@article {pmid38496489, year = {2024}, author = {Rinker, DC and Sauters, TJC and Steffen, K and Gumilang, A and Raja, HA and Rangel-Grimaldo, M and Pinzan, CF and de Castro, PA and Dos Reis, TF and Delbaje, E and Houbraken, J and Goldman, GH and Oberlies, NH and Rokas, A}, title = {Strain heterogeneity in a non-pathogenic fungus highlights factors contributing to virulence.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.03.08.583994}, pmid = {38496489}, abstract = {Fungal pathogens exhibit extensive strain heterogeneity, including variation in virulence. Whether closely related non-pathogenic species also exhibit strain heterogeneity remains unknown. Here, we comprehensively characterized the pathogenic potentials (i.e., the ability to cause morbidity and mortality) of 16 diverse strains of Aspergillus fischeri , a non-pathogenic close relative of the major pathogen Aspergillus fumigatus . In vitro immune response assays and in vivo virulence assays using a mouse model of pulmonary aspergillosis showed that A. fischeri strains varied widely in their pathogenic potential. Furthermore, pangenome analyses suggest that A. fischeri genomic and phenotypic diversity is even greater. Genomic, transcriptomic, and metabolomic profiling identified several pathways and secondary metabolites associated with variation in virulence. Notably, strain virulence was associated with the simultaneous presence of the secondary metabolites hexadehydroastechrome and gliotoxin. We submit that examining the pathogenic potentials of non-pathogenic close relatives is key for understanding the origins of fungal pathogenicity.}, } @article {pmid38495945, year = {2024}, author = {Lecomte, L and Árnyasi, M and Ferchaud, AL and Kent, M and Lien, S and Stenløkk, K and Sylvestre, F and Bernatchez, L and Mérot, C}, title = {Investigating structural variant, indel and single nucleotide polymorphism differentiation between locally adapted Atlantic salmon populations.}, journal = {Evolutionary applications}, volume = {17}, number = {3}, pages = {e13653}, pmid = {38495945}, issn = {1752-4571}, abstract = {Genomic structural variants (SVs) are now recognized as an integral component of intraspecific polymorphism and are known to contribute to evolutionary processes in various organisms. However, they are inherently difficult to detect and genotype from readily available short-read sequencing data, and therefore remain poorly documented in wild populations. Salmonid species displaying strong interpopulation variability in both life history traits and habitat characteristics, such as Atlantic salmon (Salmo salar), offer a prime context for studying adaptive polymorphism, but the contribution of SVs to fine-scale local adaptation has yet to be explored. Here, we performed a comparative analysis of SVs, single nucleotide polymorphisms (SNPs) and small indels (<50 bp) segregating in the Romaine and Puyjalon salmon, two putatively locally adapted populations inhabiting neighboring rivers (Québec, Canada) and showing pronounced variation in life history traits, namely growth, fecundity, and age at maturity and smoltification. We first catalogued polymorphism using a hybrid SV characterization approach pairing both short- (16X) and long-read sequencing (20X) for variant discovery with graph-based genotyping of SVs across 60 salmon genomes, along with characterization of SNPs and small indels from short reads. We thus identified 115,907 SVs, 8,777,832 SNPs and 1,089,321 short indels, with SVs covering 4.8 times more base pairs than SNPs. All three variant types revealed a highly congruent population structure and similar patterns of F ST and density variation along the genome. Finally, we performed outlier detection and redundancy analysis (RDA) to identify variants of interest in the putative local adaptation of Romaine and Puyjalon salmon. Genes located near these variants were enriched for biological processes related to nervous system function, suggesting that observed variation in traits such as age at smoltification could arise from differences in neural development. This study therefore demonstrates the feasibility of large-scale SV characterization and highlights its relevance for salmonid population genomics.}, } @article {pmid38492232, year = {2024}, author = {Poretsky, E and Cagirici, HB and Andorf, CM and Sen, TZ}, title = {Harnessing the predicted maize pan-interactome for putative gene function prediction and prioritization of candidate genes for important traits.}, journal = {G3 (Bethesda, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1093/g3journal/jkae059}, pmid = {38492232}, issn = {2160-1836}, abstract = {The recent assembly and annotation of the 26 maize nested association mapping (NAM) population founder inbreds have enabled large-scale pan-genomic comparative studies. These studies have expanded our understanding of agronomically important traits by integrating pan-transcriptomic data with trait-specific gene candidates from previous association mapping results. In contrast to the availability of pan-transcriptomic data, obtaining reliable protein-protein interaction (PPI) data has remained a challenge due to its high cost and complexity. We generated predicted PPI networks for each of the 26 genomes using the established STRING database. The individual genome-interactomes were then integrated to generate core- and pan-interactomes. We deployed the PPI clustering algorithm ClusterONE to identify numerous PPI clusters that were functionally annotated using gene ontology (GO) functional enrichment, demonstrating a diverse range of enriched GO terms across different clusters. Additional cluster annotations were generated by integrating gene co-expression data and gene description annotations, providing additional useful information. We show that the functionally annotated PPI clusters establish a useful framework for protein function prediction and prioritization of candidate genes of interest. Our study not only provides a comprehensive resource of predicted PPI networks for 26 maize genomes, but also offers annotated interactome clusters for predicting protein functions and prioritizing gene candidates. The source code for the Python implementation of the analysis workflow and a standalone web application for accessing the analysis results are available at https://github.com/eporetsky/PanPPI.}, } @article {pmid38491145, year = {2024}, author = {Wang, Y and Tang, H and Wang, X and Sun, Y and Joseph, PV and Paterson, AH}, title = {Detection of colinear blocks and synteny and evolutionary analyses based on utilization of MCScanX.}, journal = {Nature protocols}, volume = {}, number = {}, pages = {}, pmid = {38491145}, issn = {1750-2799}, abstract = {As different taxa evolve, gene order often changes slowly enough that chromosomal 'blocks' with conserved gene orders (synteny) are discernible. The MCScanX toolkit (https://github.com/wyp1125/MCScanX) was published in 2012 as freely available software for the detection of such 'colinear blocks' and subsequent synteny and evolutionary analyses based on genome-wide gene location and protein sequence information. Owing to its simplicity and high efficiency for colinear block detection, MCScanX provides a powerful tool for conducting diverse synteny and evolutionary analyses. Moreover, the detection of colinear blocks has been embraced as an integral step for pangenome graph construction. Here, new application trends of MCScanX are explored, striving to better connect this increasingly used tool to other tools and accelerate insight generation from exponentially growing sequence data. We provide a detailed protocol that covers how to install MCScanX on diverse platforms, tune parameters, prepare input files from data from the National Center for Biotechnology Information, run MCScanX and its visualization and evolutionary analysis tools, and connect MCScanX with external tools, including MCScanX-transposed, Circos and SynVisio. This protocol is easily implemented by users with minimal computational background and is adaptable to new data of interest to them. The data and utility programs for this protocol can be obtained from http://bdx-consulting.com/mcscanx-protocol .}, } @article {pmid38488860, year = {2024}, author = {Freddi, S and Rajabal, V and Tetu, SG and Gillings, MR and Penesyan, A}, title = {Microbial biofilms on macroalgae harbour diverse integron gene cassettes.}, journal = {Microbiology (Reading, England)}, volume = {170}, number = {3}, pages = {}, doi = {10.1099/mic.0.001446}, pmid = {38488860}, issn = {1465-2080}, abstract = {Integrons are genetic platforms that capture, rearrange and express mobile modules called gene cassettes. The best characterized gene cassettes encode antibiotic resistance, but the function of most integron gene cassettes remains unknown. Functional predictions suggest that many gene cassettes could encode proteins that facilitate interactions with other cells and with the extracellular environment. Because cell interactions are essential for biofilm stability, we sequenced gene cassettes from biofilms growing on the surface of the marine macroalgae Ulva australis and Sargassum linearifolium. Algal samples were obtained from coastal rock platforms around Sydney, Australia, using seawater as a control. We demonstrated that integrons in microbial biofilms did not sample genes randomly from the surrounding seawater, but harboured specific functions that potentially provided an adaptive advantage to both the bacterial cells in biofilm communities and their macroalgal host. Further, integron gene cassettes had a well-defined spatial distribution, suggesting that each bacterial biofilm acquired these genetic elements via sampling from a large but localized pool of gene cassettes. These findings suggest two forms of filtering: a selective acquisition of different integron-containing bacterial species into the distinct biofilms on Ulva and Sargassum surfaces, and a selective retention of unique populations of gene cassettes at each sampling location.}, } @article {pmid38488392, year = {2024}, author = {Wang, M and Li, X and Liu, X and Hou, X and He, Y and Yu, J-H and Hu, S and Yin, H and Xie, B-B}, title = {Annotation of 2,507 Saccharomyces cerevisiae genomes.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0358223}, doi = {10.1128/spectrum.03582-23}, pmid = {38488392}, issn = {2165-0497}, abstract = {Saccharomyces cerevisiae (baker's yeast, budding yeast) is one of the most important model organisms for biological research and is a crucial microorganism in industry. Currently, a huge number of Saccharomyces cerevisiae genome sequences are available at the public domain. However, these genomes are distributed at different websites and a large number of them are released without annotation information. To provide one complete annotated genome data resource, we collected 2,507 Saccharomyces cerevisiae genome assemblies and re-annotated 2,506 assemblies using a custom annotation pipeline, producing a total of 15,407,164 protein-coding gene models. With a custom pipeline, all these gene sequences were clustered into families. A total of 1,506 single-copy genes were selected as marker genes, which were then used to evaluate the genome completeness and base qualities of all assemblies. Pangenomic analyses were performed based on a selected subset of 847 medium-high-quality genomes. Statistical comparisons revealed a number of gene families showing copy number variations among different organism sources. To the authors' knowledge, this study represents the largest genome annotation project of S. cerevisiae so far, providing rich genomic resources for the future studies of the model organism S. cerevisiae and its relatives.IMPORTANCESaccharomyces cerevisiae (baker's yeast, budding yeast) is one of the most important model organisms for biological research and is a crucial microorganism in industry. Though a huge number of Saccharomyces cerevisiae genome sequences are available at the public domain, these genomes are distributed at different websites and most are released without annotation, hindering the efficient reuse of these genome resources. Here, we collected 2,507 genomes for Saccharomyces cerevisiae, performed genome annotation, and evaluated the genome qualities. All the obtained data have been deposited at public repositories and are freely accessible to the community. This study represents the largest genome annotation project of S. cerevisiae so far, providing one complete annotated genome data set for S. cerevisiae, an important workhorse for fundamental biology, biotechnology, and industry.}, } @article {pmid38488280, year = {2024}, author = {Giacomini, JJ and Torres-Morales, J and Tang, J and Dewhirst, FE and Borisy, GG and Mark Welch, JL}, title = {Spatial ecology of Haemophilus and Aggregatibacter in the human oral cavity.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0401723}, doi = {10.1128/spectrum.04017-23}, pmid = {38488280}, issn = {2165-0497}, abstract = {UNLABELLED: Haemophilus and Aggregatibacter are two of the most common bacterial genera in the human oral cavity, encompassing both commensals and pathogens of substantial ecological and medical significance. In this study, we conducted a metapangenomic analysis of oral Haemophilus and Aggregatibacter species to uncover genomic diversity, phylogenetic relationships, and habitat specialization within the human oral cavity. Using three metrics-pangenomic gene content, phylogenomics, and average nucleotide identity (ANI)-we first identified distinct species and sub-species groups among these genera. Mapping of metagenomic reads then revealed clear patterns of habitat specialization, such as Aggregatibacter species predominantly in dental plaque, a distinctive Haemophilus parainfluenzae sub-species group on the tongue dorsum, and H. sp. HMT-036 predominantly in keratinized gingiva and buccal mucosa. In addition, we found that supragingival plaque samples contained predominantly only one out of the three taxa, H. parainfluenzae, Aggregatibacter aphrophilus, and A. sp. HMT-458, suggesting independent niches or a competitive relationship. Functional analyses revealed the presence of key metabolic genes, such as oxaloacetate decarboxylase, correlated with habitat specialization, suggesting metabolic versatility as a driving force. Additionally, heme synthesis distinguishes H. sp. HMT-036 from closely related Haemophilus haemolyticus, suggesting that the availability of micronutrients, particularly iron, was important in the evolutionary ecology of these species. Overall, our study exemplifies the power of metapangenomics to identify factors that may affect ecological interactions within microbial communities, including genomic diversity, habitat specialization, and metabolic versatility.

IMPORTANCE: Understanding the microbial ecology of the mouth is essential for comprehending human physiology. This study employs metapangenomics to reveal that various Haemophilus and Aggregatibacter species exhibit distinct ecological preferences within the oral cavity of healthy individuals, thereby supporting the site-specialist hypothesis. Additionally, it was observed that the gene pool of different Haemophilus species correlates with their ecological niches. These findings shed light on the significance of key metabolic functions in shaping microbial distribution patterns and interspecies interactions in the oral ecosystem.}, } @article {pmid38487210, year = {2023}, author = {Grizon, A and Theil, S and Callon, C and Gerber, P and Helinck, S and Dugat-Bony, E and Bonnarme, P and Chassard, C}, title = {Genetic and technological diversity of Streptococcus thermophilus isolated from the Saint-Nectaire PDO cheese-producing area.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1245510}, doi = {10.3389/fmicb.2023.1245510}, pmid = {38487210}, issn = {1664-302X}, abstract = {Streptococcus thermophilus is of major importance for cheese manufacturing to ensure rapid acidification; however, studies indicate that intensive use of commercial strains leads to the loss of typical characteristics of the products. To strengthen the link between the product and its geographical area and improve the sensory qualities of cheeses, cheese-producing protected designations of origin (PDO) are increasingly interested in the development of specific autochthonous starter cultures. The present study is therefore investigating the genetic and functional diversity of S. thermophilus strains isolated from a local cheese-producing PDO area. Putative S. thermophilus isolates were isolated and identified from milk collected in the Saint-Nectaire cheese-producing PDO area and from commercial starters. Whole genomes of isolates were sequenced, and a comparative analysis based on their pan-genome was carried out. Important functional properties were studied, including acidifying and proteolytic activities. Twenty-two isolates representative of the diversity of the geographical area and four commercial strains were selected for comparison. The resulting phylogenetic trees do not correspond to the geographical distribution of isolates. The clustering based on the pan-genome analysis indicates that isolates are divided into five distinct groups. A Kyoto Encyclopedia of Genes and Genomes (KEGG) functional annotation of the accessory genes indicates that the accessory gene contents of isolates are involved in different functional categories. High variability in acidifying activities and less diversity in proteolytic activities were also observed. These results indicate that high genetic and functional variabilities of the species S. thermophilus may arise from a small (1,800 km[2]) geographical area and may be exploited to meet demand for use as autochthonous starters.}, } @article {pmid38486452, year = {2024}, author = {Shi, T and Zhang, X and Hou, Y and Jia, C and Dan, X and Zhang, Y and Jiang, Y and Lai, Q and Feng, J and Feng, J and Ma, T and Wu, J and Liu, S and Zhang, L and Long, Z and Chen, L and Street, NR and Ingvarsson, PK and Liu, J and Yin, T and Wang, J}, title = {The super-pangenome of Populus unveil genomic facets for its adaptation and diversification in widespread forest trees.}, journal = {Molecular plant}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.molp.2024.03.009}, pmid = {38486452}, issn = {1752-9867}, abstract = {Understanding the underlying mechanisms and links between genome evolution and adaptive innovations stands as a key goal in evolutionary studies. Poplars, among the world's most widely distributed and cultivated trees, exhibit extensive phenotypic diversity and environmental adaptability. In this study, we present a genus-level super-pangenome comprising 19 Populus genomes, revealing the likely pivotal role of private genes in facilitating local environmental and climate adaptation. Through the integration of pan-genomes with transcriptomes, methylomes and chromatin accessibility mapping, we unveil that the evolutionary trajectories of pan-genes and duplicated genes are closely linked to local genomic landscapes of regulatory and epigenetic architectures, notably CG methylation in gene-body regions. Further comparative genomic analyses have enabled the identification of 142,202 structural variants (SVs) across species, which intersect with a significant number of genes and contribute substantially to both phenotypic and adaptive divergence. We have experimentally validated a ∼180 bp presence/absence variant impacting the expression of the CUC2 gene, crucial for leaf serration formation. Finally, we developed a user-friendly web-based tool encompassing the multi-omics resources associated with the Populus super-pangenome (http://www.populus-superpangenome.com/). Together, the present pioneering super-pangenome resource in forest trees not only aid in the advancement of breeding efforts of this globally important tree genus but also offer valuable insights into potential avenues for comprehending tree biology.}, } @article {pmid38478130, year = {2024}, author = {Wisal, A and Saeed, N and Aurongzeb, M and Shafique, M and Sohail, S and Anwar, W and Basharat, Z and Irfan, M and Ullah, A and Hassan, SS}, title = {Bridging drug discovery through hierarchical subtractive genomics against asd, trpG, and secY of pneumonia causing MDR Staphylococcus aureus.}, journal = {Molecular genetics and genomics : MGG}, volume = {299}, number = {1}, pages = {34}, pmid = {38478130}, issn = {1617-4623}, abstract = {Staphylococcus aureus (S. aureus) is an opportunistic gram-positive, non-motile, and non-sporulating bacteria that induces pneumonia, a provocative lung infection affecting mainly the terminal bronchioles and the small air sacs known as alveoli. Recently, it has developed antibiotic resistance to the available consortium as per the WHO reports; thereby, novel remedial targets and resilient medications to forestall and cure this illness are desperately needed. Here, using pan-genomics, a total of 1,387 core proteins were identified. Subtractive proteome analyses further identified 12 proteins that are vital for bacteria. One membrane protein (secY) and two cytoplasmic proteins (asd and trpG) were chosen as possible therapeutic targets concerning minimum % host identity, essentiality, and other cutoff values, such as high resistance in the MDR S. aureus. The UniProt AA sequences of the selected targets were modelled and docked against 3 drug-like chemical libraries. The top-ranked compounds i.e., ZINC82049692, ZINC85492658 and 3a of Isosteviol derivative for Aspartate-semialdehyde dehydrogenase (asd); ZINC38222743, ZINC70455378, and 5 m Isosteviol derivative for Anthranilate synthase component II (trpG); and finally, ZINC72292296, ZINC85632684, and 7 m Isosteviol derivative for Protein translocase subunit secY (secY), were further subjected to molecular dynamics studies for thermodynamic stability and energy calculation. Our study proposes new therapeutic targets in S. aureus, some of which have previously been reported in other pathogenic microorganisms. Owing to further experimental validation, we anticipate that the adapted methodology and the predicted results in this work could make major contributions towards novel drug discovery and their targets in S. aureus caused pneumonia.}, } @article {pmid38472486, year = {2024}, author = {Martínez-Gallardo, MJ and Villicaña, C and Yocupicio-Monroy, M and Alcaraz-Estrada, SL and Salazar-Salinas, J and Mendoza-Vázquez, OF and Damazo-Hernández, G and León-Félix, J}, title = {Comparative genomic analysis of Pseudomonas aeruginosa strains susceptible and resistant to carbapenems and aztreonam isolated from patients with healthcare-associated infections in a Mexican hospital.}, journal = {Molecular genetics and genomics : MGG}, volume = {299}, number = {1}, pages = {29}, pmid = {38472486}, issn = {1617-4623}, support = {E05//Ciencia y Tecnología ISSSTE/ ; }, abstract = {Pseudomonas aeruginosa (PA) is an important opportunistic pathogen that causes different infections on immunocompromised patients. Within PA accessory genome, differences in virulence, antibiotic resistance and biofilm formation have been described between strains, leading to the emergence of multidrug-resistant strains. The genome sequences of 17 strains isolated from patients with healthcare-associated infections in a Mexican hospital were genomically and phylogenetically analyzed and antibiotic resistance genes, virulence genes, and biofilm formation genes were detected. Fifteen of the 17 strains were resistant to at least two of the carbapenems meropenem, imipenem, and the monobactam aztreonam. The antibiotic resistance (mexA, mexB, and oprM) and the biofilm formation (pslA and pslD) genes were detected in all strains. Differences were found between strains in accessory genome size. The strains had different sequence types, and seven strains had sequence types associated with global high risk epidemic PA clones. All strains were represented in two groups among PA global strains. In the 17 strains, horizontally acquired resistance genes to aminoglycosides and beta-lactams were found, mainly, and between 230 and 240 genes that encode virulence factors. The strains under study were variable in terms of their accessory genome, antibiotic resistance, and virulence genes. With these characteristics, we provide information about the genomic diversity of clinically relevant PA strains.}, } @article {pmid38470044, year = {2024}, author = {Liu, D and Xie, L-S and Lian, S and Li, K and Yang, Y and Wang, W-Z and Hu, S and Liu, S-J and Liu, C and He, Z}, title = {Anaerostipes hadrus, a butyrate-producing bacterium capable of metabolizing 5-fluorouracil.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0081623}, doi = {10.1128/msphere.00816-23}, pmid = {38470044}, issn = {2379-5042}, abstract = {UNLABELLED: Anaerostipes hadrus (A. hadrus) is a dominant species in the human gut microbiota and considered a beneficial bacterium for producing probiotic butyrate. However, recent studies have suggested that A. hadrus may negatively affect the host through synthesizing fatty acid and metabolizing the anticancer drug 5-fluorouracil, indicating that the impact of A. hadrus is complex and unclear. Therefore, comprehensive genomic studies on A. hadrus need to be performed. We integrated 527 high-quality public A. hadrus genomes and five distinct metagenomic cohorts. We analyzed these data using the approaches of comparative genomics, metagenomics, and protein structure prediction. We also performed validations with culture-based in vitro assays. We constructed the first large-scale pan-genome of A. hadrus (n = 527) and identified 5-fluorouracil metabolism genes as ubiquitous in A. hadrus genomes as butyrate-producing genes. Metagenomic analysis revealed the wide and stable distribution of A. hadrus in healthy individuals, patients with inflammatory bowel disease, and patients with colorectal cancer, with healthy individuals carrying more A. hadrus. The predicted high-quality protein structure indicated that A. hadrus might metabolize 5-fluorouracil by producing bacterial dihydropyrimidine dehydrogenase (encoded by the preTA operon). Through in vitro assays, we validated the short-chain fatty acid production and 5-fluorouracil metabolism abilities of A. hadrus. We observed for the first time that A. hadrus can convert 5-fluorouracil to α-fluoro-β-ureidopropionic acid, which may result from the combined action of the preTA operon and adjacent hydA (encoding bacterial dihydropyrimidinase). Our results offer novel understandings of A. hadrus, exceptionally functional features, and potential applications.

IMPORTANCE: This work provides new insights into the evolutionary relationships, functional characteristics, prevalence, and potential applications of Anaerostipes hadrus.}, } @article {pmid38469580, year = {2024}, author = {Yakubu, B and Appiah, EM and Adu, AF}, title = {Pangenome Analysis of Helicobacter pylori Isolates from Selected Areas of Africa Indicated Diverse Antibiotic Resistance and Virulence Genes.}, journal = {International journal of genomics}, volume = {2024}, number = {}, pages = {5536117}, pmid = {38469580}, issn = {2314-4378}, abstract = {The challenge facing Helicobacter pylori (H. pylori) infection management in some parts of Africa is the evolution of drug-resistant species, the lack of gold standard in diagnostic methods, and the ineffectiveness of current vaccines against the bacteria. It is being established that even though clinical consequences linked to the bacteria vary geographically, there is rather a generic approach to treatment. This situation has remained problematic in the successful fight against the bacteria in parts of Africa. As a result, this study compared the genomes of selected H. pylori isolates from selected areas of Africa and evaluated their virulence and antibiotic drug resistance, those that are highly pathogenic and are associated with specific clinical outcomes and those that are less virulent and rarely associated with clinical outcomes. 146 genomes of H. pylori isolated from selected locations of Africa were sampled, and bioinformatic tools such as Abricate, CARD RGI, MLST, Prokka, Roary, Phandango, Google Sheets, and iTOLS were used to compare the isolates and their antibiotic resistance or susceptibility. Over 20 k virulence and AMR genes were observed. About 95% of the isolates were genetically diverse, 90% of the isolates harbored shell genes, and 50% harbored cloud and core genes. Some isolates did not retain the cagA and vacA genes. Clarithromycin, metronidazole, amoxicillin, and tinidazole were resistant to most AMR genes (vacA, cagA, oip, and bab). Conclusion. This study found both virulence and AMR genes in all H. pylori strains in all the selected geographies around Africa with differing quantities. MLST, Pangenome, and ORF analyses showed disparities among the isolates. This in general could imply diversities in terms of genetics, evolution, and protein production. Therefore, generic administration of antibiotics such as clarithromycin, amoxicillin, and erythromycin as treatment methods in the African subregion could be contributing to the spread of the bacterium's antibiotic resistance.}, } @article {pmid38463963, year = {2024}, author = {Young, MG and Straub, TJ and Worby, CJ and Metsky, HC and Gnirke, A and Bronson, RA and van Dijk, LR and Desjardins, CA and Matranga, C and Qu, J and Dodson, K and Schreiber, HL and Manson, AL and Hultgren, SJ and Earl, AM}, title = {Distinct Escherichia coli transcriptional profiles in the guts of recurrent UTI sufferers revealed by pan-genome hybrid selection.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.29.582780}, pmid = {38463963}, abstract = {Low-abundance members of microbial communities are difficult to study in their native habitat. This includes Escherichia coli , a minor, but common inhabitant of the gastrointestinal tract and opportunistic pathogen, including of the urinary tract, where it causes most infections. While our understanding of the interactions between uropathogenic Escherichia coli (UPEC) and the bladder is increasing, comparatively little is known about UPEC in its pre-infection reservoir, partly due to its low abundance there (<1% relative abundance). In order to specifically and sensitively explore the genomes and transcriptomes of diverse E. coli from gastrointestinal communities, we developed E. coli PanSelect, a set of probes designed to enrich E. coli 's broad pangenome. First we demonstrated the ability of PanSelect to enrich diverse strains in an unbiased way using a mock community of known composition. Then we enriched E. coli DNA and RNA from human stool microbiomes by 158 and 30-fold, respectively. We also used E. coli PanSelect to explore the gene content and transcriptome of E. coli within the gut microbiomes of women with history of recurrent urinary tract infection (rUTI), finding differential regulation of pathways that suggests that the rUTI gut environment promotes respiratory vs fermentative metabolism. E. coli PanSelect technology holds promise for investigations of native in vivo biology of diverse E. coli in the gut and other environments, where it is a minor component of the microbial community, using unbiased, culture-free shotgun sequencing. This method could also be generally applied to other highly diverse, low abundance bacteria.}, } @article {pmid38463499, year = {2024}, author = {Li, H and Marin, M and Farhat, MR}, title = {Exploring gene content with pangenome gene graphs.}, journal = {ArXiv}, volume = {}, number = {}, pages = {}, pmid = {38463499}, issn = {2331-8422}, abstract = {MOTIVATION: The gene content regulates the biology of an organism. It varies between species and between individuals of the same species. Although tools have been developed to identify gene content changes in bacterial genomes, none is applicable to collections of large eukaryotic genomes such as the human pangenome.

RESULTS: We developed pangene, a computational tool to identify gene orientation, gene order and gene copy-number changes in a collection of genomes. Pangene aligns a set of input protein sequences to the genomes, resolves redundancies between protein sequences and constructs a gene graph with each genome represented as a walk in the graph. It additionally finds subgraphs that encodes gene content changes. Applied to the human pangenome, pangene identifies known gene-level variations and reveals complex haplotypes that are not well studied before. Pangene also works with high-quality bacterial pangenome and reports similar numbers of core and accessory genes in comparison to existing tools.

Source code at https://github.com/lh3/pangene; pre-built pangene graphs can be downloaded from https://zenodo.org/records/8118576 and visualized at https://pangene.bioinweb.org.}, } @article {pmid38461665, year = {2024}, author = {Feng, NX and Li, DW and Zhang, F and Bin, H and Huang, YT and Xiang, L and Liu, BL and Cai, QY and Li, YW and Xu, DL and Xie, Y and Mo, CH}, title = {Biodegradation of phthalate acid esters and whole-genome analysis of a novel Streptomyces sp. FZ201 isolated from natural habitats.}, journal = {Journal of hazardous materials}, volume = {469}, number = {}, pages = {133972}, doi = {10.1016/j.jhazmat.2024.133972}, pmid = {38461665}, issn = {1873-3336}, abstract = {Di-n-butyl phthalate (DBP) is one of the most extensively used phthalic acid esters (PAEs) and is considered to be an emerging, globally concerning pollutant. The genus Streptomyces holds promise as a degrader of various organic pollutants, but PAE biodegradation mechanisms by Streptomyces species remain unsolved. In this study, a novel PAE-degrading Streptomyces sp. FZ201 isolated from natural habitats efficiently degraded various PAEs. FZ201 had strong resilience against DBP and exhibited immediate degradation, with kinetics adhering to a first-order model. The comprehensive biodegradation of DBP involves de-esterification, β-oxidation, trans-esterification, and aromatic ring cleavage. FZ201 contains numerous catabolic genes that potentially facilitate PAE biodegradation. The DBP metabolic pathway was reconstructed by genome annotation and intermediate identification. Streptomyces species have an open pangenome with substantial genome expansion events during the evolutionary process, enabling extensive genetic diversity and highly plastic genomes within the Streptomyces genus. FZ201 had a diverse array of highly expressed genes associated with the degradation of PAEs, potentially contributing significantly to its adaptive advantage and efficiency of PAE degradation. Thus, FZ201 is a promising candidate for remediating highly PAE-contaminated environments. These findings enhance our preliminary understanding of the molecular mechanisms employed by Streptomyces for the removal of PAEs.}, } @article {pmid38459435, year = {2024}, author = {Zhu, L and Liu, H and Li, X and Shi, Y and Yin, X and Pi, X}, title = {Whole-genome sequencing and analysis of Chryseobacterium arthrosphaerae from Rana nigromaculata.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {80}, pmid = {38459435}, issn = {1471-2180}, support = {2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; }, abstract = {Chryseobacterium arthrosphaerae strain FS91703 was isolated from Rana nigromaculata in our previous study. To investigate the genomic characteristics, pathogenicity-related genes, antimicrobial resistance, and phylogenetic relationship of this strain, PacBio RS II and Illumina HiSeq 2000 platforms were used for the whole genome sequencing. The genome size of strain FS91703 was 5,435,691 bp and GC content was 37.78%. A total of 4,951 coding genes were predicted; 99 potential virulence factors homologs were identified. Analysis of antibiotic resistance genes revealed that strain FS91703 harbored 10 antibiotic resistance genes in 6 categories and 2 multidrug-resistant efflux pump genes, including adeG and farA. Strain FS91703 was sensitive to β-lactam combination drugs, cephem, monobactam and carbapenems, intermediately resistant to phenicol, and resistant to penicillin, aminoglycosides, tetracycline, fluoroquinolones, and folate pathway inhibitors. Phylogenetic analysis revealed that strain FS91703 and C. arthrosphaerae CC-VM-7[T] were on the same branch of the phylogenetic tree based on 16 S rRNA; the ANI value between them was 96.99%; and the DDH values were 80.2, 72.2 and 81.6% by three default calculation formulae. These results suggested that strain FS91703 was a species of C. arthrosphaerae. Pan-genome analysis showed FS91703 had 566 unique genes compared with 13 other C. arthrosphaerae strains, and had a distant phylogenetic relationship with the other C. arthrosphaerae strains of the same branch in phylogenetic tree based on orthologous genes. The results of this study suggest that strain FS91703 is a multidrug-resistant and highly virulent bacterium, that differs from other C. arthrosphaerae strains at the genomic level. The knowledge about the genomic characteristics and antimicrobial resistance of strain FS91703 provides valuable insights into this rare species, as well as guidance for the treatment of the disease caused by FS91703 in Rana nigromaculata.}, } @article {pmid38450165, year = {2024}, author = {Zhou, Y and Tu, T and Yao, X and Luo, Y and Yang, Z and Ren, M and Zhang, G and Yu, Y and Lu, A and Wang, Y}, title = {Pan-genome analysis of Streptococcus suis serotype 2 highlights genes associated with virulence and antibiotic resistance.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1362316}, pmid = {38450165}, issn = {1664-302X}, abstract = {Streptococcus suis serotype 2 (SS2) is a Gram-positive bacterium. It is a common and significant pathogen in pigs and a common cause of zoonotic meningitis in humans. It can lead to sepsis, endocarditis, arthritis, and pneumonia. If not diagnosed and treated promptly, it has a high mortality rate. The pan-genome of SS2 is open, and with an increasing number of genes, the core genome and accessory genome may exhibit more pronounced differences. Due to the diversity of SS2, the genes related to its virulence and resistance are still unclear. In this study, a strain of SS2 was isolated from a pig farm in Sichuan Province, China, and subjected to whole-genome sequencing and characterization. Subsequently, we conducted a Pan-Genome-Wide Association Study (Pan-GWAS) on 230 strains of SS2. Our analysis indicates that the core genome is composed of 1,458 genes related to the basic life processes of the bacterium. The accessory genome, consisting of 4,337 genes, is highly variable and a major contributor to the genetic diversity of SS2. Furthermore, we identified important virulence and resistance genes in SS2 through pan-GWAS. The virulence genes of SS2 are mainly associated with bacterial adhesion. In addition, resistance genes in the core genome may confer natural resistance of SS2 to fluoroquinolone and glycopeptide antibiotics. This study lays the foundation for further research on the virulence and resistance of SS2, providing potential new drug and vaccine targets against SS2.}, } @article {pmid38448140, year = {2024}, author = {Mathur, S and Singh, D and Ranjan, R}, title = {Recent advances in plant translational genomics for crop improvement.}, journal = {Advances in protein chemistry and structural biology}, volume = {139}, number = {}, pages = {335-382}, doi = {10.1016/bs.apcsb.2023.11.009}, pmid = {38448140}, issn = {1876-1631}, abstract = {The growing population, climate change, and limited agricultural resources put enormous pressure on agricultural systems. A plateau in crop yields is occurring and extreme weather events and urbanization threaten the livelihood of farmers. It is imperative that immediate attention is paid to addressing the increasing food demand, ensuring resilience against emerging threats, and meeting the demand for more nutritious, safer food. Under uncertain conditions, it is essential to expand genetic diversity and discover novel crop varieties or variations to develop higher and more stable yields. Genomics plays a significant role in developing abundant and nutrient-dense food crops. An alternative to traditional breeding approach, translational genomics is able to improve breeding programs in a more efficient and precise manner by translating genomic concepts into practical tools. Crop breeding based on genomics offers potential solutions to overcome the limitations of conventional breeding methods, including improved crop varieties that provide more nutritional value and are protected from biotic and abiotic stresses. Genetic markers, such as SNPs and ESTs, contribute to the discovery of QTLs controlling agronomic traits and stress tolerance. In order to meet the growing demand for food, there is a need to incorporate QTLs into breeding programs using marker-assisted selection/breeding and transgenic technologies. This chapter primarily focuses on the recent advances that are made in translational genomics for crop improvement and various omics techniques including transcriptomics, metagenomics, pangenomics, single cell omics etc. Numerous genome editing techniques including CRISPR Cas technology and their applications in crop improvement had been discussed.}, } @article {pmid38439049, year = {2024}, author = {Chen, C and Wu, S and Sun, Y and Zhou, J and Chen, Y and Zhang, J and Birchler, JA and Han, F and Yang, N and Su, H}, title = {Three near-complete genome assemblies reveal substantial centromere dynamics from diploid to tetraploid in Brachypodium genus.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {63}, pmid = {38439049}, issn = {1474-760X}, support = {2021YFF1000800//National Key Research and Development Program of China/ ; 32170571//National Natural Science Foundation of China/ ; 2021ZKPY008//Fundamental Research Funds for the Central Universities/ ; No. B21HJ0504//Hainan Yazhou Bay Seed Laboratory/ ; }, abstract = {BACKGROUND: Centromeres are critical for maintaining genomic stability in eukaryotes, and their turnover shapes genome architectures and drives karyotype evolution. However, the co-evolution of centromeres from different species in allopolyploids over millions of years remains largely unknown.

RESULTS: Here, we generate three near-complete genome assemblies, a tetraploid Brachypodium hybridum and its two diploid ancestors, Brachypodium distachyon and Brachypodium stacei. We detect high degrees of sequence, structural, and epigenetic variations of centromeres at base-pair resolution between closely related Brachypodium genomes, indicating the appearance and accumulation of species-specific centromere repeats from a common origin during evolution. We also find that centromere homogenization is accompanied by local satellite repeats bursting and retrotransposon purging, and the frequency of retrotransposon invasions drives the degree of interspecies centromere diversification. We further investigate the dynamics of centromeres during alloploidization process, and find that dramatic genetics and epigenetics architecture variations are associated with the turnover of centromeres between homologous chromosomal pairs from diploid to tetraploid. Additionally, our pangenomes analysis reveals the ongoing variations of satellite repeats and stable evolutionary homeostasis within centromeres among individuals of each Brachypodium genome with different polyploidy levels.

CONCLUSIONS: Our results provide unprecedented information on the genomic, epigenomic, and functional diversity of highly repetitive DNA between closely related species and their allopolyploid genomes at both coarse and fine scale.}, } @article {pmid38438804, year = {2024}, author = {Niu, D and Feng, N and Xi, S and Xu, J and Su, Y}, title = {Genomics-based analysis of four porcine-derived lactic acid bacteria strains and their evaluation as potential probiotics.}, journal = {Molecular genetics and genomics : MGG}, volume = {299}, number = {1}, pages = {24}, pmid = {38438804}, issn = {1617-4623}, support = {2022YFD1300402//Key Technologies Research and Development Program/ ; 31872362//National Natural Science Foundation of China/ ; 32072688//National Natural Science Foundation of China/ ; }, abstract = {The search for probiotics and exploration of their functions are crucial for livestock farming. Recently, porcine-derived lactic acid bacteria (LAB) have shown great potential as probiotics. However, research on the evaluation of porcine-derived LAB as potential probiotics through genomics-based analysis is relatively limited. The present study analyzed four porcine-derived LAB strains (Lactobacillus johnsonii L16, Latilactobacillus curvatus ZHA1, Ligilactobacillus salivarius ZSA5 and Ligilactobacillus animalis ZSB1) using genomic techniques and combined with in vitro tests to evaluate their potential as probiotics. The genome sizes of the four strains ranged from 1,897,301 bp to 2,318,470 bp with the GC contents from 33.03 to 41.97%. Pan-genomic analysis and collinearity analysis indicated differences among the genomes of four strains. Carbohydrate active enzymes analysis revealed that L. johnsonii L16 encoded more carbohydrate active enzymes than other strains. KEGG pathway analysis and in vitro tests confirmed that L. johnsonii L16 could utilize a wide range of carbohydrates and had good utilization capacity for each carbohydrate. The four strains had genes related to acid tolerance and were tolerant to low pH, with L. johnsonii L16 showing the greatest tolerance. The four strains contained genes related to bile salt tolerance and were able to tolerate 0.1% bile salt. Four strains had antioxidant related genes and exhibited antioxidant activity in in vitro tests. They contained the genes linked with organic acid biosynthesis and exhibited antibacterial activity against enterotoxigenic Escherichia coli K88 (ETEC K88) and Salmonella 6,7:c:1,5, wherein, L. johnsonii L16 and L. salivarius ZSA5 had gene clusters encoding bacteriocin. Results suggest that genome analysis combined with in vitro tests is an effective approach for evaluating different strains as probiotics. The findings of this study indicate that L. johnsonii L16 has the potential as a probiotic strain among the four strains and provide theoretical basis for the development of probiotics in swine production.}, } @article {pmid38421269, year = {2024}, author = {Deery, J and Carmody, M and Flavin, R and Tomanek, M and O'Keeffe, M and McGlacken, GP and Reen, FJ}, title = {Comparative genomics reveals distinct diversification patterns among LysR-type transcriptional regulators in the ESKAPE pathogen Pseudomonas aeruginosa.}, journal = {Microbial genomics}, volume = {10}, number = {2}, pages = {}, doi = {10.1099/mgen.0.001205}, pmid = {38421269}, issn = {2057-5858}, mesh = {Humans ; Pseudomonas aeruginosa/genetics ; Genomics ; *Pseudomonas Infections ; Pseudomonas ; *Cystic Fibrosis/genetics ; }, abstract = {Pseudomonas aeruginosa, a harmful nosocomial pathogen associated with cystic fibrosis and burn wounds, encodes for a large number of LysR-type transcriptional regulator proteins. To understand how and why LTTR proteins evolved with such frequency and to establish whether any relationships exist within the distribution we set out to identify the patterns underpinning LTTR distribution in P. aeruginosa and to uncover cluster-based relationships within the pangenome. Comparative genomic studies revealed that in the JGI IMG database alone ~86 000 LTTRs are present across the sequenced genomes (n=699). They are widely distributed across the species, with core LTTRs present in >93 % of the genomes and accessory LTTRs present in <7 %. Analysis showed that subsets of core LTTRs can be classified as either variable (typically specific to P. aeruginosa) or conserved (and found to be distributed in other Pseudomonas species). Extending the analysis to the more extensive Pseudomonas database, PA14 rooted analysis confirmed the diversification patterns and revealed PqsR, the receptor for the Pseudomonas quinolone signal (PQS) and 2-heptyl-4-quinolone (HHQ) quorum-sensing signals, to be amongst the most variable in the dataset. Successful complementation of the PAO1 pqsR [-] mutant using representative variant pqsR sequences suggests a degree of structural promiscuity within the most variable of LTTRs, several of which play a prominent role in signalling and communication. These findings provide a new insight into the diversification of LTTR proteins within the P. aeruginosa species and suggests a functional significance to the cluster, conservation and distribution patterns identified.}, } @article {pmid38421062, year = {2024}, author = {Ji, G and Long, Y and Cai, G and Wang, A and Yan, G and Li, H and Gao, G and Xu, K and Huang, Q and Chen, B and Li, L and Li, F and Nishio, T and Shen, J and Wu, X}, title = {The chromosome-scale genome of wild Brassica oleracea provides insights into the domestication of Brassica plants.}, journal = {Journal of experimental botany}, volume = {}, number = {}, pages = {}, doi = {10.1093/jxb/erae079}, pmid = {38421062}, issn = {1460-2431}, abstract = {The cultivated diploid Brassica oleracea is an important vegetable crop, but the genetic basis of domestication remains largely unclear without high-quality reference genomes of wild B. oleracea. Here, we report the first chromosome-level assembly of the wild Brassica oleracea L. W03 genome, (total genome size, 630.7 Mb; scaffold N50, 64.6 Mb). Using newly assembled W03 genome, we constructed a gene-based B. oleracea pangenome and identified 29,744 core genes, 23,306 dispensable genes, and 1,896 private genes. We resequenced 53 accessions, which represent six potential wild B. oleracea progenitor species. The results of the population genomic analysis showed that wild B. oleracea population had the highest level of diversity and represented the more closely related population of horticultural B. oleracea. Additionally, the WUSCHEL gene was found to play a decisive role in domestication and to be involved in cauliflower and broccoli curd formation. We also illustrate the loss of disease resistance genes during domestication selection. Our results provide deep insights into B. oleracea domestication and will facilitate Brassica crop genetic improvement.}, } @article {pmid38418560, year = {2024}, author = {Chao, P and Zhang, X and Zhang, L and Yang, A and Wang, Y and Chen, X}, title = {Proteomics-based vaccine targets annotation and design of multi-epitope vaccine against antibiotic-resistant Streptococcus gallolyticus.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {4836}, pmid = {38418560}, issn = {2045-2322}, abstract = {Streptococcus gallolyticus is a non-motile, gram-positive bacterium that causes infective endocarditis. S. gallolyticus has developed resistance to existing antibiotics, and no vaccine is currently available. Therefore, it is essential to develop an effective S. gallolyticus vaccine. Core proteomics was used in this study together with subtractive proteomics and reverse vaccinology approach to find antigenic proteins that could be utilized for the design of the S. gallolyticus multi-epitope vaccine. The pipeline identified two antigenic proteins as potential vaccine targets: penicillin-binding protein and the ATP synthase subunit. T and B cell epitopes from the specific proteins were forecasted employing several immunoinformatics and bioinformatics resources. A vaccine (360 amino acids) was created using a combination of seven cytotoxic T cell lymphocyte (CTL), three helper T cell lymphocyte (HTL), and five linear B cell lymphocyte (LBL) epitopes. To increase immune responses, the vaccine was paired with a cholera enterotoxin subunit B (CTB) adjuvant. The developed vaccine was highly antigenic, non-allergenic, and stable for human use. The vaccine's binding affinity and molecular interactions with the human immunological receptor TLR4 were studied using molecular mechanics/generalized Born surface area (MMGBSA), molecular docking, and molecular dynamic (MD) simulation analyses. Escherichia coli (strain K12) plasmid vector pET-28a (+) was used to examine the ability of the vaccine to be expressed. According to the outcomes of these computer experiments, the vaccine is quite promising in terms of developing a protective immunity against diseases. However, in vitro and animal research are required to validate our findings.}, } @article {pmid38417638, year = {2024}, author = {Banerjee, R and Robinson, SM and Lahiri, A and Verma, P and Banerjee, AK and Basak, S and Basak, K and Paul, S}, title = {Exploring the resistome and virulome in major sequence types of Acinetobacter baumannii genomes: Correlations with genome divergence and sequence types.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {}, number = {}, pages = {105579}, doi = {10.1016/j.meegid.2024.105579}, pmid = {38417638}, issn = {1567-7257}, abstract = {The increasing global prevalence of antimicrobial resistance in Acinetobacter baumannii has led to concerns regarding the effectiveness of infection treatment. Moreover, the critical role of virulence factor genes in A. baumannii's pathogenesis and its propensity to cause severe disease is of particular importance. Comparative genomics, including multi-locus sequence typing (MLST), enhances our understanding of A. baumannii epidemiology. While there is substantial documentation on A. baumannii, a comprehensive study of the antibiotic-resistant mechanisms and the virulence factors contributing to pathogenesis, and their correlation with Sequence Types (STs) remains incompletely elucidated. In this study, we aim to explore the relationship between antimicrobial resistance genes, virulence factor genes, and STs using genomic data from 223 publicly available A. baumannii strains. The core phylogeny analysis revealed five predominant STs in A. baumannii genomes, linked to their geographical sources of isolation. Furthermore, the resistome and virulome of A. baumannii followed an evolutionary pattern consistent with their pan-genome evolution. Among the major STs, we observed significant variations in resistant genes against "aminoglycoside" and "sulphonamide" antibiotics, highlighting the role of genotypic variations in determining resistance profiles. Furthermore, the presence of virulence factor genes, particularly exotoxin and nutritional / metabolic factor genes, played a crucial role in distinguishing the major STs, suggesting a potential link between genetic makeup and pathogenicity. Understanding these associations can provide valuable insights into A. baumannii's virulence potential and clinical outcomes, enabling the development of effective strategies to combat infections caused by this opportunistic pathogen.}, } @article {pmid38415665, year = {2024}, author = {Guillén, R and Salinas, C and Mendoza-Álvarez, A and Rubio Rodríguez, LA and Díaz-de Usera, A and Lorenzo-Salazar, JM and González-Montelongo, R and Flores, C and Rodríguez, F}, title = {Genomic epidemiology of the primary methicillin-resistant Staphylococcus aureus clones causing invasive infections in Paraguayan children.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0301223}, doi = {10.1128/spectrum.03012-23}, pmid = {38415665}, issn = {2165-0497}, abstract = {UNLABELLED: Methicillin-resistant Staphylococcus aureus (MRSA) is one of the major human pathogens. It could carry numerous resistance genes and virulence factors in its genome, some of which are related to the severity of the infection. An observational, descriptive, cross-sectional study was designed to molecularly analyze MRSA isolates that cause invasive infections in Paraguayan children from 2009 to 2013. Ten representative MRSA isolates of the main clonal complex identified were analyzed with short-read paired-end sequencing and assessed for the virulome, resistome, and phylogenetic relationships. All the genetically linked MRSA isolates were recovered from diverse clinical sources, patients, and hospitals at broad gap periods. The pan-genomic analysis of these clones revealed three major and different clonal complexes (CC30, CC5, and CC8), each composed of clones closely related to each other. The CC30 genomes prove to be a successful clone, strongly installed and disseminated throughout our country, and closely related to other CC30 public genomes from the region and the world. The CC5 shows the highest genetic variability, and the CC8 carried the complete arginine catabolic mobile element (ACME), closely related to the USA300-NAE-ACME+, identified as the major cause of CA-MRSA infections in North America. Multiple virulence and resistance genes were identified for the first time in this study, highlighting the complex virulence profiles of MRSA circulating in the country. This study opens a wide range of new possibilities for future projects and trials to improve the existing knowledge on the epidemiology of MRSA circulating in Paraguay.

IMPORTANCE: The increasing prevalence of methicillin-resistant Staphylococcus aureus (MRSA) is a public health problem worldwide. The most frequent MRSA clones identified in Paraguay in previous studies (including community and hospital acquired) were the Pediatric (CC5-ST5-IV), the Cordobes-Chilean (CC5-ST5-I), the SouthWest Pacific (CC30-ST30-IV), and the Brazilian (CC8-ST239-III) clones. In this study, the pan-genomic analysis of the most representative MRSA clones circulating in invasive infection in Paraguayan children over the years 2009-2013, such as the CC30-ST30-IV, CC5-ST5-IV, and CC8-ST8-IV, was carried out to evaluate their genetic diversity, their repertoire of virulence factors, and antimicrobial resistance determinants. This revealed multiple virulence and resistance genes, highlighting the complex virulence profiles of MRSA circulating in Paraguay. Our work is the first genomic study of MRSA in Paraguay and will contribute to the development of genomic surveillance in the region and our understanding of the global epidemiology of this pathogen.}, } @article {pmid38413855, year = {2024}, author = {Wang, H and Xia, F and Xia, Y and Li, J and Hu, Y and Deng, Y and Zou, M}, title = {Pangenome analysis of Shewanella xiamenensis revealed important genetic traits concerning genetic diversity, pathogenicity and antibiotic resistance.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {216}, pmid = {38413855}, issn = {1471-2164}, support = {No. 2023JJ30942//Natural Science Foundation of Hunan Province/ ; }, abstract = {BACKGROUND: Shewanella xiamenensis, widely distributed in natural environments, has long been considered as opportunistic pathogen. Recently, significant changes in the resistance spectrum have been observed in S. xiamenensis, due to acquired antibiotic resistance genes. Therefore, a pan-genome analysis was conducted to illuminate the genomic changes in S. xiamenensis.

RESULTS: Phylogenetic analysis revealed three major clusters and three singletons, among which close relationship between several strains was discovered, regardless of their host and niches. The "open" genomes with diversity of accessory and strain-specific genomes took advantage towards diversity environments. The purifying selection pressure was the main force on genome evolution, especially in conservative genes. Only 53 gene families were under positive selection pressure. Phenotypic resistance analysis revealed 21 strains were classified as multi-drug resistance (MDR). Ten types of antibiotic resistance genes and two heavy metal resistance operons were discovered in S. xiamenensis. Mobile genetic elements and horizontal gene transfer increased genome diversity and were closely related to MDR strains. S. xiamenensis carried a variety of virulence genes and macromolecular secretion systems, indicating their important roles in pathogenicity and adaptability. Type IV secretion system was discovered in 15 genomes with various sequence structures, indicating it was originated from different donors through horizontal gene transfer.

CONCLUSIONS: This study provided with a detailed insight into the changes in the pan-genome of S. xiamenensis, highlighting its capability to acquire new mobile genetic elements and resistance genes for its adaptation to environment and pathogenicity to human and animals.}, } @article {pmid38413611, year = {2024}, author = {Go, S and Koo, H and Jung, M and Hong, S and Yi, G and Kim, YM}, title = {Pan-chloroplast genomes for accession-specific marker development in Hibiscus syriacus.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {246}, pmid = {38413611}, issn = {2052-4463}, abstract = {Hibiscus syriacus L. is a renowned ornamental plant. We constructed 95 chloroplast genomes of H. syriacus L. cultivars using a short-read sequencing platform (Illumina) and a long-read sequencing platform (Oxford Nanopore Technology). The following genome assembly, we delineate quadripartite structures encompassing large single-copy, small single-copy, and inverted repeat (IRa and IRb) regions, from 160,231 bp to 161,041 bp. Our comprehensive analyses confirmed the presence of 79 protein-coding genes, 30 tRNA genes, and 4 rRNA genes in the pan-chloroplast genome, consistent with prior research on the H. syriacus chloroplast genome. Subsequent pangenome analysis unveiled widespread genome sequence conservation alongside unique cultivar-specific variant patterns consisting of 193 single-nucleotide polymorphisms and 61 insertions or deletions. The region containing intra-species variant patterns, as identified in this study, has the potential to develop accession-specific molecular markers, enhancing precision in cultivar classification. These findings are anticipated to drive advancements in breeding strategies, augment biodiversity, and unlock the agricultural potential inherent in H. syriacus.}, } @article {pmid38412041, year = {2024}, author = {Dong, X and Jia, H and Yu, Y and Xiang, Y and Zhang, Y}, title = {Genomic revisitation and reclassification of the genus Providencia.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0073123}, doi = {10.1128/msphere.00731-23}, pmid = {38412041}, issn = {2379-5042}, abstract = {Members of Providencia, although typically opportunistic, can cause severe infections in immunocompromised hosts. Recent advances in genome sequencing provide an opportunity for more precise study of this genus. In this study, we first identified and characterized a novel species named Providencia zhijiangensis sp. nov. It has ≤88.23% average nucleotide identity (ANI) and ≤31.8% in silico DNA-DNA hybridization (dDDH) values with all known Providencia species, which fall significantly below the species-defining thresholds. Interestingly, we found that Providencia stuartii and Providencia thailandensis actually fall under the same species, evidenced by an ANI of 98.59% and a dDDH value of 90.4%. By fusing ANI with phylogeny, we have reclassified 545 genomes within this genus into 20 species, including seven unnamed taxa (provisionally titled Taxon 1-7), which can be further subdivided into 23 lineages. Pangenomic analysis identified 1,550 genus-core genes in Providencia, with coenzymes being the predominant category at 10.56%, suggesting significant intermediate metabolism activity. Resistance analysis revealed that most lineages of the genus (82.61%, 19/23) carry a high number of antibiotic-resistance genes (ARGs) and display diverse resistance profiles. Notably, the majority of ARGs are located on plasmids, underscoring the significant role of plasmids in the resistance evolution within this genus. Three species or lineages (P. stuartii, Taxon 3, and Providencia hangzhouensis L12) that possess the highest number of carbapenem-resistance genes suggest their potential influence on clinical treatment. These findings underscore the need for continued surveillance and study of this genus, particularly due to their role in harboring antibiotic-resistance genes.IMPORTANCEThe Providencia genus, known to harbor opportunistic pathogens, has been a subject of interest due to its potential to cause severe infections, particularly in vulnerable individuals. Our research offers groundbreaking insights into this genus, unveiling a novel species, Providencia zhijiangensis sp. nov., and highlighting the need for a re-evaluation of existing classifications. Our comprehensive genomic assessment offers a detailed classification of 545 genomes into distinct species and lineages, revealing the rich biodiversity and intricate species diversity within the genus. The substantial presence of antibiotic-resistance genes in the Providencia genus underscores potential challenges for public health and clinical treatments. Our study highlights the pressing need for increased surveillance and research, enriching our understanding of antibiotic resistance in this realm.}, } @article {pmid38412007, year = {2024}, author = {Kim, M and Kim, W and Park, Y and Jung, J and Park, W}, title = {Lineage-specific evolution of Aquibium, a close relative of Mesorhizobium, during habitat adaptation.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0209123}, doi = {10.1128/aem.02091-23}, pmid = {38412007}, issn = {1098-5336}, abstract = {The novel genus Aquibium that lacks nitrogenase was recently reclassified from the Mesorhizobium genus. The genomes of Aquibium species isolated from water were smaller and had higher GC contents than those of Mesorhizobium species. Six Mesorhizobium species lacking nitrogenase were found to exhibit low similarity in the average nucleotide identity values to the other 24 Mesorhizobium species. Therefore, they were classified as the non-N2-fixing Mesorhizobium lineage (N-ML), an evolutionary intermediate species. The results of our phylogenomic analyses and the loss of Rhizobiales-specific fur/mur indicated that Mesorhizobium species may have evolved from Aquibium species through an ecological transition. Halotolerant and alkali-resistant Aquibium and Mesorhizobium microcysteis belonging to N-ML possessed many tripartite ATP-independent periplasmic transporter and sodium/proton antiporter subunits composed of seven genes (mrpABCDEFG). These genes were not present in the N2-fixing Mesorhizobium lineage (ML), suggesting that genes acquired for adaptation to highly saline and alkaline environments were lost during the evolution of ML as the habitat changed to soil. Land-to-water habitat changes in Aquibium species, close relatives of Mesorhizobium species, could have influenced their genomic evolution by the gain and loss of genes. Our study indicated that lineage-specific evolution could have played a significant role in shaping their genome architecture and conferring their ability to thrive in different habitats.IMPORTANCEPhylogenetic analyses revealed that the Aquibium lineage (AL) and non-N2-fixing Mesorhizobium lineage (N-ML) were monophyletically grouped into distinct clusters separate from the N2-fixing Mesorhizobium lineage (ML). The N-ML, an evolutionary intermediate species having characteristics of both ancestral and descendant species, could provide a genomic snapshot of the genetic changes that occur during adaptation. Genomic analyses of AL, N-ML, and ML revealed that changes in the levels of genes related to transporters, chemotaxis, and nitrogen fixation likely reflect adaptations to different environmental conditions. Our study sheds light on the complex and dynamic nature of the evolution of rhizobia in response to changes in their environment and highlights the crucial role of genomic analysis in understanding these processes.}, } @article {pmid38411865, year = {2024}, author = {Seo, B and Jeon, K and Kim, WK and Jang, YJ and Cha, KH and Ko, G}, title = {Strain-Specific Anti-Inflammatory Effects of Faecalibacterium prausnitzii Strain KBL1027 in Koreans.}, journal = {Probiotics and antimicrobial proteins}, volume = {}, number = {}, pages = {}, pmid = {38411865}, issn = {1867-1314}, support = {E0170600-07//Korea Food Research Institute/ ; RS-2023-00223831//National Research Foundation of Korea/ ; }, abstract = {Faecalibacterium prausnitzii is one of the most dominant commensal bacteria in the human gut, and certain anti-inflammatory functions have been attributed to a single microbial anti-inflammatory molecule (MAM). Simultaneously, substantial diversity among F. prausnitzii strains is acknowledged, emphasizing the need for strain-level functional studies aimed at developing innovative probiotics. Here, two distinct F. prausnitzii strains, KBL1026 and KBL1027, were isolated from Korean donors, exhibiting notable differences in the relative abundance of F. prausnitzii. Both strains were identified as the core Faecalibacterium amplicon sequence variant (ASV) within the healthy Korean cohort, and their MAM sequences showed a high similarity of 98.6%. However, when a single strain was introduced to mice with dextran sulfate sodium (DSS)-induced colitis, KBL1027 showed the most significant ameliorative effects, including alleviation of colonic inflammation and restoration of gut microbial dysbiosis. Moreover, the supernatant from KBL1027 elevated the secretion of IL-10 cytokine more than that of KBL1026 in mouse bone marrow-derived macrophage (BMDM) cells, suggesting that the strain-specific, anti-inflammatory efficacy of KBL1027 might involve effector compounds other than MAM. Through analysis of the Faecalibacterium pan-genome and comparative genomics, strain-specific functions related to extracellular polysaccharide biosynthesis were identified in KBL1027, which could contribute to the observed morphological disparities. Collectively, our findings highlight the strain-specific, anti-inflammatory functions of F. prausnitzii, even within the same core ASV, emphasizing the influence of their human origin.}, } @article {pmid38410456, year = {2024}, author = {Kogay, R and Wolf, YI and Koonin, EV}, title = {Defense systems and horizontal gene transfer in bacteria.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.09.579689}, pmid = {38410456}, abstract = {Horizontal gene transfer (HGT) is a fundamental process in the evolution of prokaryotes, making major contributions to diversification and adaptation. Typically, HGT is facilitated by mobile genetic elements (MGEs), such as conjugative plasmids and phages that generally impose fitness costs on their hosts. However, a substantial fraction of bacterial genes is involved in defense mechanisms that limit the propagation of MGEs, raising the possibility that they can actively restrict HGT. Here we examine whether defense systems curb HGT by exploring the connections between HGT rate and the presence of 73 defense systems in 12 bacterial species. We found that only 6 defense systems, 3 of which are different CRISPR-Cas subtypes, are associated with the reduced gene gain rate on the scale of species evolution. The hosts of such defense systems tend to have a smaller pangenome size and harbor fewer phage-related genes compared to genomes lacking these systems, suggesting that these defense mechanisms inhibit HGT by limiting the integration of prophages. We hypothesize that restriction of HGT by defense systems is species-specific and depends on various ecological and genetic factors, including the burden of MGEs and fitness effect of HGT in bacterial populations.}, } @article {pmid38408562, year = {2024}, author = {Huy, NQ and Linh, NC and Son, NT and Ngoc, DB and Tam, TTT and Hang, LTT and Thuyet, BT and Song, LH and Van Quyen, D and Hayer, J and Bañuls, AL and Sy, BT}, title = {Genomic insights into an extensively drug-resistant and hypervirulent Burkholderia dolosa N149 isolate of a novel sequence type (ST2237) from a Vietnamese patient hospitalized for stroke.}, journal = {Journal of global antimicrobial resistance}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgar.2024.02.009}, pmid = {38408562}, issn = {2213-7173}, abstract = {OBJECTIVES: Burkholderia dolosa is a clinically important opportunistic pathogen in inpatients. Here we characterized an extensively drug-resistant and hypervirulent B. dolosa isolate from a patient hospitalized for stroke.

METHODS: Resistance to 41 antibiotics was tested with the agar disc diffusion, minimum inhibitory concentration, or broth microdilution method. The complete genome was assembled using short-reads and long-reads and the hybrid de novo assembly method. Allelic profiles obtained by multilocus sequence typing were analyzed using the PubMLST database. Antibiotic-resistance and virulence genes were predicted in silico using public databases and the "baargin" workflow. B. dolosa N149 phylogenetic relationships with all available B. dolosa strains and Burkholderia cepacia complex strains were analyzed using the pangenome obtained with Roary.

RESULTS: B. dolosa N149 displayed extensive resistance to 31 antibiotics and intermediate resistance to 4 antibiotics. The complete genome included three circular chromosomes (6,338,630 bp in total) and one plasmid (167,591 bp). Genotypic analysis revealed various gene clusters (acr, amr, amp, emr, ade, bla and tet) associated with resistance to 35 antibiotic classes. The major intrinsic resistance mechanisms were multidrug efflux pump alterations, inactivation and reduced permeability of targeted antibiotics. Moreover, 91 virulence genes (encoding proteins involved in adherence, formation of capsule, biofilm and colony, motility, phagocytosis inhibition, secretion systems, protease secretion, transmission and quorum sensing) were identified. B. dolosa N149 was assigned to a novel sequence type (ST2237) and formed a mono-phylogenetic clade separated from other B. dolosa strains.

CONCLUSION: This study provided insights into the antimicrobial resistance and virulence mechanisms of B. dolosa.}, } @article {pmid38407244, year = {2024}, author = {Selvaraj Anand, S and Wu, CT and Bremer, J and Bhatti, M and Treangen, TJ and Kalia, A and Shelburne, SA and Shropshire, WC}, title = {Identification of a novel CG307 sub-clade in third-generation-cephalosporin-resistant Klebsiella pneumoniae causing invasive infections in the USA.}, journal = {Microbial genomics}, volume = {10}, number = {2}, pages = {}, doi = {10.1099/mgen.0.001201}, pmid = {38407244}, issn = {2057-5858}, abstract = {Despite the notable clinical impact, recent molecular epidemiology regarding third-generation-cephalosporin-resistant (3GC-R) Klebsiella pneumoniae in the USA remains limited. We performed whole-genome sequencing of 3GC-R K. pneumoniae bacteraemia isolates collected from March 2016 to May 2022 at a tertiary care cancer centre in Houston, TX, USA, using Illumina and Oxford Nanopore Technologies platforms. A comprehensive comparative genomic analysis was performed to dissect population structure, transmission dynamics and pan-genomic signatures of our 3GC-R K. pneumoniae population. Of the 178 3GC-R K. pneumoniae bacteraemias that occurred during our study time frame, we were able to analyse 153 (86 %) bacteraemia isolates, 126 initial and 27 recurrent isolates. While isolates belonging to the widely prevalent clonal group (CG) 258 were rarely observed, the predominant CG, 307, accounted for 37 (29 %) index isolates and displayed a significant correlation (Pearson correlation test P value=0.03) with the annual frequency of 3GC-R K. pneumoniae bacteraemia. Interestingly, only 11 % (4/37) of CG307 isolates belonged to the commonly detected 'Texas-specific' clade that has been observed in previous Texas-based K. pneumoniae antimicrobial-resistance surveillance studies. We identified nearly half of our CG307 isolates (n=18) belonged to a novel, monophyletic CG307 sub-clade characterized by the chromosomally encoded bla SHV-205 and unique accessory genome content. This CG307 sub-clade was detected in various regions of the USA, with genome sequences from 24 additional strains becoming recently available in the National Center for Biotechnology Information (NCBI) SRA database. Collectively, this study underscores the emergence and dissemination of a distinct CG307 sub-clade that is a prevalent cause of 3GC-R K. pneumoniae bacteraemia among cancer patients seen in Houston, TX, and has recently been isolated throughout the USA.}, } @article {pmid38402521, year = {2024}, author = {van Westerhoven, AC and Aguilera-Galvez, C and Nakasato-Tagami, G and Shi-Kunne, X and Martinez de la Parte, E and Chavarro-Carrero, E and Meijer, HJG and Feurtey, A and Maryani, N and Ordóñez, N and Schneiders, H and Nijbroek, K and Wittenberg, AHJ and Hofstede, R and García-Bastidas, F and Sørensen, A and Swennen, R and Drenth, A and Stukenbrock, EH and Kema, GHJ and Seidl, MF}, title = {Segmental duplications drive the evolution of accessory regions in a major crop pathogen.}, journal = {The New phytologist}, volume = {}, number = {}, pages = {}, doi = {10.1111/nph.19604}, pmid = {38402521}, issn = {1469-8137}, support = {AG - 442//Bill and Melinda Gates Foundation/ ; 20 04 04 02//Stichting Dioraphte/ ; }, abstract = {Many pathogens evolved compartmentalized genomes with conserved core and variable accessory regions (ARs) that carry effector genes mediating virulence. The fungal plant pathogen Fusarium oxysporum has such ARs, often spanning entire chromosomes. The presence of specific ARs influences the host range, and horizontal transfer of ARs can modify the pathogenicity of the receiving strain. However, how these ARs evolve in strains that infect the same host remains largely unknown. We defined the pan-genome of 69 diverse F. oxysporum strains that cause Fusarium wilt of banana, a significant constraint to global banana production, and analyzed the diversity and evolution of the ARs. Accessory regions in F. oxysporum strains infecting the same banana cultivar are highly diverse, and we could not identify any shared genomic regions and in planta-induced effectors. We demonstrate that segmental duplications drive the evolution of ARs. Furthermore, we show that recent segmental duplications specifically in accessory chromosomes cause the expansion of ARs in F. oxysporum. Taken together, we conclude that extensive recent duplications drive the evolution of ARs in F. oxysporum, which contribute to the evolution of virulence.}, } @article {pmid38399738, year = {2024}, author = {Straková, D and Sánchez-Porro, C and de la Haba, RR and Ventosa, A}, title = {Decoding the Genomic Profile of the Halomicroarcula Genus: Comparative Analysis and Characterization of Two Novel Species.}, journal = {Microorganisms}, volume = {12}, number = {2}, pages = {}, pmid = {38399738}, issn = {2076-2607}, support = {PID2020-118136GB-I00//MCIN/AEI/10.13039/501100011033/ ; P20_01066 and BIO-213//Junta de Andalucía/ ; }, abstract = {The genus Halomicroarcula, classified within the family Haloarculaceae, presently comprises eight haloarchaeal species isolated from diverse saline habitats, such as solar salterns, hypersaline soils, marine salt, and marine algae. Here, a detailed taxogenomic study and comparative genomic analysis of the genus Halomicroarcula was carried out. In addition, two strains, designated S1CR25-12[T] and S3CR25-11[T], that were isolated from hypersaline soils located in the Odiel Saltmarshes in Huelva (Spain) were included in this study. The 16S rRNA and rpoB' gene sequence analyses affiliated the two strains to the genus Halomicroarcula. Typically, the species of the genus Halomicroarcula possess multiple heterogeneous copies of the 16S rRNA gene, which can lead to misclassification of the taxa and overestimation of the prokaryotic diversity. In contrast, the application of overall genome relatedness indexes (OGRIs) augments the capacity for the precise taxonomic classification and categorization of prokaryotic organisms. The relatedness indexes of the two new isolates, particularly digital DNA-DNA hybridization (dDDH), orthologous average nucleotide identity (OrthoANI), and average amino acid identity (AAI), confirmed that strains S1CR25-12[T] (= CECT 30620[T] = CCM 9252[T]) and S3CR25-11[T] (= CECT 30621[T] = CCM 9254[T]) constitute two novel species of the genus Halomicroarcula. The names Halomicroarcula saliterrae sp. nov. and Halomicroarcula onubensis sp. nov. are proposed for S1CR25-12[T] and S3CR25-11[T], respectively. Metagenomic fragment recruitment analysis, conducted using seven shotgun metagenomic datasets, revealed that the species belonging to the genus Halomicroarcula were predominantly recruited from hypersaline soils found in the Odiel Saltmarshes and the ponds of salterns with high salt concentrations. This reinforces the understanding of the extreme halophilic characteristics associated with the genus Halomicroarcula. Finally, comparing pan-genomes across the twenty Halomicroarcula and Haloarcula species allowed for the identification of commonalities and differences between the species of these two related genera.}, } @article {pmid38399654, year = {2024}, author = {Rhoads, DD and Pummill, J and Alrubaye, AAK}, title = {Molecular Genomic Analyses of Enterococcus cecorum from Sepsis Outbreaks in Broilers.}, journal = {Microorganisms}, volume = {12}, number = {2}, pages = {}, doi = {10.3390/microorganisms12020250}, pmid = {38399654}, issn = {2076-2607}, support = {none//Arkansas Biosciences Institute/ ; }, abstract = {Extensive genomic analyses of Enterococcus cecorum isolates from sepsis outbreaks in broilers suggest a polyphyletic origin, likely arising from core genome mutations rather than gene acquisition. This species is a normal intestinal flora of avian species with particular isolates associated with osteomyelitis. More recently, this species has been associated with sepsis outbreaks affecting broilers during the first 3 weeks post-hatch. Understanding the genetic and management basis of this new phenotype is critical for developing strategies to mitigate this emerging problem. Phylogenomic analyses of 227 genomes suggest that sepsis isolates are polyphyletic and closely related to both commensal and osteomyelitis isolate genomes. Pangenome analyses detect no gene acquisitions that distinguish all the sepsis isolates. Core genome single nucleotide polymorphism analyses have identified a number of mutations, affecting the protein-coding sequences, that are enriched in sepsis isolates. The analysis of the protein substitutions supports the mutational origins of sepsis isolates.}, } @article {pmid38397433, year = {2024}, author = {Nedashkovskaya, O and Balabanova, L and Otstavnykh, N and Zhukova, N and Detkova, E and Seitkalieva, A and Bystritskaya, E and Noskova, Y and Tekutyeva, L and Isaeva, M}, title = {In-Depth Genome Characterization and Pan-Genome Analysis of Strain KMM 296, a Producer of Highly Active Alkaline Phosphatase; Proposal for the Reclassification of Cobetia litoralis and Cobetia pacifica as the Later Heterotypic Synonyms of Cobetia amphilecti and Cobetia marina, and Emended Description of the Species Cobetia amphilecti and Cobetia marina.}, journal = {Biomolecules}, volume = {14}, number = {2}, pages = {}, doi = {10.3390/biom14020196}, pmid = {38397433}, issn = {2218-273X}, support = {15.BRK.21.0004 (contract no. 075-15-2021-1052)//the Ministry of Science and Higher Education, Russian Federation/ ; }, abstract = {A strictly aerobic, Gram-stain-negative, rod-shaped, and motile bacterium, designated strain KMM 296, isolated from the coelomic fluid of the mussel Crenomytilus grayanus, was investigated in detail due to its ability to produce a highly active alkaline phosphatase CmAP of the structural family PhoA. A previous taxonomic study allocated the strain to the species Cobetia marina, a member of the family Halomonadaceae of the class Gammaproteobacteria. However, 16S rRNA gene sequencing showed KMM 296's relatedness to Cobetia amphilecti NRIC 0815[T]. The isolate grew with 0.5-19% NaCl at 4-42 °C and hydrolyzed Tweens 20 and 40 and L-tyrosine. The DNA G+C content was 62.5 mol%. The prevalent fatty acids were C18:1 ω7c, C12:0 3-OH, C18:1 ω7c, C12:0, and C17:0 cyclo. The polar lipid profile was characterized by the presence of phosphatidylethanolamine, phosphatidylglycerol, phosphatidic acid, and also an unidentified aminolipid, phospholipid, and a few unidentified lipids. The major respiratory quinone was Q-8. According to phylogenomic and chemotaxonomic evidence, and the nearest neighbors, the strain KMM 296 represents a member of the species C. amphilecti. The genome-based analysis of C. amphilecti NRIC 0815[T] and C. litoralis NRIC 0814[T] showed their belonging to a single species. In addition, the high similarity between the C. pacifica NRIC 0813[T] and C. marina LMG 2217[T] genomes suggests their affiliation to one species. Based on the rules of priority, C. litoralis should be reclassified as a later heterotypic synonym of C. amphilecti, and C. pacifica is a later heterotypic synonym of C. marina. The emended descriptions of the species C. amphilecti and C. marina are also proposed.}, } @article {pmid38396752, year = {2024}, author = {Evseev, PV and Shneider, MM and Kolupaeva, LV and Kasimova, AA and Timoshina, OY and Perepelov, AV and Shpirt, AM and Shelenkov, AA and Mikhailova, YV and Suzina, NE and Knirel, YA and Miroshnikov, KA and Popova, AV}, title = {New Obolenskvirus Phages Brutus and Scipio: Biology, Evolution, and Phage-Host Interaction.}, journal = {International journal of molecular sciences}, volume = {25}, number = {4}, pages = {}, doi = {10.3390/ijms25042074}, pmid = {38396752}, issn = {1422-0067}, support = {20-75-10113//Russian Science Foundation/ ; }, abstract = {Two novel virulent phages of the genus Obolenskvirus infecting Acinetobacter baumannii, a significant nosocomial pathogen, have been isolated and studied. Phages Brutus and Scipio were able to infect A. baumannii strains belonging to the K116 and K82 capsular types, respectively. The biological properties and genomic organization of the phages were characterized. Comparative genomic, phylogenetic, and pangenomic analyses were performed to investigate the relationship of Brutus and Scipio to other bacterial viruses and to trace the possible origin and evolutionary history of these phages and other representatives of the genus Obolenskvirus. The investigation of enzymatic activity of the tailspike depolymerase encoded in the genome of phage Scipio, the first reported virus infecting A. baumannii of the K82 capsular type, was performed. The study of new representatives of the genus Obolenskvirus and mechanisms of action of depolymerases encoded in their genomes expands knowledge about the diversity of viruses within this taxonomic group and strategies of Obolenskvirus-host bacteria interaction.}, } @article {pmid38396294, year = {2024}, author = {Sepich-Poore, GD and McDonald, D and Kopylova, E and Guccione, C and Zhu, Q and Austin, G and Carpenter, C and Fraraccio, S and Wandro, S and Kosciolek, T and Janssen, S and Metcalf, JL and Song, SJ and Kanbar, J and Miller-Montgomery, S and Heaton, R and Mckay, R and Patel, SP and Swafford, AD and Korem, T and Knight, R}, title = {Robustness of cancer microbiome signals over a broad range of methodological variation.}, journal = {Oncogene}, volume = {}, number = {}, pages = {}, pmid = {38396294}, issn = {1476-5594}, support = {U24 CA248454/CA/NCI NIH HHS/United States ; }, abstract = {In 2020, we identified cancer-specific microbial signals in The Cancer Genome Atlas (TCGA) [1]. Multiple peer-reviewed papers independently verified or extended our findings [2-12]. Given this impact, we carefully considered concerns by Gihawi et al. [13] that batch correction and database contamination with host sequences artificially created the appearance of cancer type-specific microbiomes. (1) We tested batch correction by comparing raw and Voom-SNM-corrected data per-batch, finding predictive equivalence and significantly similar features. We found consistent results with a modern microbiome-specific method (ConQuR [14]), and when restricting to taxa found in an independent, highly-decontaminated cohort. (2) Using Conterminator [15], we found low levels of human contamination in our original databases (~1% of genomes). We demonstrated that the increased detection of human reads in Gihawi et al. [13] was due to using a newer human genome reference. (3) We developed Exhaustive, a method twice as sensitive as Conterminator, to clean RefSeq. We comprehensively host-deplete TCGA with many human (pan)genome references. We repeated all analyses with this and the Gihawi et al. [13] pipeline, and found cancer type-specific microbiomes. These extensive re-analyses and updated methods validate our original conclusion that cancer type-specific microbial signatures exist in TCGA, and show they are robust to methodology.}, } @article {pmid38389535, year = {2024}, author = {Patakova, P and Vasylkivska, M and Sedlar, K and Jureckova, K and Bezdicek, M and Lovecka, P and Branska, B and Kastanek, P and Krofta, K}, title = {Whole genome sequencing and characterization of Pantoea agglomerans DBM 3797, endophyte, isolated from fresh hop (Humulus lupulus L.).}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1305338}, pmid = {38389535}, issn = {1664-302X}, abstract = {BACKGROUND: This paper brings new information about the genome and phenotypic characteristics of Pantoea agglomerans strain DBM 3797, isolated from fresh Czech hop (Humulus lupulus) in the Saaz hop-growing region. Although P. agglomerans strains are frequently isolated from different materials, there are not usually thoroughly characterized even if they have versatile metabolism and those isolated from plants may have a considerable potential for application in agriculture as a support culture for plant growth.

METHODS: P. agglomerans DBM 3797 was cultured under aerobic and anaerobic conditions, its metabolites were analyzed by HPLC and it was tested for plant growth promotion abilities, such as phosphate solubilization, siderophore and indol-3-acetic acid productions. In addition, genomic DNA was extracted, sequenced and de novo assembly was performed. Further, genome annotation, pan-genome analysis and selected genome analyses, such as CRISPR arrays detection, antibiotic resistance and secondary metabolite genes identification were carried out.

RESULTS AND DISCUSSION: The typical appearance characteristics of the strain include the formation of symplasmata in submerged liquid culture and the formation of pale yellow colonies on agar. The genetic information of the strain (in total 4.8 Mb) is divided between a chromosome and two plasmids. The strain lacks any CRISPR-Cas system but is equipped with four restriction-modification systems. The phenotypic analysis focused on growth under both aerobic and anaerobic conditions, as well as traits associated with plant growth promotion. At both levels (genomic and phenotypic), the production of siderophores, indoleacetic acid-derived growth promoters, gluconic acid, and enzyme activities related to the degradation of complex organic compounds were found. Extracellular gluconic acid production under aerobic conditions (up to 8 g/l) is probably the result of glucose oxidation by the membrane-bound pyrroloquinoline quinone-dependent enzyme glucose dehydrogenase. The strain has a number of properties potentially beneficial to the hop plant and its closest relatives include the strains also isolated from the aerial parts of plants, yet its safety profile needs to be addressed in follow-up research.}, } @article {pmid38389084, year = {2024}, author = {Miao, J and Wei, X and Cao, C and Sun, J and Xu, Y and Zhang, Z and Wang, Q and Pan, Y and Wang, Z}, title = {Pig pangenome graph reveals functional features of non-reference sequences.}, journal = {Journal of animal science and biotechnology}, volume = {15}, number = {1}, pages = {32}, pmid = {38389084}, issn = {1674-9782}, support = {2022YFF1000500//National Key Research and Development Program of China/ ; 31941007//National Natural Science Foundation of China/ ; 2016C02054-2//Zhejiang province agriculture (livestock) varieties breeding Key Technology R&D Program/ ; }, abstract = {BACKGROUND: The reliance on a solitary linear reference genome has imposed a significant constraint on our comprehensive understanding of genetic variation in animals. This constraint is particularly pronounced for non-reference sequences (NRSs), which have not been extensively studied.

RESULTS: In this study, we constructed a pig pangenome graph using 21 pig assemblies and identified 23,831 NRSs with a total length of 105 Mb. Our findings revealed that NRSs were more prevalent in breeds exhibiting greater genetic divergence from the reference genome. Furthermore, we observed that NRSs were rarely found within coding sequences, while NRS insertions were enriched in immune-related Gene Ontology terms. Notably, our investigation also unveiled a close association between novel genes and the immune capacity of pigs. We observed substantial differences in terms of frequencies of NRSs between Eastern and Western pigs, and the heat-resistant pigs exhibited a substantial number of NRS insertions in an 11.6 Mb interval on chromosome X. Additionally, we discovered a 665 bp insertion in the fourth intron of the TNFRSF19 gene that may be associated with the ability of heat tolerance in Southern Chinese pigs.

CONCLUSIONS: Our findings demonstrate the potential of a graph genome approach to reveal important functional features of NRSs in pig populations.}, } @article {pmid38388650, year = {2024}, author = {Pena-Fernández, N and Ocejo, M and van der Graaf-van Bloois, L and Lavín, JL and Kortabarria, N and Collantes-Fernández, E and Hurtado, A and Aduriz, G}, title = {Comparative pangenomic analysis of Campylobacter fetus isolated from Spanish bulls and other mammalian species.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {4347}, pmid = {38388650}, issn = {2045-2322}, support = {Pre2018-086113 funded by MCIN/AEI/ 10.13039/501100011033 and by "ESF Investing in your future"//Ministerio de Ciencia e Innovación/ ; }, abstract = {Campylobacter fetus comprises two closely related mammal-associated subspecies: Campylobacter fetus subsp. fetus (Cff) and Campylobacter fetus subsp. venerealis (Cfv). The latter causes bovine genital campylobacteriosis, a sexually-transmitted disease endemic in Spain that results in significant economic losses in the cattle industry. Here, 33 C. fetus Spanish isolates were whole-genome sequenced and compared with 62 publicly available C. fetus genomes from other countries. Genome-based taxonomic identification revealed high concordance with in silico PCR, confirming Spanish isolates as Cff (n = 4), Cfv (n = 9) and Cfv biovar intermedius (Cfvi, n = 20). MLST analysis assigned the Spanish isolates to 6 STs, including three novel: ST-76 and ST-77 for Cfv and ST-78 for Cff. Core genome SNP phylogenetic analysis of the 95 genomes identified multiple clusters, revealing associations at subspecies and biovar level between genomes with the same ST and separating the Cfvi genomes from Spain and other countries. A genome-wide association study identified pqqL as a Cfv-specific gene and a potential candidate for more accurate identification methods. Functionality analysis revealed variations in the accessory genome of C. fetus subspecies and biovars that deserve further studies. These results provide valuable information about the regional variants of C. fetus present in Spain and the genetic diversity and predicted functionality of the different subspecies.}, } @article {pmid38385549, year = {2024}, author = {Arizala, D and Arif, M}, title = {Impact of homologous recombination on core genome evolution and host adaptation of Pectobacterium parmentieri.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evae032}, pmid = {38385549}, issn = {1759-6653}, abstract = {Homologous recombination is a major force mechanism driving bacterial evolution, host adaptability and acquisition of novel virulence traits. Pectobacterium parmentieri is a plant bacterial pathogen distributed worldwide, primarily affecting potatoes, by causing soft rot and blackleg diseases. The goal of this investigation was to understand the impact of homologous recombination on the genomic evolution of P. parmentieri. Analysis of P. parmentieri genomes using Roary revealed a dynamic pan-genome with 3,742 core genes and over 55% accessory genome variability. Bayesian population structure analysis identified seven lineages, indicating species heterogeneity. ClonalFrameML analysis displayed 5,125 recombination events, with the lineage 4 exhibiting the highest events. fastGEAR analysis identified 486 ancestral and 941 recent recombination events ranging 43 bp - 119 kb and 36 bp - 13.96 kb, respectively, suggesting ongoing adaptation. Notably, 11% (412 genes) of the core genome underwent recent recombination, with lineage 1 as the main donor. The prevalence of recent recombination (double compared to ancient) events implies continuous adaptation, possibly driven by global potato trade. Recombination events were found in genes involved in vital cellular processes (DNA replication, DNA repair, RNA processing, homeostasis, and metabolism), pathogenicity determinants (type secretion systems, cell-wall degrading enzymes, iron scavengers, lipopolysaccharides, flagellum, etc.), antimicrobial compounds (phenazine and colicin) and even CRISPR-Cas genes. Overall, these results emphasize the potential role of homologous recombination in P. parmentieri's evolutionary dynamics, influencing host colonization, pathogenicity, adaptive immunity, and ecological fitness.}, } @article {pmid38385476, year = {2024}, author = {Tariq, DE}, title = {Pangenomic analyses of tuberculosis strains to identify resistomes using computational approaches.}, journal = {JPMA. The Journal of the Pakistan Medical Association}, volume = {74}, number = {1 (Supple-2)}, pages = {S74-S78}, doi = {10.47391/JPMA-DUHS-S15}, pmid = {38385476}, issn = {0030-9982}, abstract = {OBJECTIVE: To locate resistomes in tuberculosis strains, to determine the severity of drug resistance, and to infer its implications with respect to high tuberculosis prevalence in a Third World setting.

METHODS: The pangenomic study was conducted from October 2022 to January 2023 in Sir Syed University of Engineering and Technology, Karachi, and comprised 2012-22 data on multiple sequence alignment to assess the genetic evolution of tuberculosis strains. Antibiotic resistance drug classes were identified using the Canadian Antibiotic Resistance Database, which entailed multidrug-resistant and extremely drug-resistant strains. Also, GenBank was used for tuberculosis genome FASTA (fast-all; nucleotide and protein sequence representation) files, prediction of resistome sequences on the basis of Canadian Antibiotic Resistance Database, and multiple sequence alignment was done in Mauve.

RESULTS: Evolutionarily, the 6 strains identified were structurally similar with polymorphisms in their core chromosomal regions. Their resistome genes showed perfect hits for isoniazid, rifamycin, cephalosporin, fluoroquinolone, aminoglycosides, penem, penam and cephamycin.

CONCLUSION: Drugs discovered in antibiotic resistance genes are now less effective in treatment, and have the potential to develop into more dangerous bacteria, if not monitored. For treatment, staying long durations in hospitals for quality healthcare and supervision in third world countries is unaffordable.}, } @article {pmid38379925, year = {2024}, author = {Turco, S and Russo, S and Pietrucci, D and Filippi, A and Milanesi, M and Luzzago, C and Garbarino, C and Palladini, G and Chillemi, G and Ricchi, M}, title = {High clonality of Mycobacterium avium subsp. paratuberculosis field isolates from red deer revealed by two different methodological approaches of comparative genomic analysis.}, journal = {Frontiers in veterinary science}, volume = {11}, number = {}, pages = {1301667}, pmid = {38379925}, issn = {2297-1769}, abstract = {Mycobacterium avium subsp. paratuberculosis (MAP) is the aetiological agent of paratuberculosis (Johne's disease) in both domestic and wild ruminants. In the present study, using a whole-genome sequence (WGS) approach, we investigated the genetic diversity of 15 Mycobacterium avium field strains isolated in the last 10 years from red deer inhabiting the Stelvio National Park and affected by paratuberculosis. Combining de novo assembly and a reference-based method, followed by a pangenome analysis, we highlight a very close relationship among 13 MAP field isolates, suggesting that a single infecting event occurred in this population. Moreover, two isolates have been classified as Mycobacterium avium subsp. hominissuis, distinct from the other MAPs under comparison but close to each other. This is the first time that this subspecies has been found in Italy in samples without evident epidemiological correlations, having been isolated in two different locations of the Stelvio National Park and in different years. Our study highlights the importance of a multidisciplinary approach incorporating molecular epidemiology and ecology into traditional infectious disease knowledge in order to investigate the nature of infectious disease in wildlife populations.}, } @article {pmid38378816, year = {2024}, author = {Schreiber, M and Jayakodi, M and Stein, N and Mascher, M}, title = {Plant pangenomes for crop improvement, biodiversity and evolution.}, journal = {Nature reviews. Genetics}, volume = {}, number = {}, pages = {}, pmid = {38378816}, issn = {1471-0064}, abstract = {Plant genome sequences catalogue genes and the genetic elements that regulate their expression. Such inventories further research aims as diverse as mapping the molecular basis of trait diversity in domesticated plants or inquiries into the origin of evolutionary innovations in flowering plants millions of years ago. The transformative technological progress of DNA sequencing in the past two decades has enabled researchers to sequence ever more genomes with greater ease. Pangenomes - complete sequences of multiple individuals of a species or higher taxonomic unit - have now entered the geneticists' toolkit. The genomes of crop plants and their wild relatives are being studied with translational applications in breeding in mind. But pangenomes are applicable also in ecological and evolutionary studies, as they help classify and monitor biodiversity across the tree of life, deepen our understanding of how plant species diverged and show how plants adapt to changing environments or new selection pressures exerted by human beings.}, } @article {pmid38376942, year = {2024}, author = {Truong, TC and Park, H and Kim, JH and Tran, VT and Kim, W}, title = {The evolutionary phylodynamics of human parechovirus A type 3 reveal multiple recombination events in South Korea.}, journal = {Journal of medical virology}, volume = {96}, number = {2}, pages = {e29477}, doi = {10.1002/jmv.29477}, pmid = {38376942}, issn = {1096-9071}, support = {NRF-2021R1C1C2003223//National Research Foundation of Korea/ ; NRF-2022R1A2C2012209//National Research Foundation of Korea/ ; }, abstract = {Human parechovirus A (HPeV-A) is a causative agent of respiratory and gastrointestinal illnesses, acute flaccid paralysis encephalitis, meningitis, and neonatal sepsis. To clarify the characteristics of HPeV-A infection in children, 391 fecal specimens were collected from January 2014 to October 2015 from patients with acute gastroenteritis in Seoul, South Korea. Of these, 221/391 (56.5%) HPeV-A positive samples were found in children less than 2 years old. Three HPeV-A genotypes HPeV-A1 (117/221; 52.94%), HPeV-A3 (100/221; 45.25%), and HPeV-A6 (4/221; 1.81%) were detected, among which HPeV-A3 was predominant with the highest recorded value of 58.6% in 2015. Moreover, recombination events in the Korean HPeV-A3 strains were detected. Phylogenetic analysis revealed that the capsid-encoding regions and noncapsid gene 2A of the four Korean HPeV-A3 strains are closely related to the HPeV-A3 strains isolated in Canada in 2007 (Can82853-01), Japan in 2008 (A308/99), and Taiwan in 2011 (TW-03067-2011) while noncapsid genes P2 (2B-2C) and P3 (3A-3D) are closely related to those of HPeV-A1 strains BNI-788St (Germany in 2008) and TW-71594-2010 (Taiwan in 2010). This first report on the whole-genome analysis of HPeV-A3 in Korea provides insight into the evolving status and pathogenesis of HPeVs in children.}, } @article {pmid38376382, year = {2024}, author = {Cooper, HB and Vezina, B and Hawkey, J and Passet, V and López-Fernández, S and Monk, JM and Brisse, S and Holt, KE and Wyres, KL}, title = {A validated pangenome-scale metabolic model for the Klebsiella pneumoniae species complex.}, journal = {Microbial genomics}, volume = {10}, number = {2}, pages = {}, doi = {10.1099/mgen.0.001206}, pmid = {38376382}, issn = {2057-5858}, abstract = {The Klebsiella pneumoniae species complex (KpSC) is a major source of nosocomial infections globally with high rates of resistance to antimicrobials. Consequently, there is growing interest in understanding virulence factors and their association with cellular metabolic processes for developing novel anti-KpSC therapeutics. Phenotypic assays have revealed metabolic diversity within the KpSC, but metabolism research has been neglected due to experiments being difficult and cost-intensive. Genome-scale metabolic models (GSMMs) represent a rapid and scalable in silico approach for exploring metabolic diversity, which compile genomic and biochemical data to reconstruct the metabolic network of an organism. Here we use a diverse collection of 507 KpSC isolates, including representatives of globally distributed clinically relevant lineages, to construct the most comprehensive KpSC pan-metabolic model to date, KpSC pan v2. Candidate metabolic reactions were identified using gene orthology to known metabolic genes, prior to manual curation via extensive literature and database searches. The final model comprised a total of 3550 reactions, 2403 genes and can simulate growth on 360 unique substrates. We used KpSC pan v2 as a reference to derive strain-specific GSMMs for all 507 KpSC isolates, and compared these to GSMMs generated using a prior KpSC pan-reference (KpSC pan v1) and two single-strain references. We show that KpSC pan v2 includes a greater proportion of accessory reactions (8.8 %) than KpSC pan v1 (2.5 %). GSMMs derived from KpSC pan v2 also generate more accurate growth predictions, with high median accuracies of 95.4 % (aerobic, n=37 isolates) and 78.8 % (anaerobic, n=36 isolates) for 124 matched carbon substrates. KpSC pan v2 is freely available at https://github.com/kelwyres/KpSC-pan-metabolic-model, representing a valuable resource for the scientific community, both as a source of curated metabolic information and as a reference to derive accurate strain-specific GSMMs. The latter can be used to investigate the relationship between KpSC metabolism and traits of interest, such as reservoirs, epidemiology, drug resistance or virulence, and ultimately to inform novel KpSC control strategies.}, } @article {pmid38376357, year = {2024}, author = {Benning, S and Pritsch, K and Radl, V and Siani, R and Wang, Z and Schloter, M}, title = {(Pan)genomic analysis of two Rhodococcus isolates and their role in phenolic compound degradation.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0378323}, doi = {10.1128/spectrum.03783-23}, pmid = {38376357}, issn = {2165-0497}, abstract = {The genus Rhodococcus is recognized for its potential to degrade a large range of aromatic substances, including plant-derived phenolic compounds. We used comparative genomics in the context of the broader Rhodococcus pan-genome to study genomic traits of two newly described Rhodococcus strains (type-strain Rhodococcus pseudokoreensis R79[T] and Rhodococcus koreensis R85) isolated from apple rhizosphere. Of particular interest was their ability to degrade phenolic compounds as part of an integrated approach to treat apple replant disease (ARD) syndrome. The pan-genome of the genus Rhodococcus based on 109 high-quality genomes was open with a small core (1.3%) consisting of genes assigned to basic cell functioning. The range of genome sizes in Rhodococcus was high, from 3.7 to 10.9 Mbp. Genomes from host-associated strains were generally smaller compared to environmental isolates which were characterized by exceptionally large genome sizes. Due to large genomic differences, we propose the reclassification of distinct groups of rhodococci like the Rhodococcus equi cluster to new genera. Taxonomic species affiliation was the most important factor in predicting genetic content and clustering of the genomes. Additionally, we found genes that discriminated between the strains based on habitat. All members of the genus Rhodococcus had at least one gene involved in the pathway for the degradation of benzoate, while biphenyl degradation was mainly restricted to strains in close phylogenetic relationships with our isolates. The ~40% of genes still unclassified in larger Rhodococcus genomes, particularly those of environmental isolates, need more research to explore the metabolic potential of this genus.IMPORTANCERhodococcus is a diverse, metabolically powerful genus, with high potential to adapt to different habitats due to the linear plasmids and large genome sizes. The analysis of its pan-genome allowed us to separate host-associated from environmental strains, supporting taxonomic reclassification. It was shown which genes contribute to the differentiation of the genomes based on habitat, which can possibly be used for targeted isolation and screening for desired traits. With respect to apple replant disease (ARD), our isolates showed genome traits that suggest potential for application in reducing plant-derived phenolic substances in soil, which makes them good candidates for further testing against ARD.}, } @article {pmid38375235, year = {2024}, author = {Lagerstrom, KM and Scales, NC and Hadly, EA}, title = {Impressive pan-genomic diversity of E. coli from a wild animal community near urban development reflects human impacts.}, journal = {iScience}, volume = {27}, number = {3}, pages = {109072}, pmid = {38375235}, issn = {2589-0042}, abstract = {Human and domesticated animal waste infiltrates global freshwater, terrestrial, and marine environments, widely disseminating fecal microbes, antibiotics, and other chemical pollutants. Emerging evidence suggests that guts of wild animals are being invaded by our microbes, including Escherichia coli, which face anthropogenic selective pressures to gain antimicrobial resistance (AMR) and increase virulence. However, wild animal sources remain starkly under-represented among genomic sequence repositories. We sequenced whole genomes of 145 E. coli isolates from 55 wild and 13 domestic animal fecal samples, averaging 2 (ranging 1-7) isolates per sample, on a preserve imbedded in a human-dominated landscape in California Bay Area, USA, to assess AMR, virulence, and pan-genomic diversity. With single nucleotide polymorphism analyses we predict potential transmission routes. We illustrate the usefulness of E. coli to aid our understanding of and ability to surveil the emergence of zoonotic pathogens created by the mixing of human and wild bacteria in the environment.}, } @article {pmid38370750, year = {2024}, author = {Bolognini, D and Halgren, A and Lou, RN and Raveane, A and Rocha, JL and Guarracino, A and Soranzo, N and Chin, J and Garrison, E and Sudmant, PH}, title = {Global diversity, recurrent evolution, and recent selection on amylase structural haplotypes in humans.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.07.579378}, pmid = {38370750}, abstract = {The adoption of agriculture, first documented ∼12,000 years ago in the Fertile Crescent, triggered a rapid shift toward starch-rich diets in human populations. Amylase genes facilitate starch digestion and increased salivary amylase copy number has been observed in some modern human populations with high starch intake, though evidence of recent selection is lacking. Here, using 52 long-read diploid assemblies and short read data from ∼5,600 contemporary and ancient humans, we resolve the diversity, evolutionary history, and selective impact of structural variation at the amylase locus. We find that both salivary and pancreatic amylase genes have higher copy numbers in populations with agricultural subsistence compared to fishing, hunting, and pastoral groups. We identify 28 distinct amylase structural architectures and demonstrate that identical structures have arisen independently multiple times throughout recent human history. Using a pangenome graph-based approach to infer structural haplotypes across thousands of humans, we identify extensively duplicated haplotypes present at higher frequencies in modern agricultural populations. Leveraging 534 ancient human genomes we find that duplication-containing haplotypes have increased in frequency more than seven-fold over the last 12,000 years providing evidence for recent selection in Eurasians at this locus comparable in magnitude to that at lactase. Together, our study highlights the strong impact of the agricultural revolution on human genomes and the importance of long-read sequencing in identifying signatures of selection at structurally complex loci.}, } @article {pmid38370713, year = {2024}, author = {Lypaczewski, P and Chac, D and Dunmire, CN and Tandoc, KM and Chowdhury, F and Khan, AI and Bhuiyan, T and Harris, JB and LaRocque, RC and Calderwood, SB and Ryan, ET and Qadri, F and Shapiro, BJ and Weil, AA}, title = {Diversity of Vibrio cholerae O1 through the human gastrointestinal tract during cholera.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.08.579476}, pmid = {38370713}, abstract = {UNLABELLED: Vibrio cholerae O1 causes the diarrheal disease cholera, and the small intestine is the site of active infection. During cholera, cholera toxin is secreted from V. cholerae and induces a massive fluid influx into the small intestine, which causes vomiting and diarrhea. Typically, V. cholerae genomes are sequenced from bacteria passed in stool, but rarely from vomit, a fluid that may more closely represents the site of active infection. We hypothesized that the V. cholerae O1 population bottlenecks along the gastrointestinal tract would result in reduced genetic variation in stool compared to vomit. To test this, we sequenced V. cholerae genomes from ten cholera patients with paired vomit and stool samples. Genetic diversity was low in both vomit and stool, consistent with a single infecting population rather than co-infection with divergent V. cholerae O1 lineages. The number of single nucleotide variants decreased between vomit and stool in four patients, increased in two, and remained unchanged in four. The number of genes encoded in the V. cholerae genome decreased between vomit and stool in eight patients and increased in two. Pangenome analysis of assembled short-read sequencing demonstrated that the toxin-coregulated pilus operon more frequently contained deletions in genomes from vomit compared to stool. However, these deletions were not detected by PCR or long-read sequencing, indicating that interpreting gene presence or absence patterns from short-read data alone may be incomplete. Overall, we found that V. cholerae O1 isolated from stool is genetically similar to V. cholerae recovered from the upper intestinal tract.

IMPORTANCE: Vibrio cholerae O1, the bacterium that causes cholera, is ingested in contaminated food or water and then colonizes the upper small intestine and is excreted in stool. Shed V. cholerae genomes are usually studied, but V. cholerae isolated from vomit may be more representative of where V. cholerae colonizes in the upper intestinal epithelium. V. cholerae may experience bottlenecks, or large reductions in bacterial population sizes or genetic diversity, as it passes through the gut. Passage through the gut may select for distinct V. cholerae mutants that are adapted for survival and gut colonization. We did not find strong evidence for such adaptive mutations, and instead observed that passage through the gut results in modest reductions in V. cholerae genetic diversity, and only in some patients. These results fill a gap in our understanding of the V. cholerae life cycle, transmission, and evolution.}, } @article {pmid38370577, year = {2023}, author = {Yuan, C and An, T and Li, X and Zou, J and Lin, Z and Gu, J and Hu, R and Fang, Z}, title = {Genomic analysis of Ralstonia pickettii reveals the genetic features for potential pathogenicity and adaptive evolution in drinking water.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1272636}, pmid = {38370577}, issn = {1664-302X}, abstract = {Ralstonia pickettii, the most critical clinical pathogen of the genus Ralstonia, has been identified as a causative agent of numerous harmful infections. Additionally, Ralstonia pickettii demonstrates adaptability to extreme environmental conditions, such as those found in drinking water. In this study, we conducted a comprehensive genomic analysis to investigate the genomic characteristics related to potential pathogenicity and adaptive evolution in drinking water environments of Ralstonia pickettii. Through phylogenetic analysis and population genetic analysis, we divided Ralstonia pickettii into five Groups, two of which were associated with drinking water environments. The open pan-genome with a large and flexible gene repertoire indicated a high genetic plasticity. Significant differences in functional enrichment were observed between the core- and pan-genome of different groups. Diverse mobile genetic elements (MGEs), extensive genomic rearrangements, and horizontal gene transfer (HGT) events played a crucial role in generating genetic diversity. In drinking water environments, Ralstonia pickettii exhibited strong adaptability, and the acquisition of specific adaptive genes was potentially facilitated by genomic islands (GIs) and HGT. Furthermore, environmental pressures drove the adaptive evolution of Ralstonia pickettii, leading to the accumulation of unique mutations in key genes. These mutations may have a significant impact on various physiological functions, particularly carbon metabolism and energy metabolism. The presence of virulence-related elements associated with macromolecular secretion systems, virulence factors, and antimicrobial resistance indicated the potential pathogenicity of Ralstonia pickettii, making it capable of causing multiple nosocomial infections. This study provides comprehensive insights into the potential pathogenicity and adaptive evolution of Ralstonia pickettii in drinking water environments from a genomic perspective.}, } @article {pmid38365240, year = {2024}, author = {Shen, L and Liu, Y and Chen, L and Lei, T and Ren, P and Ji, M and Song, W and Lin, H and Su, W and Wang, S and Rooman, M and Pucci, F}, title = {Genomic basis of environmental adaptation in the widespread poly-extremophilic Exiguobacterium group.}, journal = {The ISME journal}, volume = {18}, number = {1}, pages = {}, doi = {10.1093/ismejo/wrad020}, pmid = {38365240}, issn = {1751-7370}, support = {U21A20176//National Natural Science Foundation of China/ ; 2019QZKK0503//Second Tibetan Plateau Scientific Expedition and Research/ ; 92251304//National Natural Science Foundation of China/ ; swzy202008//Open Project Fund of Anhui Provincial Key Laboratory of Protection and Utilization of Important Biological Resources/ ; 2022AH010012//Anhui Provincial Engineering Research Centre for Molecular Detection and Diagnostics/ ; }, abstract = {Delineating cohesive ecological units and determining the genetic basis for their environmental adaptation are among the most important objectives in microbiology. In the last decade, many studies have been devoted to characterizing the genetic diversity in microbial populations to address these issues. However, the impact of extreme environmental conditions, such as temperature and salinity, on microbial ecology and evolution remains unclear so far. In order to better understand the mechanisms of adaptation, we studied the (pan)genome of Exiguobacterium, a poly-extremophile bacterium able to grow in a wide range of environments, from permafrost to hot springs. To have the genome for all known Exiguobacterium type strains, we first sequenced those that were not yet available. Using a reverse-ecology approach, we showed how the integration of phylogenomic information, genomic features, gene and pathway enrichment data, regulatory element analyses, protein amino acid composition, and protein structure analyses of the entire Exiguobacterium pangenome allows to sharply delineate ecological units consisting of mesophilic, psychrophilic, halophilic-mesophilic, and halophilic-thermophilic ecotypes. This in-depth study clarified the genetic basis of the defined ecotypes and identified some key mechanisms driving the environmental adaptation to extreme environments. Our study points the way to organizing the vast microbial diversity into meaningful ecologically units, which, in turn, provides insight into how microbial communities adapt and respond to different environmental conditions in a changing world.}, } @article {pmid38364871, year = {2024}, author = {Wu, Z and Li, T and Jiang, Z and Zheng, J and Gu, Y and Liu, Y and Liu, Y and Xie, Z}, title = {Human pangenome analysis of sequences missing from the reference genome reveals their widespread evolutionary, phenotypic, and functional roles.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae086}, pmid = {38364871}, issn = {1362-4962}, support = {2019YFA0904400//National Key Research and Development Program of China/ ; 202201020336//Science and Technology Program of Guangzhou, China/ ; }, abstract = {Nonreference sequences (NRSs) are DNA sequences present in global populations but absent in the current human reference genome. However, the extent and functional significance of NRSs in the human genomes and populations remains unclear. Here, we de novo assembled 539 genomes from five genetically divergent human populations using long-read sequencing technology, resulting in the identification of 5.1 million NRSs. These were merged into 45284 unique NRSs, with 29.7% being novel discoveries. Among these NRSs, 38.7% were common across the five populations, and 35.6% were population specific. The use of a graph-based pangenome approach allowed for the detection of 565 transcript expression quantitative trait loci on NRSs, with 426 of these being novel findings. Moreover, 26 NRS candidates displayed evidence of adaptive selection within human populations. Genes situated in close proximity to or intersecting with these candidates may be associated with metabolism and type 2 diabetes. Genome-wide association studies revealed 14 NRSs to be significantly associated with eight phenotypes. Additionally, 154 NRSs were found to be in strong linkage disequilibrium with 258 phenotype-associated SNPs in the GWAS catalogue. Our work expands the understanding of human NRSs and provides novel insights into their functions, facilitating evolutionary and biomedical researches.}, } @article {pmid38361606, year = {2024}, author = {Bonnie, JK and Ahmed, OY and Langmead, B}, title = {DandD: Efficient measurement of sequence growth and similarity.}, journal = {iScience}, volume = {27}, number = {3}, pages = {109054}, doi = {10.1016/j.isci.2024.109054}, pmid = {38361606}, issn = {2589-0042}, abstract = {Genome assembly databases are growing rapidly. The redundancy of sequence content between a new assembly and previous ones is neither conceptually nor algorithmically easy to measure. We introduce pertinent methods and DandD, a tool addressing how much new sequence is gained when a sequence collection grows. DandD can describe how much structural variation is discovered in each new human genome assembly and when discoveries will level off in the future. DandD uses a measure called δ ("delta"), developed initially for data compression and chiefly dependent on k-mer counts. DandD rapidly estimates δ using genomic sketches. We propose δ as an alternative to k-mer-specific cardinalities when computing the Jaccard coefficient, thereby avoiding the pitfalls of a poor choice of k. We demonstrate the utility of DandD's functions for estimating δ, characterizing the rate of pangenome growth, and computing all-pairs similarities using k-independent Jaccard.}, } @article {pmid38356529, year = {2024}, author = {Zhou, L and Liu, D and Zhu, Y and Zhang, Z and Chen, S and Zhao, G and Zheng, H}, title = {Advance typing of Vibrio parahaemolyticus through the mtlA and aer gene: A high-resolution, cost-effective approach.}, journal = {Heliyon}, volume = {10}, number = {3}, pages = {e25642}, pmid = {38356529}, issn = {2405-8440}, abstract = {Vibrio parahaemolyticus is a significant cause of foodborne illness, and its incidence worldwide is on the rise. It is thus imperative to develop a straightforward and efficient method for typing strains of this pathogen. In this study, we conducted a pangenome analysis of 75 complete genomes of V. parahaemolyticus and identified the core gene mtlA with the highest degree of variation, which distinguished 44 strains and outperformed traditional seven-gene-based MLST when combined with aer, another core gene with high degree of variation. The mtlA gene had higher resolution to type strains with a close relationship compared to the traditional MLST genes in the phylogenetic tree built by core genomes. Strong positive selection was also detected in the gene mtlA (ω > 1), representing adaptive and evolution in response to the environment. Therefore, the panel of gene mtlA and aer may serve as a tool for the typing of V. parahaemolyticus, potentially contributing to the prevention and control of this foodborne disease.}, } @article {pmid38355307, year = {2024}, author = {Leonard, AS and Mapel, XM and Pausch, H}, title = {Pangenome genotyped structural variation improves molecular phenotype mapping in cattle.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278267.123}, pmid = {38355307}, issn = {1549-5469}, abstract = {Expression and splicing quantitative trait loci (e/sQTL) are large contributors to phenotypic variability. Achieving sufficient statistical power for e/sQTL mapping requires large cohorts with both genotypes and molecular phenotypes, and so the genomic variation is often called from short-read alignments which are unable to comprehensively resolve structural variation. Here we build a pangenome from 16 HiFi haplotype-resolved assemblies to identify small and structural variation and genotype them with PanGenie in 307 short-read samples. We find high (>90%) concordance of PanGenie-genotyped and DeepVariant-called small variation, and confidently genotype close to 21M small and 43k structural variants in the larger population. We validate 85% of these structural variants (with MAF>0.1) directly with a subset of 25 short-read samples that also have medium coverage HiFi reads. We then conduct e/sQTL mapping with this comprehensive variant set in a subset of 117 cattle that have testis transcriptome data and find 92 structural variants as causal candidates for eQTL and 73 for sQTL. We find that roughly half of top associated structural variants affecting expression or splicing are transposable elements, such as SV-eQTLs for STN1 and MYH7 and SV-sQTLs for CEP89 and ASAH2 Extensive linkage disequilibrium between small and structural variation results in only 28 additional eQTL and 17 sQTL discovered when including SVs, although many top associated SVs are compelling candidates.}, } @article {pmid38352482, year = {2024}, author = {Raghuram, V and Petit, RA and Karol, Z and Mehta, R and Weissman, DB and Read, TD}, title = {Average Nucleotide Identity based Staphylococcus aureus strain grouping allows identification of strain-specific genes in the pangenome.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.01.29.577756}, pmid = {38352482}, abstract = {UNLABELLED: Staphylococcus aureus causes both hospital and community acquired infections in humans worldwide. Due to the high incidence of infection S. aureus is also one of the most sampled and sequenced pathogens today, providing an outstanding resource to understand variation at the bacterial subspecies level. We processed and downsampled 83,383 public S. aureus Illumina whole genome shotgun sequences and 1,263 complete genomes to produce 7,954 representative substrains. Pairwise comparison of core gene Average Nucleotide Identity (ANI) revealed a natural boundary of 99.5% that could be used to define 145 distinct strains within the species. We found that intermediate frequency genes in the pangenome (present in 10-95% of genomes) could be divided into those closely linked to strain background ("strain-concentrated") and those highly variable within strains ("strain-diffuse"). Non-core genes had different patterns of chromosome location; notably, strain-diffuse associated with prophages, strain-concentrated with the vSaβ genome island and rare genes (<10% frequency) concentrated near the origin of replication. Antibiotic genes were enriched in the strain-diffuse class, while virulence genes were distributed between strain-diffuse, strain-concentrated, core and rare classes. This study shows how different patterns of gene movement help create strains as distinct subspecies entities and provide insight into the diverse histories of important S. aureus functions.

IMPORTANCE: We analyzed the genomic diversity of Staphylococcus aureus , a globally prevalent bacterial species that causes serious infections in humans. Our goal was to build a genetic picture of the different strains of S. aureus and which genes may be associated with them. We used a large public dataset (>84,000 genomes) that was re-processed and subsampled to remove redundancy. We found that individual genomes could be grouped into strains by sharing > 99.5% identical nucleotide sequence of the core part of their genome. We also showed that a portion of genes that are present in intermediate frequency in the species are strongly associated with some strains but completely absent from others, suggesting a role in strain-specificity. This work lays the foundation for understanding individual gene histories of the S. aureus species and also outlines strategies for processing large bacterial genomic datasets.}, } @article {pmid38351383, year = {2024}, author = {Li, X and Wang, Y and Cai, C and Ji, J and Han, F and Zhang, L and Chen, S and Zhang, L and Yang, Y and Tang, Q and Bucher, J and Wang, X and Yang, L and Zhuang, M and Zhang, K and Lv, H and Bonnema, G and Zhang, Y and Cheng, F}, title = {Large-scale gene expression alterations introduced by structural variation drive morphotype diversification in Brassica oleracea.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {38351383}, issn = {1546-1718}, support = {31972411//National Natural Science Foundation of China (National Science Foundation of China)/ ; 31722048//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172578//National Natural Science Foundation of China (National Science Foundation of China)/ ; 201809110159//China Scholarship Council (CSC)/ ; }, abstract = {Brassica oleracea, globally cultivated for its vegetable crops, consists of very diverse morphotypes, characterized by specialized enlarged organs as harvested products. This makes B. oleracea an ideal model for studying rapid evolution and domestication. We constructed a B. oleracea pan-genome from 27 high-quality genomes representing all morphotypes and their wild relatives. We identified structural variations (SVs) among these genomes and characterized these in 704 B. oleracea accessions using graph-based genome tools. We show that SVs exert bidirectional effects on the expression of numerous genes, either suppressing through DNA methylation or promoting probably by harboring transcription factor-binding elements. The following examples illustrate the role of SVs modulating gene expression: SVs promoting BoPNY and suppressing BoCKX3 in cauliflower/broccoli, suppressing BoKAN1 and BoACS4 in cabbage and promoting BoMYBtf in ornamental kale. These results provide solid evidence for the role of SVs as dosage regulators of gene expression, driving B. oleracea domestication and diversification.}, } @article {pmid38346372, year = {2024}, author = {Chen, Y and Li, X and Liu, Z and Hu, M and Ma, J and Luo, Y and Zhang, Q and Li, L and Zhao, X and Zhao, M and Liu, W and Liu, Y}, title = {Genomic analysis and experimental pathogenic characterization of Riemerella anatipestifer isolates from chickens in China.}, journal = {Poultry science}, volume = {103}, number = {4}, pages = {103497}, doi = {10.1016/j.psj.2024.103497}, pmid = {38346372}, issn = {1525-3171}, abstract = {Waterfowl have a high likelihood of being infected with Riemerella anatipestifer. Although the pathogen is found in domestic ducks, turkeys, geese, and wild birds, there is little information available about the consequences of infection during egg laying and hatching in chickens. Here, we present the first report of a novel sequence type of R. anatipestifer S63 isolated from chickens in China. On the basis of pan-genome analysis, we showed S63's genome occupies a distinct branch with other R. anatipestifer isolates from other hosts. Galleria mellonella larval tests indicated that S63 is less virulent than R. anatipestifer Ra36 isolated from ducks. Ducks and hens are susceptible to S63 infection. There is no mortality rate for chickens or ducks, but adult chickens experience neurological symptoms that reduce egg production and hatching rates. In chickens, S63 might be passed vertically from parents to offspring, resulting in "jelly-like" lifeless embryos. Using quantitative PCR, S63 was detected in the brain, liver, reproductive organs, and embryos. As far as we know, this is the first report of R. anatipestifer in hens, a disease that can reduce egg productivity, lower hatching rates, and produce jelly-like lifeless embryos, and the first report to raise the possibility that hens can be infected by roosters via semen.}, } @article {pmid38339052, year = {2024}, author = {Zhang, T and Chen, X and Yan, W and Li, M and Huang, W and Liu, Q and Li, Y and Guo, C and Shu, Y}, title = {Comparative Analysis of Chloroplast Pan-Genomes and Transcriptomics Reveals Cold Adaptation in Medicago sativa.}, journal = {International journal of molecular sciences}, volume = {25}, number = {3}, pages = {}, doi = {10.3390/ijms25031776}, pmid = {38339052}, issn = {1422-0067}, support = {LH2022C050//Natural Science Foundation of Heilongjiang Province/ ; HSDSSCX2023-42//the Innovative Project for Postgraduate Students of Harbin Normal University/ ; FKL-202203//the Open Fund of Yunnan Province Flower Breeding Key Laboratory/ ; 202301BD070001-208//Agriculture Joint Special Project of Science and Technology Plan Project of Yunnan Science and Technology Department/ ; 530000210000000013742//the Green Food Brand Build a Special Project (Floriculture) supported by Yunnan Provincial Fi-nance Department/ ; U21A20182//the Natural and Science Foundation of China/ ; Qian Liu//Construction of Tengchong Rural Revitalization Technological Innovation County/ ; }, abstract = {Alfalfa (Medicago sativa) is a perennial forage legume that is widely distributed all over the world; therefore, it has an extremely complex genetic background. Though population structure and phylogenetic studies have been conducted on a large group of alfalfa nuclear genomes, information about the chloroplast genomes is still lacking. Chloroplast genomes are generally considered to be conservative and play an important role in population diversity analysis and species adaptation in plants. Here, 231 complete alfalfa chloroplast genomes were successfully assembled from 359 alfalfa resequencing data, on the basis of which the alfalfa chloroplast pan-genome was constructed. We investigated the genetic variations of the alfalfa chloroplast genome through comparative genomic, genetic diversity, phylogenetic, population genetic structure, and haplotype analysis. Meanwhile, the expression of alfalfa chloroplast genes under cold stress was explored through transcriptome analysis. As a result, chloroplast genomes of 231 alfalfa lack an IR region, and the size of the chloroplast genome ranges from 125,192 bp to 126,105 bp. Using population structure, haplotypes, and construction of a phylogenetic tree, it was found that alfalfa populations could be divided into four groups, and multiple highly variable regions were found in the alfalfa chloroplast genome. Transcriptome analysis showed that tRNA genes were significantly up-regulated in the cold-sensitive varieties, while rps7, rpl32, and ndhB were down-regulated, and the editing efficiency of ycf1, ycf2, and ndhF was decreased in the cold-tolerant varieties, which may be due to the fact that chloroplasts store nutrients through photosynthesis to resist cold. The huge number of genetic variants in this study provide powerful resources for molecular markers.}, } @article {pmid38337024, year = {2024}, author = {Andorf, CM and Haley, OC and Hayford, RK and Portwood, JL and Harding, S and Sen, S and Cannon, EK and Gardiner, JM and Kim, HS and Woodhouse, MR}, title = {PanEffect: a pan-genome visualization tool for variant effects in maize.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae073}, pmid = {38337024}, issn = {1367-4811}, abstract = {UNLABELLED: Understanding the effects of genetic variants is crucial for accurately predicting traits and functional outcomes. Recent approaches have utilized artificial intelligence and protein language models to score all possible missense variant effects at the proteome level for a single genome, but a reliable tool is needed to explore these effects at the pan-genome level. To address this gap, we introduce a new tool called PanEffect. We implemented PanEffect at MaizeGDB to enable a comprehensive examination of the potential effects of coding variants across 50 maize genomes. The tool allows users to visualize over 550 million possible amino acid substitutions in the B73 maize reference genome and to observe the effects of the 2.3 million natural variations in the maize pan-genome. Each variant effect score, calculated from the Evolutionary Scale Modeling (ESM) protein language model, shows the log-likelihood ratio difference between B73 and all variants in the pan-genome. These scores are shown using heatmaps spanning benign outcomes to potential functional consequences. Additionally, PanEffect displays secondary structures and functional domains along with the variant effects, offering additional functional and structural context. Using PanEffect, researchers now have a platform to explore protein variants and identify genetic targets for crop enhancement.

AVAILABILITY: The PanEffect code is freely available on GitHub (https://github.com/Maize-Genetics-and-Genomics-Database/PanEffect). A maize implementation of PanEffect and underlying datasets are available at MaizeGDB (https://www.maizegdb.org/effect/maize/).

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid38334660, year = {2024}, author = {Bachari, A and Nassar, N and Telukutla, S and Zomer, R and Piva, TJ and Mantri, N}, title = {Evaluating the Mechanism of Cell Death in Melanoma Induced by the Cannabis Extract PHEC-66.}, journal = {Cells}, volume = {13}, number = {3}, pages = {}, doi = {10.3390/cells13030268}, pmid = {38334660}, issn = {2073-4409}, support = {Not Applicable//MGC Pharmaceuticals Ltd/ ; }, abstract = {Research suggests the potential of using cannabinoid-derived compounds to function as anticancer agents against melanoma cells. Our recent study highlighted the remarkable in vitro anticancer effects of PHEC-66, an extract from Cannabis sativa, on the MM418-C1, MM329, and MM96L melanoma cell lines. However, the complete molecular mechanism behind this action remains to be elucidated. This study aims to unravel how PHEC-66 brings about its antiproliferative impact on these cell lines, utilising diverse techniques such as real-time polymerase chain reaction (qPCR), assays to assess the inhibition of CB1 and CB2 receptors, measurement of reactive oxygen species (ROS), apoptosis assays, and fluorescence-activated cell sorting (FACS) for apoptosis and cell cycle analysis. The outcomes obtained from this study suggest that PHEC-66 triggers apoptosis in these melanoma cell lines by increasing the expression of pro-apoptotic markers (BAX mRNA) while concurrently reducing the expression of anti-apoptotic markers (Bcl-2 mRNA). Additionally, PHEC-66 induces DNA fragmentation, halting cell progression at the G1 cell cycle checkpoint and substantially elevating intracellular ROS levels. These findings imply that PHEC-66 might have potential as an adjuvant therapy in the treatment of malignant melanoma. However, it is essential to conduct further preclinical investigations to delve deeper into its potential and efficacy.}, } @article {pmid38332778, year = {2024}, author = {Sakurai, A and Suzuki, M and Hayashi, K and Doi, Y}, title = {Taxonomic classification of genus Aeromonas using open reading frame-based binarized structure network analysis.}, journal = {Fujita medical journal}, volume = {10}, number = {1}, pages = {8-15}, doi = {10.20407/fmj.2023-007}, pmid = {38332778}, issn = {2189-7255}, abstract = {OBJECTIVES: Taxonomic assignment based on whole-genome sequencing data facilitates clear demarcation of species within a complex genus. Here, we applied a unique pan-genome phylogenetic method, open reading frame (ORF)-based binarized structure network analysis (OSNA), for taxonomic inference of Aeromonas spp., a complex taxonomic group consisting of 30 species.

METHODS: Data from 335 publicly available Aeromonas genomes, including the reference genomes of 30 species, were used to build a phylogenetic tree using OSNA. In OSNA, whole-genome structures are expressed as binary sequences based on the presence or absence of ORFs, and a tree is generated using neighbor-net, a distance-based method for constructing phylogenetic networks from binary sequences. The tree built by OSNA was compared to that constructed by a core-genome single-nucleotide polymorphism (SNP)-based analysis. Furthermore, the orthologous average nucleotide identity (OrthoANI) values of the sequences that clustered in a single clade in the OSNA-based tree were calculated.

RESULTS: The phylogenetic tree constructed with OSNA successfully delineated the majority of species of the genus Aeromonas forming conspecific clades for individual species, which was corroborated by OrthoANI values. Moreover, the OSNA-based phylogenetic tree demonstrated high compositional similarity to the core-genome SNP-based phylogenetic tree, supported by the Fowlkes-Mallows index.

CONCLUSIONS: We propose that OSNA is a useful tool in predicting the taxonomic classification of complex bacterial genera.}, } @article {pmid38329369, year = {2024}, author = {Newcomer, EP and Fishbein, SRS and Zhang, K and Hink, T and Reske, KA and Cass, C and Iqbal, ZH and Struttmann, EL and Burnham, C-AD and Dubberke, ER and Dantas, G}, title = {Genomic surveillance of Clostridioides difficile transmission and virulence in a healthcare setting.}, journal = {mBio}, volume = {}, number = {}, pages = {e0330023}, doi = {10.1128/mbio.03300-23}, pmid = {38329369}, issn = {2150-7511}, abstract = {Clostridioides difficile infection (CDI) is a major cause of healthcare-associated diarrhea, despite the widespread implementation of contact precautions for patients with CDI. Here, we investigate strain contamination in a hospital setting and the genomic determinants of disease outcomes. Across two wards over 6 months, we selectively cultured C. difficile from patients (n = 384) and their environments. Whole-genome sequencing (WGS) of 146 isolates revealed that most C. difficile isolates were from clade 1 (131/146, 89.7%), while only one isolate of the hypervirulent ST1 was recovered. Of culture-positive admissions (n = 79), 19 (24%) patients were colonized with toxigenic C. difficile on admission to the hospital. We defined 25 strain networks at ≤2 core gene single nucleotide polymorphisms; two of these networks contain strains from different patients. Strain networks were temporally linked (P < 0.0001). To understand the genomic correlates of the disease, we conducted WGS on an additional cohort of C. difficile (n = 102 isolates) from the same hospital and confirmed that clade 1 isolates are responsible for most CDI cases. We found that while toxigenic C. difficile isolates are associated with the presence of cdtR, nontoxigenic isolates have an increased abundance of prophages. Our pangenomic analysis of clade 1 isolates suggests that while toxin genes (tcdABER and cdtR) were associated with CDI symptoms, they are dispensable for patient colonization. These data indicate that toxigenic and nontoxigenic C. difficile contamination persist in a hospital setting and highlight further investigation into how accessory genomic repertoires contribute to C. difficile colonization and disease.IMPORTANCEClostridioides difficile infection remains a leading cause of hospital-associated diarrhea, despite increased antibiotic stewardship and transmission prevention strategies. This suggests a changing genomic landscape of C. difficile. Our study provides insight into the nature of prevalent C. difficile strains in a hospital setting and transmission patterns among carriers. Longitudinal sampling of surfaces and patient stool revealed that both toxigenic and nontoxigenic strains of C. difficile clade 1 dominate these two wards. Moreover, quantification of transmission in carriers of these clade 1 isolates underscores the need to revisit infection prevention measures in this patient group. We identified unique genetic signatures associated with virulence in this clade. Our data highlight the complexities of preventing transmission of this pathogen in a hospital setting and the need to investigate the mechanisms of in vivo persistence and virulence of prevalent lineages in the host gut microbiome.}, } @article {pmid38322985, year = {2024}, author = {Zhong, C and Hu, G and Hu, C and Xu, C and Zhang, Z and Ning, K}, title = {Comparative genomics analysis reveals genetic characteristics and nitrogen fixation profile of Bradyrhizobium.}, journal = {iScience}, volume = {27}, number = {2}, pages = {108948}, doi = {10.1016/j.isci.2024.108948}, pmid = {38322985}, issn = {2589-0042}, abstract = {Bradyrhizobium is a genus of nitrogen-fixing bacteria, with some species producing nodules in leguminous plants. Investigations into Bradyrhizobium have recently revealed its substantial genetic resources and agricultural benefits, but a comprehensive survey of its genetic diversity and functional properties is lacking. Using a panel of various strains (N = 278), this study performed a comparative genomics analysis to anticipate genes linked with symbiotic nitrogen fixation. Bradyrhizobium's pan-genome consisted of 84,078 gene families, containing 824 core genes and 42,409 accessory genes. Core genes were mainly involved in crucial cell processes, while accessory genes served diverse functions, including nitrogen fixation and nodulation. Three distinct genetic profiles were identified based on the presence/absence of gene clusters related to nodulation, nitrogen fixation, and secretion systems. Most Bradyrhizobium strains from soil and non-leguminous plants lacked major nif/nod genes and were evolutionarily more closely related. These findings shed light on Bradyrhizobium's genetic features for symbiotic nitrogen fixation.}, } @article {pmid38307885, year = {2024}, author = {Zheng, Z and Zhu, M and Zhang, J and Liu, X and Hou, L and Liu, W and Yuan, S and Luo, C and Yao, X and Liu, J and Yang, Y}, title = {A sequence-aware merger of genomic structural variations at population scale.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {960}, pmid = {38307885}, issn = {2041-1723}, abstract = {Merging structural variations (SVs) at the population level presents a significant challenge, yet it is essential for conducting comprehensive genotypic analyses, especially in the era of pangenomics. Here, we introduce PanPop, a tool that utilizes an advanced sequence-aware SV merging algorithm to efficiently merge SVs of various types. We demonstrate that PanPop can merge and optimize the majority of multiallelic SVs into informative biallelic variants. We show its superior precision and lower rates of missing data compared to alternative software solutions. Our approach not only enables the filtering of SVs by leveraging multiple SV callers for enhanced accuracy but also facilitates the accurate merging of large-scale population SVs. These capabilities of PanPop will help to accelerate future SV-related studies.}, } @article {pmid38304712, year = {2024}, author = {Chen, P and Wang, S and Li, H and Qi, X and Hou, Y and Ma, T}, title = {Comparative genomic analyses of Cutibacterium granulosum provide insights into genomic diversity.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1343227}, doi = {10.3389/fmicb.2024.1343227}, pmid = {38304712}, issn = {1664-302X}, abstract = {Cutibacterium granulosum, a commensal bacterium found on human skin, formerly known as Propionibacterium granulosum, rarely causes infections and is generally considered non-pathogenic. Recent research has revealed the transferability of the multidrug-resistant plasmid pTZC1 between C. granulosum and Cutibacterium acnes, the latter being an opportunistic pathogen in surgical site infections. However, there is a noticeable lack of research on the genome of C. granulosum, and the genetic landscape of this species remains largely uncharted. We investigated the genomic features and evolutionary structure of C. granulosum by analyzing a total of 30 Metagenome-Assembled Genomes (MAGs) and isolate genomes retrieved from public databases, as well as those generated in this study. A pan-genome of 6,077 genes was identified for C. granulosum. Remarkably, the 'cloud genes' constituted 62.38% of the pan-genome. Genes associated with mobilome: prophages, transposons [X], defense mechanisms [V] and replication, recombination and repair [L] were enriched in the cloud genome. Phylogenomic analysis revealed two distinct mono-clades, highlighting the genomic diversity of C. granulosum. The genomic diversity was further confirmed by the distribution of Average Nucleotide Identity (ANI) values. The functional profiles analysis of C. granulosum unveiled a wide range of potential Antibiotic Resistance Genes (ARGs) and virulence factors, suggesting its potential tolerance to various environmental challenges. Subtype I-E of the CRISPR-Cas system was the most abundant in these genomes, a feature also detected in C. acnes genomes. Given the widespread distribution of C. granulosum strains within skin microbiome, our findings make a substantial contribution to our broader understanding of the genetic diversity, which may open new avenues for investigating the mechanisms and treatment of conditions such as acne vulgaris.}, } @article {pmid38302106, year = {2024}, author = {Hayeck, TJ and Li, Y and Mosbruger, TL and Bradfield, JP and Gleason, AG and Damianos, G and Shaw, GT and Duke, JL and Conlin, LK and Turner, TN and Fernández-Viña, MA and Sarmady, M and Monos, DS}, title = {The Impact of Patterns in Linkage Disequilibrium and Sequencing Quality on the Imprint of Balancing Selection.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evae009}, pmid = {38302106}, issn = {1759-6653}, abstract = {Regions under balancing selection are characterized by dense polymorphisms and multiple persistent haplotypes, along with other sequence complexities. Successful identification of these patterns depends on both the statistical approach and the quality of sequencing. To address this challenge, at first, a new statistical method called LD-ABF was developed, employing efficient Bayesian techniques to effectively test for balancing selection. LD-ABF demonstrated the most robust detection of selection in a variety of simulation scenarios, compared against a range of existing tests/tools (Tajima's D, HKA, Dng, BetaScan, and BalLerMix). Furthermore, the impact of the quality of sequencing on detection of balancing selection was explored, as well, using: 1) SNP genotyping and exome data, 2) targeted high-resolution HLA genotyping (IHIW), and 3) whole-genome long-read sequencing data (Pangenome). In the analysis of SNP genotyping and exome data, we identified known targets and 38 new selection signatures in genes not previously linked to balancing selection. To further investigate the impact of sequencing quality on detection of balancing selection, a detailed investigation of the MHC was performed with high-resolution HLA typing data. Higher quality sequencing revealed the HLA-DQ genes consistently demonstrated strong selection signatures otherwise not observed from the sparser SNP array and exome data. The HLA-DQ selection signature was also replicated in the Pangenome samples using considerably less samples but, with high quality long-read sequence data. The improved statistical method, coupled with higher quality sequencing, leads to more consistent identification of selection and enhanced localization of variants under selection, particularly in complex regions.}, } @article {pmid38298071, year = {2024}, author = {Lee, J and Cha, IT and Lee, KE and Son, YK and Cho, S and Seol, D}, title = {Complete genome sequence and potential pathogenic assessment of Flavobacterium plurextorum RSG-18 isolated from the gut of Schlegel's black rockfish, Sebastes schlegelii.}, journal = {Environmental microbiology reports}, volume = {}, number = {}, pages = {}, doi = {10.1111/1758-2229.13226}, pmid = {38298071}, issn = {1758-2229}, support = {NIBR202134204//National Institute of Biological Resources, Ministry of Environment/ ; }, abstract = {Flavobacterium plurextorum is a potential fish pathogen of interest, previously isolated from diseased rainbow trout (Oncorhynchus mykiss) and oomycete-infected chum salmon (Oncorhynchus keta) eggs. We report here the first complete genome sequence of F. plurextorum RSG-18 isolated from the gut of Schlegel's black rockfish (Sebastes schlegelii). The genome of RSG-18 consists of a circular chromosome of 5,610,911 bp with a 33.57% GC content, containing 4858 protein-coding genes, 18 rRNAs, 63 tRNAs and 1 tmRNA. A comparative analysis was conducted on 11 Flavobacterium species previously reported as pathogens or isolated from diseased fish to confirm the potential pathogenicity of RSG-18. In the SEED classification, RSG-18 was found to have 36 genes categorized in 'Virulence, Disease and Defense'. Across all Flavobacterium species, a total of 16 antibiotic resistance genes and 61 putative virulence factors were identified. All species had at least one phage region and type I, III and IX secretion systems. In pan-genomic analysis, core genes consist of genes linked to phages, integrases and matrix-tolerated elements associated with pathology. The complete genome sequence of F. plurextorum RSG-18 will serve as a foundation for future research, enhancing our understanding of Flavobacterium pathogenicity in fish and contributing to the development of effective prevention strategies.}, } @article {pmid38295902, year = {2024}, author = {Chen, Y and Xiang, G and Liu, P and Zhou, X and Guo, P and Wu, Z and Yang, J and Chen, P and Huang, J and Liao, K}, title = {Prevalence and Molecular Characteristics of Ceftazidime-avibactam Resistance among carbapenem-resistant Pseudomonas aeruginosa Clinical Isolates.}, journal = {Journal of global antimicrobial resistance}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgar.2024.01.014}, pmid = {38295902}, issn = {2213-7173}, abstract = {BACKGROUND: Resistance against ceftazidime-avibactam (CZA) in carbapenem-resistant Pseudomonas aeruginosa (CRPA) is emerging. This study was aimed at detecting the prevalence and molecular characteristics of CZA-resistant CRPA clinical isolates in Guangdong Province, China.

METHODS: The antimicrobial susceptibility profile of these strains was determined. A subset of sixteen CZA-resistant CRPA isolates was analyzed by whole genome sequencing (WGS). Genetic surroundings of carbapenem resistance genes and pan-genome-wide association analysis were further studied.

RESULTS: Of the 250 CRPA isolates, CZA resistance rate was 6.4% (16/250). The minimum inhibitory concentration (MIC) of CZA range was from 0.25 to >256 mg/L. MIC50 and MIC90 were 2/4 and 8/4 mg/L, respectively. Among the sixteen CZA-resistant CRPA strains, 31.3% (5/16) of them carried class B carbapenem resistance genes including blaIMP-4, blaIMP-45 and blaVIM-2, located on IncP-2 megaplasmids or chromosome, respectively. Pan-genome-wide association analysis of accessory genes for CZA-susceptible or -resistant CRPA isolates showed that PA1874, a hypothetical protein containing BapA prefix-like domain, was enriched in CZA-resistant group significantly.

CONCLUSIONS: Class B carbapenem resistance genes play important roles in CZA resistance. Meanwhile, PA1874 gene may be a novel mechanism involving in CZA resistance. It is necessary to continually monitor CZA-resistant CRPA isolates.}, } @article {pmid38293557, year = {2023}, author = {Kim, B and Han, SR and Lee, H and Oh, TJ}, title = {Insights into group-specific pattern of secondary metabolite gene cluster in Burkholderia genus.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1302236}, doi = {10.3389/fmicb.2023.1302236}, pmid = {38293557}, issn = {1664-302X}, abstract = {Burkholderia is a versatile strain that has expanded into several genera. It has been steadily reported that the genome features of Burkholderia exhibit activities ranging from plant growth promotion to pathogenicity across various isolation areas. The objective of this study was to investigate the secondary metabolite patterns of 366 Burkholderia species through comparative genomics. Samples were selected based on assembly quality assessment and similarity below 80% in average nucleotide identity. Duplicate samples were excluded. Samples were divided into two groups using FastANI analysis. Group A included B. pseudomallei complex. Group B included B. cepacia complex. The limitations of MLST were proposed. The detection of genes was performed, including environmental and virulence-related genes. In the pan-genome analysis, each complex possessed a similar pattern of cluster for orthologous groups. Group A (n = 185) had 14,066 cloud genes, 2,465 shell genes, 682 soft-core genes, and 2,553 strict-core genes. Group B (n = 181) had 39,867 cloud genes, 4,986 shell genes, 324 soft-core genes, 222 core genes, and 2,949 strict-core genes. AntiSMASH was employed to analyze the biosynthetic gene cluster (BGC). The results were then utilized for network analysis using BiG-SCAPE and CORASON. Principal component analysis was conducted and a table was constructed using the results obtained from antiSMASH. The results were divided into Group A and Group B. We expected the various species to show similar patterns of secondary metabolite gene clusters. For in-depth analysis, a network analysis of secondary metabolite gene clusters was conducted, exemplified by BiG-SCAPE analysis. Depending on the species and complex, Burkholderia possessed several kinds of siderophore. Among them, ornibactin was possessed in most Burkholderia and was clustered into 4,062 clans. There was a similar pattern of gene clusters depending on the species. NRPS_04014 belonged to siderophore BGCs including ornibactin and indigoidine. However, it was observed that each family included a similar species. This suggests that, besides siderophores being species-specific, the ornibactin gene cluster itself might also be species-specific. The results suggest that siderophores are associated with environmental adaptation, possessing a similar pattern of siderophore gene clusters among species, which could provide another perspective on species-specific environmental adaptation mechanisms.}, } @article {pmid38290434, year = {2024}, author = {Joubert, PM and Krasileva, KV}, title = {Distinct genomic contexts predict gene presence-absence variation in different pathotypes of Magnaporthe oryzae.}, journal = {Genetics}, volume = {}, number = {}, pages = {}, doi = {10.1093/genetics/iyae012}, pmid = {38290434}, issn = {1943-2631}, abstract = {Fungi use the accessory gene content of their pangenomes to adapt to their environments. While gene presence-absence variation (PAV) contributes to shaping accessory gene reservoirs, the genomic contexts that shape these events remain unclear. Since pangenome studies are typically species-wide and do not analyze different populations separately, it is yet to be uncovered whether PAV patterns and mechanisms are consistent across populations. Fungal plant pathogens are useful models for studying PAV because they rely on it to adapt to their hosts, and members of a species often infect distinct hosts. We analyzed gene PAV in the blast fungus, Magnaporthe oryzae (syn. Pyricularia oryzae), and found that PAV genes involved in host-pathogen and microbe-microbe interactions may drive the adaptation of the fungus to its environment. We then analyzed genomic and epigenomic features of PAV and observed that proximity to transposable elements, gene GC content, gene length, expression level in the host, and histone H3K27me3 marks were different between PAV genes and conserved genes. We used these features to construct a model that was able to predict whether a gene is likely to experience PAV with high precision (86.06%) and recall (92.88%) in M. oryzae. Finally, we found that PAV genes in the rice and wheat pathotypes of M. oryzae differed in their number and their genomic context. Our results suggest that genomic and epigenomic features of gene PAV can be used to better understand and predict fungal pangenome evolution. We also show that substantial intra-species variation can exist in these features.}, } @article {pmid38281938, year = {2024}, author = {Zaccaron, AZ and Stergiopoulos, I}, title = {Analysis of five near-complete genome assemblies of the tomato pathogen Cladosporium fulvum uncovers additional accessory chromosomes and structural variations induced by transposable elements effecting the loss of avirulence genes.}, journal = {BMC biology}, volume = {22}, number = {1}, pages = {25}, pmid = {38281938}, issn = {1741-7007}, support = {1557995//Directorate for Biological Sciences/ ; CA-D-PPA-2185-H//National Institute of Food and Agriculture/ ; }, abstract = {BACKGROUND: Fungal plant pathogens have dynamic genomes that allow them to rapidly adapt to adverse conditions and overcome host resistance. One way by which this dynamic genome plasticity is expressed is through effector gene loss, which enables plant pathogens to overcome recognition by cognate resistance genes in the host. However, the exact nature of these loses remains elusive in many fungi. This includes the tomato pathogen Cladosporium fulvum, which is the first fungal plant pathogen from which avirulence (Avr) genes were ever cloned and in which loss of Avr genes is often reported as a means of overcoming recognition by cognate tomato Cf resistance genes. A recent near-complete reference genome assembly of C. fulvum isolate Race 5 revealed a compartmentalized genome architecture and the presence of an accessory chromosome, thereby creating a basis for studying genome plasticity in fungal plant pathogens and its impact on avirulence genes.

RESULTS: Here, we obtained near-complete genome assemblies of four additional C. fulvum isolates. The genome assemblies had similar sizes (66.96 to 67.78 Mb), number of predicted genes (14,895 to 14,981), and estimated completeness (98.8 to 98.9%). Comparative analysis that included the genome of isolate Race 5 revealed high levels of synteny and colinearity, which extended to the density and distribution of repetitive elements and of repeat-induced point (RIP) mutations across homologous chromosomes. Nonetheless, structural variations, likely mediated by transposable elements and effecting the deletion of the avirulence genes Avr4E, Avr5, and Avr9, were also identified. The isolates further shared a core set of 13 chromosomes, but two accessory chromosomes were identified as well. Accessory chromosomes were significantly smaller in size, and one carried pseudogenized copies of two effector genes. Whole-genome alignments further revealed genomic islands of near-zero nucleotide diversity interspersed with islands of high nucleotide diversity that co-localized with repeat-rich regions. These regions were likely generated by RIP, which generally asymmetrically affected the genome of C. fulvum.

CONCLUSIONS: Our results reveal new evolutionary aspects of the C. fulvum genome and provide new insights on the importance of genomic structural variations in overcoming host resistance in fungal plant pathogens.}, } @article {pmid38279113, year = {2024}, author = {Rajput, J and Chandra, G and Jain, C}, title = {Co-linear chaining on pangenome graphs.}, journal = {Algorithms for molecular biology : AMB}, volume = {19}, number = {1}, pages = {4}, pmid = {38279113}, issn = {1748-7188}, abstract = {Pangenome reference graphs are useful in genomics because they compactly represent the genetic diversity within a species, a capability that linear references lack. However, efficiently aligning sequences to these graphs with complex topology and cycles can be challenging. The seed-chain-extend based alignment algorithms use co-linear chaining as a standard technique to identify a good cluster of exact seed matches that can be combined to form an alignment. Recent works show how the co-linear chaining problem can be efficiently solved for acyclic pangenome graphs by exploiting their small width and how incorporating gap cost in the scoring function improves alignment accuracy. However, it remains open on how to effectively generalize these techniques for general pangenome graphs which contain cycles. Here we present the first practical formulation and an exact algorithm for co-linear chaining on cyclic pangenome graphs. We rigorously prove the correctness and computational complexity of the proposed algorithm. We evaluate the empirical performance of our algorithm by aligning simulated long reads from the human genome to a cyclic pangenome graph constructed from 95 publicly available haplotype-resolved human genome assemblies. While the existing heuristic-based algorithms are faster, the proposed algorithm provides a significant advantage in terms of accuracy. Implementation (https://github.com/at-cg/PanAligner).}, } @article {pmid38278862, year = {2024}, author = {Mondol, SM and Islam, I and Islam, MR and Shakil, SK and Rakhi, NN and Mustary, JF and Amiruzzaman, and Gomes, DJ and Shahjalal, HM and Rahaman, MM}, title = {Genomic landscape of NDM-1 producing multidrug-resistant Providencia stuartii causing burn wound infections in Bangladesh.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {2246}, pmid = {38278862}, issn = {2045-2322}, support = {LS2019935//Ministry of Education, Government of the People's Republic of Bangladesh/ ; }, abstract = {The increasing antimicrobial resistance in Providencia stuartii (P. stuartii) worldwide, particularly concerning for immunocompromised and burn patients, has raised concern in Bangladesh, where the significance of this infectious opportunistic pathogen had been previously overlooked, prompting a need for investigation. The two strains of P. stuartii (P. stuartii SHNIBPS63 and P. stuartii SHNIBPS71) isolated from wound swab of two critically injured burn patients were found to be multidrug-resistant and P. stuartii SHNIBPS63 showed resistance to all the 22 antibiotics tested as well as revealed the co-existence of blaVEB-6 (Class A), blaNDM-1 (Class B), blaOXA-10 (Class D) beta lactamase genes. Complete resistance to carbapenems through the production of NDM-1, is indicative of an alarming situation as carbapenems are considered to be the last line antibiotic to combat this pathogen. Both isolates displayed strong biofilm-forming abilities and exhibited resistance to copper, zinc, and iron, in addition to carrying multiple genes associated with metal resistance and the formation of biofilms. The study also encompassed a pangenome analysis utilizing a dataset of eighty-six publicly available P. stuartii genomes (n = 86), revealing evidence of an open or expanding pangenome for P. stuartii. Also, an extensive genome-wide analysis of all the P. stuartii genomes revealed a concerning global prevalence of diverse antimicrobial resistance genes, with a particular alarm raised over the abundance of carbapenem resistance gene blaNDM-1. Additionally, this study highlighted the notable genetic diversity within P. stuartii, significant informations about phylogenomic relationships and ancestry, as well as potential for cross-species transmission, raising important implications for public health and microbial adaptation across different environments.}, } @article {pmid38271481, year = {2024}, author = {Barbitoff, YA and Ushakov, MO and Lazareva, TE and Nasykhova, YA and Glotov, AS and Predeus, AV}, title = {Bioinformatics of germline variant discovery for rare disease diagnostics: current approaches and remaining challenges.}, journal = {Briefings in bioinformatics}, volume = {25}, number = {2}, pages = {}, doi = {10.1093/bib/bbad508}, pmid = {38271481}, issn = {1477-4054}, support = {075-15-2021-1058//Ministry of Science and Higher Education of Russian Federation/ ; }, abstract = {Next-generation sequencing (NGS) has revolutionized the field of rare disease diagnostics. Whole exome and whole genome sequencing are now routinely used for diagnostic purposes; however, the overall diagnosis rate remains lower than expected. In this work, we review current approaches used for calling and interpretation of germline genetic variants in the human genome, and discuss the most important challenges that persist in the bioinformatic analysis of NGS data in medical genetics. We describe and attempt to quantitatively assess the remaining problems, such as the quality of the reference genome sequence, reproducible coverage biases, or variant calling accuracy in complex regions of the genome. We also discuss the prospects of switching to the complete human genome assembly or the human pan-genome and important caveats associated with such a switch. We touch on arguably the hardest problem of NGS data analysis for medical genomics, namely, the annotation of genetic variants and their subsequent interpretation. We highlight the most challenging aspects of annotation and prioritization of both coding and non-coding variants. Finally, we demonstrate the persistent prevalence of pathogenic variants in the coding genome, and outline research directions that may enhance the efficiency of NGS-based disease diagnostics.}, } @article {pmid38270699, year = {2024}, author = {Singh, S and Singh, R and Priyadarsini, S and Ola, AL}, title = {Genomics empowering conservation action and improvement of celery in the face of climate change.}, journal = {Planta}, volume = {259}, number = {2}, pages = {42}, pmid = {38270699}, issn = {1432-2048}, abstract = {Integration of genomic approaches like whole genome sequencing, functional genomics, evolutionary genomics, and CRISPR/Cas9-based genome editing has accelerated the improvement of crop plants including leafy vegetables like celery in the face of climate change. The anthropogenic climate change is a real peril to the existence of life forms on our planet, including human and plant life. Climate change is predicted to be a significant threat to biodiversity and food security in the coming decades and is rapidly transforming global farming systems. To avoid the ghastly future in the face of climate change, the elucidation of shifts in the geographical range of plant species, species adaptation, and evolution is necessary for plant scientists to develop climate-resilient strategies. In the post-genomics era, the increasing availability of genomic resources and integration of multifaceted genomics elements is empowering biodiversity conservation action, restoration efforts, and identification of genomic regions adaptive to climate change. Genomics has accelerated the true characterization of crop wild relatives, genomic variations, and the development of climate-resilient varieties to ensure food security for 10 billion people by 2050. In this review, we have summarized the applications of multifaceted genomic tools, like conservation genomics, whole genome sequencing, functional genomics, genome editing, pangenomics, in the conservation and adaptation of plant species with a focus on celery, an aromatic and medicinal Apiaceae vegetable. We focus on how conservation scientists can utilize genomics and genomic data in conservation and improvement.}, } @article {pmid38268053, year = {2024}, author = {Uruén, C and Fernandez, A and Arnal, JL and Del Pozo, M and Amoribieta, MC and de Blas, I and Jurado, P and Calvo, JH and Gottschalk, M and González-Vázquez, LD and Arenas, M and Marín, CM and Arenas, J}, title = {Genomic and phenotypic analysis of invasive Streptococcus suis isolated in Spain reveals genetic diversification and associated virulence traits.}, journal = {Veterinary research}, volume = {55}, number = {1}, pages = {11}, pmid = {38268053}, issn = {1297-9716}, support = {PID2020-114617RB-100//Ministerio de Ciencia e Innovación/Agencia Española de investigación/ ; LMP58_21//Departamento de Educación, Cultura y Deporte, Gobierno de Aragón/ ; }, abstract = {Streptococcus suis is a zoonotic pathogen that causes a major health problem in the pig production industry worldwide. Spain is one of the largest pig producers in the world. This work aimed to investigate the genetic and phenotypic features of invasive S. suis isolates recovered in Spain. A panel of 156 clinical isolates recovered from 13 Autonomous Communities, representing the major pig producers, were analysed. MLST and serotyping analysis revealed that most isolates (61.6%) were assigned to ST1 (26.3%), ST123 (18.6%), ST29 (9.6%), and ST3 (7.1%). Interestingly, 34 new STs were identified, indicating the emergence of novel genetic lineages. Serotypes 9 (27.6%) and 1 (21.8%) prevailed, followed by serotypes 7 (12.8%) and 2 (12.2%). Analysis of 13 virulence-associated genes showed significant associations between ST, serotype, virulence patterns, and clinical features, evidencing particular virulence traits associated with genetic clusters. The pangenome was generated, and the core genome was distributed in 7 Bayesian groups where each group included a variable set of over- and under-represented genes of different categories. The study provides comprehensive data and knowledge to improve the design of new vaccines, antimicrobial treatments, and bacterial typing approaches.}, } @article {pmid38265421, year = {2024}, author = {Kothe, CI and Monnet, C and Irlinger, F and Virsolvy, M and Frühling, A and Neumann-Schaal, M and Wolf, J and Renault, P}, title = {Halomonas citrativorans sp. nov., Halomonas casei sp. nov. and Halomonas colorata sp. nov., isolated from French cheese rinds.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {74}, number = {1}, pages = {}, doi = {10.1099/ijsem.0.006234}, pmid = {38265421}, issn = {1466-5034}, abstract = {Eight Gram-stain-negative bacterial strains were isolated from cheese rinds sampled in France. On the basis of 16S rRNA gene sequence analysis, all isolates were assigned to the genus Halomonas. Phylogenetic investigations, including 16S rRNA gene studies, multilocus sequence analysis, reconstruction of a pan-genome phylogenetic tree with the concatenated core-genome content and average nucleotide identity (ANI) calculations, revealed that they constituted three novel and well-supported clusters. The closest relative species, determined using the whole-genome sequences of the strains, were Halomonas zhanjiangensis for two groups of cheese strains, sharing 82.4 and 93.1 % ANI, and another cluster sharing 92.2 % ANI with the Halomonas profundi type strain. The strains isolated herein differed from the previously described species by ANI values <95 % and several biochemical, enzymatic and colony characteristics. The results of phenotypic, phylogenetic and chemotaxonomic analyses indicated that the isolates belonged to three novel Halomonas species, for which the names Halomonas citrativorans sp. nov., Halomonas casei sp. nov. and Halomonas colorata sp. nov. are proposed, with isolates FME63[T] (=DSM 113315[T]=CIRM-BIA2430[T]=CIP 111880[T]=LMG 32013[T]), FME64[T] (=DSM 113316[T]=CIRM-BIA2431[T]=CIP 111877[T]=LMG 32015[T]) and FME66[T] (=DSM 113318[T]=CIRM-BIA2433[T]=CIP 111876[T]=LMG 32014[T]) as type strains, respectively.}, } @article {pmid38261993, year = {2024}, author = {Teyssonniere, EM and Shichino, Y and Mito, M and Friedrich, A and Iwasaki, S and Schacherer, J}, title = {Translation variation across genetic backgrounds reveals a post-transcriptional buffering signature in yeast.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae030}, pmid = {38261993}, issn = {1362-4962}, support = {772505/ERC_/European Research Council/International ; S10 OD018174/CD/ODCDC CDC HHS/United States ; }, abstract = {Gene expression is known to vary among individuals, and this variability can impact the phenotypic diversity observed in natural populations. While the transcriptome and proteome have been extensively studied, little is known about the translation process itself. Here, we therefore performed ribosome and transcriptomic profiling on a genetically and ecologically diverse set of natural isolates of the Saccharomyces cerevisiae yeast. Interestingly, we found that the Euclidean distances between each profile and the expression fold changes in each pairwise isolate comparison were higher at the transcriptomic level. This observation clearly indicates that the transcriptional variation observed in the different isolates is buffered through a phenomenon known as post-transcriptional buffering at the translation level. Furthermore, this phenomenon seemed to have a specific signature by preferentially affecting essential genes as well as genes involved in complex-forming proteins, and low transcribed genes. We also explored the translation of the S. cerevisiae pangenome and found that the accessory genes related to introgression events displayed similar transcription and translation levels as the core genome. By contrast, genes acquired through horizontal gene transfer events tended to be less efficiently translated. Together, our results highlight both the extent and signature of the post-transcriptional buffering.}, } @article {pmid38260597, year = {2024}, author = {Villani, F and Guarracino, A and Ward, RR and Green, T and Emms, M and Pravenec, M and Prins, P and Garrison, E and Williams, RW and Chen, H and Colonna, V}, title = {Pangenome reconstruction in rats enhances genotype-phenotype mapping and novel variant discovery.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.01.10.575041}, pmid = {38260597}, abstract = {The HXB/BXH family of recombinant inbred rat strains is a unique genetic resource that has been extensively phenotyped over 25 years, resulting in a vast dataset of quantitative molecular and physiological phenotypes. We built a pangenome graph from 10x Genomics linked-read data for 31 recombinant inbred rats to study genetic variation and association mapping. The pangenome length was on average 2.4 times greater than the corresponding length of the reference mRatBN7.2, confirming the capture of substantial additional variation. We validated variants in challenging regions, including complex structural variants resolving into multiple haplotypes. Phenome-wide association analysis of validated SNPs uncovered variants associated with glucose/insulin levels and hippocampal gene expression. We propose an interaction between Pirl1l1, Cromogranine expression, TNF-α levels, and insulin regulation. This study demonstrates the utility of linked-read pangenomes for comprehensive variant detection and mapping phenotypic diversity in a widely used rat genetic reference panel.}, } @article {pmid38259089, year = {2024}, author = {Chen, F and Yin, Y and Chen, H and Wang, R and Wang, S and Wang, H}, title = {Global genetic diversity and Asian clades evolution: a phylogeographic study of Staphylococcus aureus sequence type 5.}, journal = {Antimicrobial agents and chemotherapy}, volume = {}, number = {}, pages = {e0117523}, doi = {10.1128/aac.01175-23}, pmid = {38259089}, issn = {1098-6596}, abstract = {Staphylococcus aureus sequence type (ST) 5 has spread worldwide; however, phylogeographic studies on the evolution of global phylogenetic and Asian clades of ST5 are lacking. This study included 368 ST5 genome sequences, including 111 newly generated sequences. Primary phylogenetic analysis suggested that there are five clades, and geographical clustering of ST5 methicillin-resistant S. aureus (MRSA) was linked to the acquisition of S. aureus pathogenicity islands (SaPIs; enterotoxin gene island) and integration of the prophage φSa3. The most recent common ancestor of global S. aureus ST5 dates back to the mid-1940s, coinciding with the clinical introduction of penicillin. Bayesian phylogeographic inference allowed to ancestrally trace the Asian ST5 MRSA clade to Japan, which may have spread to major cities in China and Korea in the 1990s. Based on a pan-genome-wide association study, the emergence of Asian ST5 clades was attributed to the gain of prophages, SaPIs, and plasmids, as well as the coevolution of resistance genes. Clade IV displayed greater genomic diversity than the Asian MRSA clades. Collectively, our study provides in-depth insights into the global evolution of S. aureus ST5 mainly in China and the United States and reveals that different S. aureus ST5 clades have arisen independently in different parts of the world, with limited geographic dispersal across continents.}, } @article {pmid38257915, year = {2023}, author = {Afordoanyi, DM and Akosah, YA and Shnakhova, L and Saparmyradov, K and Diabankana, RGC and Validov, S}, title = {Biotechnological Key Genes of the Rhodococcus erythropolis MGMM8 Genome: Genes for Bioremediation, Antibiotics, Plant Protection, and Growth Stimulation.}, journal = {Microorganisms}, volume = {12}, number = {1}, pages = {}, doi = {10.3390/microorganisms12010088}, pmid = {38257915}, issn = {2076-2607}, support = {RF-1930.61321X0001/15.IP.21.0020//Ministry of Education 362 and Science of the Russian Federation/ ; }, abstract = {Anthropogenic pollution, including residues from the green revolution initially aimed at addressing food security and healthcare, has paradoxically exacerbated environmental challenges. The transition towards comprehensive green biotechnology and bioremediation, achieved with lower financial investment, hinges on microbial biotechnology, with the Rhodococcus genus emerging as a promising contender. The significance of fully annotating genome sequences lies in comprehending strain constituents, devising experimental protocols, and strategically deploying these strains to address pertinent issues using pivotal genes. This study revolves around Rhodococcus erythropolis MGMM8, an associate of winter wheat plants in the rhizosphere. Through the annotation of its chromosomal genome and subsequent comparison with other strains, its potential applications were explored. Using the antiSMASH server, 19 gene clusters were predicted, encompassing genes responsible for antibiotics and siderophores. Antibiotic resistance evaluation via the Comprehensive Antibiotic Resistance Database (CARD) identified five genes (vanW, vanY, RbpA, iri, and folC) that were parallel to strain CCM2595. Leveraging the NCBI Prokaryotic Genome Annotation Pipeline (PGAP) for biodegradation, heavy metal resistance, and remediation genes, the presence of chlorimuron-ethyl, formaldehyde, benzene-desulfurization degradation genes, and heavy metal-related genes (ACR3, arsC, corA, DsbA, modA, and recG) in MGMM8 was confirmed. Furthermore, quorum-quenching signal genes, critical for curbing biofilm formation and virulence elicited by quorum-sensing in pathogens, were also discerned within MGMM8's genome. In light of these predictions, the novel isolate MGMM8 warrants phenotypic assessment to gauge its potential in biocontrol and bioremediation. This evaluation extends to isolating active compounds for potential antimicrobial activities against pathogenic microorganisms. The comprehensive genome annotation process has facilitated the genetic characterization of MGMM8 and has solidified its potential as a biotechnological strain to address global anthropogenic predicaments.}, } @article {pmid38257891, year = {2023}, author = {Godoy, M and Montes de Oca, M and Suarez, R and Martinez, A and Pontigo, JP and Caro, D and Kusch, K and Coca, Y and Bohle, H and Bayliss, S and Kibenge, M and Kibenge, F}, title = {Genomics of Re-Emergent Aeromonas salmonicida in Atlantic Salmon Outbreaks.}, journal = {Microorganisms}, volume = {12}, number = {1}, pages = {}, doi = {10.3390/microorganisms12010064}, pmid = {38257891}, issn = {2076-2607}, abstract = {Furunculosis, caused by Aeromonas salmonicida, poses a significant threat to both salmonid and non-salmonid fish in diverse aquatic environments. This study explores the genomic intricacies of re-emergent A. salmonicida outbreaks in Atlantic salmon (Salmo salar). Previous clinical cases have exhibited pathological characteristics, such as periorbital hemorrhages and gastrointestinal abnormalities. Genomic sequencing of three Chilean isolates (ASA04, ASA05, and CIBA_5017) and 25 previously described genomes determined the pan-genome, phylogenomics, insertion sequences, and restriction-modification systems. Unique gene families have contributed to an improved understanding of the psychrophilic and mesophilic clades, while phylogenomic analysis has been used to identify mesophilic and psychrophilic strains, thereby further differentiating between typical and atypical psychrophilic isolates. Diverse insertion sequences and restriction-modification patterns have highlighted genomic structural differences, and virulence factor predictions can emphasize exotoxin disparities, especially between psychrophilic and mesophilic strains. Thus, a novel plasmid was characterized which emphasized the role of plasmids in virulence and antibiotic resistance. The analysis of antibiotic resistance factors revealed resistance against various drug classes in Chilean strains. Overall, this study elucidates the genomic dynamics of re-emergent A. salmonicida and provides novel insights into their virulence, antibiotic resistance, and population structure.}, } @article {pmid38254124, year = {2024}, author = {Fan, J and Khan, J and Singh, NP and Pibiri, GE and Patro, R}, title = {Fulgor: a fast and compact k-mer index for large-scale matching and color queries.}, journal = {Algorithms for molecular biology : AMB}, volume = {19}, number = {1}, pages = {3}, pmid = {38254124}, issn = {1748-7188}, support = {R01HG009937/NH/NIH HHS/United States ; }, abstract = {The problem of sequence identification or matching-determining the subset of reference sequences from a given collection that are likely to contain a short, queried nucleotide sequence-is relevant for many important tasks in Computational Biology, such as metagenomics and pangenome analysis. Due to the complex nature of such analyses and the large scale of the reference collections a resource-efficient solution to this problem is of utmost importance. This poses the threefold challenge of representing the reference collection with a data structure that is efficient to query, has light memory usage, and scales well to large collections. To solve this problem, we describe an efficient colored de Bruijn graph index, arising as the combination of a k-mer dictionary with a compressed inverted index. The proposed index takes full advantage of the fact that unitigs in the colored compacted de Bruijn graph are monochromatic (i.e., all k-mers in a unitig have the same set of references of origin, or color). Specifically, the unitigs are kept in the dictionary in color order, thereby allowing for the encoding of the map from k-mers to their colors in as little as 1 + o(1) bits per unitig. Hence, one color per unitig is stored in the index with almost no space/time overhead. By combining this property with simple but effective compression methods for integer lists, the index achieves very small space. We implement these methods in a tool called Fulgor, and conduct an extensive experimental analysis to demonstrate the improvement of our tool over previous solutions. For example, compared to Themisto-the strongest competitor in terms of index space vs. query time trade-off-Fulgor requires significantly less space (up to 43% less space for a collection of 150,000 Salmonella enterica genomes), is at least twice as fast for color queries, and is 2-6[Formula: see text] faster to construct.}, } @article {pmid38253726, year = {2024}, author = {Jeong, J and Ahn, S and Truong, TC and Kim, JH and Weerawongwiwat, V and Lee, JS and Yoon, JH and Sukhoom, A and Kim, W}, title = {Description of Mycolicibacterium arenosum sp. nov. Isolated from Coastal Sand on the Yellow Sea Coast.}, journal = {Current microbiology}, volume = {81}, number = {3}, pages = {73}, pmid = {38253726}, issn = {1432-0991}, support = {NIBR202102205//National Institute of Biological Resources/ ; NRF-2021R1C1C2003223//National Research Foundation of Korea/ ; 2017//Chung-Ang University/ ; }, abstract = {A Gram-staining-positive, aerobic, non-spore-forming bacterium was isolated from coastal sand samples from Incheon in the Republic of Korea and designated as strain CAU 1645[T]. The optimum conditions for growth were observed at 30 °C in growth media containing 1% (w/v) NaCl at pH 9.0. The predominant respiratory quinone was MK-9 and the major fatty acids were C16:0, C17:1 w7c, and summed feature 7. Similarly, the 16S rRNA gene sequence exhibited the highest similarity with Mycolicibacterium bacteremicum DSM 45578[T] and Mycolicibacterium neoaurum JCM 6365[T], both of which exhibited similarity rates of 97.2%. The genomic DNA G+C content was 68.2%. The whole genome of strain CAU 1645[T] was obtained and annotated with annotation using RAST server. The pan-genome analysis was determined using Prokka, Roary, and Phandango. In the pan-genome analysis, the strain CAU 1645[T] shared 40 core genes with closely related Mycolicibacterium species, including the AcpM gene, the meromycolate extension acyl carrier protein involved in forming impermeable cell walls in mycobacteria. Therefore, our findings demonstrated that the isolate represents a novel species of the genus Mycolicibacterium, for which we propose the name Mycolicibacterium arenosum sp. nov. The type strain is CAU 1645[T] (= KCTC 49724[T] = MCCC 1K07087[T]).}, } @article {pmid38253606, year = {2024}, author = {Groza, C and Schwendinger-Schreck, C and Cheung, WA and Farrow, EG and Thiffault, I and Lake, J and Rizzo, WB and Evrony, G and Curran, T and Bourque, G and Pastinen, T}, title = {Pangenome graphs improve the analysis of structural variants in rare genetic diseases.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {657}, pmid = {38253606}, issn = {2041-1723}, abstract = {Rare DNA alterations that cause heritable diseases are only partially resolvable by clinical next-generation sequencing due to the difficulty of detecting structural variation (SV) in all genomic contexts. Long-read, high fidelity genome sequencing (HiFi-GS) detects SVs with increased sensitivity and enables assembling personal and graph genomes. We leverage standard reference genomes, public assemblies (n = 94) and a large collection of HiFi-GS data from a rare disease program (Genomic Answers for Kids, GA4K, n = 574 assemblies) to build a graph genome representing a unified SV callset in GA4K, identify common variation and prioritize SVs that are more likely to cause genetic disease (MAF < 0.01). Using graphs, we obtain a higher level of reproducibility than the standard reference approach. We observe over 200,000 SV alleles unique to GA4K, including nearly 1000 rare variants that impact coding sequence. With improved specificity for rare SVs, we isolate 30 candidate SVs in phenotypically prioritized genes, including known disease SVs. We isolate a novel diagnostic SV in KMT2E, demonstrating use of personal assemblies coupled with pangenome graphs for rare disease genomics. The community may interrogate our pangenome with additional assemblies to discover new SVs within the allele frequency spectrum relevant to genetic diseases.}, } @article {pmid38249481, year = {2023}, author = {Deng, Y and Jiang, ZM and Han, XF and Su, J and Yu, LY and Liu, WH and Zhang, YQ}, title = {Corrigendum: Pangenome analysis of the genus Herbiconiux and proposal of four new species associated with Chinese medicinal plants.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1295710}, doi = {10.3389/fmicb.2023.1295710}, pmid = {38249481}, issn = {1664-302X}, abstract = {[This corrects the article DOI: 10.3389/fmicb.2023.1119226.].}, } @article {pmid38246550, year = {2024}, author = {Song, Z and Ge, Y and Yu, X and Liu, R and Liu, C and Cheng, K and Guo, L and Yao, S}, title = {Development of a SNP-based strain-identified method for Streptococcus thermophilus CICC 6038 and Lactobacillus delbrueckii ssp. bulgaricus CICC 6047 using pan-genomics analysis.}, journal = {Journal of dairy science}, volume = {}, number = {}, pages = {}, doi = {10.3168/jds.2023-23655}, pmid = {38246550}, issn = {1525-3198}, abstract = {The health benefits conferred by probiotics is specific to individual probiotic strains, highlighting the importance of identifying specific strains for research and production purposes. Streptococcus thermophilus CICC 6038 and Lactobacillus delbrueckii ssp. bulgaricus CICC 6047 are exceedingly valuable for commercial use with an excellent mixed-culture fermentation. To differentiate these 2 strains from other S. thermophilus and L. delbrueckii ssp. bulgaricus, a specific, sensitive, accurate, rapid, convenient, and cost-effective method is required. In this study, we conducted a pan-genome analysis of S. thermophilus and L. delbrueckii ssp. bulgaricus to identify species-specific core genes, along with strain-specific single-nucleotide polymorphisms (SNPs). These genes were used to develop suitable PCR primers, and the conformity of sequence length and unique SNPs was confirmed by sequencing for qualitative identification at the strain level. The results demonstrated that SNPs analysis of PCR products derived from these primers could distinguish CICC 6038 and CICC 6047 accurately and reproducibly from the other strains of S. thermophilus and L. delbrueckii ssp. bulgaricus, respectively. The strain-specific PCR method based on SNPs herein is universally applicable for probiotics identification. It offers valuable insights into identifying probiotics at the strain level that is fit-for-purpose in quality control and compliance assessment of commercial dairy products.}, } @article {pmid38238664, year = {2024}, author = {Peng, M and Lin, W and Zhou, A and Jiang, Z and Zhou, F and Wang, Z}, title = {High genetic diversity and different type VI secretion systems in Enterobacter species revealed by comparative genomics analysis.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {26}, pmid = {38238664}, issn = {1471-2180}, support = {32200094//National Natural Science Foundation of China/ ; PT012201//Hubei Key Laboratory of Biological Resources Protection and Utilization (Hubei Minzu University)/ ; 2022CFB674//Natural Science Foundation of Hubei Province/ ; }, abstract = {The human-pathogenic Enterobacter species are widely distributed in diverse environmental conditions, however, the understanding of the virulence factors and genetic variations within the genus is very limited. In this study, we performed comparative genomics analysis of 49 strains originated from diverse niches and belonged to eight Enterobacter species, in order to further understand the mechanism of adaption to the environment in Enterobacter. The results showed that they had an open pan-genome and high genomic diversity which allowed adaptation to distinctive ecological niches. We found the number of secretion systems was the highest among various virulence factors in these Enterobacter strains. Three types of T6SS gene clusters including T6SS-A, T6SS-B and T6SS-C were detected in most Enterobacter strains. T6SS-A and T6SS-B shared 13 specific core genes, but they had different gene structures, suggesting they probably have different biological functions. Notably, T6SS-C was restricted to E. cancerogenus. We detected a T6SS gene cluster, highly similar to T6SS-C (91.2%), in the remote related Citrobacter rodenitum, suggesting that this unique gene cluster was probably acquired by horizontal gene transfer. The genomes of Enterobacter strains possess high genetic diversity, limited number of conserved core genes, and multiple copies of T6SS gene clusters with differentiated structures, suggesting that the origins of T6SS were not by duplication instead by independent acquisition. These findings provide valuable information for better understanding of the functional features of Enterobacter species and their evolutionary relationships.}, } @article {pmid38230932, year = {2024}, author = {Silva-Pereira, TT and Soler-Camargo, NC and Guimarães, AMS}, title = {Diversification of gene content in the Mycobacterium tuberculosis complex is determined by phylogenetic and ecological signatures.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0228923}, doi = {10.1128/spectrum.02289-23}, pmid = {38230932}, issn = {2165-0497}, abstract = {In this study, we analyzed the gene content of different ecotypes of the Mycobacterium tuberculosis complex (MTBC), the pathogens of tuberculosis. We found that changes in their gene content are associated with their ecological features, such as host preference. Gene loss was identified as the primary driver of these changes, which can vary even among different strains of the same ecotype. Our study also revealed that the gene content relatedness of these bacteria does not always mirror their evolutionary relationships. In addition, some genes of virulence can be variably lost among strains of the same MTBC ecotype, likely helping them to evade the immune system. Overall, our study highlights the importance of understanding how gene loss can lead to new adaptations in these bacteria and how different selective pressures may influence their genetic makeup.}, } @article {pmid38229335, year = {2024}, author = {Venkatachalam, S and Jabir, T and Vipindas, PV and Krishnan, KP}, title = {Ecological significance of Candidatus ARS69 and Gemmatimonadota in the Arctic glacier foreland ecosystems.}, journal = {Applied microbiology and biotechnology}, volume = {108}, number = {1}, pages = {128}, pmid = {38229335}, issn = {1432-0614}, abstract = {The Gemmatimonadota phylum has been widely detected in diverse natural environments, yet their specific ecological roles in many habitats remain poorly investigated. Similarly, the Candidatus ARS69 phylum has been identified only in a few habitats, and literature on their metabolic functions is relatively scarce. In the present study, we investigated the ecological significance of phyla Ca. ARS69 and Gemmatimonadota in the Arctic glacier foreland (GF) ecosystems through genome-resolved metagenomics. We have reconstructed the first high-quality metagenome-assembled genome (MAG) belonging to Ca. ARS69 and 12 other MAGs belonging to phylum Gemmatimonadota from the three different Arctic GF samples. We further elucidated these two groups phylogenetic lineage and their metabolic function through phylogenomic and pangenomic analysis. The analysis showed that all the reconstructed MAGs potentially belonged to novel species. The MAGs belonged to Ca. ARS69 consist about 8296 gene clusters, of which only about 8% of single-copy core genes (n = 980) were shared among them. The study also revealed the potential ecological role of Ca. ARS69 is associated with carbon fixation, denitrification, sulfite oxidation, and reduction biochemical processes in the GF ecosystems. Similarly, the study demonstrates the widespread distribution of different classes of Gemmatimonadota across wide ranges of ecosystems and their metabolic functions, including in the polar region. KEY POINTS: • Glacier foreland ecosystems act as a natural laboratory to study microbial community structure. • We have reconstructed 13 metagenome-assembled genomes from the soil samples. • All the reconstructed MAGs belonged to novel species with different metabolic processes. • Ca. ARS69 and Gemmatimonadota MAGs were found to participate in carbon fixation and denitrification processes.}, } @article {pmid38225047, year = {2024}, author = {Han, DM and Baek, JH and Choi, DG and Jeon, MS and Eyun, SI and Jeon, CO}, title = {Comparative pangenome analysis of Aspergillus flavus and Aspergillus oryzae reveals their phylogenetic, genomic, and metabolic homogeneity.}, journal = {Food microbiology}, volume = {119}, number = {}, pages = {104435}, doi = {10.1016/j.fm.2023.104435}, pmid = {38225047}, issn = {1095-9998}, abstract = {Aspergillus flavus and Aspergillus oryzae are closely related fungal species with contrasting roles in food safety and fermentation. To comprehensively investigate their phylogenetic, genomic, and metabolic characteristics, we conducted an extensive comparative pangenome analysis using complete, dereplicated genome sets for both species. Phylogenetic analyses, employing both the entirety of the identified single-copy orthologous genes and six housekeeping genes commonly used for fungal classification, did not reveal clear differentiation between A. flavus and A. oryzae genomes. Upon analyzing the aflatoxin biosynthesis gene clusters within the genomes, we observed that non-aflatoxin-producing strains were dispersed throughout the phylogenetic tree, encompassing both A. flavus and A. oryzae strains. This suggests that aflatoxin production is not a distinguishing trait between the two species. Furthermore, A. oryzae and A. flavus strains displayed remarkably similar genomic attributes, including genome sizes, gene contents, and G + C contents, as well as metabolic features and pathways. The profiles of CAZyme genes and secondary metabolite biosynthesis gene clusters within the genomes of both species further highlight their similarity. Collectively, these findings challenge the conventional differentiation of A. flavus and A. oryzae as distinct species and highlight their phylogenetic, genomic, and metabolic homogeneity, potentially indicating that they may indeed belong to the same species.}, } @article {pmid38224489, year = {2024}, author = {Wendisch, VF and Brito, LF and Passaglia, LMP}, title = {Genome-based analyses to learn from and about Paenibacillus sonchi genomovar Riograndensis SBR5T.}, journal = {Genetics and molecular biology}, volume = {46}, number = {3 Suppl 1}, pages = {e20230115}, doi = {10.1590/1678-4685-GMB-2023-0115}, pmid = {38224489}, issn = {1415-4757}, abstract = {Paenibacillus sonchi genomovar Riograndensis SBR5T is a plant growth-promoting rhizobacterium (PGPR) isolated in the Brazilian state of Rio Grande do Sul from the rhizosphere of Triticum aestivum. It fixes nitrogen, produces siderophores as well as the phytohormone indole-3-acetic acid, solubilizes phosphate and displays antagonist activity against Listeria monocytogenes and Pectobacterium carotovorum. Comprehensive omics analysis and the development of genetic tools are key to characterizing and engineering such non-model microorganisms. Therefore, the complete genome of SBR5T was sequenced, and shown to encode 6,705 proteins, 87 tRNAs, and 27 rRNAs and it enabled a landscape transcriptome analysis that unveiled conserved transcriptional and translational patterns and characterized operon structures and riboswitches. The pangenome of P. sonchi species is open with a stable core pangenome. At the same time, the analysis of genes coding for nitrogenases revealed that the trait of nitrogen fixation is sparse within the Paenibacillaceae family and the presence of Fe-only nitrogenase in the P. sonchi group was exclusive to SBR5T. The development of genetic tools for SBR5T enabled genetic transformation, plasmid construction for constitutive and inducible gene expression, and gene repression using the CRISPRi system. Altogether, the work with P. sonchi can guide the study of non-model bacteria with economic potential.}, } @article {pmid38217963, year = {2024}, author = {Monterrubio-López, GP and Llamas-Monroy, JL and Martínez-Gómez, ÁA and Delgadillo-Gutiérrez, K}, title = {Novel vaccine candidates of Bordetella pertussis identified by reverse vaccinology.}, journal = {Biologicals : journal of the International Association of Biological Standardization}, volume = {85}, number = {}, pages = {101740}, doi = {10.1016/j.biologicals.2023.101740}, pmid = {38217963}, issn = {1095-8320}, abstract = {Whooping cough is a disease caused by Bordetella pertussis, whose morbidity has increased, motivating the improvement of current vaccines. Reverse vaccinology is a strategy that helps identify proteins with good characteristics fast and with fewer resources. In this work, we applied reverse vaccinology to study the B. pertussis proteome and pangenome with several in-silico tools. We analyzed the B. pertussis Tohama I proteome with NERVE software and compared 234 proteins with B. parapertussis, B. bronchiseptica, and B. holmessi. VaxiJen was used to calculate an antigenicity value; our threshold was 0.6, selecting 84 proteins. The candidates were depurated and grouped in eight family proteins to select representative candidates, according to bibliographic information and their immunological response predicted with ABCpred, Bcepred, IgPred, and C-ImmSim. Additionally, a pangenome study was conducted with 603 B. pertussis strains and PanRV software, identifying 3421 core proteins that were analyzed to select the best candidates. Finally, we selected 15 proteins from the proteome study and seven proteins from the pangenome analysis as good vaccine candidates.}, } @article {pmid38216873, year = {2024}, author = {Yang, Z and Yang, X and Wang, M and Jia, R and Chen, S and Liu, M and Zhao, X and Yang, Q and Wu, Y and Zhang, S and Huang, J and Ou, X and Mao, S and Gao, Q and Sun, D and Tian, B and Zhu, D and Cheng, A}, title = {Genome-wide association study reveals serovar-associated genetic loci in Riemerella anatipestifer.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {57}, pmid = {38216873}, issn = {1471-2164}, abstract = {BACKGROUND: The disease caused by Riemerella anatipestifer (R. anatipestifer, RA) results in large economic losses to the global duck industry every year. Serovar-related genomic variation, such as the O-antigen and capsular polysaccharide (CPS) gene clusters, has been widely used for serotyping in many gram-negative bacteria. RA has been classified into at least 21 serovars based on slide agglutination, but the molecular basis of serotyping is unknown. In this study, we performed a pan-genome-wide association study (Pan-GWAS) to identify the genetic loci associated with RA serovars.

RESULTS: The results revealed a significant association between the putative CPS synthesis gene locus and the serological phenotype. Further characterization of the CPS gene clusters in 11 representative serovar strains indicated that they were highly diverse and serovar-specific. The CPS gene cluster contained the key genes wzx and wzy, which are involved in the Wzx/Wzy-dependent pathway of CPS synthesis. Similar CPS loci have been found in some other species within the family Weeksellaceae. We have also shown that deletion of the wzy gene in RA results in capsular defects and cross-agglutination.

CONCLUSIONS: This study indicates that the CPS synthesis gene cluster of R. anatipestifer is a serotype-specific genetic locus. Importantly, our finding provides a new perspective for the systematic analysis of the genetic basis of the R anatipestifer serovars and a potential target for establishing a complete molecular serotyping scheme.}, } @article {pmid38216606, year = {2024}, author = {Schreiber, M and Wonneberger, R and Haaning, AM and Coulter, M and Russell, J and Himmelbach, A and Fiebig, A and Muehlbauer, GJ and Stein, N and Waugh, R}, title = {Genomic resources for a historical collection of cultivated two-row European spring barley genotypes.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {66}, pmid = {38216606}, issn = {2052-4463}, support = {BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; MU 3589/1-1//Deutsche Forschungsgemeinschaft (German Research Foundation)/ ; MU 3589/1-1//Deutsche Forschungsgemeinschaft (German Research Foundation)/ ; 1844331//National Science Foundation (NSF)/ ; 1844331//National Science Foundation (NSF)/ ; }, abstract = {Barley genomic resources are increasing rapidly, with the publication of a barley pangenome as one of the latest developments. Two-row spring barley cultivars are intensely studied as they are the source of high-quality grain for malting and distilling. Here we provide data from a European two-row spring barley population containing 209 different genotypes registered for the UK market between 1830 to 2014. The dataset encompasses RNA-sequencing data from six different tissues across a range of barley developmental stages, phenotypic datasets from two consecutive years of field-grown trials in the United Kingdom, Germany and the USA; and whole genome shotgun sequencing from all cultivars, which was used to complement the RNA-sequencing data for variant calling. The outcomes are a filtered SNP marker file, a phenotypic database and a large gene expression dataset providing a comprehensive resource which allows for downstream analyses like genome wide association studies or expression associations.}, } @article {pmid38214698, year = {2024}, author = {Park, S and Kim, I and Chhetri, G and Jung, Y and Woo, H and Seo, T}, title = {Cellulomonas alba sp. nov. and Cellulomonas edaphi sp. nov., isolated from wetland soils.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {74}, number = {1}, pages = {}, doi = {10.1099/ijsem.0.006235}, pmid = {38214698}, issn = {1466-5034}, abstract = {Two novel strains were isolated from wetland soils in Goyang, Republic of Korea. The two Gram-stain-positive, facultatively anaerobic, rod-shaped bacterial-type strains were designated MW4[T] and MW9[T]. Phylogenomic analysis based on whole-genome sequences suggested that both strains belonged to the genus Cellulomonas. The cells of strain MW4[T] were non-motile and grew at 20-40 °C (optimum, 35 °C), at pH 6.0-10.0 (optimum, pH 8.0) and in the presence of 0-1.0% NaCl (optimum, 0 %). The cells of strain MW9[T] were non-motile and grew at 20-40 °C (optimum, 35 °C), at pH 5.0-9.0 (optimum, pH 8.0) and in the presence of 0-1.0% NaCl (optimum, 0 %). The average nucleotide identity (77.1-88.1 %) and digital DNA-DNA hybridization values (21.0-34.8 %) between the two novel strains and with their closely related strains fell within the range for the genus Cellulomonas. The novel strains MW4[T] and MW9[T] and reference strains possessed alkane synthesis gene clusters (oleA, oleB, oleC and oleD). Phylogenomic, phylogenetic, average nucleotide identity, digital DNA-DNA hybridization, physiological and biochemical data indicated that the novel strains were distinct from other members of the family Cellulomonadaceae. We propose the names Cellulomonas alba sp. nov. (type strain MW4[T]=KACC 23260[T]=TBRC 17645[T]) and Cellulomons edaphi sp. nov. (type strain MW9[T]=KACC 23261[T]=TBRC 17646[T]) for the two strains.}, } @article {pmid38213027, year = {2024}, author = {Ferrero-Serrano, Á and Chakravorty, D and Kirven, KJ and Assmann, SM}, title = {Oryza CLIMtools: A Genome-Environment Association Resource Reveals Adaptive Roles for Heterotrimeric G Proteins in the Regulation of Rice Agronomic Traits.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100813}, doi = {10.1016/j.xplc.2024.100813}, pmid = {38213027}, issn = {2590-3462}, abstract = {Modern crop varieties display a degree of mismatch between their current distributions and the suitability of the local climate for their productivity. To this end, we present Oryza CLIMtools (https://gramene.org/CLIMtools/oryza_v1.0/), the first resource for pan-genome prediction of climate-associated genetic variants in a crop species. Oryza CLIMtools consists of interactive web-based databases that allow the user to: i) explore the local environments of traditional rice varieties (landraces) in South-Eastern Asia, and; ii) investigate the environment by genome associations for 658 Indica and 283 Japonica rice landrace accessions collected from georeferenced local environments and included in the 3K Rice Genomes Project. We exemplify the value of these resources, identifying an interplay between flowering time and temperature in the local environment that is facilitated by adaptive natural variation in OsHD2 and disrupted by a natural variant in OsSOC1. Prior QTL analysis has suggested the importance of heterotrimeric G proteins in the control of agronomic traits. Accordingly, we analyzed the climate associations of natural variants in the different heterotrimeric G protein subunits. We identified a coordinated role of G proteins in adaptation to the prevailing Potential Evapotranspiration gradient and their regulation of key agronomic traits including plant height and seed and panicle length. We conclude by highlighting the prospect of targeting heterotrimeric G proteins to produce crops that are climate resilient.}, } @article {pmid38203838, year = {2024}, author = {Bin Hafeez, A and Pełka, K and Worobo, R and Szweda, P}, title = {In Silico Safety Assessment of Bacillus Isolated from Polish Bee Pollen and Bee Bread as Novel Probiotic Candidates.}, journal = {International journal of molecular sciences}, volume = {25}, number = {1}, pages = {}, doi = {10.3390/ijms25010666}, pmid = {38203838}, issn = {1422-0067}, abstract = {Bacillus species isolated from Polish bee pollen (BP) and bee bread (BB) were characterized for in silico probiotic and safety attributes. A probiogenomics approach was used, and in-depth genomic analysis was performed using a wide array of bioinformatics tools to investigate the presence of virulence and antibiotic resistance properties, mobile genetic elements, and secondary metabolites. Functional annotation and Carbohydrate-Active enZYmes (CAZYme) profiling revealed the presence of genes and a repertoire of probiotics properties promoting enzymes. The isolates BB10.1, BP20.15 (isolated from bee bread), and PY2.3 (isolated from bee pollen) genome mining revealed the presence of several genes encoding acid, heat, cold, and other stress tolerance mechanisms, adhesion proteins required to survive and colonize harsh gastrointestinal environments, enzymes involved in the metabolism of dietary molecules, antioxidant activity, and genes associated with the synthesis of vitamins. In addition, genes responsible for the production of biogenic amines (BAs) and D-/L-lactate, hemolytic activity, and other toxic compounds were also analyzed. Pan-genome analyses were performed with 180 Bacillus subtilis and 204 Bacillus velezensis genomes to mine for any novel genes present in the genomes of our isolates. Moreover, all three isolates also consisted of gene clusters encoding secondary metabolites.}, } @article {pmid38203357, year = {2023}, author = {Liu, K and Xu, H and Gao, X and Lu, Y and Wang, L and Ren, Z and Chen, C}, title = {Pan-Genome Analysis of TIFY Gene Family and Functional Analysis of CsTIFY Genes in Cucumber.}, journal = {International journal of molecular sciences}, volume = {25}, number = {1}, pages = {}, doi = {10.3390/ijms25010185}, pmid = {38203357}, issn = {1422-0067}, support = {32372703//the National Natural Science Foundation of China/ ; 32172605//the National Natural Science Foundation of China/ ; ZR2022MC084//the Shandong Natural Science Foundation/ ; }, abstract = {Cucumbers are frequently affected by gray mold pathogen Botrytis cinerea, a pathogen that causes inhibited growth and reduced yield. Jasmonic acid (JA) plays a primary role in plant responses to biotic stresses, and the jasmonate-ZIM-Domain (JAZ) proteins are key regulators of the JA signaling pathway. In this study, we used the pan-genome of twelve cucumber varieties to identify cucumber TIFY genes. Our findings revealed that two CsTIFY genes were present in all twelve cucumber varieties and showed no differences in protein sequence, gene structure, and motif composition. This suggests their evolutionary conservation across different cucumber varieties and implies that they may play a crucial role in cucumber growth. On the other hand, the other fourteen CsTIFY genes exhibited variations in protein sequence and gene structure or conserved motifs, which could be the result of divergent evolution, as these genes adapt to different cultivation and environmental conditions. Analysis of the expression profiles of the CsTIFY genes showed differential regulation by B. cinerea. Transient transfection plants overexpressing CsJAZ2, CsJAZ6, or CsZML2 were found to be more susceptible to B. cinerea infection compared to control plants. Furthermore, these plants infected by the pathogen showed lower levels of the enzymatic activities of POD, SOD and CAT. Importantly, after B. cinerea infection, the content of JA was upregulated in the plants, and cucumber cotyledons pretreated with exogenous MeJA displayed increased resistance to B. cinerea infection compared to those pretreated with water. Therefore, this study explored key TIFY genes in the regulation of cucumber growth and adaptability to different cultivation environments based on bioinformatics analysis and demonstrated that CsJAZs negatively regulate cucumber disease resistance to gray mold via multiple signaling pathways.}, } @article {pmid38200255, year = {2024}, author = {Sosinsky, A and Ambrose, J and Cross, W and Turnbull, C and Henderson, S and Jones, L and Hamblin, A and Arumugam, P and Chan, G and Chubb, D and Noyvert, B and Mitchell, J and Walker, S and Bowman, K and Pasko, D and Buongermino Pereira, M and Volkova, N and Rueda-Martin, A and Perez-Gil, D and Lopez, J and Pullinger, J and Siddiq, A and Zainy, T and Choudhury, T and Yavorska, O and Fowler, T and Bentley, D and Kingsley, C and Hing, S and Deans, Z and Rendon, A and Hill, S and Caulfield, M and Murugaesu, N}, title = {Insights for precision oncology from the integration of genomic and clinical data of 13,880 tumors from the 100,000 Genomes Cancer Programme.}, journal = {Nature medicine}, volume = {}, number = {}, pages = {}, pmid = {38200255}, issn = {1546-170X}, support = {C1298/A8362//Cancer Research UK (CRUK)/ ; C17422/A25154.//Cancer Research UK (CRUK)/ ; Barts Biomedical Research Centre//DH | National Institute for Health Research (NIHR)/ ; }, abstract = {The Cancer Programme of the 100,000 Genomes Project was an initiative to provide whole-genome sequencing (WGS) for patients with cancer, evaluating opportunities for precision cancer care within the UK National Healthcare System (NHS). Genomics England, alongside NHS England, analyzed WGS data from 13,880 solid tumors spanning 33 cancer types, integrating genomic data with real-world treatment and outcome data, within a secure Research Environment. Incidence of somatic mutations in genes recommended for standard-of-care testing varied across cancer types. For instance, in glioblastoma multiforme, small variants were present in 94% of cases and copy number aberrations in at least one gene in 58% of cases, while sarcoma demonstrated the highest occurrence of actionable structural variants (13%). Homologous recombination deficiency was identified in 40% of high-grade serous ovarian cancer cases with 30% linked to pathogenic germline variants, highlighting the value of combined somatic and germline analysis. The linkage of WGS and longitudinal life course clinical data allowed the assessment of treatment outcomes for patients stratified according to pangenomic markers. Our findings demonstrate the utility of linking genomic and real-world clinical data to enable survival analysis to identify cancer genes that affect prognosis and advance our understanding of how cancer genomics impacts patient outcomes.}, } @article {pmid38191433, year = {2024}, author = {Zhang, RY and Wang, YR and Liu, RL and Rhee, SK and Zhao, GP and Quan, ZX}, title = {Metagenomic characterization of a novel non-ammonia-oxidizing Thaumarchaeota from hadal sediment.}, journal = {Microbiome}, volume = {12}, number = {1}, pages = {7}, pmid = {38191433}, issn = {2049-2618}, support = {2021R1A2C3004015//National Research Foundation of Korea/ ; 2018YFC0310600//the National Key R&D Program of China/ ; 31870109, 31811540398//the National Natural Science Foundation of China (NSFC)/ ; }, abstract = {BACKGROUND: The hadal sediment, found at an ocean depth of more than 6000 m, is geographically isolated and under extremely high hydrostatic pressure, resulting in a unique ecosystem. Thaumarchaeota are ubiquitous marine microorganisms predominantly present in hadal environments. While there have been several studies on Thaumarchaeota there, most of them have primarily focused on ammonia-oxidizing archaea (AOA). However, systematic metagenomic research specifically targeting heterotrophic non-AOA Thaumarchaeota is lacking.

RESULTS: In this study, we explored the metagenomes of Challenger Deep hadal sediment, focusing on the Thaumarchaeota. Functional analysis of sequence reads revealed the potential contribution of Thaumarchaeota to recalcitrant dissolved organic matter degradation. Metagenome assembly binned one new group of hadal sediment-specific and ubiquitously distributed non-AOA Thaumarchaeota, named Group-3.unk. Pathway reconstruction of this new type of Thaumarchaeota also supports heterotrophic characteristics of Group-3.unk, along with ABC transporters for the uptake of amino acids and carbohydrates and catabolic utilization of these substrates. This new clade of Thaumarchaeota also contains aerobic oxidation of carbon monoxide-related genes. Complete glyoxylate cycle is a distinctive feature of this clade in supplying intermediates of anabolic pathways. The pan-genomic and metabolic analyses of metagenome-assembled genomes belonging to Group-3.unk Thaumarchaeota have highlighted distinctions, including the dihydroxy phthalate decarboxylase gene associated with the degradation of aromatic compounds and the absence of genes related to the synthesis of some types of vitamins compared to AOA. Notably, Group-3.unk shares a common feature with deep ocean AOA, characterized by their high hydrostatic pressure resistance, potentially associated with the presence of V-type ATP and di-myo-inositol phosphate syntheses-related genes. The enrichment of organic matter in hadal sediments might be attributed to the high recruitment of sequence reads of the Group-3.unk clade of heterotrophic Thaumarchaeota in the trench sediment. Evolutionary and genetic dynamic analyses suggest that Group-3 non-AOA consists of mesophilic Thaumarchaeota organisms. These results indicate a potential role in the transition from non-AOA to AOA Thaumarchaeota and from thermophilic to mesophilic Thaumarchaeota, shedding light on recent evolutionary pathways.

CONCLUSIONS: One novel clade of heterotrophic non-AOA Thaumarchaeota was identified through metagenome analysis of sediments from Challenger Deep. Our study provides insight into the ecology and genomic characteristics of the new sub-group of heterotrophic non-AOA Thaumarchaeota, thereby extending the knowledge of the evolution of Thaumarchaeota. Video Abstract.}, } @article {pmid38189173, year = {2024}, author = {Biderre-Petit, C and Courtine, D and Hennequin, C and Galand, PE and Bertilsson, S and Debroas, D and Monjot, A and Lepère, C and Divne, AM and Hochart, C}, title = {A pan-genomic approach reveals novel Sulfurimonas clade in the ferruginous meromictic Lake Pavin.}, journal = {Molecular ecology resources}, volume = {}, number = {}, pages = {e13923}, doi = {10.1111/1755-0998.13923}, pmid = {38189173}, issn = {1755-0998}, abstract = {The permanently anoxic waters in meromictic lakes create suitable niches for the growth of bacteria using sulphur metabolisms like sulphur oxidation. In Lake Pavin, the anoxic water mass hosts an active cryptic sulphur cycle that interacts narrowly with iron cycling, however the metabolisms of the microorganisms involved are poorly known. Here we combined metagenomics, single-cell genomics, and pan-genomics to further expand our understanding of the bacteria and the corresponding metabolisms involved in sulphur oxidation in this ferruginous sulphide- and sulphate-poor meromictic lake. We highlighted two new species within the genus Sulfurimonas that belong to a novel clade of chemotrophic sulphur oxidisers exclusive to freshwaters. We moreover conclude that this genus holds a key-role not only in limiting sulphide accumulation in the upper part of the anoxic layer but also constraining carbon, phosphate and iron cycling.}, } @article {pmid38188626, year = {2023}, author = {Karthik, K and Subramanian, S and Vinoli Priyadharshini, M and Jawahar, A and Anbazhagan, S and Kathiravan, RS and Thomas, P and Babu, RPA and Gopalan Tirumurugaan, K and Raj, GD}, title = {Whole genome sequencing and comparative genomics of Mycobacterium orygis isolated from different animal hosts to identify specific diagnostic markers.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1302393}, pmid = {38188626}, issn = {2235-2988}, abstract = {INTRODUCTION: Mycobacterium orygis, a member of MTBC has been identified in higher numbers in the recent years from animals of South Asia. Comparative genomics of this important zoonotic pathogen is not available which can provide data on the molecular difference between other MTBC members. Hence, the present study was carried out to isolate, whole genome sequence M. orygis from different animal species (cattle, buffalo and deer) and to identify molecular marker for the differentiation of M. orygis from other MTBC members.

METHODS: Isolation and whole genome sequencing of M. orygis was carried out for 9 samples (4 cattle, 4 deer and 1 buffalo) died due to tuberculosis. Comparative genomics employing 53 genomes (44 from database and 9 newly sequenced) was performed to identify SNPs, spoligotype, pangenome structure, and region of difference.

RESULTS: M. orygis was isolated from water buffalo and sambar deer which is the first of its kind report worldwide. Comparative pangenomics of all M. orygis strains worldwide (n= 53) showed a closed pangenome structure which is also reported for the first time. Pairwise SNP between TANUVAS_2, TANUVAS_4, TANUVAS_5, TANUVAS_7 and NIRTAH144 was less than 15 indicating that the same M. orygis strain may be the cause for infection. Region of difference prediction showed absence of RD7, RD8, RD9, RD10, RD12, RD301, RD315 in all the M. orygis analyzed. SNPs in virulence gene, PE35 was found to be unique to M. orygis which can be used as marker for identification.

CONCLUSION: The present study is yet another supportive evidence that M. orygis is more prevalent among animals in South Asia and the zoonotic potential of this organism needs to be evaluated.}, } @article {pmid38187556, year = {2023}, author = {Oles, RE and Terrazas, MC and Loomis, LR and Hsu, CY and Tribelhorn, C and Ferre, PB and Ea, A and Bryant, M and Young, J and Carrow, HC and Sandborn, WJ and Dulai, P and Sivagnanam, M and Pride, D and Knight, R and Chu, H}, title = {Pangenome comparison of Bacteroides fragilis genomospecies unveil genetic diversity and ecological insights.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.12.20.572674}, pmid = {38187556}, abstract = {Bacteroides fragilis is a Gram-negative commensal bacterium commonly found in the human colon that differentiates into two genomospecies termed division I and II. We leverage a comprehensive collection of 694 B. fragilis whole genome sequences and report differential gene abundance to further support the recent proposal that divisions I and II represent separate species. In division I strains, we identify an increased abundance of genes related to complex carbohydrate degradation, colonization, and host niche occupancy, confirming the role of division I strains as gut commensals. In contrast, division II strains display an increased prevalence of plant cell wall degradation genes and exhibit a distinct geographic distribution, primarily originating from Asian countries, suggesting dietary influences. Notably, division II strains have an increased abundance of genes linked to virulence, survival in toxic conditions, and antimicrobial resistance, consistent with a higher incidence of these strains in bloodstream infections. This study provides new evidence supporting a recent proposal for classifying divisions I and II B. fragilis strains as distinct species, and our comparative genomic analysis reveals their niche-specific roles.}, } @article {pmid38183874, year = {2023}, author = {Yu, K and Huang, Z and Xiao, Y and Gao, H and Bai, X and Wang, D}, title = {Global spread characteristics of CTX-M-type extended-spectrum β-lactamases: A genomic epidemiology analysis.}, journal = {Drug resistance updates : reviews and commentaries in antimicrobial and anticancer chemotherapy}, volume = {73}, number = {}, pages = {101036}, doi = {10.1016/j.drup.2023.101036}, pmid = {38183874}, issn = {1532-2084}, abstract = {BACKGROUND: Extended-spectrum β-lactamases (ESBLs) producing bacteria have spread worldwide and become a global public health concern. Plasmid-mediated transfer of ESBLs is an important route for resistance acquisition.

METHODS: We collected 1345 complete sequences of plasmids containing CTX-Ms from public database. The global transmission pattern of plasmids and evolutionary dynamics of CTX-Ms have been inferred. We applied the pan-genome clustering based on plasmid genomes and evolution analysis to demonstrate the transmission events.

FINDINGS: Totally, 48 CTX-Ms genotypes and 186 incompatible types of plasmids were identified. The geographical distribution of CTX-Ms showed significant differences across countries and continents. CTX-M-14 and CTX-M-55 were found to be the dominant genotypes in Asia, while CTX-M-1 played a leading role in Europe. The plasmids can be divided into 12 lineages, some of which forming distinct geographical clusters in Asia and Europe, while others forming hybrid populations. The Inc types of plasmids are lineage-specific, with the CTX-M-1_IncI1-I (Alpha) and CTX-M-65_IncFII (pHN7A8)/R being the dominant patterns of cross-host and cross-regional transmission. The IncI-I (Alpha) plasmids with the highest number, were presumed to form communication groups in Europe-Asia and Asia-America-Oceania, showing the transmission model as global dissemination and regional microevolution. Meanwhile, the main kinetic elements of blaCTX-Ms showed genotypic preferences. ISEcpl and IS26 were most frequently involved in the transfer of CTX-M-14 and CTX-M-65, respectively. IS15 has become a crucial participant in mediating the dissemination of blaCTX-Ms. Interestingly, blaTEM and blaCTX-Ms often coexisted in the same transposable unit. Furthermore, antibiotic resistance genes associated with aminoglycosides, sulfonamides and cephalosporins showed a relatively high frequency of synergistic effects with CTX-Ms.

CONCLUSIONS: We recognized the dominant blaCTX-Ms and mainstream plasmids of different continents. The results of this study provide support for a more effective response to the risks associated with the evolution of blaCTX-Ms-bearing plasmids, and lay the foundation for genotype-specific epidemiological surveillance of resistance, which are of important public health implications.}, } @article {pmid38181886, year = {2024}, author = {Verma, N and Sharma, T and Bhardwaj, A and Ramana, VV}, title = {Comparative genomics and characterization of a multidrug-resistant Acinetobacter baumannii VRL-M19 isolated from a crowded setting in India.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {}, number = {}, pages = {105549}, doi = {10.1016/j.meegid.2023.105549}, pmid = {38181886}, issn = {1567-7257}, abstract = {A crowded vegetable market serves as a mass gathering, posing a potential risk for infection transmission. In this study, we isolated a multidrug-resistant Acinetobacter baumannii strain, VRL-M19, from the air of such a market and conducted comparative genomics and phenotypic characterization. Antimicrobial susceptibility testing, genome sequencing using Illumina HiSeq X10, and pan-genome analysis with 788 clinical isolates identified core, accessory, and unique drug-resistant determinants. Mutational analysis of drug-resistance genes, virulence factor annotation, in vitro pathogenicity assessment, subsystem analysis, Multilocus sequence typing, and whole genome phylogenetic analysis were performed. VRL-M19 exhibited multidrug resistance with 69 determinants, and analysis across 788 clinical isolates and 350 Indian isolates revealed more accessory genes (52 out of 69) in the Indian isolates. Multiple mutations were observed in drug target modification genes, and the strain was identified as a moderate biofilm-former with 55 virulence factors. Whole genome phylogenetics indicated a close relationship between VRL-M19 and clinical A. baumannii strains. In conclusion, our comprehensive study suggests that VRL-M19 is a multidrug-resistant, potential pathogen with biofilm-forming capabilities, closely associated with clinical A. baumannii strains.}, } @article {pmid38177691, year = {2024}, author = {Domingo-Sananes, MR and Meehan, CJ}, title = {The population genetics of prokaryotic pangenomes.}, journal = {Nature ecology & evolution}, volume = {}, number = {}, pages = {}, pmid = {38177691}, issn = {2397-334X}, } @article {pmid38177690, year = {2024}, author = {Douglas, GM and Shapiro, BJ}, title = {Pseudogenes act as a neutral reference for detecting selection in prokaryotic pangenomes.}, journal = {Nature ecology & evolution}, volume = {}, number = {}, pages = {}, pmid = {38177690}, issn = {2397-334X}, abstract = {A long-standing question is to what degree genetic drift and selection drive the divergence in rare accessory gene content between closely related bacteria. Rare genes, including singletons, make up a large proportion of pangenomes (all genes in a set of genomes), but it remains unclear how many such genes are adaptive, deleterious or neutral to their host genome. Estimates of species' effective population sizes (Ne) are positively associated with pangenome size and fluidity, which has independently been interpreted as evidence for both neutral and adaptive pangenome models. We hypothesized that pseudogenes, used as a neutral reference, could be used to distinguish these models. We find that most functional categories are depleted for rare pseudogenes when a genome encodes only a single intact copy of a gene family. In contrast, transposons are enriched in pseudogenes, suggesting they are mostly neutral or deleterious to the host genome. Thus, even if individual rare accessory genes vary in their effects on host fitness, we can confidently reject a model of entirely neutral or deleterious rare genes. We also define the ratio of singleton intact genes to singleton pseudogenes (si/sp) within a pangenome, compare this measure across 668 prokaryotic species and detect a signal consistent with the adaptive value of many rare accessory genes. Taken together, our work demonstrates that comparing with pseudogenes can improve inferences of the evolutionary forces driving pangenome variation.}, } @article {pmid38173673, year = {2023}, author = {Sarr, M and Alou, MT and Padane, A and Diouf, FS and Beye, M and Sokhna, C and Fenollar, F and Mboup, S and Raoult, D and Million, M}, title = {A review of the literature of Listeria monocytogenes in Africa highlights breast milk as an overlooked human source.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1213953}, pmid = {38173673}, issn = {1664-302X}, abstract = {According to the latest WHO estimates (2015) of the global burden of foodborne diseases, Listeria monocytogenes is responsible for one of the most serious foodborne infections and commonly results in severe clinical outcomes. The 2013 French MONALISA prospective cohort identified that women born in Africa has a 3-fold increase in the risk of maternal neonatal listeriosis. One of the largest L. monocytogenes outbreaks occurred in South Africa in 2017-2018 with over 1,000 cases. Moreover, recent findings identified L. monocytogenes in human breast milk in Mali and Senegal with its relative abundance positively correlated with severe acute malnutrition. These observations suggest that the carriage of L. monocytogenes in Africa should be further explored, starting with the existing literature. For that purpose, we searched the peer-reviewed and grey literature published dating back to 1926 to date using six databases. Ultimately, 225 articles were included in this review. We highlighted that L. monocytogenes is detected in various sample types including environmental samples, food samples as well as animal and human samples. These studies were mostly conducted in five east African countries, four west African countries, four north African countries, and two Southern African countries. Moreover, only ≈ 0.2% of the Listeria monocytogenes genomes available on NCBI were obtained from African samples, contracted with its detection. The pangenome resulting from the African Listeria monocytogenes samples revealed three clusters including two from South-African strains as well as one consisting of the strains isolated from breast milk in Mali and Senegal and, a vaginal post-miscarriage sample. This suggests there was a clonal complex circulating in Mali and Senegal. As this clone has not been associated to infections, further studies should be conducted to confirm its circulation in the region and explore its association with foodborne infections. Moreover, it is apparent that more resources should be allocated to the detection of L. monocytogenes as only 15/54 countries have reported its detection in the literature. It seems paramount to map the presence and carriage of L. monocytogenes in all African countries to prevent listeriosis outbreaks and the related miscarriages and confirm its association with severe acute malnutrition.}, } @article {pmid38172677, year = {2024}, author = {Choi, DG and Baek, JH and Han, DM and Khan, SA and Jeon, CO}, title = {Comparative pangenome analysis of Enterococcus faecium and Enterococcus lactis provides new insights into the adaptive evolution by horizontal gene acquisitions.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {28}, pmid = {38172677}, issn = {1471-2164}, support = {Graduate Research Scholarship in 2018//Chung-Ang University/ ; PJ01710102//Rural Development Administration/ ; 2018R1A5A1025077//Ministry of Science and ICT, South Korea/ ; }, abstract = {BACKGROUND: Enterococcus faecium and E. lactis are phylogenetically closely related lactic acid bacteria that are ubiquitous in nature and are known to be beneficial or pathogenic. Despite their considerable industrial and clinical importance, comprehensive studies on their evolutionary relationships and genomic, metabolic, and pathogenic traits are still lacking. Therefore, we conducted comparative pangenome analyses using all available dereplicated genomes of these species.

RESULTS: E. faecium was divided into two subclades: subclade I, comprising strains derived from humans, animals, and food, and the more recent phylogenetic subclade II, consisting exclusively of human-derived strains. In contrast, E. lactis strains, isolated from diverse sources including foods, humans, animals, and the environment, did not display distinct clustering based on their isolation sources. Despite having similar metabolic features, noticeable genomic differences were observed between E. faecium subclades I and II, as well as E. lactis. Notably, E. faecium subclade II strains exhibited significantly larger genome sizes and higher gene counts compared to both E. faecium subclade I and E. lactis strains. Furthermore, they carried a higher abundance of antibiotic resistance, virulence, bacteriocin, and mobile element genes. Phylogenetic analysis of antibiotic resistance and virulence genes suggests that E. faecium subclade II strains likely acquired these genes through horizontal gene transfer, facilitating their effective adaptation in response to antibiotic use in humans.

CONCLUSIONS: Our study offers valuable insights into the adaptive evolution of E. faecium strains, enabling their survival as pathogens in the human environment through horizontal gene acquisitions.}, } @article {pmid38170317, year = {2024}, author = {Lin, J and Xiao, Y and Liu, H and Gao, D and Duan, Y and Zhu, X}, title = {Combined transcriptomic and pangenomic analyses guide metabolic amelioration to enhance tiancimycins production.}, journal = {Applied microbiology and biotechnology}, volume = {108}, number = {1}, pages = {1-11}, pmid = {38170317}, issn = {1432-0614}, support = {2020zzts248//Fundamental Research Funds for Central Universities of the Central South University/ ; 81530092//National Natural Science Foundation of China/ ; B0803420//Chinese Ministry of Education 111/ ; }, abstract = {Exploration of high-yield mechanism is important for further titer improvement of valuable antibiotics, but how to achieve this goal is challenging. Tiancimycins (TNMs) are anthraquinone-fused enediynes with promising drug development potentials, but their prospective applications are limited by low titers. This work aimed to explore the intrinsic high-yield mechanism in previously obtained TNMs high-producing strain Streptomyces sp. CB03234-S for the further titer amelioration of TNMs. First, the typical ribosomal RpsL(K43N) mutation in CB03234-S was validated to be merely responsible for the streptomycin resistance but not the titer improvement of TNMs. Subsequently, the combined transcriptomic, pan-genomic and KEGG analyses revealed that the significant changes in the carbon and amino acid metabolisms could reinforce the metabolic fluxes of key CoA precursors, and thus prompted the overproduction of TNMs in CB03234-S. Moreover, fatty acid metabolism was considered to exert adverse effects on the biosynthesis of TNMs by shunting and reducing the accumulation of CoA precursors. Therefore, different combinations of relevant genes were respectively overexpressed in CB03234-S to strengthen fatty acid degradation. The resulting mutants all showed the enhanced production of TNMs. Among them, the overexpression of fadD, a key gene responsible for the first step of fatty acid degradation, achieved the highest 21.7 ± 1.1 mg/L TNMs with a 63.2% titer improvement. Our studies suggested that comprehensive bioinformatic analyses are effective to explore metabolic changes and guide rational metabolic reconstitution for further titer improvement of target products. KEY POINTS: • Comprehensive bioinformatic analyses effectively reveal primary metabolic changes. • Primary metabolic changes cause precursor enrichment to enhance TNMs production. • Strengthening of fatty acid degradation further improves the titer of TNMs.}, } @article {pmid38168881, year = {2024}, author = {Triesch, S and Denton, AK and Bouvier, JW and Buchmann, JP and Reichel-Deland, V and Guerreiro, RNFM and Busch, N and Schlüter, U and Stich, B and Kelly, S and Weber, APM}, title = {Transposable elements contribute to the establishment of the glycine shuttle in Brassicaceae species.}, journal = {Plant biology (Stuttgart, Germany)}, volume = {}, number = {}, pages = {}, doi = {10.1111/plb.13601}, pmid = {38168881}, issn = {1438-8677}, support = {391465903/GRK 2466//Deutsche Forschungsgemeinschaft (German Research Foundation)/ ; 390686111//Germany's Excellence Strategy EXC-2048/1/ ; WE 2231/20-1//ERA-CAPS (European Research Network for Coordinating Action in Plant Sciences)/ ; 456082119//CRC (Collaborative Research Center)/ ; BB/J014427/1//BBSRC/ ; //Royal Society University Research Fellowship/ ; }, abstract = {C3 -C4 intermediate photosynthesis has evolved at least five times convergently in the Brassicaceae, despite this family lacking bona fide C4 species. The establishment of this carbon concentrating mechanism is known to require a complex suite of ultrastructural modifications, as well as changes in spatial expression patterns, which are both thought to be underpinned by a reconfiguration of existing gene-regulatory networks. However, to date, the mechanisms which underpin the reconfiguration of these gene networks are largely unknown. In this study, we used a pan-genomic association approach to identify genomic features that could confer differential gene expression towards the C3 -C4 intermediate state by analysing eight C3 species and seven C3 -C4 species from five independent origins in the Brassicaceae. We found a strong correlation between transposable element (TE) insertions in cis-regulatory regions and C3 -C4 intermediacy. Specifically, our study revealed 113 gene models in which the presence of a TE within a gene correlates with C3 -C4 intermediate photosynthesis. In this set, genes involved in the photorespiratory glycine shuttle are enriched, including the glycine decarboxylase P-protein whose expression domain undergoes a spatial shift during the transition to C3 -C4 photosynthesis. When further interrogating this gene, we discovered independent TE insertions in its upstream region which we conclude to be responsible for causing the spatial shift in GLDP1 gene expression. Our findings hint at a pivotal role of TEs in the evolution of C3 -C4 intermediacy, especially in mediating differential spatial gene expression.}, } @article {pmid38168637, year = {2024}, author = {Guo, N and Wang, S and Wang, T and Duan, M and Zong, M and Miao, L and Han, S and Wang, G and Liu, X and Zhang, D and Jiao, C and Xu, H and Chen, L and Fei, Z and Li, J and Liu, F}, title = {Graph-based Pan-genome of Brassica oleracea Provides New Insights into Its Domestication and Morphotype Diversification.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100791}, doi = {10.1016/j.xplc.2023.100791}, pmid = {38168637}, issn = {2590-3462}, abstract = {The domestication of Brassica oleracea has resulted in diverse morphological types with distinct patterns of organ development. Here we report a graph-based pan-genome of B. oleracea constructed with high-quality genome assemblies of different morphotypes. The pan-genome harbors over 200 structural variant (SV) hotspot regions enriched with auxin and flowering-related genes. Population genomic analyses reveal that early domestication of B. oleracea focused on leaf or stem development. Gene flows resulting from agricultural practices and variety improvement are detected among different morphotypes. Selective sweep and pan-genome analyses identify an auxin-responsive SAUR gene and a CLE family gene as crucial players in the leaf-stem differentiation during the early stage of B. oleracea domestication, and the BoKAN1 gene as instrumental in shaping the leafy heads of cabbage and Brussels sprouts. Our pan-genome and functional analyses further discover that variations in the BoFLC2 gene play key roles in the divergence of vernalization and flowering characteristics among different morphotypes, and variations in the first intron of BoFLC3 are involved in fine-tuning the flowering process in cauliflower. This study provides a comprehensive understanding of the pan-genome of B. oleracea and sheds light on the domestication and differential organ development of this globally important crop species.}, } @article {pmid38168361, year = {2023}, author = {Sirén, J and Eskandar, P and Ungaro, MT and Hickey, G and Eizenga, JM and Novak, AM and Chang, X and Chang, PC and Kolmogorov, M and Carroll, A and Monlong, J and Paten, B}, title = {Personalized Pangenome References.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.12.13.571553}, pmid = {38168361}, abstract = {Pangenomes, by including genetic diversity, should reduce reference bias by better representing new samples compared to them. Yet when comparing a new sample to a pangenome, variants in the pangenome that are not part of the sample can be misleading, for example, causing false read mappings. These irrelevant variants are generally rarer in terms of allele frequency, and have previously been dealt with using allele frequency filters. However, this is a blunt heuristic that both fails to remove some irrelevant variants and removes many relevant variants. We propose a new approach, inspired by local ancestry inference methods, that imputes a personalized pangenome subgraph based on sampling local haplotypes according to k -mer counts in the reads. Our approach is tailored for the Giraffe short read aligner, as the indexes it needs for read mapping can be built quickly. We compare the accuracy of our approach to state-of-the-art methods using graphs from the Human Pangenome Reference Consortium. The resulting personalized pangenome pipelines provide faster pangenome read mapping than comparable pipelines that use a linear reference, reduce small variant genotyping errors by 4x relative to the Genome Analysis Toolkit (GATK) best-practice pipeline, and for the first time make short-read structural variant genotyping competitive with long-read discovery methods.}, } @article {pmid38168234, year = {2023}, author = {Qiu, X and McGee, L and Hammitt, LL and Grant, LR and O'Brien, KL and Hanage, WP and Lipsitch, M}, title = {Prediction of post-PCV13 pneumococcal evolution using invasive disease data enhanced by inverse-invasiveness weighting.}, journal = {medRxiv : the preprint server for health sciences}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.12.10.23299786}, pmid = {38168234}, abstract = {BACKGROUND: After introduction of pneumococcal conjugate vaccines (PCVs), serotype replacement occurred in the population of Streptococcus pneumoniae. Predicting which pneumococcal clones and serotypes will become more common in carriage after vaccination can enhance vaccine design and public health interventions, while also improving our understanding of pneumococcal evolution. We sought to use invasive disease data to assess how well negative frequency-dependent selection (NFDS) models could explain pneumococcal carriage population evolution in the post-PCV13 epoch by weighting invasive data to approximate strain proportions in the carriage population.

METHODS: Invasive pneumococcal isolates were collected and sequenced during 1998-2018 by the Active Bacterial Core surveillance (ABCs) from the Centers for Disease Control and Prevention (CDC). To predict the post-PCV13 population dynamics in the carriage population using a NFDS model, all genomic data were processed under a bioinformatic pipeline of assembly, annotation, and pangenome analysis to define genetically similar sequence clusters (i.e., strains) and a set of accessory genes present in 5% to 95% of the isolates. The NFDS model predicted the strain proportion by calculating the post-vaccine strain composition in the weighted invasive disease population that would best match pre-vaccine accessory gene frequencies. To overcome the biases of invasive disease data, serotype-specific inverse-invasiveness weights were defined as the ratio of the proportion of the serotype in the carriage data to the proportion in the invasive data, using data from 1998-2001 in the United States, before conjugate vaccine introduction. The weights were applied to adjust both the observed strain proportion and the accessory gene frequencies.

RESULTS: Inverse-invasiveness weighting increased the correlation of accessory gene frequencies between invasive and carriage data with reduced residuals in linear or logit scale for pre-vaccine, post-PCV7, and post-PCV13. Similarly, weighting increased the correlation of accessory gene frequencies between different time periods in the invasive data. By weighting the invasive data, we were able to use the NFDS model to predict strain proportions in the carriage population in the post-PCV13 epoch, with the adjusted R-squared between predicted and observed strain proportions increasing from 0.176 to 0.544 after weighting.

CONCLUSIONS: The weighting system adjusted the invasive disease surveillance data to better represent the carriage population of S. pneumoniae . The NFDS mechanism predicted the strain proportions in the projected carriage population as estimated from the weighted invasive disease frequencies in the post-PCV13 epoch. Our methods enrich the value of genomic sequences from invasive disease surveillance, which is readily available, easy to collect, and of direct interest to public health.

IMPORTANCE: Streptococcus pneumoniae , a common colonizer in the human nasopharynx, can cause invasive diseases including pneumonia, bacteremia, and meningitis mostly in children under 5 years or older adults. The PCV7 was introduced in 2000 in the United States within the pediatric population to prevent disease and reduce deaths, followed by PCV13 in 2010, PCV15 in 2022, and PCV20 in 2023. After the removal of vaccine serotypes, the prevalence of carriage remained stable as the vacated pediatric ecological niche was filled with certain non-vaccine serotypes. Predicting which pneumococcal clones, and which serotypes, will be most successful in colonization after vaccination can enhance vaccine design and public health interventions, while also improving our understanding of pneumococcal evolution. While carriage data, which are collected from the pneumococcal population that is competing to colonize and transmit, are most directly relevant to evolutionary studies, invasive disease data are often more plentiful. Previously, evolutionary models based on negative frequency-dependent selection (NFDS) on the accessory genome were shown to predict which non-vaccine strains and serotypes were most successful in colonization following the introduction of PCV7. Here, we show that an inverse-invasiveness weighting system applied to invasive disease surveillance data allows the NFDS model to predict strain proportions in the projected carriage population in the post-PCV13/pre-PCV15 and -PCV20 epoch. The significance of our research lies in using a sample of invasive disease surveillance data to extend the use of NFDS as an evolutionary mechanism to predict post-PCV13 population dynamics. This has shown that we can correct for biased sampling that arises from differences in virulence and can enrich the value of genomic data from disease surveillance and advances our understanding of how NFDS impacts carriage population dynamics after both PCV7 and PCV13 vaccination.}, } @article {pmid38163518, year = {2023}, author = {Abondio, P and Bruno, F and Passarino, G and Montesanto, A and Luiselli, D}, title = {Pangenomics: a new era in the field of neurodegenerative diseases.}, journal = {Ageing research reviews}, volume = {}, number = {}, pages = {102180}, doi = {10.1016/j.arr.2023.102180}, pmid = {38163518}, issn = {1872-9649}, abstract = {A pangenome is composed of all the genetic variability of a group of individuals, and its application to the study of neurodegenerative diseases may provide valuable insights into the underlying aspects of genetic heterogenetiy for these complex ailments, including gene expression, epigenetics, and translation mechanisms. Furthermore, a reference pangenome allows for the identification of previously undetected structural commonalities and differences among individuals, which may help in the diagnosis of a disease, support the prediction of what will happen over time (prognosis) and aid in developing novel treatments in the perspective of personalized medicine. Therefore, in the present review, the application of the pangenome concept to the study of neurodegenerative diseases will be discussed and analyzed for its potential to enable an improvement in diagnosis and prognosis for these illnesses, leading to the development of tailored treatments for individual patients from the knowledge of the genomic composition of a whole population.}, } @article {pmid38158885, year = {2023}, author = {Lv, Y and Liu, C and Li, X and Wang, Y and He, H and He, W and Chen, W and Yang, L and Dai, X and Cao, X and Yu, X and Liu, J and Zhang, B and Wei, H and Zhang, H and Qian, H and Shi, C and Leng, Y and Liu, X and Guo, M and Wang, X and Zhang, Z and Wang, T and Zhang, B and Xu, Q and Cui, Y and Zhang, Q and Yuan, Q and Jahan, N and Ma, J and Zheng, X and Zhou, Y and Qian, Q and Guo, L and Shang, L}, title = {A centromere map based on super pan-genome highlights the structure and function of rice centromeres.}, journal = {Journal of integrative plant biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/jipb.13607}, pmid = {38158885}, issn = {1744-7909}, abstract = {Rice (Oryza sativa) is a significant crop worldwide with a genome shaped by various evolutionary factors. Rice centromeres are crucial for chromosome segregation, and contain some unreported genes. Due to the diverse and complex centromere region, a comprehensive understanding of rice centromere structure and function at the population level is needed. We constructed a high-quality centromere map based on the rice super pan-genome consisting of a 251-accession panel comprising both cultivated and wild species of Asian and African rice. We showed that rice centromeres have diverse satellite repeat CentO, which vary across chromosomes and subpopulations, reflecting their distinct evolutionary patterns. We also revealed that long terminal repeats (LTRs), especially young Gypsy-type LTRs, are abundant in the peripheral CentO-enriched regions (CoERs) and drive rice centromere expansion and evolution. Furthermore, high-quality genome assembly and complete T2T reference genome enable us to obtain more centromeric genome information despite the mapping and cloning of centromere genes is challenging. We investigated the association between structural variations (SVs) and gene expression in the rice centromere. A centromere gene, OsMAB, that positively regulates rice tiller number, was further confirmed by eQTL, haplotype analysis and CRISPR/Cas9 methods. By revealing the new insights into the evolutionary patterns and biological roles of rice centromeres, our finding will facilitate future research on centromere biology and crop improvement. This article is protected by copyright. All rights reserved.}, } @article {pmid38157192, year = {2023}, author = {Yu, Y and Chen, H}, title = {Human pangenome: far-reaching implications in precision medicine.}, journal = {Frontiers of medicine}, volume = {}, number = {}, pages = {}, pmid = {38157192}, issn = {2095-0225}, } @article {pmid38147560, year = {2024}, author = {Beavan, A and Domingo-Sananes, MR and McInerney, JO}, title = {Contingency, repeatability, and predictability in the evolution of a prokaryotic pangenome.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {121}, number = {1}, pages = {e2304934120}, doi = {10.1073/pnas.2304934120}, pmid = {38147560}, issn = {1091-6490}, support = {BB/Y513374/1//UKRI | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; }, abstract = {Pangenomes exhibit remarkable variability in many prokaryotic species, much of which is maintained through the processes of horizontal gene transfer and gene loss. Repeated acquisitions of near-identical homologs can easily be observed across pangenomes, leading to the question of whether these parallel events potentiate similar evolutionary trajectories, or whether the remarkably different genetic backgrounds of the recipients mean that postacquisition evolutionary trajectories end up being quite different. In this study, we present a machine learning method that predicts the presence or absence of genes in the Escherichia coli pangenome based on complex patterns of the presence or absence of other accessory genes within a genome. Our analysis leverages the repeated transfer of genes through the E. coli pangenome to observe patterns of repeated evolution following similar events. We find that the presence or absence of a substantial set of genes is highly predictable from other genes alone, indicating that selection potentiates and maintains gene-gene co-occurrence and avoidance relationships deterministically over long-term bacterial evolution and is robust to differences in host evolutionary history. We propose that at least part of the pangenome can be understood as a set of genes with relationships that govern their likely cohabitants, analogous to an ecosystem's set of interacting organisms. Our findings indicate that intragenomic gene fitness effects may be key drivers of prokaryotic evolution, influencing the repeated emergence of complex gene-gene relationships across the pangenome.}, } @article {pmid38145107, year = {2023}, author = {Dabbaghie, F and Srikakulam, SK and Marschall, T and Kalinina, OV}, title = {PanPA: generation and alignment of panproteome graphs.}, journal = {Bioinformatics advances}, volume = {3}, number = {1}, pages = {vbad167}, pmid = {38145107}, issn = {2635-0041}, abstract = {MOTIVATION: Compared to eukaryotes, prokaryote genomes are more diverse through different mechanisms, including a higher mutation rate and horizontal gene transfer. Therefore, using a linear representative reference can cause a reference bias. Graph-based pangenome methods have been developed to tackle this problem. However, comparisons in DNA space are still challenging due to this high diversity. In contrast, amino acid sequences have higher similarity due to evolutionary constraints, whereby a single amino acid may be encoded by several synonymous codons. Coding regions cover the majority of the genome in prokaryotes. Thus, panproteomes present an attractive alternative leveraging the higher sequence similarity while not losing much of the genome in non-coding regions.

RESULTS: We present PanPA, a method that takes a set of multiple sequence alignments of protein sequences, indexes them, and builds a graph for each multiple sequence alignment. In the querying step, it can align DNA or amino acid sequences back to these graphs. We first showcase that PanPA generates correct alignments on a panproteome from 1350 Escherichia coli. To demonstrate that panproteomes allow comparisons at longer phylogenetic distances, we compare DNA and protein alignments from 1073 Salmonella enterica assemblies against E.coli reference genome, pangenome, and panproteome using BWA, GraphAligner, and PanPA, respectively; with PanPA aligning around 22% more sequences. We also aligned a DNA short-reads whole genome sequencing (WGS) sample from S.enterica against the E.coli reference with BWA and the panproteome with PanPA, where PanPA was able to find alignment for 68% of the reads compared to 5% with BWA.

PanPA is available at https://github.com/fawaz-dabbaghieh/PanPA.}, } @article {pmid38139397, year = {2023}, author = {Yin, S and Zhao, L and Liu, J and Sun, Y and Li, B and Wang, L and Ren, Z and Chen, C}, title = {Pan-genome Analysis of WOX Gene Family and Function Exploration of CsWOX9 in Cucumber.}, journal = {International journal of molecular sciences}, volume = {24}, number = {24}, pages = {}, doi = {10.3390/ijms242417568}, pmid = {38139397}, issn = {1422-0067}, support = {ZR2022MC084//Shandong Natural Science Foundation/ ; 31701923//National Natural Science Foundation of China/ ; 32372703//National Natural Science Foundation of China/ ; 32172605//National Natural Science Foundation of China/ ; }, abstract = {Cucumber is an economically important vegetable crop, and the warts (composed of spines and Tubercules) of cucumber fruit are an important quality trait that influences its commercial value. WOX transcription factors are known to have pivotal roles in regulating various aspects of plant growth and development, but their studies in cucumber are limited. Here, genome-wide identification of cucumber WOX genes was performed using the pan-genome analysis of 12 cucumber varieties. Our findings revealed diverse CsWOX genes in different cucumber varieties, with variations observed in protein sequences and lengths, gene structure, and conserved protein domains, possibly resulting from the divergent evolution of CsWOX genes as they adapt to diverse cultivation and environmental conditions. Expression profiles of the CsWOX genes demonstrated that CsWOX9 was significantly expressed in unexpanded ovaries, especially in the epidermis. Additionally, analysis of the CsWOX9 promoter revealed two binding sites for the C2H2 zinc finger protein. We successfully executed a yeast one-hybrid assay (Y1H) and a dual-luciferase (LUC) transaction assay to demonstrate that CsWOX9 can be transcriptionally activated by the C2H2 zinc finger protein Tu, which is crucial for fruit Tubercule formation in cucumber. Overall, our results indicated that CsWOX9 is a key component of the molecular network that regulates wart formation in cucumber fruits, and provide further insight into the function of CsWOX genes in cucumber.}, } @article {pmid38138105, year = {2023}, author = {Zhang, Y and Pan, M and Wang, Q and Wang, L and Liao, L}, title = {Complete Genome Sequence and Pan-Genome Analysis of Shewanella oncorhynchi Z-P2, a Siderophore Putrebactin-Producing Bacterium.}, journal = {Microorganisms}, volume = {11}, number = {12}, pages = {}, doi = {10.3390/microorganisms11122961}, pmid = {38138105}, issn = {2076-2607}, support = {2022BEC030//Technological innovation Program of Hubei Province(Major project)/ ; }, abstract = {In this study, we reported the complete genome sequence of Shewanella oncorhynchi for the first time. S. oncorhynchi Z-P2 is a bacterium that produces the siderophore putrebactin. Its genome consists of a circular chromosome of 5,034,612 bp with a G + C content of 45.4%. A total of 4544 protein-coding genes, 109 tRNAs and 31 rRNAs were annotated by the RAST. Five non-ribosomal peptide synthetase (NRPS) and polyketide synthetase (PKS) gene clusters were identified by the antiSMASH analysis. The pan-genome analysis of Z-P2 and 10 Shewanella putrefaciens revealed 9228 pan-gene clusters and 2681 core gene clusters, with Z-P2 having 618 unique gene clusters. Additionally, the gene cluster involved in putrebactin biosynthesis in Z-P2 was annotated, and the mechanism of putrebactin biosynthesis was analyzed. The putrebactin produced by Z-P2 was detected using UPLC-MS analysis, with an [M + H][+] molecular ion at m/z 373.21. These findings provide valuable support for further research on the genetic engineering of putrebactin biosynthetic genes of Z-P2 and their potential applications.}, } @article {pmid38136976, year = {2023}, author = {Serag, M and Plutino, M and Charles, P and Azulay, JP and Chaussenot, A and Paquis-Flucklinger, V and Ait-El-Mkadem Saadi, S and Rouzier, C}, title = {A Case Report of SYNE1 Deficiency-Mimicking Mitochondrial Disease and the Value of Pangenomic Investigations.}, journal = {Genes}, volume = {14}, number = {12}, pages = {}, doi = {10.3390/genes14122154}, pmid = {38136976}, issn = {2073-4425}, abstract = {Mitochondrial disorders are characterized by a huge clinical, biochemical, and genetic heterogeneity, which poses significant diagnostic challenges. Several studies report that more than 50% of patients with suspected mitochondrial disease could have a non-mitochondrial disorder. Thus, only the identification of the causative pathogenic variant can confirm the diagnosis. Herein, we describe the diagnostic journey of a family suspected of having a mitochondrial disorder who were referred to our Genetics Department. The proband presented with the association of cerebellar ataxia, COX-negative fibers on muscle histology, and mtDNA deletions. Whole exome sequencing (WES), supplemented by a high-resolution array, comparative genomic hybridization (array-CGH), allowed us to identify two pathogenic variants in the non-mitochondrial SYNE1 gene. The proband and her affected sister were found to be compound heterozygous for a known nonsense variant (c.13258C>T, p.(Arg4420Ter)), and a large intragenic deletion that was predicted to result in a loss of function. To our knowledge, this is the first report of a large intragenic deletion of SYNE1 in patients with cerebellar ataxia (ARCA1). This report highlights the interest in a pangenomic approach to identify the genetic basis in heterogeneous neuromuscular patients with the possible cause of mitochondrial disease. Moreover, even rare copy number variations should be considered in patients with a phenotype suggestive of SYNE1 deficiency.}, } @article {pmid38134602, year = {2023}, author = {Mumtaz, MN and Irfan, M and Siraj, S and Khan, A and Khan, H and Imran, M and Khan, IA and Khan, A}, title = {Whole-genome sequencing of extensively drug-resistant Salmonella enterica serovar Typhi clinical isolates from the Peshawar region of Pakistan.}, journal = {Journal of infection and public health}, volume = {17}, number = {2}, pages = {271-282}, doi = {10.1016/j.jiph.2023.12.002}, pmid = {38134602}, issn = {1876-035X}, abstract = {BACKGROUND: Typhoid fever, caused by Salmonella enterica serovar Typhi, is a significant public health concern due to the escalating of antimicrobial resistance (AMR), with limited treatment options for extensively drug-resistant (XDR) S. Typhi strains pose a serious threat to disease management and control. This study aimed to investigate the genomic characteristics, epidemiology and AMR genes of XDR S. Typhi strains from typhoid fever patients in Pakistan.

METHODOLOGY: We assessed 200 patients with enteric fever symptoms, confirming 65 S. Typhi cases through culturing and biochemical tests. Subsequent antimicrobial susceptibility testing revealed 40 cases of extensively drug-resistant (XDR) and 25 cases of multi-drug resistance (MDR). Thirteen XDR strains were selected for whole-genome sequencing, to analyze their sequence type, phylogenetics, resistance genes, pathogenicity islands, and plasmid sequences using variety of data analysis resources. Pangenome analysis was conducted for 140 XDR strains, including thirteen in-house and 127 strains reported from other regions of Pakistan, to assess their genetic diversity and functional annotation.

RESULTS: MLST analysis classified all isolates as sequence type 1 (ST-1) with 4.3.1.1. P1 genotype characterization. Prophage and Salmonella Pathogenicity Island (SPI) analysis identified intact prophages and eight SPIs involved in Salmonella's invasion and replication within host cells. Genome data analysis revealed numerous AMR genes including dfrA7, sul1, qnrS1, TEM-1, Cat1, and CTX-M-15, and SNPs associated with antibiotics resistance. IncY, IncQ1, pMAC, and pAbTS2 plasmids, conferring antimicrobial resistance, were detected in a few XDR S. Typhi strains. Phylogenetic analysis inferred a close epidemiological linkage among XDR strains from different regions of Pakistan. Pangenome was noted closed among these strains and functional annotation highlighted genes related to metabolism and pathogenesis.

CONCLUSION: This study revealed a uniform genotypic background among XDR S. Typhi strains in Pakistan, signifying a persistence transmission of a single, highly antibiotic-resistant clone. The closed pan-genome observed underscores limited genetic diversity and highlights the importance of genomic surveillance for combating drug-resistant typhoid infections.}, } @article {pmid38128825, year = {2023}, author = {Wang, J and Peng, Y and Xu, Y and Li, Z and Zhan, G and Kang, Z and Zhao, J}, title = {Pan-genome analysis reveals a highly plastic genome and extensive secreted protein polymorphism in Puccinia striiformis f. sp. Tritici.}, journal = {Journal of genetics and genomics = Yi chuan xue bao}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgg.2023.12.004}, pmid = {38128825}, issn = {1673-8527}, } @article {pmid38126779, year = {2023}, author = {Ahmed, N and Joglekar, P and Deming, C and , and Lemon, KP and Kong, HH and Segre, JA and Conlan, S}, title = {Genomic characterization of the C. tuberculostearicum species complex, a prominent member of the human skin microbiome.}, journal = {mSystems}, volume = {8}, number = {6}, pages = {e0063223}, doi = {10.1128/msystems.00632-23}, pmid = {38126779}, issn = {2379-5077}, support = {//HHS | NIH | National Human Genome Research Institute (NHGRI)/ ; //HHS | NIH | National Institute of Arthritis and Musculoskeletal and Skin Diseases (NIAMS)/ ; }, abstract = {Amplicon sequencing data combined with isolate whole genome sequencing have expanded our understanding of Corynebacterium on the skin. Healthy human skin is colonized by a diverse collection of Corynebacterium species, but Corynebacterium tuberculostearicum predominates on many skin sites. Our work supports the emerging idea that C. tuberculostearicum is a species complex encompassing several distinct species. We produced a collection of genomes that help define this complex, including a potentially new species we term Corynebacterium hallux based on a preference for sites on the feet, whole-genome average nucleotide identity, pangenomic analysis, and growth in skin-like media. This isolate collection and high-quality genome resource set the stage for developing engineered strains for both basic and translational clinical studies.}, } @article {pmid38125681, year = {2023}, author = {Charron, P and Gao, R and Chmara, J and Hoover, E and Nadin-Davis, S and Chauvin, D and Hazelwood, J and Makondo, K and Duceppe, MO and Kang, M}, title = {Influence of genomic variations on glanders serodiagnostic antigens using integrative genomic and transcriptomic approaches.}, journal = {Frontiers in veterinary science}, volume = {10}, number = {}, pages = {1217135}, pmid = {38125681}, issn = {2297-1769}, abstract = {Glanders is a highly contagious and life-threatening zoonotic disease caused by Burkholderia mallei (B. mallei). Without an effective vaccine or treatment, early diagnosis has been regarded as the most effective method to prevent glanders transmission. Currently, the diagnosis of glanders is heavily reliant on serological tests. However, given that markedly different host immune responses can be elicited by genetically different strains of the same bacterial species, infection by B. mallei, whose genome is unstable and plastic, may result in various immune responses. This variability can make the serodiagnosis of glanders challenging. Therefore, there is a need for a comprehensive understanding and assessment of how B. mallei genomic variations impact the appropriateness of specific target antigens for glanders serodiagnosis. In this study, we investigated how genomic variations in the B. mallei genome affect gene content (gene presence/absence) and expression, with a special focus on antigens used or potentially used in serodiagnosis. In all the genome sequences of B. mallei isolates available in NCBI's RefSeq database (accessed in July 2023) and in-house sequenced samples, extensive small and large variations were observed when compared to the type strain ATCC 23344. Further pan-genome analysis of those assemblies revealed variations of gene content among all available genomes of B. mallei. Specifically, differences in gene content ranging from 31 to 715 genes with an average of 334 gene presence-absence variations were found in strains with complete or chromosome-level genome assemblies, using the ATCC 23344 strain as a reference. The affected genes included some encoded proteins used as serodiagnostic antigens, which were lost due mainly to structural variations. Additionally, a transcriptomic analysis was performed using the type strain ATCC 23344 and strain Zagreb which has been widely utilized to produce glanders antigens. In total, 388 significant differentially expressed genes were identified between these two strains, including genes related to bacterial pathogenesis and virulence, some of which were associated with genomic variations, particularly structural variations. To our knowledge, this is the first comprehensive study to uncover the impacts of genetic variations of B. mallei on its gene content and expression. These differences would have significant impacts on host innate and adaptive immunity, including antibody production, during infection. This study provides novel insights into B. mallei genetic variants, knowledge which will help to improve glanders serodiagnosis.}, } @article {pmid38122983, year = {2023}, author = {Mondol, SM and Islam, MR and Rakhi, NN and Shakil, SK and Islam, I and Mustary, JF and Amiruzzaman, and Shahjalal, HM and Gomes, DJ and Rahaman, MM}, title = {Unveiling a High-Risk Epidemic Clone (ST 357) of 'Difficult to Treat Extensively Drug-Resistant' (DT-XDR) Pseudomonas aeruginosa from a burn patient in Bangladesh: A Resilient Beast Revealing Co-existence of Four Classes of Beta Lactamases.}, journal = {Journal of global antimicrobial resistance}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgar.2023.11.014}, pmid = {38122983}, issn = {2213-7173}, abstract = {OBJECTIVES: Pseudomonas aeruginosa (P. aeruginosa) stands out as a key culprit in the colonization of burn wounds, instigating grave infections of heightened severity. In this study, we have performed comparative whole genome analysis of a difficult to treat extensively drug resistant (DT-XDR) P. aeruginosa isolated from a burn patient in order to elucidate genomic diversity, molecular patterns, mechanisms and genes responsible for conferring antimicrobial resistance and virulence.

METHOD: P. aeruginosa SHNIBPS206 was isolated from an infected burn wound of a critically injured burn patient. Whole genome sequencing was carried out and annotated with Prokka. Sequence type, serotype, antimicrobial resistance genes and mechanisms, virulence genes, metal resistance genes and CRISPR/Cas systems were investigated. Later, pangenome analysis was carried out to find out genomic diversity.

RESULT: P. aeruginosa SHNIBPS206 (MLST 357, Serotype O11)) was resistant to 14 antibiotics including carbapenems and harboured all four classes of beta lactamase producing genes: Class A (blaPME-1, blaVEB-9), Class B (blaNDM-1), Class C (blaPDC-11) and Class D (blaOXA-846). Mutational analysis of Porin D gave valuable insights. Several efflux pump, virulence and metal resistance genes were also detected. Pangenome analysis revealed high genomic diversity among different strains of P. aeruginosa.

CONCLUSION: To our knowledge, this is the first report of an extensively drug resistant ST 357 P. aeruginosa from Bangladesh, which is an epidemic high-risk P. aeruginosa clone. Further research and in-depth comprehensive studies are required to investigate the prevalence of such high-risk clone of P. aeruginosa in Bangladesh.}, } @article {pmid38117845, year = {2023}, author = {Hollensteiner, J and Schneider, D and Poehlein, A and Brinkhoff, T and Daniel, R}, title = {Pan-genome analysis of six Paracoccus type strain genomes reveal lifestyle traits.}, journal = {PloS one}, volume = {18}, number = {12}, pages = {e0287947}, doi = {10.1371/journal.pone.0287947}, pmid = {38117845}, issn = {1932-6203}, abstract = {The genus Paracoccus capable of inhabiting a variety of different ecological niches both, marine and terrestrial, is globally distributed. In addition, Paracoccus is taxonomically, metabolically and regarding lifestyle highly diverse. Until now, little is known on how Paracoccus can adapt to such a range of different ecological niches and lifestyles. In the present study, the genus Paracoccus was phylogenomically analyzed (n = 160) and revisited, allowing species level classification of 16 so far unclassified Paracoccus sp. strains and detection of five misclassifications. Moreover, we performed pan-genome analysis of Paracoccus-type strains, isolated from a variety of ecological niches, including different soils, tidal flat sediment, host association such as the bluespotted cornetfish, Bugula plumosa, and the reef-building coral Stylophora pistillata to elucidate either i) the importance of lifestyle and adaptation potential, and ii) the role of the genomic equipment and niche adaptation potential. Six complete genomes were de novo hybrid assembled using a combination of short and long-read technologies. These Paracoccus genomes increase the number of completely closed high-quality genomes of type strains from 15 to 21. Pan-genome analysis revealed an open pan-genome composed of 13,819 genes with a minimal chromosomal core (8.84%) highlighting the genomic adaptation potential and the huge impact of extra-chromosomal elements. All genomes are shaped by the acquisition of various mobile genetic elements including genomic islands, prophages, transposases, and insertion sequences emphasizing their genomic plasticity. In terms of lifestyle, each mobile genetic elements should be evaluated separately with respect to the ecological context. Free-living genomes, in contrast to host-associated, tend to comprise (1) larger genomes, or the highest number of extra-chromosomal elements, (2) higher number of genomic islands and insertion sequence elements, and (3) a lower number of intact prophage regions. Regarding lifestyle adaptations, free-living genomes share genes linked to genetic exchange via T4SS, especially relevant for Paracoccus, known for their numerous extrachromosomal elements, enabling adaptation to dynamic environments. Conversely, host-associated genomes feature diverse genes involved in molecule transport, cell wall modification, attachment, stress protection, DNA repair, carbon, and nitrogen metabolism. Due to the vast number of adaptive genes, Paracoccus can quickly adapt to changing environmental conditions.}, } @article {pmid38113358, year = {2023}, author = {Bourdin, A and Toutée, A and Fardeau, C}, title = {Intravenous Immunoglobulins for Bilateral Retinochoroiditis in Rhinovirus Infection: A Case Report.}, journal = {Ophthalmic surgery, lasers & imaging retina}, volume = {54}, number = {12}, pages = {720-722}, doi = {10.3928/23258160-20231019-02}, pmid = {38113358}, issn = {2325-8179}, abstract = {A 43-year-old woman presented bilateral anterior granulomatous uveitis associated with bilateral disc edema and bilateral peripheral retinochoroidal lesions. Intravenous corticosteroids after negative investigations for infectious causes did not prevent spreading of the lesions and retinal atrophy. A diagnostic vitrectomy with vitreous analysis, including pan-genomic, next-generation sequencing showed a positive result for rhinovirus HRV B91, and the cytological analysis was suggestive of infection. Intravenous immunoglobulins associated with pegylated interferon-alpha strongly slowed the progression of the lesions and led to scarred and atrophic aspect in both eyes after 6 months. [Ophthalmic Surg Lasers Imaging Retina 2023;54:720-722.].}, } @article {pmid38112751, year = {2023}, author = {Gould, AL and Henderson, JB}, title = {Comparative genomics of symbiotic Photobacterium using highly contiguous genome assemblies from long read sequences.}, journal = {Microbial genomics}, volume = {9}, number = {12}, pages = {}, doi = {10.1099/mgen.0.001161}, pmid = {38112751}, issn = {2057-5858}, abstract = {This study presents the assembly and comparative genomic analysis of luminous Photobacterium strains isolated from the light organs of 12 fish species using Oxford Nanopore Technologies (ONT) sequencing. The majority of assemblies achieved chromosome-level continuity, consisting of one large (>3 Mbp) and one small (~1.5 Mbp) contig, with near complete BUSCO scores along with varying plasmid sequences. Leveraging this dataset, this study significantly expanded the available genomes for P. leiognathi and its subspecies P. 'mandapamensis', enabling a comparative genomic analysis between the two lineages. An analysis of the large and small chromosomes unveiled distinct patterns of core and accessory genes, with a larger fraction of the core genes residing on the large chromosome, supporting the hypothesis of secondary chromosome evolution from megaplasmids in Vibrionaceae. In addition, we discovered a proposed new species, Photobacterium acropomis sp. nov., isolated from an acropomatid host, with an average nucleotide identify (ANI) of 93 % compared to the P. leiognathi and P. 'mandapamensis' strains. A comparison of the P. leiognathi and P. 'mandapamensis' lineages revealed minimal differences in gene content, yet highlighted the former's larger genome size and potential for horizontal gene transfer. An investigation of the lux-rib operon, responsible for light production, indicated congruence between the presence of luxF and host family, challenging its role in differentiating P. 'mandapamensis' from P. leiognathi. Further insights were derived from the identification of metabolic differences, such as the presence of the NADH:quinone oxidoreductase respiratory complex I in P. leiognathi as well as variations in the type II secretion system (T2S) genes between the lineages, potentially impacting protein secretion and symbiosis. In summary, this study advances our understanding of Photobacterium genome evolution, highlighting subtle differences between closely related lineages, specifically P. leiognathi and P. 'mandapamensis'. These findings highlight the benefit of long read sequencing for bacterial genome assembly and pangenome analysis and provide a foundation for exploring early bacterial speciation processes of these facultative light organ symbionts.}, } @article {pmid38111050, year = {2023}, author = {Cochetel, N and Minio, A and Guarracino, A and Garcia, JF and Figueroa-Balderas, R and Massonnet, M and Kasuga, T and Londo, JP and Garrison, E and Gaut, BS and Cantu, D}, title = {A super-pangenome of the North American wild grape species.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {290}, pmid = {38111050}, issn = {1474-760X}, support = {1741627//National Science Foundation/ ; }, abstract = {BACKGROUND: Capturing the genetic diversity of wild relatives is crucial for improving crops because wild species are valuable sources of agronomic traits that are essential to enhance the sustainability and adaptability of domesticated cultivars. Genetic diversity across a genus can be captured in super-pangenomes, which provide a framework for interpreting genomic variations.

RESULTS: Here we report the sequencing, assembly, and annotation of nine wild North American grape genomes, which are phased and scaffolded at chromosome scale. We generate a reference-unbiased super-pangenome using pairwise whole-genome alignment methods, revealing the extent of the genomic diversity among wild grape species from sequence to gene level. The pangenome graph captures genomic variation between haplotypes within a species and across the different species, and it accurately assesses the similarity of hybrids to their parents. The species selected to build the pangenome are a great representation of the genus, as illustrated by capturing known allelic variants in the sex-determining region and for Pierce's disease resistance loci. Using pangenome-wide association analysis, we demonstrate the utility of the super-pangenome by effectively mapping short reads from genus-wide samples and identifying loci associated with salt tolerance in natural populations of grapes.

CONCLUSIONS: This study highlights how a reference-unbiased super-pangenome can reveal the genetic basis of adaptive traits from wild relatives and accelerate crop breeding research.}, } @article {pmid38110716, year = {2023}, author = {Vogan, K}, title = {Refining the apple pan-genome.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, doi = {10.1038/s41588-023-01629-y}, pmid = {38110716}, issn = {1546-1718}, } @article {pmid38107860, year = {2023}, author = {Jiang, ZM and Mou, T and Sun, Y and Su, J and Yu, LY and Zhang, YQ}, title = {Environmental distribution and genomic characteristics of Solirubrobacter, with proposal of two novel species.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1267771}, pmid = {38107860}, issn = {1664-302X}, abstract = {Solirubrobacter spp. were abundant in soil samples collected from deserts and other areas with high UV radiation. In addition, a novel Solirubrobacter species, with strain CPCC 204708[T] as the type, was isolated and identified from sandy soil sample collected from the Badain Jaran Desert of the Inner Mongolia autonomous region. Strain CPCC 204708[T] was Gram-stain positive, rod-shaped, non-motile, non-spore-forming, and grew optimally at 28-30°C, pH 7.0-8.0, and in the absence of NaCl. Analysis of the 16S rRNA gene sequence of strain CPCC 204708[T] showed its identity within the genus Solirubrobacter, with highest nucleotide similarities (97.4-98.2%) to other named Solirubrobacter species. Phylogenetic and genomic analyses indicated that the strain was most closely related to Solirubrobacter phytolaccae KCTC 29190[T], while represented a distinct species, as confirmed from physiological properties and comparison. The name Solirubrobacter deserti sp. nov. was consequently proposed, with CPCC 204708[T] (= DSM 105495[T] = NBRC 112942[T]) as the type strain. Genomic analyses of the Solirubrobacter spp. also suggested that Solirubrobacter sp. URHD0082 represents a novel species, for which the name Candidatus "Solirubrobacter pratensis" sp. nov. was proposed. Genomic analysis of CPCC 204708[T] revealed the presence of genes related to its adaptation to the harsh environments of deserts and may also harbor genes functional in plant-microbe interactions. Pan-genomic analysis of available Solirubrobacter spp. confirmed the presence of many of the above genes as core components of Solirubrobacter genomes and suggests they may possess beneficial potential for their associate plant and may be important resources for bioactive compounds.}, } @article {pmid38105952, year = {2023}, author = {Newcomer, EP and Fishbein, SRS and Zhang, K and Hink, T and Reske, KA and Cass, C and Iqbal, ZH and Struttmann, EL and Dubberke, ER and Dantas, G}, title = {Genomic surveillance of Clostridioides difficile transmission and virulence in a healthcare setting.}, journal = {medRxiv : the preprint server for health sciences}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.09.26.23295023}, pmid = {38105952}, abstract = {Clostridioides difficile infection (CDI) is a major cause of healthcare-associated diarrhea, despite the widespread implementation of contact precautions for patients with CDI. Here, we investigate strain contamination in a hospital setting and genomic determinants of disease outcomes. Across two wards over six months, we selectively cultured C. difficile from patients (n=384) and their environments. Whole-genome sequencing (WGS) of 146 isolates revealed that most C. difficile isolates were from clade 1 (131/146, 89.7%), while only one isolate of the hypervirulent ST1 was recovered. Of culture-positive admissions (n=79), 19 (24%) of patients were colonized with toxigenic C. difficile on admission to the hospital. We defined 25 strain networks at ≤ 2 core gene SNPs; 2 of these networks contain strains from different patients. Strain networks were temporally linked (p<0.0001). To understand genomic correlates of disease, we conducted WGS on an additional cohort of C. difficile (n=102 isolates) from the same hospital and confirmed that clade 1 isolates are responsible for most CDI cases. We found that while toxigenic C. difficile isolates are associated with the presence of cdtR , nontoxigenic isolates have an increased abundance of prophages. Our pangenomic analysis of clade 1 isolates suggests that while toxin genes (tcdABER and cdtR) were associated with CDI symptoms, they are dispensable for patient colonization. These data indicate toxigenic and nontoxigenic C. difficile contamination persists in a hospital setting and highlight further investigation into how accessory genomic repertoires contribute to C. difficile colonization and disease.}, } @article {pmid38103051, year = {2023}, author = {Kim, YH and Park, J and Chung, HS}, title = {Genetic characterization of tetracycline-resistant Staphylococcus aureus with reduced vancomycin susceptibility using whole-genome sequencing.}, journal = {Archives of microbiology}, volume = {206}, number = {1}, pages = {24}, pmid = {38103051}, issn = {1432-072X}, support = {2020R1C1C1013823//National Research Foundation of Korea (NRF) grant funded by the Korea government (MSIT)/ ; }, abstract = {This study aimed to analyze the genetic characteristics of Staphylococcus aureus with reduced vancomycin susceptibility (RVS-SA). Whole-genome sequencing was performed on 27 RVS-SA clinical isolates, and comparative genomic analysis was performed using S. aureus reference strains. Pan-genome orthologous groups (POGs) were identified that were present in RVS-SA but absent in the reference strains, but further analysis showed that the presence of these POGs was influenced by tetracycline resistance rather than vancomycin resistance. Therefore, we restricted our analysis to tetracycline-resistant (tetR) RVS-SA and tetR vancomycin-susceptible S. aureus (VSSA). Phylogenomic analysis showed them to be closely related, and further analysis revealed the presence of an uncharacterized protein SAB0394 and the absence of lytA in tetR RVS-SA, which are involved in cell wall thickening. In summary, using whole-genome sequencing we identified gain or loss of genes in tetR RVS-SA strains. These findings provide insights into the investigation of mechanisms associated with reduced vancomycin susceptibility and have the potential to contribute to the development of molecular biomarkers for the rapid and efficient detection of RVS-SA.}, } @article {pmid38084888, year = {2023}, author = {Do, VH and Nguyen, SH and Le, DQ and Nguyen, TT and Nguyen, CH and Ho, TH and Vo, NS and Nguyen, T and Nguyen, HA and Cao, MD}, title = {Pasa: leveraging population pangenome graph to scaffold prokaryote genome assemblies.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad1170}, pmid = {38084888}, issn = {1362-4962}, support = {VINIF.2019.DA11//VINIF/ ; }, abstract = {Whole genome sequencing has increasingly become the essential method for studying the genetic mechanisms of antimicrobial resistance and for surveillance of drug-resistant bacterial pathogens. The majority of bacterial genomes sequenced to date have been sequenced with Illumina sequencing technology, owing to its high-throughput, excellent sequence accuracy, and low cost. However, because of the short-read nature of the technology, these assemblies are fragmented into large numbers of contigs, hindering the obtaining of full information of the genome. We develop Pasa, a graph-based algorithm that utilizes the pangenome graph and the assembly graph information to improve scaffolding quality. By leveraging the population information of the bacteria species, Pasa is able to utilize the linkage information of the gene families of the species to resolve the contig graph of the assembly. We show that our method outperforms the current state of the arts in terms of accuracy, and at the same time, is computationally efficient to be applied to a large number of existing draft assemblies.}, } @article {pmid38076784, year = {2023}, author = {Vaddadi, NSK and Mun, T and Langmead, B}, title = {Minimizing Reference Bias with an Impute-First Approach.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.11.30.568362}, pmid = {38076784}, abstract = {Pangenome indexes reduce reference bias in sequencing data analysis. However, a greater reduction in bias can be achieved using a personalized reference, e.g. a diploid human reference constructed to match a donor individual's alleles. We present a novel impute-first alignment framework that combines elements of genotype imputation and pangenome alignment. It begins by genotyping the individual from a sub-sample of the input reads. It next uses a reference panel and efficient imputation algorithm to impute a personalized diploid reference. Finally, it indexes the personalized reference and applies a read aligner, which could be a linear or graph aligner, to align the full read set to the personalized reference. This frame-work has higher variant-calling recall (99.54% vs. 99.37%), precision (99.36% vs. 99.18%), and F1 (99.45% vs. 99.28%) compared to a graph-based pangenome. The personalized reference is also smaller and faster to query compared to a pangenome index, making it an overall advantageous choice for whole-genome DNA sequencing experiments.}, } @article {pmid38075907, year = {2023}, author = {Lan, Y and Liu, M and Song, Y and Cao, Y and Li, F and Luo, D and Qiao, D}, title = {Distribution, characterization, and evolution of heavy metal resistance genes and Tn7-like associated heavy metal resistance Gene Island of Burkholderia.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1252127}, pmid = {38075907}, issn = {1664-302X}, abstract = {INTRODUCTION: Burkholderia is a rod-shaped aerobic Gram-negative bacteria with considerable genetic and metabolic diversity, which can beused for bioremediation and production applications, and has great biotechnology potential. However, there are few studies on the heavy metal resistance of the Burkholderia genus.

METHODS: In this paper, the distribution, characteristics and evolution of heavy metal resistance genes in Burkholderia and the gene island of Tn7-like transposable element associated with heavy metal resistance genes in Burkholderia were studied by comparative genomic method based on the characteristics of heavy metal resistance.

RESULTS AND DISCUSSION: The classification status of some species of the Burkholderia genus was improved, and it was found that Burkholderia dabaoshanensis and Burkholderia novacaledonica do not belong to the Burkholderia genus.Secondly, comparative genomics studies and pan-genome analysis found that the core genome of Burkholderia has alarger proportion of heavy metal resistance genes and a greater variety of heavy metalresistance genes than the subsidiary genome and strain specific genes. Heavy metal resistance genes are mostly distributed in the genome in the form of various gene clusters (for example, mer clusters, ars clusters, czc/cusABC clusters). At the same time, transposase, recombinase, integrase and other genes were foundupstream and downstream of heavy metal gene clusters, indicating that heavy metal resistance genes may beobtained through horizontal transfer. The analysis of natural selection pressure of heavy metal resistance genes showed that heavy metal resistance genes experienced strong purification selection under purification selection pressure in the genome.The Tn7 like transposable element of Burkholderia was associated with the heavy metal resistance gene island, and there were a large number of Tn7 transposable element insertion events in genomes. At the same time, BGI metal gene islands related to heavy metal resistance genes of Tn7 like transposable element were found, and these gene islands were only distributed in Burkholderia cepacia, Burkholderia polyvora, and Burkholderia contaminant.}, } @article {pmid38075893, year = {2023}, author = {You, M and Zhao, Q and Liu, Y and Zhang, W and Shen, Z and Ren, Z and Xu, C}, title = {Insights into lignocellulose degradation: comparative genomics of anaerobic and cellulolytic Ruminiclostridium-type species.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1288286}, pmid = {38075893}, issn = {1664-302X}, abstract = {Mesophilic, anaerobic, and cellulolytic Ruminiclostridium-type bacterial species can secrete an extracellular, multi-enzyme machinery cellulosome, which efficiently degrades cellulose. In this study, we first reported the complete genome of Ruminiclostridium papyrosolvens DSM2782, a single circular 5,027,861-bp chromosome with 37.1% G + C content, and compared it with other Ruminiclostridium-type species. Pan-genome analysis showed that Ruminiclostridium-type species share a large number of core genes to conserve basic functions, although they have a high level of intraspecific genetic diversity. Especially, KEGG mapping revealed that Ruminiclostridium-type species mainly use ABC transporters regulated by two-component systems (TCSs) to absorb extracellular sugars but not phosphotransferase systems (PTSs) that are employed by solventogenic clostridia, such as Clostridium acetobutylicum. Furthermore, we performed comparative analyses of the species-specific repertoire of CAZymes for each of the Ruminiclostridium-type species. The high similarity of their cohesins suggests a common ancestor and potential cross-species recognition. Additionally, both differences between the C-terminal cohesins and other cohesins of scaffoldins and between the dockerins linking with cellulases and other catalytic domains indicate a preference for the location of cellulosomal catalytic subunits at scaffoldins. The information gained in this study may be utilized directly or developed further by genetic engineering and optimizing enzyme systems or cell factories for enhanced biotechnological biomass deconstruction and biofuel production.}, } @article {pmid38075891, year = {2023}, author = {Zhu, X and Lu, Q and Li, Y and Long, Q and Zhang, X and Long, X and Cao, D}, title = {Contraction and expansion dynamics: deciphering genomic underpinnings of growth rate and pathogenicity in Mycobacterium.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1292897}, pmid = {38075891}, issn = {1664-302X}, abstract = {BACKGROUND: Mycobacterium bacteria, encompassing both slow growth (SGM) and rapid growth mycobacteria (RGM), along with true pathogenic (TP), opportunistic pathogenic (OP), and non-pathogenic (NP) types, exhibit diverse phenotypes. Yet, the genetic underpinnings of these variations remain elusive.

METHODS: Here, We conducted a comprehensive comparative genomics study involving 53 Mycobacterium species to unveil the genomic drivers behind growth rate and pathogenicity disparities.

RESULTS: Our core/pan-genome analysis highlighted 1,307 shared gene families, revealing an open pan-genome structure. A phylogenetic tree highlighted clear boundaries between SGM and RGM, as well as TP and other species. Gene family contraction emerged as the primary alteration associated with growth and pathogenicity transitions. Specifically, ABC transporters for amino acids and inorganic ions, along with quorum sensing genes, exhibited significant contractions in SGM species, potentially influencing their distinct traits. Conversely, TP strains displayed contraction in lipid and secondary metabolite biosynthesis and metabolism-related genes. Across the 53 species, we identified 26 core and 64 accessory virulence factors. Remarkably, TP and OP strains stood out for their expanded mycobactin biosynthesis and type VII secretion system gene families, pivotal for their pathogenicity.

CONCLUSION: Our findings underscore the importance of gene family contraction in nucleic acids, ions, and substance metabolism for host adaptation, while emphasizing the significance of virulence gene family expansion, including type VII secretion systems and mycobactin biosynthesis, in driving mycobacterial pathogenicity.}, } @article {pmid38075871, year = {2023}, author = {Pham, A and Volmer, JG and Chambers, DC and Smith, DJ and Reid, DW and Burr, L and Wells, TJ}, title = {Genomic analyses of Burkholderia respiratory isolates indicates two evolutionarily distinct B. anthina clades.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1274280}, pmid = {38075871}, issn = {1664-302X}, abstract = {INTRODUCTION: The Burkholderia cepacia complex (BCC) encompasses a group of at least 22 genetically distinct gram-negatives bacterial species ubiquitous in nature. Recognised as a group of genetically and phenotypically flexible species, the BCC inhabits diverse ecological niches causing both plant and human diseases. Comparative genomic analysis provides an in depth understanding into the population biology, phylogenetic relationship, and genomic architecture of species.

METHODS: Here, we genomically characterise Burkholderia anthina isolated from patients with chronic lung infections, an understudied pathogen within the Burkholderia cepacia complex.

RESULTS: We demonstrate that B. anthina is polyphyletic and constitutes two distinct evolutionary lineages. Core- and pan-genome analyses demonstrated substantial metabolic diversity, with B. anthina Clade I enriched in genes associated with microbial metabolism in diverse environments, including degradation of aromatic compounds and metabolism of xenobiotics, while B. anthina Clade II demonstrated an enhanced capability for siderophore biosynthesis.

DISCUSSION: Based on our phylogenetic and comparative genomic analyses, we suggest stratifying B. anthina to recognise a distinct species harbouring increased potential for iron metabolism via siderophore synthesis, for which we propose the name Burkholderia anthinoferum (sp. nov.).}, } @article {pmid38071270, year = {2023}, author = {Minich, JJ and Moore, ML and Allsing, NA and Aylward, A and Murray, ER and Tran, L and Michael, TP}, title = {Generating high-quality plant and fish reference genomes from field-collected specimens by optimizing preservation.}, journal = {Communications biology}, volume = {6}, number = {1}, pages = {1246}, pmid = {38071270}, issn = {2399-3642}, support = {INV-040541/GATES/Bill & Melinda Gates Foundation/United States ; }, abstract = {Sample preservation often impedes efforts to generate high-quality reference genomes or pangenomes for Earth's more than 2 million plant and animal species due to nucleotide degradation. Here we compare the impacts of storage methods including solution type, temperature, and time on DNA quality and Oxford Nanopore long-read sequencing quality in 9 fish and 4 plant species. We show 95% ethanol largely protects against degradation for fish blood (22 °C, ≤6 weeks) and plant tissue (4 °C, ≤3 weeks). From this furthest storage timepoint, we assemble high-quality reference genomes of 3 fish and 2 plant species with contiguity (contig N50) and completeness (BUSCO) that achieve the Vertebrate Genome Project benchmarking standards. For epigenetic applications, we also report methylation frequency compared to liquid nitrogen control. The results presented here remove the necessity for cryogenic storage in many long read applications and provide a framework for future studies focused on sampling in remote locations, which may represent a large portion of the future sequencing of novel organisms.}, } @article {pmid38071267, year = {2023}, author = {Norman, M and Chen, C and Miah, H and Patpour, M and Sørensen, C and Hovmøller, M and Forrest, K and Kumar, S and Prasad, P and Gangwar, OP and Bhardwaj, S and Bariana, H and Periyannan, S and Bansal, U}, title = {Sr65: a widely effective gene for stem rust resistance in wheat.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {137}, number = {1}, pages = {1}, pmid = {38071267}, issn = {1432-2242}, abstract = {Sr65 in chromosome 1A of Indian wheat landrace Hango-2 is a potentially useful all-stage resistance gene that currently protects wheat from stem rust in Australia, India, Africa and Europe. Stem rust, caused by Puccinia graminis f. sp. tritici (Pgt), threatened global wheat production with the appearance of widely virulent races that included TTKSK and TTRTF. Indian landrace Hango-2 showed resistance to Pgt races in India and Australia. Screening of a Hango-2/Avocet 'S' (AvS) recombinant inbred line population identified two stem rust resistance genes, a novel gene (temporarily named as SrH2) from Hango-2 and Sr26 from AvS. A mapping population segregating for SrH2 alone was developed from two recombinant lines. SrH2 was mapped on the short arm of chromosome 1A, where it was flanked by KASP markers KASP_7944 (proximal) and KASP_12147 (distal). SrH2 was delimited to an interval of 1.8-2.3 Mb on chromosome arm 1AS. The failure to detect candidate genes through MutRenSeq and comparative genomic analysis with the pan-genome dataset indicated the necessity to generate a Hango-2 specific assembly for detecting the gene sequence linked with SrH2 resistance. MutRenSeq however enabled identification of SrH2-linked KASP marker sunCS_265. Markers KASP_12147 and sunCS_265 showed 92% and 85% polymorphism among an Australian cereal cultivar diversity panel and can be used for marker-assisted selection of SrH2 in breeding programs. The effectiveness of SrH2 against Pgt races from Europe, Africa, India, and Australia makes it a valuable resource for breeding stem rust-resistant wheat cultivars. Since no wheat-derived gene was previously located in chromosome arm 1AS, SrH2 represents a new locus and named as SR65.}, } @article {pmid38070563, year = {2023}, author = {Lau, NS and Furusawa, G}, title = {Polysaccharide degradation in Cellvibrionaceae: Genomic insights of the novel chitin-degrading marine bacterium, strain KSP-S5-2, and its chitinolytic activity.}, journal = {The Science of the total environment}, volume = {}, number = {}, pages = {169134}, doi = {10.1016/j.scitotenv.2023.169134}, pmid = {38070563}, issn = {1879-1026}, abstract = {In this study, we present the genome characterization of a novel chitin-degrading strain, KSP-S5-2, and comparative genomics of 33 strains of Cellvibrionaceae. Strain KSP-S5-2 was isolated from mangrove sediment collected in Balik Pulau, Penang, Malaysia, and its 16S rRNA gene sequence showed the highest similarity (95.09%) to Teredinibacter franksiae. Genome-wide analyses including 16S rRNA gene sequence similarity, average nucleotide identity, digital DNA-DNA hybridization, and phylogenomics, suggested that KSP-S5-2 represents a novel species in the family Cellvibrionaceae. The Cellvibrionaceae pan-genome exhibited high genomic variability, with only 1.7 % representing the core genome, while the flexible genome showed a notable enrichment of genes related to carbohydrate metabolism and transport pathway. This observation sheds light on the genetic plasticity of the Cellvibrionaceae family and the gene pools that form the basis for the evolution of polysaccharide-degrading capabilities. Comparative analysis of the carbohydrate-active enzymes across Cellvibrionaceae strains revealed that the chitinolytic system is not universally present within the family, as only 18 of the 33 genomes encoded chitinases. Strain KSP-S5-2 displayed an expanded repertoire of chitinolytic enzymes (25 GH18, two GH19 chitinases, and five GH20 β-N-acetylhexosaminidases) but lacked genes for agar, xylan, and pectin degradation, indicating specialized enzymatic machinery focused primarily on chitin degradation. Further, the strain degraded 90 % of chitin after 10 days of incubation. In summary, our findings provided insights into strain KSP-S5-2's genomic potential, the genetics of its chitinolytic system, genomic diversity within the Cellvibrionaceae family in terms of polysaccharide degradation, and its application for chitin degradation.}, } @article {pmid38070037, year = {2023}, author = {Aziz, K and Gilbert, JA and Zaidi, AH}, title = {Genomic and Phenotypic Insight into the Probiotic Potential of Lactic Acid Bacterial spp. Associated with the Human Gut Mucosa.}, journal = {Probiotics and antimicrobial proteins}, volume = {}, number = {}, pages = {}, pmid = {38070037}, issn = {1867-1314}, support = {SIG # S10 OD026929/NH/NIH HHS/United States ; }, abstract = {Commensal microbiome-based health support is gaining respect in the medical community and new human gut-associated Lactic Acid Bacteria (LAB) strains must be evaluated for their probiotic potential. Here we characterized the phenotype and genomes of human ileocecal mucosa-associated LAB strains using metagenomic sequencing and in vitro testing. The strains characterized belonged to the genus Enterococcus (Enterococcus lactis NPL1366, NPL1371, and Enterococcus mundtii NPL1379) and Lactobacillus (Lactobacillus paragasseri, NPL1369, NPL1370, and Lactiplantibacillus plantarum NPL1378). Genome annotation suggested bacterial adaptation to both human physiological and industrial manufacturing-related stressors. Genes for histidine kinases in enterococci and Na + /K + antiporters and F0F1 ATP synthases in Lactobacillus strains may support their tolerance to acid seen in vitro. The bile salt hydrolase (BSH) gene in Lp. plantarum and L. paragasseri may help explain their reported bile salt deconjugation and cholesterol-lowering behavior. Thioredoxin is the principal antioxidant system, and several oxidases and general stress-related proteins are found in lactobacilli, most notably in L. plantarum NPL1378. Multiple adhesion and biofilm-related genes were predicted in the LAB genomes. Adhesion and biofilm-related genes figured prominently in the genomes of enterococcal strains, especially E. lactis, corresponding to its biofilm formation capacity in vitro. Bacteriocin and secondary metabolite biosynthetic gene clusters in the sequenced genomes of E. lactis NPL1366 and Lp. plantarum NPL1378 may explain their in vitro pathogenic antagonism. Moreover, folate producing Lp. plantarum strain holds potential to be used in therapeutics or biofortification of food. All the strains were deemed safe through in vitro and in silico analysis. This basic genetic and phenotypic information supports their contention as probiotic adjuncts to conventional medical therapy.}, } @article {pmid38070010, year = {2023}, author = {Gómez-Sánchez, I and Castelán-Sánchez, HG and Martínez-Castilla, LP and Hurtado-Ramírez, JM and López-Leal, G}, title = {Genetic insights into the microevolutionary dynamics and early introductions of human monkeypox virus in Mexico.}, journal = {Archives of virology}, volume = {169}, number = {1}, pages = {2}, pmid = {38070010}, issn = {1432-8798}, abstract = {The recent global outbreak of mpox, caused by monkeypox virus (MPV) emerged in Europe in 2022 and rapidly spread to over 40 countries. The Americas are currently facing the highest impact, reporting over 50,000 cases by early 2023. In this study, we analyzed 880 MPV isolates worldwide to gain insights into the evolutionary patterns and initial introduction events of the virus in Mexico. We found that MPV entered Mexico on multiple occasions, from the United Kingdom, Portugal, and Canada, and subsequently spread locally in different regions of Mexico. Additionally, we show that MPV has an open pangenome, highlighting the role of gene turnover in shaping its genomic diversity, rather than single-nucleotide polymorphisms (SNPs), which do not contribute significantly to genome diversity. Although the genome contains multiple SNPs in coding regions, these remain under purifying selection, suggesting their evolutionary conservation. One notable exception is amino acid position 63 of the protein encoded by the Cop-A4L gene, which is intricately related to viral maturity, which we found to be under strong positive selection. Ancestral state reconstruction indicated that the ancestral state at position 63 corresponds to the amino acid valine, which is present only in isolates of clade I. However, the isolates from the current outbreak contained threonine at position 63. Our findings contribute new information about the evolution of monkeypox virus.}, } @article {pmid38069258, year = {2023}, author = {Lyu, K and Xiao, J and Lyu, S and Liu, R}, title = {Comparative Analysis of Transposable Elements in Strawberry Genomes of Different Ploidy Levels.}, journal = {International journal of molecular sciences}, volume = {24}, number = {23}, pages = {}, doi = {10.3390/ijms242316935}, pmid = {38069258}, issn = {1422-0067}, abstract = {Transposable elements (TEs) make up a large portion of plant genomes and play a vital role in genome structure, function, and evolution. Cultivated strawberry (Fragaria x ananassa) is one of the most important fruit crops, and its octoploid genome was formed through several rounds of genome duplications from diploid ancestors. Here, we built a pan-genome TE library for the Fragaria genus using ten published strawberry genomes at different ploidy levels, including seven diploids, one tetraploid, and two octoploids, and performed comparative analysis of TE content in these genomes. The TEs comprise 51.83% (F. viridis) to 60.07% (F. nilgerrensis) of the genomes. Long terminal repeat retrotransposons (LTR-RTs) are the predominant TE type in the Fragaria genomes (20.16% to 34.94%), particularly in F. iinumae (34.94%). Estimating TE content and LTR-RT insertion times revealed that species-specific TEs have shaped each strawberry genome. Additionally, the copy number of different LTR-RT families inserted in the last one million years reflects the genetic distance between Fragaria species. Comparing cultivated strawberry subgenomes to extant diploid ancestors showed that F. vesca and F. iinumae are likely the diploid ancestors of the cultivated strawberry, but not F. viridis. These findings provide new insights into the TE variations in the strawberry genomes and their roles in strawberry genome evolution.}, } @article {pmid38069099, year = {2023}, author = {Shemesh-Mayer, E and Faigenboim, A and Sherman, A and Gao, S and Zeng, Z and Liu, T and Kamenetsky-Goldstein, R}, title = {Deprivation of Sexual Reproduction during Garlic Domestication and Crop Evolution.}, journal = {International journal of molecular sciences}, volume = {24}, number = {23}, pages = {}, doi = {10.3390/ijms242316777}, pmid = {38069099}, issn = {1422-0067}, abstract = {Garlic, originating in the mountains of Central Asia, has undergone domestication and subsequent widespread introduction to diverse regions. Human selection for adaptation to various climates has resulted in the development of numerous garlic varieties, each characterized by specific morphological and physiological traits. However, this process has led to a loss of fertility and seed production in garlic crops. In this study, we conducted morpho-physiological and transcriptome analyses, along with whole-genome resequencing of 41 garlic accessions from different regions, in order to assess the variations in reproductive traits among garlic populations. Our findings indicate that the evolution of garlic crops was associated with mutations in genes related to vernalization and the circadian clock. The decline in sexual reproduction is not solely attributed to a few mutations in specific genes, but is correlated with extensive alterations in the genetic regulation of the annual cycle, stress adaptations, and environmental requirements. The regulation of flowering ability, stress response, and metabolism occurs at both the genetic and transcriptional levels. We conclude that the migration and evolution of garlic crops involve substantial and diverse changes across the entire genome landscape. The construction of a garlic pan-genome, encompassing genetic diversity from various garlic populations, will provide further insights for research into and the improvement of garlic crops.}, } @article {pmid38062402, year = {2023}, author = {Liu, Q and Ye, L and Li, M and Wang, Z and Xiong, G and Ye, Y and Tu, T and Schwarzacher, T and Heslop-Harrison, JSP}, title = {Genome-wide expansion and reorganization during grass evolution: from 30 Mb chromosomes in rice and Brachypodium to 550 Mb in Avena.}, journal = {BMC plant biology}, volume = {23}, number = {1}, pages = {627}, pmid = {38062402}, issn = {1471-2229}, support = {32070359, 32370402//National Natural Science Foundation of China/ ; 2021A1515012410//Basic and Applied Basic Research Foundation of Guangdong Province/ ; KCJH-80107-2023-148//Sciences Innovative Training Programs for Undergraduates of Chinese Academy of Sciences/ ; GDZZDC20228704//Guangdong Provincial Special Fund for Natural Resource Affairs on Ecology and Forestry Construction/ ; Y861041001//Overseas Distinguished Scholar Project of South China Botanical Garden, Chinese Academy of Sciences/ ; BB/P02307X/1//Global Challenges Research Foundation for Global Agricultural and Food Systems Research/ ; }, abstract = {BACKGROUND: The BOP (Bambusoideae, Oryzoideae, and Pooideae) clade of the Poaceae has a common ancestor, with similarities to the genomes of rice, Oryza sativa (2n = 24; genome size 389 Mb) and Brachypodium, Brachypodium distachyon (2n = 10; 271 Mb). We exploit chromosome-scale genome assemblies to show the nature of genomic expansion, structural variation, and chromosomal rearrangements from rice and Brachypodium, to diploids in the tribe Aveneae (e.g., Avena longiglumis, 2n = 2x = 14; 3,961 Mb assembled to 3,850 Mb in chromosomes).

RESULTS: Most of the Avena chromosome arms show relatively uniform expansion over the 10-fold to 15-fold genome-size increase. Apart from non-coding sequence diversification and accumulation around the centromeres, blocks of genes are not interspersed with blocks of repeats, even in subterminal regions. As in the tribe Triticeae, blocks of conserved synteny are seen between the analyzed species with chromosome fusion, fission, and nesting (insertion) events showing deep evolutionary conservation of chromosome structure during genomic expansion. Unexpectedly, the terminal gene-rich chromosomal segments (representing about 50 Mb) show translocations between chromosomes during speciation, with homogenization of genome-specific repetitive elements within the tribe Aveneae. Newly-formed intergenomic translocations of similar extent are found in the hexaploid A. sativa.

CONCLUSIONS: The study provides insight into evolutionary mechanisms and speciation in the BOP clade, which is valuable for measurement of biodiversity, development of a clade-wide pangenome, and exploitation of genomic diversity through breeding programs in Poaceae.}, } @article {pmid38062371, year = {2023}, author = {Chenhaka, LH and Van Wyk, DAB and Mienie, C and Bezuidenhout, CC and Lekota, KE}, title = {The phylogenomic landscape of extended-spectrum β-lactamase producing Citrobacter species isolated from surface water.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {755}, pmid = {38062371}, issn = {1471-2164}, abstract = {BACKGROUND: Citrobacter species are Gram-negative opportunistic pathogens commonly reported in nosocomial-acquired infections. This study characterised four Citrobacter species that were isolated from surface water in the North West Province, South Africa.

RESULTS: Phenotypic antimicrobial susceptibility profiles of the isolates demonstrated their ability to produce the extended-spectrum β-lactamase (ESBL). Whole genomes were sequenced to profile antibiotic resistance and virulence genes, as well as mobile genetic elements. In silico taxonomic identification was conducted by using multi-locus sequence typing and average nucleotide identity. A pangenome was used to determine the phylogenomic landscape of the Citrobacter species by using 109 publicly available genomes. The strains S21 and S23 were identified as C. braakii, while strains S24 and S25 were C. murliniae and C. portucalensis, respectively. Comparative genomics and sequenced genomes of the ESBL-producing isolates consisted of n = 91; 83% Citrobacter species in which bla-CMY-101 (n = 19; 32,2%) and bla-CMY-59 (n = 12; 38,7%) were prevalent in C. braakii, and C. portucalensis strains, respectively. Macrolide (acrAB-TolC, and mdtG) and aminoglycoside (acrD) efflux pumps genes were identified in the four sequenced Citrobacter spp. isolates. The quinolone resistance gene, qnrB13, was exclusive to the C. portucalensis S25 strain. In silico analysis detected plasmid replicon types IncHI1A, IncP, and Col(VCM04) in C. murliniae S24 and C. portucalensis S25, respectively. These potentially facilitate the T4SS secretion system in Citrobacter species. In this study, the C. braakii genomes could be distinguished from C. murliniae and C. portucalensis on the basis of gene encoding for cell surface localisation of the CPS (vexC) and identification of genes involved in capsule polymer synthesis (tviB and tviE). A cluster for the salmochelin siderophore system (iro-BCDEN) was found in C. murliniae S24. This is important when it comes to the pathogenicity pathway that confers an advantage in colonisation.

CONCLUSIONS: The emerging and genomic landscapes of these ESBL-producing Citrobacter species are of significant concern due to their dissemination potential in freshwater systems. The presence of these ESBL and multidrug-resistant (MDR) pathogens in aquatic environments is of One Health importance, since they potentially impact the clinical domain, that is, in terms of human health and the agricultural domain, that is, in terms of animal health and food production as well as the environmental domain.}, } @article {pmid38062354, year = {2023}, author = {Hochstedler-Kramer, BR and Ene, A and Putonti, C and Wolfe, AJ}, title = {Comparative genomic analysis of clinical Enterococcus faecalis distinguishes strains isolated from the bladder.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {752}, pmid = {38062354}, issn = {1471-2164}, support = {U2CDK129917 and TL1DK132769//National Institute of Diabetes and Digestive Kidney Diseases of the National Institutes of Health/ ; }, abstract = {BACKGROUND: Enterococcus faecalis is the most commonly isolated enterococcal species in clinical infection. This bacterium is notorious for its ability to share genetic content within and outside of its species. With this increased proficiency for horizontal gene transfer, tremendous genomic diversity within this species has been identified. Many researchers have hypothesized E. faecalis exhibits niche adaptation to establish infections or colonize various parts of the human body. Here, we hypothesize that E. faecalis strains isolated from the human bladder will carry unique genomic content compared to clinical strains isolated from other sources.

RESULTS: This analysis includes comparison of 111 E. faecalis genomes isolated from bladder, urogenital, blood, and fecal samples. Phylogenomic comparison shows no association between isolation source and lineage; however, accessory genome comparison differentiates blood and bladder genomes. Further gene enrichment analysis identifies gene functions, virulence factors, antibiotic resistance genes, and plasmid-associated genes that are enriched or rare in bladder genomes compared to urogenital, blood, and fecal genomes. Using these findings as training data and 682 publicly available genomes as test data, machine learning classifiers successfully distinguished between bladder and non-bladder strains with high accuracy. Genes identified as important for this differentiation were often related to transposable elements and phage, including 3 prophage species found almost exclusively in bladder and urogenital genomes.

CONCLUSIONS: E. faecalis strains isolated from the bladder contain unique genomic content when compared to strains isolated from other body sites. This genomic diversity is most likely due to horizontal gene transfer, as evidenced by lack of phylogenomic clustering and enrichment of transposable elements and prophages. Investigation into how these enriched genes influence host-microbe interactions may elucidate gene functions required for successful bladder colonization and disease establishment.}, } @article {pmid38059630, year = {2023}, author = {Allegretti, YH and Yamaji, R and Adams-Sapper, S and Riley, LW}, title = {Genetic features of antimicrobial drug-susceptible extraintestinal pathogenic Escherichia coli pandemic sequence type 95.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0418922}, doi = {10.1128/spectrum.04189-22}, pmid = {38059630}, issn = {2165-0497}, abstract = {Despite the increasing prevalence of antibiotic-resistant Escherichia coli strains that cause urinary tract and bloodstream infections, a major pandemic lineage of extraintestinal pathogenic E. coli (ExPEC) ST95 has a comparatively low frequency of drug resistance. We compared the genomes of 1,749 ST95 isolates to identify genetic features that may explain why most strains of ST95 resist becoming drug-resistant. Identification of such genomic features could contribute to the development of novel strategies to prevent the spread of antibiotic-resistant genes and devise new measures to control antibiotic-resistant infections.}, } @article {pmid38057566, year = {2023}, author = {Zhu, F and Yin, ZT and Zhao, QS and Sun, YX and Jie, YC and Smith, J and Yang, YZ and Burt, DW and Hincke, M and Zhang, ZD and Yuan, MD and Kaufman, J and Sun, CJ and Li, JY and Shao, LW and Yang, N and Hou, ZC}, title = {A chromosome-level genome assembly for the Silkie chicken resolves complete sequences for key chicken metabolic, reproductive, and immunity genes.}, journal = {Communications biology}, volume = {6}, number = {1}, pages = {1233}, pmid = {38057566}, issn = {2399-3642}, abstract = {A set of high-quality pan-genomes would help identify important genes that are still hidden/incomplete in bird reference genomes. In an attempt to address these issues, we have assembled a de novo chromosome-level reference genome of the Silkie (Gallus gallus domesticus), which is an important avian model for unique traits, like fibromelanosis, with unclear genetic foundation. This Silkie genome includes the complete genomic sequences of well-known, but unresolved, evolutionarily, endocrinologically, and immunologically important genes, including leptin, ovocleidin-17, and tumor-necrosis factor-α. The gap-less and manually annotated MHC (major histocompatibility complex) region possesses 38 recently identified genes, with differentially regulated genes recovered in response to pathogen challenges. We also provide whole-genome methylation and genetic variation maps, and resolve a complex genetic region that may contribute to fibromelanosis in these animals. Finally, we experimentally show leptin binding to the identified leptin receptor in chicken, confirming an active leptin ligand-receptor system. The Silkie genome assembly not only provides a rich data resource for avian genome studies, but also lays a foundation for further functional validation of resolved genes.}, } @article {pmid38053559, year = {2023}, author = {Esteves, MAC and Viana, AS and Viçosa, GN and Botelho, AMN and Moustafa, AM and Mansoldo, FRP and Ferreira, ALP and Vermelho, AB and Ferreira-Carvalho, BT and Planet, PJ and Figueiredo, AMS}, title = {RdJ detection tests to identify a unique MRSA clone of ST105-SCCmecII lineage and its variants disseminated in the metropolitan region of Rio de Janeiro.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1275918}, pmid = {38053559}, issn = {1664-302X}, abstract = {Hospital bloodstream infection (BSI) caused by methicillin-resistant Staphylococcus aureus (MRSA) is a major cause of morbidity and mortality and is frequently related to invasive procedures and medically complex patients. An important feature of MRSA is the clonal structure of its population. Specific MRSA clones may differ in their pathogenic, epidemiological, and antimicrobial resistance profiles. Whole-genome sequencing is currently the most robust and discriminatory technique for tracking hypervirulent/well-adapted MRSA clones. However, it remains an expensive and time-consuming technique that requires specialized personnel. In this work, we describe a pangenome protocol, based on binary matrix (1,0) of open reading frames (ORFs), that can be used to quickly find diagnostic, apomorphic sequence mutations that can serve as biomarkers. We use this technique to create a diagnostic screen for MRSA isolates circulating in the Rio de Janeiro metropolitan area, the RdJ clone, which is prevalent in BSI. The method described here has 100% specificity and sensitivity, eliminating the need to use genomic sequencing for clonal identification. The protocol used is relatively simple and all the steps, formulas and commands used are described in this work, such that this strategy can also be used to identify other MRSA clones and even clones from other bacterial species.}, } @article {pmid38049764, year = {2023}, author = {Tian, X and Teo, WFA and Wee, WY and Yang, Y and Ahmed, H and Jakubovics, NS and Choo, SW and Tan, GYA}, title = {Genome characterization and taxonomy of Actinomyces acetigenes sp. nov., and Actinomyces stomatis sp. nov., previously isolated from the human oral cavity.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {734}, pmid = {38049764}, issn = {1471-2164}, support = {WB20211227000125//Wenzhou Municipal Key Laboratory for Applied Biomedical and the Biopharmaceutical Informatics/ ; WB20210429000008//Zhejiang Bioinformatics International Science and Technology Cooperation Center at Wenzhou-Kean University/ ; 5000105//The high-level talent recruitment program for academic and research platform construction from Wenzhou-Kean University/ ; }, abstract = {BACKGROUND: Actinomyces strains are commonly found as part of the normal microflora on human tissue surfaces, including the oropharynx, gastrointestinal tract, and female genital tract. Understanding the diversity and characterization of Actinomyces species is crucial for human health, as they play an important role in dental plaque formation and biofilm-related infections. Two Actinomyces strains ATCC 49340[ T] and ATCC 51655[ T] have been utilized in various studies, but their accurate species classification and description remain unresolved.

RESULTS: To investigate the genomic properties and taxonomic status of these strains, we employed both 16S rRNA Sanger sequencing and whole-genome sequencing using the Illumina HiSeq X Ten platform with PE151 (paired-end) sequencing. Our analyses revealed that the draft genome of Actinomyces acetigenes ATCC 49340[ T] was 3.27 Mbp with a 68.0% GC content, and Actinomyces stomatis ATCC 51655[ T] has a genome size of 3.08 Mbp with a 68.1% GC content. Multi-locus (atpA, rpoB, pgi, metG, gltA, gyrA, and core genome SNPs) sequence analysis supported the phylogenetic placement of strains ATCC 51655[ T] and ATCC 49340[ T] as independent lineages. Digital DNA-DNA hybridization (dDDH), average nucleotide identity (ANI), and average amino acid identity (AAI) analyses indicated that both strains represented novel Actinomyces species, with values below the threshold for species demarcation (70% dDDH, 95% ANI and AAI). Pangenome analysis identified 5,731 gene clusters with strains ATCC 49340[ T] and ATCC 51655[ T] possessing 1,515 and 1,518 unique gene clusters, respectively. Additionally, genomic islands (GIs) prediction uncovered 24 putative GIs in strain ATCC 49340[ T] and 16 in strain ATCC 51655[ T], contributing to their genetic diversity and potential adaptive capabilities. Pathogenicity analysis highlighted the potential human pathogenicity risk associated with both strains, with several virulence-associated factors identified. CRISPR-Cas analysis exposed the presence of CRISPR and Cas genes in both strains, indicating these strains might evolve a robust defense mechanism against them.

CONCLUSION: This study supports the classification of strains ATCC 49340[ T] and ATCC 51655[ T] as novel species within the Actinomyces, in which the name Actinomyces acetigenes sp. nov. (type strain ATCC 49340[ T] = VPI D163E-3[ T] = CCUG 34286[ T] = CCUG 35339 [T]) and Actinomyces stomatis sp. nov. (type strain ATCC 51655[ T] = PK606[T] = CCUG 33930[ T]) are proposed.}, } @article {pmid38048088, year = {2023}, author = {Chai, K and Chen, S and Wang, P and Kong, W and Ma, X and Zhang, X}, title = {Multiomics Analysis Reveals the Genetic Basis of Volatile Terpenoid Formation in Oolong Tea.}, journal = {Journal of agricultural and food chemistry}, volume = {}, number = {}, pages = {}, doi = {10.1021/acs.jafc.3c06762}, pmid = {38048088}, issn = {1520-5118}, abstract = {Oolong tea has gained great popularity in China due to its pleasant floral and fruity aromas. Although numerous studies have investigated the aroma differences across various tea cultivars, the genetic mechanism is unclear. This study performed multiomics analysis of three varieties suitable for oolong tea and three others with different processing suitability. Our analysis revealed that oolong tea varieties contained higher levels of cadinane sesquiterpenoids. PanTFBS was developed to identify variants of transcription factor binding sites (TFBSs). We found that the CsDCS gene had two TFBS variants in the promoter sequence and a single nucleotide polymorphism (SNP) in the coding sequence. Integrating data on genetic variations, gene expression, and protein-binding sites indicated that CsDCS might be a pivotal gene involved in the biosynthesis of cadinane sesquiterpenoids. These findings advance our understanding of the genetic factors involved in the aroma formation of oolong tea and offer insights into the enhancement of tea aroma.}, } @article {pmid38047471, year = {2023}, author = {Kumar, K and Barbora, L and Moholkar, VS}, title = {Genomic insights into clostridia in bioenergy production: Comparison of metabolic capabilities and evolutionary relationships.}, journal = {Biotechnology and bioengineering}, volume = {}, number = {}, pages = {}, doi = {10.1002/bit.28610}, pmid = {38047471}, issn = {1097-0290}, abstract = {Bacteria from diverse genera, including Acetivibrio, Bacillus, Cellulosilyticum, Clostridium, Desulfotomaculum, Lachnoclostridium, Moorella, Ruminiclostridium, and Thermoanaerobacterium, have attracted significant attention due to their versatile metabolic capabilities encompassing acetogenic, cellulolytic, and C1 -metabolic properties, and acetone-butanol-ethanol fermentation. Despite their biotechnological significance, a comprehensive understanding of clostridial physiology and evolution has remained elusive. This study reports an extensive comparative genomic analysis of 48 fully sequenced bacterial genomes from these genera. Our investigation, encompassing pan-genomic analysis, central carbon metabolism comparison, exploration of general genome features, and in-depth scrutiny of Cluster of Orthologous Groups genes, has established a holistic whole-genome-based phylogenetic framework. We have classified these strains into acetogenic, butanol-producing, cellulolytic, CO2 -fixating, chemo(litho/organo)trophic, and heterotrophic categories, often exhibiting overlaps. Key outcomes include the identification of misclassified species and the revelation of insights into metabolic features, energy conservation, substrate utilization, stress responses, and regulatory mechanisms. These findings can provide guidance for the development of efficient microbial systems for sustainable bioenergy production. Furthermore, by addressing fundamental questions regarding genetic relationships, conserved genomic features, pivotal enzymes, and essential genes, this study has also contributed to our comprehension of clostridial biology, evolution, and their shared metabolic potential.}, } @article {pmid38046854, year = {2023}, author = {Zhang, X and Chen, Y and Wang, L and Yuan, Y and Fang, M and Shi, L and Lu, R and Comes, HP and Ma, Y and Chen, Y and Huang, G and Zhou, Y and Zheng, Z and Qiu, Y}, title = {Pangenome of water caltrop reveals structural variations and asymmetric subgenome divergence after allopolyploidization.}, journal = {Horticulture research}, volume = {10}, number = {11}, pages = {uhad203}, pmid = {38046854}, issn = {2662-6810}, abstract = {Water caltrop (Trapa spp., Lythraceae) is a traditional but currently underutilized non-cereal crop. Here, we generated chromosome-level genome assemblies for the two diploid progenitors of allotetraploid Trapa. natans (4x, AABB), i.e., diploid T. natans (2x, AA) and Trapa incisa (2x, BB). In conjunction with four published (sub)genomes of Trapa, we used gene-based and graph-based pangenomic approaches and a pangenomic transposable element (TE) library to develop Trapa genomic resources. The pangenome displayed substantial gene-content variation with dispensable and private gene clusters occupying a large proportion (51.95%) of the total cluster sets in the six (sub)genomes. Genotyping of presence-absence variation (PAVs) identified 40 453 PAVs associated with 2570 genes specific to A- or B-lineages, of which 1428 were differentially expressed, and were enriched in organ development process, organic substance metabolic process and response to stimulus. Comparative genome analyses showed that the allotetraploid T. natans underwent asymmetric subgenome divergence, with the B-subgenome being more dominant than the A-subgenome. Multiple factors, including PAVs, asymmetrical amplification of TEs, homeologous exchanges (HEs), and homeolog expression divergence, together affected genome evolution after polyploidization. Overall, this study sheds lights on the genome architecture and evolution of Trapa, and facilitates its functional genomic studies and breeding program.}, } @article {pmid38045253, year = {2023}, author = {Salamzade, R and Kalan, LR}, title = {skDER: microbial genome dereplication approaches for comparative and metagenomic applications.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.09.27.559801}, pmid = {38045253}, abstract = {skDER (https://github.com/raufs/skDER) combines recent advances to efficiently estimate average nucleotide identity (ANI) between thousands of microbial genomes by skani [1] with two low-memory methods for genomic dereplication. The first method implements a dynamic algorithm to determine a concise set of representative genomes. This approach is well-suited for selecting reference genomes to align metagenomic reads onto for tracking strain presence across related microbiome samples. This is because fewer representative genomes should alleviate the concern that reads belonging to the same strain get falsely partitioned across closely related genomes. The other method, which uses a greedy approach, is better suited for use in comparative genomics, where users might be overwhelmed with the high number of genomes available for certain taxa and aim to reduce redundancy and, therefore, computational requirements for downstream analytics. This method selects a larger number of representative genomes to comprehensively sample the pangenome space for the taxon of interest. To further aid usage for comparative genomics studies, skDER also features an option to automatically download genomes classified as a particular species or genus in the Genome Taxonomy Database [2-4] and we provide precomputed representative genomes for commonly studied bacterial taxa [5] .}, } @article {pmid38040628, year = {2023}, author = {Liu, X and Wu, Z and Hu, T and Lin, X and Liang, H and Li, W and Jin, X and Xiao, L and Fang, X and Zou, Y}, title = {Comparative genomic analysis reveals niche adaption of Lactobacillus acidophilus.}, journal = {Journal of applied microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1093/jambio/lxad287}, pmid = {38040628}, issn = {1365-2672}, abstract = {AIMS: Lactobacillus acidophilus has been extensively applied in plentiful probiotic products. Although several studies have been performed to investigate the beneficial characteristics and genome function of L. acidophilus, comparative genomic analysis remains scarce. In this study, we collected 74 L. acidophilus genomes from our gut bacterial genome collection and the public database and conducted a comprehensive comparative genomic analysis.

METHODS AND RESULTS: This study revealed the potential correlation of the genomic diversity and niche adaptation of L. acidophilus from different perspectives. The pan-genome of L. acidophilus was found to be open, with metabolism, information storage and processing genes mainly distributed in the core genome. Phage- and peptidase-associated genes were found in the genome of the specificity of animal-derived strains, which were related to adaptation of animal gut. SNP analysis showed the differences of the utilization of vitamin B12 in cellular of L. acidophilus strains from animal gut and others.

CONCLUSIONS: This work provides new insights for the genomic diversity analysis of Lactobacillus acidophilus and uncovers the ecological adaptation of the specific strains.}, } @article {pmid38037131, year = {2023}, author = {Andreace, F and Lechat, P and Dufresne, Y and Chikhi, R}, title = {Comparing methods for constructing and representing human pangenome graphs.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {274}, pmid = {38037131}, issn = {1474-760X}, support = {ANR-22-CE45-0007//ANR Full-RNA/ ; ANR-19-CE45-0008//SeqDigger/ ; PIA/ANR16-CONV-0005//Inception/ ; ANR-19-P3IA-0001//PRAIRIE/ ; 956229//H2020 Marie Skłodowska-Curie Actions/ ; 872539//H2020 Marie Skłodowska-Curie Actions/ ; }, abstract = {BACKGROUND: As a single reference genome cannot possibly represent all the variation present across human individuals, pangenome graphs have been introduced to incorporate population diversity within a wide range of genomic analyses. Several data structures have been proposed for representing collections of genomes as pangenomes, in particular graphs.

RESULTS: In this work, we collect all publicly available high-quality human haplotypes and construct the largest human pangenome graphs to date, incorporating 52 individuals in addition to two synthetic references (CHM13 and GRCh38). We build variation graphs and de Bruijn graphs of this collection using five of the state-of-the-art tools: Bifrost, mdbg, Minigraph, Minigraph-Cactus and pggb. We examine differences in the way each of these tools represents variations between input sequences, both in terms of overall graph structure and representation of specific genetic loci.

CONCLUSION: This work sheds light on key differences between pangenome graph representations, informing end-users on how to select the most appropriate graph type for their application.}, } @article {pmid38036791, year = {2023}, author = {Chen, J and Liu, Y and Liu, M and Guo, W and Wang, Y and He, Q and Chen, W and Liao, Y and Zhang, W and Gao, Y and Dong, K and Ren, R and Yang, T and Zhang, L and Qi, M and Li, Z and Zhao, M and Wang, H and Wang, J and Qiao, Z and Li, H and Jiang, Y and Liu, G and Song, X and Deng, Y and Li, H and Yan, F and Dong, Y and Li, Q and Li, T and Yang, W and Cui, J and Wang, H and Zhou, Y and Zhang, X and Jia, G and Lu, P and Zhi, H and Tang, S and Diao, X}, title = {Pangenome analysis reveals genomic variations associated with domestication traits in broomcorn millet.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {38036791}, issn = {1546-1718}, abstract = {Broomcorn millet (Panicum miliaceum L.) is an orphan crop with the potential to improve cereal production and quality, and ensure food security. Here we present the genetic variations, population structure and diversity of a diverse worldwide collection of 516 broomcorn millet genomes. Population analysis indicated that the domesticated broomcorn millet originated from its wild progenitor in China. We then constructed a graph-based pangenome of broomcorn millet based on long-read de novo genome assemblies of 32 representative accessions. Our analysis revealed that the structural variations were highly associated with transposable elements, which influenced gene expression when located in the coding or regulatory regions. We also identified 139 loci associated with 31 key domestication and agronomic traits, including candidate genes and superior haplotypes, such as LG1, for panicle architecture. Thus, the study's findings provide foundational resources for developing genomics-assisted breeding programs in broomcorn millet.}, } @article {pmid38035008, year = {2023}, author = {Muhammad, SA and Guo, J and Noor, K and Mustafa, A and Amjad, A and Bai, B}, title = {Pangenomic and immunoinformatics based analysis of Nipah virus revealed CD4[+] and CD8[+] T-Cell epitopes as potential vaccine candidates.}, journal = {Frontiers in pharmacology}, volume = {14}, number = {}, pages = {1290436}, doi = {10.3389/fphar.2023.1290436}, pmid = {38035008}, issn = {1663-9812}, abstract = {Introduction: Nipah (NiV) is the zoonotic deadly bat-borne virus that causes neurological and respiratory infections which ultimately lead to death. There are 706 infected cases reported up till now especially in Asia, out of which 409 patients died. There is no vaccine and effective treatment available for NiV infections and we have to timely design such strategies as world could not bear another pandemic situation. Methods: In this study, we screened viral proteins of NiV strains based on pangenomics analysis, antigenicity, molecular weight, and sub-cellular localization. The immunoproteomics based approach was used to predict T-cell epitopes of MHC class-I and II as potential vaccine candidates. These epitopes are capable to activate CD4[+], CD8[+], and T-cell dependent B-lymphocytes. Results: The two surface proteins including fusion glycoprotein (F) and attachment glycoprotein (G) are antigenic with molecular weights of 60 kDa and 67 kDa respectively. Three epitopes of F protein (VNYNSEGIA, PNFILVRNT, and IKMIPNVSN) were ranked and selected based on the binding affinity with MHC class-I, and 3 epitopes (VILNKRYYS, ILVRNTLIS, and VKLQETAEK) with MHC-II molecules. Similarly, for G protein, 3 epitopes each for MHC-I (GKYDKVMPY, ILKPKLISY, and KNKIWCISL) and MHC-II (LRNIEKGKY, FLIDRINWI, and FLLKNKIWC) with substantial binding energies were predicted. Based on the physicochemical properties, all these epitopes are non-toxic, hydrophilic, and stable. Conclusion: Our vaccinomics and system-level investigation could help to trigger the host immune system to prevent NiV infection.}, } @article {pmid38033569, year = {2023}, author = {Feng, L and Zhang, M and Fan, Z}, title = {Population genomic analysis of clinical ST15 Klebsiella pneumoniae strains in China.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1272173}, doi = {10.3389/fmicb.2023.1272173}, pmid = {38033569}, issn = {1664-302X}, abstract = {ST15 Klebsiella pneumoniae (Kpn) is a growing public health concern in China and worldwide, yet its genomic and evolutionary dynamics in this region remain poorly understood. This study comprehensively elucidates the population genomics of ST15 Kpn in China by analyzing 287 publicly available genomes. The proportion of the genomes increased sharply from 2012 to 2021, and 92.3% of them were collected from the Yangtze River Delta (YRD) region of eastern China. Carbapenemase genes, including OXA-232, KPC-2, and NDM, were detected in 91.6% of the studied genomes, and 69.2% of which were multidrug resistant (MDR) and hypervirulent (hv). Phylogenetic analysis revealed four clades, C1 (KL112, 59.2%), C2 (mainly KL19, 30.7%), C3 (KL48, 0.7%) and C4 (KL24, 9.4%). C1 appeared in 2007 and was OXA-232-producing and hv; C2 and C4 appeared between 2005 and 2007, and both were KPC-2-producing but with different levels of virulence. Transmission clustering detected 86.1% (n = 247) of the enrolled strains were grouped into 55 clusters (2-159 strains) and C1 was more transmissible than others. Plasmid profiling revealed 88 plasmid clusters (PCs) that were highly heterogeneous both between and within clades. 60.2% (n = 53) of the PCs carrying AMR genes and 7 of which also harbored VFs. KPC-2, NDM and OXA-232 were distributed across 14, 4 and 1 PCs, respectively. The MDR-hv strains all carried one of two homologous PCs encoding iucABCD and rmpA2 genes. Pangenome analysis revealed two major coinciding accessory components predominantly located on plasmids. One component, associated with KPC-2, encompassed 15 additional AMR genes, while the other, linked to OXA-232, involved seven more AMR genes. This study provides essential insights into the genomic evolution of the high-risk ST15 CP-Kpn strains in China and warrants rigorous monitoring.}, } @article {pmid38029170, year = {2023}, author = {Wu, F and Zhang, T and Wu, Q and Li, X and Zhang, M and Luo, X and Zhang, Y and Lu, R}, title = {Complete genome sequence and comparative analysis of a Vibrio vulnificus strain isolated from a clinical patient.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1240835}, doi = {10.3389/fmicb.2023.1240835}, pmid = {38029170}, issn = {1664-302X}, abstract = {Vibrio vulnificus is an opportunistic, global pathogen that naturally inhabits sea water and is responsible for most vibriosis-related deaths. We investigated the genetic characteristics of V. vulnificus isolated from the clinical blood culture specimen of a patient with hepatitis B virus cirrhosis in 2018 (named as V. vulnificus VV2018) by whole genome sequencing (WGS). VV2018 belonged to a novel sequencing type 620 (ST620) and comprised two circular chromosomes, containing 4,389 potential coding sequences (CDSs) and 152 RNA genes. The phylogenetic tree of single nucleotide polymorphisms (SNPs) using 26 representative genomes revealed that VV2108 grouped with two other V. vulnificus strains isolated from humans. The pan-genome of V. vulnificus was constructed using 26 representative genomes to elucidate their genetic diversity, evolutionary characteristics, and virulence and antibiotic resistance profiles. The pan-genome analysis revealed that VV2018 shared a total of 3,016 core genes (≥99% presence), including 115 core virulence factors (VFs) and 5 core antibiotic resistance-related genes, and 309 soft core genes (≥95 and <99% presence) with 25 other V. vulnificus strains. The varG gene might account for the cefazolin resistance, and comparative analysis of the genetic context of varG revealed that two genes upstream and downstream of varG were conserved. The glycosylation (pgl) like genes were found in VV2018 compared with Pgl-related proteins in Neisseria that might affect the adherence of the strain in hosts. The comparative analysis of VV2018 would contribute to a better understanding of the virulence and antibiotic resistance profiles of V. vulnificus. Meanwhile much work remains to be done to better understand the function of pgl-like genes in V. vulnificus.}, } @article {pmid38029151, year = {2023}, author = {Cai, X and Peng, Y and Yang, G and Feng, L and Tian, X and Huang, P and Mao, Y and Xu, L}, title = {Populational genomic insights of Paraclostridium bifermentans as an emerging human pathogen.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1293206}, doi = {10.3389/fmicb.2023.1293206}, pmid = {38029151}, issn = {1664-302X}, abstract = {Paraclostridium bifermentans (P.b) is an emerging human pathogen that is phylogenomically close to Paeniclostridium sordellii (P.s), while their populational genomic features and virulence capacity remain understudied. Here, we performed comparative genomic analyses of P.b and compared their pan-genomic features and virulence coding profiles to those of P.s. Our results revealed that P.b has a more plastic pangenome, a larger genome size, and a higher GC content than P.s. Interestingly, the P.b and P.s share similar core-genomic functions, but P.b encodes more functions in nutrient metabolism and energy conversion and fewer functions in host defense in their accessory-genomes. The P.b may initiate extracellular infection processes similar to those of P.s and Clostridium perfringens by encoding three toxin homologs (i.e., microbial collagenase, thiol-activated cytolysin, phospholipase C, which are involved in extracellular matrices degradation and membrane damaging) in their core-genomes. However, P.b is less toxic than the P.s by encoding fewer secretion toxins in the core-genome and fewer lethal toxins in the accessory-genome. Notably, P.b carries more toxins genes in their accessory-genomes, particularly those of plasmid origin. Moreover, three within-species and highly conserved plasmid groups, encoding virulence, gene acquisition, and adaptation, were carried by 25-33% of P.b strains and clustered by isolation source rather than geography. This study characterized the pan-genomic virulence features of P.b for the first time, and revealed that P. bifermentans is an emerging pathogen that can threaten human health in many aspects, emphasizing the importance of phenotypic and genomic characterizations of in situ clinical isolates.}, } @article {pmid38029109, year = {2023}, author = {Crosby, KC and Rojas, M and Sharma, P and Johnson, MA and Mazloom, R and Kvitko, BH and Smits, THM and Venter, SN and Coutinho, TA and Heath, LS and Palmer, M and Vinatzer, BA}, title = {Genomic delineation and description of species and within-species lineages in the genus Pantoea.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1254999}, doi = {10.3389/fmicb.2023.1254999}, pmid = {38029109}, issn = {1664-302X}, abstract = {As the name of the genus Pantoea ("of all sorts and sources") suggests, this genus includes bacteria with a wide range of provenances, including plants, animals, soils, components of the water cycle, and humans. Some members of the genus are pathogenic to plants, and some are suspected to be opportunistic human pathogens; while others are used as microbial pesticides or show promise in biotechnological applications. During its taxonomic history, the genus and its species have seen many revisions. However, evolutionary and comparative genomics studies have started to provide a solid foundation for a more stable taxonomy. To move further toward this goal, we have built a 2,509-gene core genome tree of 437 public genome sequences representing the currently known diversity of the genus Pantoea. Clades were evaluated for being evolutionarily and ecologically significant by determining bootstrap support, gene content differences, and recent recombination events. These results were then integrated with genome metadata, published literature, descriptions of named species with standing in nomenclature, and circumscriptions of yet-unnamed species clusters, 15 of which we assigned names under the nascent SeqCode. Finally, genome-based circumscriptions and descriptions of each species and each significant genetic lineage within species were uploaded to the LINbase Web server so that newly sequenced genomes of isolates belonging to any of these groups could be precisely and accurately identified.}, } @article {pmid38029097, year = {2023}, author = {Shikov, AE and Merkushova, AV and Savina, IA and Nizhnikov, AA and Antonets, KS}, title = {The man, the plant, and the insect: shooting host specificity determinants in Serratia marcescens pangenome.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1211999}, doi = {10.3389/fmicb.2023.1211999}, pmid = {38029097}, issn = {1664-302X}, abstract = {INTRODUCTION: Serratia marcescens is most commonly known as an opportunistic pathogen causing nosocomial infections. It, however, was shown to infect a wide range of hosts apart from vertebrates such as insects or plants as well, being either pathogenic or growth-promoting for the latter. Despite being extensively studied in terms of virulence mechanisms during human infections, there has been little evidence of which factors determine S. marcescens host specificity. On that account, we analyzed S. marcescens pangenome to reveal possible specificity factors.

METHODS: We selected 73 high-quality genome assemblies of complete level and reconstructed the respective pangenome and reference phylogeny based on core genes alignment. To find an optimal pipeline, we tested current pangenomic tools and obtained several phylogenetic inferences. The pangenome was rich in its accessory component and was considered open according to the Heaps' law. We then applied the pangenome-wide associating method (pan-GWAS) and predicted positively associated gene clusters attributed to three host groups, namely, humans, insects, and plants.

RESULTS: According to the results, significant factors relating to human infections included transcriptional regulators, lipoproteins, ABC transporters, and membrane proteins. Host preference toward insects, in its turn, was associated with diverse enzymes, such as hydrolases, isochorismatase, and N-acetyltransferase with the latter possibly exerting a neurotoxic effect. Finally, plant infection may be conducted through type VI secretion systems and modulation of plant cell wall synthesis. Interestingly, factors associated with plants also included putative growth-promoting proteins like enzymes performing xenobiotic degradation and releasing ammonium irons. We also identified overrepresented functional annotations within the sets of specificity factors and found that their functional characteristics fell into separate clusters, thus, implying that host adaptation is represented by diverse functional pathways. Finally, we found that mobile genetic elements bore specificity determinants. In particular, prophages were mainly associated with factors related to humans, while genetic islands-with insects and plants, respectively.

DISCUSSION: In summary, functional enrichments coupled with pangenomic inferences allowed us to hypothesize that the respective host preference is carried out through distinct molecular mechanisms of virulence. To the best of our knowledge, the presented research is the first to identify specific genomic features of S. marcescens assemblies isolated from different hosts at the pangenomic level.}, } @article {pmid38028596, year = {2023}, author = {Kabata, F and Thaldar, D}, title = {The human genome as the common heritage of humanity.}, journal = {Frontiers in genetics}, volume = {14}, number = {}, pages = {1282515}, doi = {10.3389/fgene.2023.1282515}, pmid = {38028596}, issn = {1664-8021}, abstract = {While debate on the international regulation of human genomic research remains unsettled, the Universal Declaration on the Human Genome and Human Rights, 1997 qualifies the human genome as "heritage of humankind" in a symbolic sense. Using document analysis this article assesses whether, how and to what extent the common heritage framework is relevant in regulation of human genomic research. The article traces the history of the Human Genome Project to reveal the international community's race against privatization of the human genome and its resulting qualification as the common heritage of humanity. Further, it reviews the archival records of UNESCO's International Bioethics Committee to discover the rationale for qualifying the human genome as common heritage of humankind. The article finds that the common heritage of mankind framework remains relevant to the application of the human genome at the collective level. However, the framework is at odds with the individual dimension of the human genome based on individual personality rights. The article thus argues that the right to benefit from scientific progress and its applications offers an alternative international regulatory framework for human genomic research.}, } @article {pmid38026211, year = {2023}, author = {Ghaly, TM and Rajabal, V and Penesyan, A and Coleman, NV and Paulsen, IT and Gillings, MR and Tetu, SG}, title = {Functional enrichment of integrons: Facilitators of antimicrobial resistance and niche adaptation.}, journal = {iScience}, volume = {26}, number = {11}, pages = {108301}, doi = {10.1016/j.isci.2023.108301}, pmid = {38026211}, issn = {2589-0042}, abstract = {Integrons are genetic elements, found among diverse bacteria and archaea, that capture and rearrange gene cassettes to rapidly generate genetic diversity and drive adaptation. Despite their broad taxonomic and geographic prevalence, and their role in microbial adaptation, the functions of gene cassettes remain poorly characterized. Here, using a combination of bioinformatic and experimental analyses, we examined the functional diversity of gene cassettes from different environments. We find that cassettes encode diverse antimicrobial resistance (AMR) determinants, including those conferring resistance to antibiotics currently in the developmental pipeline. Further, we find a subset of cassette functions is universally enriched relative to their broader metagenomes. These are largely involved in (a)biotic interactions, including AMR, phage defense, virulence, biodegradation, and stress tolerance. The remainder of functions are sample-specific, suggesting that they confer localised functions relevant to their microenvironment. Together, they comprise functional profiles different from bulk metagenomes, representing niche-adaptive components of the prokaryotic pangenome.}, } @article {pmid38023484, year = {2023}, author = {Yocca, AE and Platts, A and Alger, E and Teresi, S and Mengist, MF and Benevenuto, J and Ferrão, LFV and Jacobs, M and Babinski, M and Magallanes-Lundback, M and Bayer, P and Golicz, A and Humann, JL and Main, D and Espley, RV and Chagné, D and Albert, NW and Montanari, S and Vorsa, N and Polashock, J and Díaz-Garcia, L and Zalapa, J and Bassil, NV and Munoz, PR and Iorizzo, M and Edger, PP}, title = {Blueberry and cranberry pangenomes as a resource for future genetic studies and breeding efforts.}, journal = {Horticulture research}, volume = {10}, number = {11}, pages = {uhad202}, doi = {10.1093/hr/uhad202}, pmid = {38023484}, issn = {2662-6810}, abstract = {Domestication of cranberry and blueberry began in the United States in the early 1800s and 1900s, respectively, and in part owing to their flavors and health-promoting benefits are now cultivated and consumed worldwide. The industry continues to face a wide variety of production challenges (e.g. disease pressures), as well as a demand for higher-yielding cultivars with improved fruit quality characteristics. Unfortunately, molecular tools to help guide breeding efforts for these species have been relatively limited compared with those for other high-value crops. Here, we describe the construction and analysis of the first pangenome for both blueberry and cranberry. Our analysis of these pangenomes revealed both crops exhibit great genetic diversity, including the presence-absence variation of 48.4% genes in highbush blueberry and 47.0% genes in cranberry. Auxiliary genes, those not shared by all cultivars, are significantly enriched with molecular functions associated with disease resistance and the biosynthesis of specialized metabolites, including compounds previously associated with improving fruit quality traits. The discovery of thousands of genes, not present in the previous reference genomes for blueberry and cranberry, will serve as the basis of future research and as potential targets for future breeding efforts. The pangenome, as a multiple-sequence alignment, as well as individual annotated genomes, are publicly available for analysis on the Genome Database for Vaccinium-a curated and integrated web-based relational database. Lastly, the core-gene predictions from the pangenomes will serve useful to develop a community genotyping platform to guide future molecular breeding efforts across the family.}, } @article {pmid38017392, year = {2023}, author = {Jensen, MG and Svraka, L and Baez, E and Lund, M and Poehlein, A and Brüggemann, H}, title = {Species- and strain-level diversity of Corynebacteria isolated from human facial skin.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {366}, pmid = {38017392}, issn = {1471-2180}, support = {LF-OC-21-000826//LEO Fondet/ ; }, abstract = {BACKGROUND: Sequencing of the human skin microbiome revealed that Corynebacterium is an ubiquitous and abundant bacterial genus on human skin. Shotgun sequencing further highlighted the microbial "dark matter" of the skin microbiome, consisting of microorganisms, including corynebacterial species that were not cultivated and genome-sequenced so far. In this pilot project, facial human skin swabs of 13 persons were cultivated to selectively obtain corynebacteria. 54 isolates were collected and 15 of these were genome-sequenced and the pan-genome was determined. The strains were biochemically characterized and antibiotic susceptibility testing (AST) was performed.

RESULTS: Among the 15 sequenced strains, nine different corynebacterial species were found, including two so far undescribed species, tentatively named "Corynebacterium vikingii" and "Corynebacterium borealis", for which closed genome sequences were obtained. Strain variability beyond the species level was determined in biochemical tests, such as the variable presence of urease activity and the capacity to ferment different sugars. The ability to grow under anaerobic conditions on solid agar was found to be species-specific. AST revealed resistances to clindamycin in seven strains. A Corynebacterium pseudokroppenstedtii strain showed additional resistance towards beta-lactam and fluoroquinolone antibiotics; a chromosomally located 17 kb gene cluster with five antibiotic resistance genes was found in the closed genome of this strain.

CONCLUSIONS: Taken together, this pilot study identified an astonishing diversity of cutaneous corynebacterial species in a relatively small cohort and determined species- and strain-specific individualities regarding biochemical and resistance profiles. This further emphasizes the need for cultivation-based studies to be able to study these microorganisms in more detail, in particular regarding their host-interacting and, potentially, -beneficial and/or -detrimental properties.}, } @article {pmid38015202, year = {2023}, author = {Williams, AN and Ma, A and Croxen, MA and Demczuk, WHB and Martin, I and Tyrrell, GJ}, title = {Genomic analysis of Streptococcus pneumoniae serogroup 20 isolates in Alberta, Canada from 1993-2019.}, journal = {Microbial genomics}, volume = {9}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001141}, pmid = {38015202}, issn = {2057-5858}, abstract = {In the province of Alberta, Canada, invasive disease caused by Streptococcus pneumoniae serogroup 20 (serotypes 20A/20B) has been increasing in incidence. Here, we characterize provincial invasive serogroup 20 isolates collected from 1993 to 2019 alongside invasive and non-invasive serogroup 20 isolates from the Global Pneumococcal Sequencing (GPS) Project collected from 1998 to 2015. Trends in clinical metadata and geographic location were evaluated, and serogroup 20 isolate genomes were subjected to molecular sequence typing, virulence and antimicrobial resistance factor mining, phylogenetic analysis and pangenome calculation. Two hundred and seventy-four serogroup 20 isolates from Alberta were sequenced, and analysed along with 95 GPS Project genomes. The majority of invasive Alberta serogroup 20 isolates were identified after 2007 in primarily middle-aged adults and typed predominantly as ST235, a sequence type that was rare among GPS Project isolates. Most Alberta isolates carried a full-length whaF capsular gene, suggestive of serotype 20B. All Alberta and GPS Project genomes carried molecular resistance determinants implicated in fluoroquinolone and macrolide resistance, with a few Alberta isolates exhibiting phenotypic resistance to azithromycin, clindamycin, erythromycin, tetracycline and trimethoprim-sulfamethoxazole, as well as non-susceptibility to tigecycline. All isolates carried multiple virulence factors including those involved in adherence, immune modulation and nutrient uptake, as well as exotoxins and exoenzymes. Phylogenetically, Alberta serogroup 20 isolates clustered with predominantly invasive GPS Project isolates from the USA, Israel, Brazil and Nepal. Overall, this study highlights the increasing incidence of invasive S. pneumoniae serogroup 20 disease in Alberta, Canada, and provides insights into the genetic and clinical characteristics of these isolates within a global context.}, } @article {pmid38014076, year = {2023}, author = {Ramsbottom, KA and Prakash, A and Riverol, YP and Camacho, OM and Sun, Z and Kundu, DJ and Bowler-Barnett, E and Martin, M and Fan, J and Chebotarov, D and McNally, KL and Deutsch, EW and Vizcaíno, JA and Jones, AR}, title = {A meta-analysis of rice phosphoproteomics data to understand variation in cell signalling across the rice pan-genome.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.11.17.567512}, pmid = {38014076}, abstract = {Phosphorylation is the most studied post-translational modification, and has multiple biological functions. In this study, we have re-analysed publicly available mass spectrometry proteomics datasets enriched for phosphopeptides from Asian rice (Oryza sativa). In total we identified 15,522 phosphosites on serine, threonine and tyrosine residues on rice proteins. We identified sequence motifs for phosphosites, and link motifs to enrichment of different biological processes, indicating different downstream regulation likely caused by different kinase groups. We cross-referenced phosphosites against the rice 3,000 genomes, to identify single amino acid variations (SAAVs) within or proximal to phosphosites that could cause loss of a site in a given rice variety. The data was clustered to identify groups of sites with similar patterns across rice family groups, for example those highly conserved in Japonica, but mostly absent in Aus type rice varieties - known to have different responses to drought. These resources can assist rice researchers to discover alleles with significantly different functional effects across rice varieties. The data has been loaded into UniProt Knowledge-Base - enabling researchers to visualise sites alongside other data on rice proteins e.g. structural models from AlphaFold2, PeptideAtlas and the PRIDE database - enabling visualisation of source evidence, including scores and supporting mass spectra.}, } @article {pmid38012560, year = {2023}, author = {Liu, H and Zhao, W and Hua, W and Liu, J}, title = {Correction: A large-scale population based organelle pan-genomes construction and phylogeny analysis reveal the genetic diversity and the evolutionary origins of chloroplast and mitochondrion in Brassica napus L.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {716}, pmid = {38012560}, issn = {1471-2164}, } @article {pmid38012347, year = {2023}, author = {Edwards, D and Batley, J}, title = {Teatime for pangenomics.}, journal = {Nature plants}, volume = {}, number = {}, pages = {}, pmid = {38012347}, issn = {2055-0278}, } @article {pmid38012346, year = {2023}, author = {Chen, S and Wang, P and Kong, W and Chai, K and Zhang, S and Yu, J and Wang, Y and Jiang, M and Lei, W and Chen, X and Wang, W and Gao, Y and Qu, S and Wang, F and Wang, Y and Zhang, Q and Gu, M and Fang, K and Ma, C and Sun, W and Ye, N and Wu, H and Zhang, X}, title = {Gene mining and genomics-assisted breeding empowered by the pangenome of tea plant Camellia sinensis.}, journal = {Nature plants}, volume = {}, number = {}, pages = {}, pmid = {38012346}, issn = {2055-0278}, support = {No. 32222019//National Natural Science Foundation of China (National Science Foundation of China)/ ; }, abstract = {Tea is one of the world's oldest crops and is cultivated to produce beverages with various flavours. Despite advances in sequencing technologies, the genetic mechanisms underlying key agronomic traits of tea remain unclear. In this study, we present a high-quality pangenome of 22 elite cultivars, representing broad genetic diversity in the species. Our analysis reveals that a recent long terminal repeat burst contributed nearly 20% of gene copies, introducing functional genetic variants that affect phenotypes such as leaf colour. Our graphical pangenome improves the efficiency of genome-wide association studies and allows the identification of key genes controlling bud flush timing. We also identified strong correlations between allelic variants and flavour-related chemistries. These findings deepen our understanding of the genetic basis of tea quality and provide valuable genomic resources to facilitate its genomics-assisted breeding.}, } @article {pmid37961504, year = {2023}, author = {Hong, A and Oliva, M and Köppl, D and Bannai, H and Boucher, C and Gagie, T}, title = {PFP-FM: An Accelerated FM-index.}, journal = {Research square}, volume = {}, number = {}, pages = {}, pmid = {37961504}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; }, abstract = {FM-indexes are a crucial data structure in DNA alignment, but searching with them usually takes at least one random access per character in the query pattern. Ferragina and Fischer [1] observed in 2007 that word-based indexes often use fewer random accesses than character-based indexes, and thus support faster searches. Since DNA lacks natural word-boundaries, however, it is necessary to parse it somehow before applying word-based FM-indexing. Last year, Deng et al. [2] proposed parsing genomic data by induced suffix sorting, and showed the resulting word-based FM-indexes support faster counting queries than standard FM-indexes when patterns are a few thousand characters or longer. In this paper we show that using prefix-free parsing-which takes parameters that let us tune the average length of the phrases-instead of induced suffix sorting, gives a significant speedup for patterns of only a few hundred characters. We implement our method and demonstrate it is between 3 and 18 times faster than competing methods on queries to GRCh38, and is consistently faster on queries made to 25,000, 50,000 and 100,000 SARS-CoV-2 genomes. Hence, it seems our method accelerates the performance of count over all state-of-the-art methods with a minor increase in the memory. The source code for PFP-FM is available at https://github.com/marco-oliva/afm.}, } @article {pmid38008766, year = {2023}, author = {Mackenzie, A and Norman, M and Gessese, M and Chen, C and Sørensen, C and Hovmøller, M and Ma, L and Forrest, K and Hickey, L and Bariana, H and Bansal, U and Periyannan, S}, title = {Wheat stripe rust resistance locus YR63 is a hot spot for evolution of defence genes - a pangenome discovery.}, journal = {BMC plant biology}, volume = {23}, number = {1}, pages = {590}, pmid = {38008766}, issn = {1471-2229}, abstract = {BACKGROUND: Stripe rust, caused by Puccinia striiformis f. sp. tritici (Pst), poses a threat to global wheat production. Deployment of widely effective resistance genes underpins management of this ongoing threat. This study focused on the mapping of stripe rust resistance gene YR63 from a Portuguese hexaploid wheat landrace AUS27955 of the Watkins Collection.

RESULTS: YR63 exhibits resistance to a broad spectrum of Pst races from Australia, Africa, Asia, Europe, Middle East and South America. It was mapped to the short arm of chromosome 7B, between two single nucleotide polymorphic (SNP) markers sunCS_YR63 and sunCS_67, positioned at 0.8 and 3.7 Mb, respectively, in the Chinese Spring genome assembly v2.1. We characterised YR63 locus using an integrated approach engaging targeted genotyping-by-sequencing (tGBS), mutagenesis, resistance gene enrichment and sequencing (MutRenSeq), RNA sequencing (RNASeq) and comparative genomic analysis with tetraploid (Zavitan and Svevo) and hexaploid (Chinese Spring) wheat genome references and 10+ hexaploid wheat genomes. YR63 is positioned at a hot spot enriched with multiple nucleotide-binding and leucine rich repeat (NLR) and kinase domain encoding genes, known widely for defence against pests and diseases in plants and animals. Detection of YR63 within these gene clusters is not possible through short-read sequencing due to high homology between members. However, using the sequence of a NLR member we were successful in detecting a closely linked SNP marker for YR63 and validated on a panel of Australian bread wheat, durum and triticale cultivars.

CONCLUSIONS: This study highlights YR63 as a valuable source for resistance against Pst in Australia and elsewhere. The closely linked SNP marker will facilitate rapid introgression of YR63 into elite cultivars through marker-assisted selection. The bottleneck of this study reinforces the necessity for a long-read sequencing such as PacBio or Oxford Nanopore based techniques for accurate detection of the underlying resistance gene when it is part of a large gene cluster.}, } @article {pmid38004814, year = {2023}, author = {Carter, MQ and Quiñones, B and He, X and Pham, A and Carychao, D and Cooley, MB and Lo, CC and Chain, PSG and Lindsey, RL and Bono, JL}, title = {Genomic and Phenotypic Characterization of Shiga Toxin-Producing Escherichia albertii Strains Isolated from Wild Birds in a Major Agricultural Region in California.}, journal = {Microorganisms}, volume = {11}, number = {11}, pages = {}, doi = {10.3390/microorganisms11112803}, pmid = {38004814}, issn = {2076-2607}, support = {USDA-ARS CRIS projects 2030-42000-049-00D, 2030-42000-052-000D, and 2030-42000-055-000D//United States Department of Agriculture/ ; }, abstract = {Escherichia albertii is an emerging foodborne pathogen. To better understand the pathogenesis and health risk of this pathogen, comparative genomics and phenotypic characterization were applied to assess the pathogenicity potential of E. albertii strains isolated from wild birds in a major agricultural region in California. Shiga toxin genes stx2f were present in all avian strains. Pangenome analyses of 20 complete genomes revealed a total of 11,249 genes, of which nearly 80% were accessory genes. Both core gene-based phylogenetic and accessory gene-based relatedness analyses consistently grouped the three stx2f-positive clinical strains with the five avian strains carrying ST7971. Among the three Stx2f-converting prophage integration sites identified, ssrA was the most common one. Besides the locus of enterocyte effacement and type three secretion system, the high pathogenicity island, OI-122, and type six secretion systems were identified. Substantial strain variation in virulence gene repertoire, Shiga toxin production, and cytotoxicity were revealed. Six avian strains exhibited significantly higher cytotoxicity than that of stx2f-positive E. coli, and three of them exhibited a comparable level of cytotoxicity with that of enterohemorrhagic E. coli outbreak strains, suggesting that wild birds could serve as a reservoir of E. albertii strains with great potential to cause severe diseases in humans.}, } @article {pmid38004763, year = {2023}, author = {Xue, M and Gao, Q and Yan, R and Liu, L and Wang, L and Wen, B and Wen, C}, title = {Comparative Genomic Analysis of Shrimp-Pathogenic Vibrio parahaemolyticus LC and Intraspecific Strains with Emphasis on Virulent Factors of Mobile Genetic Elements.}, journal = {Microorganisms}, volume = {11}, number = {11}, pages = {}, doi = {10.3390/microorganisms11112752}, pmid = {38004763}, issn = {2076-2607}, support = {32072995//National Natural Science Foundation of China/ ; K22218//Modern Seed Industry Park for Whiteleg Shrimp of Guangdong Province/ ; 004//Lianjiang Shrimp Aquaculture Group Co., Ltd., Guangdong, China/ ; }, abstract = {Vibrio parahaemolyticus exhibits severe pathogenicity in humans and animals worldwide. In this study, genome sequencing and comparative analyses were conducted for in-depth characterization of the virulence factor (VF) repertoire of V. parahaemolyticus strain LC, which presented significant virulence to shrimp Litopenaeus vannamei. Strain LC, harboring two circular chromosomes and three linear plasmids, demonstrated ≥98.14% average nucleotide identities with 31 publicly available V. parahaemolyticus genomes, including 13, 11, and 7 shrimp-, human-, and non-pathogenic strains, respectively. Phylogeny analysis based on dispensable genes of pan-genome clustered 11 out of 14 shrimp-pathogenic strains and 7 out of 11 clinical strains into two distinct clades, indicating the close association between host-specific pathogenicity and accessory genes. The VFDB database revealed that 150 VFs of LC were mainly associated with the secretion system, adherence, antiphagocytosis, chemotaxis, motility, and iron uptake, whereas no homologs of the typical pathogenic genes pirA, pirB, tdh, and trh were detected. Four genes, mshB, wbfT, wbfU, and wbtI, were identified in both types of pathogenic strains but were absent in non-pathogens. Notably, a unique cluster similar to Yen-Tc, which encodes an insecticidal toxin complex, and diverse toxin-antitoxin (TA) systems, were identified on the mobile genetic elements (MGEs) of LC. Conclusively, in addition to the common VFs, various unique MGE-borne VFs, including the Yen-Tc cluster, TA components, and multiple chromosome-encoded chitinase genes, may contribute to the full spectrum of LC virulence. Moreover, V. parahaemolyticus demonstrates host-specific virulence, which potentially drives the origin and spread of pathogenic factors.}, } @article {pmid38004738, year = {2023}, author = {Wang, C and Mao, L and Bao, G and Zhu, H}, title = {Pan-Genome Analyses of the Genus Cohnella and Proposal of the Novel Species Cohnella silvisoli sp. nov., Isolated from Forest Soil.}, journal = {Microorganisms}, volume = {11}, number = {11}, pages = {}, doi = {10.3390/microorganisms11112726}, pmid = {38004738}, issn = {2076-2607}, support = {32001115//the Natural Science Foundation of China/ ; 2022JB087//the Initial Funding for Doctoral Research of Huizhou University/ ; 2022A1515111059//the grant from the Basic and Applied Basic Research Foundation of Guangdong Province/ ; 2023A04J1432//the grant from the Guangzhou Science and Technology Plan Project/ ; }, abstract = {Two strains, designated NL03-T5[T] and NL03-T5-1, were isolated from a soil sample collected from the Nanling National Forests, Guangdong Province, PR China. The two strains were Gram-stain-positive, aerobic, rod-shaped and had lophotrichous flagellation. Strain NL03-T5[T] could secrete extracellular mucus whereas NL03-T5-1 could not. Phylogenetic analysis based on 16S rRNA gene sequences revealed that the two strains belong to the genus Cohnella, were most closely related to Cohnella lupini LMG 27416[T] (95.9% and 96.1% similarities), and both showed 94.0% similarity with Cohnella arctica NRRL B-59459[T], respectively. The two strains showed 99.8% 16S rRNA gene sequence similarity between them. The draft genome size of strain NL03-T5[T] was 7.44 Mbp with a DNA G+C content of 49.2 mol%. The average nucleotide identities (ANI) and the digital DNA-DNA hybridization (dDDH) values between NL03-T5[T] and NL03-T5-1 were 99.98% and 100%, indicating the two strains were of the same species. Additionally, the ANI and dDDH values between NL03-T5[T] and C. lupini LMG 27416[T] were 76.1% and 20.4%, respectively. The major cellular fatty acids of strain NL03-T5[T] included anteiso-C15:0 and iso-C16:0. The major polar lipids and predominant respiratory quinone were diphosphatidylglycerol (DPG) and menaquinone-7 (MK-7). Based on phylogenetic analysis, phenotypic and chemotaxonomic characterization, genomic DNA G+C content, and ANI and dDDH values, strains NL03-T5[T] and NL03-T5-1 represent novel species in the genus Cohnella, for which the name Cohnella silvisoli is proposed. The type strain is NL03-T5[T] (=GDMCC 1.2294[T] = JCM 34999[T]). Furthermore, comparative genomics revealed that the genus Cohnella had an open pan-genome. The pan-genome of 29 Cohnella strains contained 41,356 gene families, and the number of strain-specific genes ranged from 6 to 1649. The results may explain the good adaptability of the Cohnella strains to different habitats at the genetic level.}, } @article {pmid38003271, year = {2023}, author = {Singh, G and Singh, N and Ellur, RK and Balamurugan, A and Prakash, G and Rathour, R and Mondal, KK and Bhowmick, PK and Gopala Krishnan, S and Nagarajan, M and Seth, R and Vinod, KK and Singh, V and Bollinedi, H and Singh, AK}, title = {Genetic Enhancement for Biotic Stress Resistance in Basmati Rice through Marker-Assisted Backcross Breeding.}, journal = {International journal of molecular sciences}, volume = {24}, number = {22}, pages = {}, doi = {10.3390/ijms242216081}, pmid = {38003271}, issn = {1422-0067}, support = {BT/PR13578/AG/106/991/2015 dated 05/01/2016//Department of Biotechnology/ ; }, abstract = {Pusa Basmati 1509 (PB1509) is one of the major foreign-exchange-earning varieties of Basmati rice; it is semi-dwarf and early maturing with exceptional cooking quality and strong aroma. However, it is highly susceptible to various biotic stresses including bacterial blight and blast. Therefore, bacterial blight resistance genes, namely, xa13 + Xa21 and Xa38, and fungal blast resistance genes Pi9 + Pib and Pita were incorporated into the genetic background of recurrent parent (RP) PB1509 using donor parents, namely, Pusa Basmati 1718 (PB1718), Pusa 1927 (P1927), Pusa 1929 (P1929) and Tetep, respectively. Foreground selection was carried out with respective gene-linked markers, stringent phenotypic selection for recurrent parent phenotype, early generation background selection with Simple sequence repeat (SSR) markers, and background analysis at advanced generations with Rice Pan Genome Array comprising 80K SNPs. This has led to the development of Near isogenic lines (NILs), namely, Pusa 3037, Pusa 3054, Pusa 3060 and Pusa 3066 carrying genes xa13 + Xa21, Xa38, Pi9 + Pib and Pita with genomic similarity of 98.25%, 98.92%, 97.38% and 97.69%, respectively, as compared to the RP. Based on GGE-biplot analysis, Pusa 3037-1-44-3-164-20-249-2 carrying xa13 + Xa21, Pusa 3054-2-47-7-166-24-261-3 carrying Xa38, Pusa 3060-3-55-17-157-4-124-1 carrying Pi9 + Pib, and Pusa 3066-4-56-20-159-8-174-1 carrying Pita were identified to be relatively stable and better-performing individuals in the tested environments. Intercrossing between the best BC3F1s has led to the generation of Pusa 3122 (xa13 + Xa21 + Xa38), Pusa 3124 (Xa38 + Pi9 + Pib) and Pusa 3123 (Pi9 + Pib + Pita) with agronomy, grain and cooking quality parameters at par with PB1509. Cultivation of such improved varieties will help farmers reduce the cost of cultivation with decreased pesticide use and improve productivity with ensured safety to consumers.}, } @article {pmid38003233, year = {2023}, author = {Zhegalova, IV and Vasiluev, PA and Flyamer, IM and Shtompel, AS and Glazyrina, E and Shilova, N and Minzhenkova, M and Markova, Z and Petrova, NV and Dashinimaev, EB and Razin, SV and Ulianov, SV}, title = {Trisomies Reorganize Human 3D Genome.}, journal = {International journal of molecular sciences}, volume = {24}, number = {22}, pages = {}, doi = {10.3390/ijms242216044}, pmid = {38003233}, issn = {1422-0067}, support = {075-15-2021-1062//Russian Ministry of Science and Higher Education/ ; }, abstract = {Trisomy is the presence of one extra copy of an entire chromosome or its part in a cell nucleus. In humans, autosomal trisomies are associated with severe developmental abnormalities leading to embryonic lethality, miscarriage or pronounced deviations of various organs and systems at birth. Trisomies are characterized by alterations in gene expression level, not exclusively on the trisomic chromosome, but throughout the genome. Here, we applied the high-throughput chromosome conformation capture technique (Hi-C) to study chromatin 3D structure in human chorion cells carrying either additional chromosome 13 (Patau syndrome) or chromosome 16 and in cultured fibroblasts with extra chromosome 18 (Edwards syndrome). The presence of extra chromosomes results in systematic changes of contact frequencies between small and large chromosomes. Analyzing the behavior of individual chromosomes, we found that a limited number of chromosomes change their contact patterns stochastically in trisomic cells and that it could be associated with lamina-associated domains (LAD) and gene content. For trisomy 13 and 18, but not for trisomy 16, the proportion of compacted loci on a chromosome is correlated with LAD content. We also found that regions of the genome that become more compact in trisomic cells are enriched in housekeeping genes, indicating a possible decrease in chromatin accessibility and transcription level of these genes. These results provide a framework for understanding the mechanisms of pan-genome transcription dysregulation in trisomies in the context of chromatin spatial organization.}, } @article {pmid38002453, year = {2023}, author = {Qian, M and Han, X and Liu, J and Xu, P and Tao, F}, title = {Genomic Insights on the Carbon-Negative Workhorse: Systematical Comparative Genomic Analysis on 56 Synechococcus Strains.}, journal = {Bioengineering (Basel, Switzerland)}, volume = {10}, number = {11}, pages = {}, doi = {10.3390/bioengineering10111329}, pmid = {38002453}, issn = {2306-5354}, support = {No. 2018YFA0903600//National Key Research and Development Program of China/ ; }, abstract = {Synechococcus, a type of ancient photosynthetic cyanobacteria, is crucial in modern carbon-negative synthetic biology due to its potential for producing bioenergy and high-value products. With its high biomass, fast growth rate, and established genetic manipulation tools, Synechococcus has become a research focus in recent years. Abundant germplasm resources have been accumulated from various habitats, including temperature and salinity conditions relevant to industrialization. In this study, a comprehensive analysis of complete genomes of the 56 Synechococcus strains currently available in public databases was performed, clarifying genetic relationships, the adaptability of Synechococcus to the environment, and its reflection at the genomic level. This was carried out via pan-genome analysis and a detailed comparison of the functional gene groups. The results revealed an open-genome pattern, with 275 core genes and variable genome sizes within these strains. The KEGG annotation and orthology composition comparisons unveiled that the cold and thermophile strains have 32 and 84 unique KO functional units in their shared core gene functional units, respectively. Each KO functional unit reflects unique gene families and pathways. In terms of salt tolerance and comparative genomics, there are 65 unique KO functional units in freshwater-adapted strains and 154 in strictly marine strains. By delving into these aspects, our understanding of the metabolic potential of Synechococcus was deepened, promoting the development and industrial application of cyanobacterial biotechnology.}, } @article {pmid38001096, year = {2023}, author = {Hyun, JC and Monk, JM and Szubin, R and Hefner, Y and Palsson, BO}, title = {Global pathogenomic analysis identifies known and candidate genetic antimicrobial resistance determinants in twelve species.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {7690}, pmid = {38001096}, issn = {2041-1723}, support = {U0AI124316//U.S. Department of Health & Human Services | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; U0AI124316//U.S. Department of Health & Human Services | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; T32GM8806//U.S. Department of Health & Human Services | National Institutes of Health (NIH)/ ; }, abstract = {Surveillance programs for managing antimicrobial resistance (AMR) have yielded thousands of genomes suited for data-driven mechanism discovery. We present a workflow integrating pangenomics, gene annotation, and machine learning to identify AMR genes at scale. When applied to 12 species, 27,155 genomes, and 69 drugs, we 1) find AMR gene transfer mostly confined within related species, with 925 genes in multiple species but just eight in multiple phylogenetic classes, 2) demonstrate that discovery-oriented support vector machines outperform contemporary methods at recovering known AMR genes, recovering 263 genes compared to 145 by Pyseer, and 3) identify 142 AMR gene candidates. Validation of two candidates in E. coli BW25113 reveals cases of conditional resistance: ΔcycA confers ciprofloxacin resistance in minimal media with D-serine, and frdD V111D confers ampicillin resistance in the presence of ampC by modifying the overlapping promoter. We expect this approach to be adaptable to other species and phenotypes.}, } @article {pmid38000216, year = {2023}, author = {Gmeiner, A and Njage, PMK and Hansen, LT and Aarestrup, FM and Leekitcharoenphon, P}, title = {Predicting Listeria monocytogenes virulence potential using whole genome sequencing and machine learning.}, journal = {International journal of food microbiology}, volume = {410}, number = {}, pages = {110491}, doi = {10.1016/j.ijfoodmicro.2023.110491}, pmid = {38000216}, issn = {1879-3460}, abstract = {Contamination with food-borne pathogens, such as Listeria monocytogenes, remains a big concern for food safety. Hence, rigorous and continuous microbial surveillance is a standard procedure. At this point, however, the food industry and authorities only focus on detection of Listeria monocytogenes without characterization of individual strains into groups of more or less concern. As whole genome sequencing (WGS) gains increasing interest in the industry, this methodology presents an opportunity to obtain finer resolution of microbial traits such as virulence. Within this study, we therefore aimed to explore the use of WGS in combination with Machine Learning (ML) to predict L. monocytogenes virulence potential on a sub-species level. The WGS datasets used in this study for ML model training consisted of i) national surveillance isolates (n = 169, covering 38 MLST types) and ii) publicly available isolates acquired through the GenomeTrakr network (n = 2880, spanning 80 MLST types). We used the clinical frequency, i.e., ratio of the number of clinical isolates to total amount of isolates, as estimate for virulence potential. The predictive performance of input features from three different genomic levels (i.e., virulence genes, pan-genome genes, and single nucleotide polymorphisms (SNPs)) and six machine learning algorithms (i.e., Support Vector Machine with a linear kernel, Support Vector Machine with a radial kernel, Random Forrest, Neural Networks, LogitBoost, and Majority Voting) were compared. Our machine learning models predicted sub-species virulence potential with nested cross-validation F1-scores up to 0.88 for the majority voting classifier trained on national surveillance data and using pan-genome genes as input features. The validation of the pre-trained ML models based on 101 previously in vivo studied isolates resulted in F1-scores up to 0.76. Furthermore, we found that the more rapid and less computationally intensive raw read alignment yields comparably accurate models as de novo assembly. The results of our study suggest that a majority voting classifier trained on pan-genome genes is the best and most robust choice for the prediction of clinical frequency. Our study contributes to more rapid and precise characterization of L. monocytogenes virulence and its variation on a sub-species level. We further demonstrated a possible application of WGS data in the context of microbial hazard characterization for food safety. In the future, predictive models may assist case-specific microbial risk management in the food industry. The python code, pre-trained models, and prediction pipeline are deposited at (https://github.com/agmei/LmonoVirulenceML).}, } @article {pmid38001525, year = {2023}, author = {Gao, G and Zhang, H and Ni, J and Zhao, X and Zhang, K and Wang, J and Kong, X and Wang, Q}, title = {Insights into genetic diversity and phenotypic variations in domestic geese through comprehensive population and pan-genome analysis.}, journal = {Journal of animal science and biotechnology}, volume = {14}, number = {1}, pages = {150}, pmid = {38001525}, issn = {1674-9782}, support = {cstc2022jxjl80007//Chongqing Scientific Research Institution Performance Incentive Project/ ; CARS-42-51//Earmarked Fund for China Agriculture Research System/ ; 22534C-22//Key R&D Project in Agriculture and Animal Husbandry of Rongchang/ ; CSTB2022NSCQ-MSX0434//Natural Science Foundation of Chongqing Project/ ; 2022NSFSC0605//Natural Science Foundation of Sichuan Province/ ; 2021YFS0379//Natural Science Foundation of Sichuan Province/ ; cstc2021ycjh-bgzxm0248//Chongqing Technology Innovation and Application Development Project/ ; }, abstract = {BACKGROUND: Domestic goose breeds are descended from either the Swan goose (Anser cygnoides) or the Greylag goose (Anser anser), exhibiting variations in body size, reproductive performance, egg production, feather color, and other phenotypic traits. Constructing a pan-genome facilitates a thorough identification of genetic variations, thereby deepening our comprehension of the molecular mechanisms underlying genetic diversity and phenotypic variability.

RESULTS: To comprehensively facilitate population genomic and pan-genomic analyses in geese, we embarked on the task of 659 geese whole genome resequencing data and compiling a database of 155 RNA-seq samples. By constructing the pan-genome for geese, we generated non-reference contigs totaling 612 Mb, unveiling a collection of 2,813 novel genes and pinpointing 15,567 core genes, 1,324 softcore genes, 2,734 shell genes, and 878 cloud genes in goose genomes. Furthermore, we detected an 81.97 Mb genomic region showing signs of genome selection, encompassing the TGFBR2 gene correlated with variations in body weight among geese. Genome-wide association studies utilizing single nucleotide polymorphisms (SNPs) and presence-absence variation revealed significant genomic associations with various goose meat quality, reproductive, and body composition traits. For instance, a gene encoding the SVEP1 protein was linked to carcass oblique length, and a distinct gene-CDS haplotype of the SVEP1 gene exhibited an association with carcass oblique length. Notably, the pan-genome analysis revealed enrichment of variable genes in the "hair follicle maturation" Gene Ontology term, potentially linked to the selection of feather-related traits in geese. A gene presence-absence variation analysis suggested a reduced frequency of genes associated with "regulation of heart contraction" in domesticated geese compared to their wild counterparts. Our study provided novel insights into gene expression features and functions by integrating gene expression patterns across multiple organs and tissues in geese and analyzing population variation.

CONCLUSION: This accomplishment originates from the discernment of a multitude of selection signals and candidate genes associated with a wide array of traits, thereby markedly enhancing our understanding of the processes underlying domestication and breeding in geese. Moreover, assembling the pan-genome for geese has yielded a comprehensive apprehension of the goose genome, establishing it as an indispensable asset poised to offer innovative viewpoints and make substantial contributions to future geese breeding initiatives.}, } @article {pmid37996991, year = {2023}, author = {Li, Y and Yao, J and Sang, H and Wang, Q and Su, L and Zhao, X and Xia, Z and Wang, F and Wang, K and Lou, D and Wang, G and Waterhouse, RM and Wang, H and Luo, S and Sun, C}, title = {Pan-genome analysis highlights the role of structural variation in the evolution and environmental adaptation of Asian honeybees.}, journal = {Molecular ecology resources}, volume = {}, number = {}, pages = {}, doi = {10.1111/1755-0998.13905}, pmid = {37996991}, issn = {1755-0998}, support = {31971397//National Natural Science Foundation of China/ ; 32270445//National Natural Science Foundation of China/ ; PP00P3_202669//Swiss National Science Foundation (SNSF)/ ; Y2019XK13//Central Public-interest Scientific Institution Basal Research Fund for Chinese Academy of Agricultural Sciences/ ; Y2021XK16//Central Public-interest Scientific Institution Basal Research Fund for Chinese Academy of Agricultural Sciences/ ; }, abstract = {The Asian honeybee, Apis cerana, is an ecologically and economically important pollinator. Mapping its genetic variation is key to understanding population-level health, histories and potential capacities to respond to environmental changes. However, most efforts to date were focused on single nucleotide polymorphisms (SNPs) based on a single reference genome, thereby ignoring larger scale genomic variation. We employed long-read sequencing technologies to generate a chromosome-scale reference genome for the ancestral group of A. cerana. Integrating this with 525 resequencing data sets, we constructed the first pan-genome of A. cerana, encompassing almost the entire gene content. We found that 31.32% of genes in the pan-genome were variably present across populations, providing a broad gene pool for environmental adaptation. We identified and characterized structural variations (SVs) and found that they were not closely linked with SNP distributions; however, the formation of SVs was closely associated with transposable elements. Furthermore, phylogenetic analysis using SVs revealed a novel A. cerana ecological group not recoverable from the SNP data. Performing environmental association analysis identified a total of 44 SVs likely to be associated with environmental adaptation. Verification and analysis of one of these, a 330 bp deletion in the Atpalpha gene, indicated that this SV may promote the cold adaptation of A. cerana by altering gene expression. Taken together, our study demonstrates the feasibility and utility of applying pan-genome approaches to map and explore genetic feature variations of honeybee populations, and in particular to examine the role of SVs in the evolution and environmental adaptation of A. cerana.}, } @article {pmid37996397, year = {2023}, author = {Vos, M and Padfield, D and Quince, C and Vos, R}, title = {Adaptive radiations in natural populations of prokaryotes: innovation is key.}, journal = {FEMS microbiology ecology}, volume = {}, number = {}, pages = {}, doi = {10.1093/femsec/fiad154}, pmid = {37996397}, issn = {1574-6941}, abstract = {Prokaryote diversity makes up most of the tree of life and is crucial to the functioning of the biosphere and human health. However, the patterns and mechanisms of prokaryote diversification have received relatively little attention compared to animals and plants. Adaptive radiation, the rapid diversification of an ancestor species into multiple ecologically divergent species, is a fundamental process by which macrobiological diversity is generated. Here, we discuss whether ecological opportunity could lead to similar bursts of diversification in bacteria. We explore how adaptive radiations in prokaryotes can be kickstarted by horizontally acquired key innovations allowing lineages to invade new niche space that subsequently is partitioned among diversifying specialist descendants. We discuss how novel adaptive zones are colonised and exploited after the evolution of a key innovation and whether certain types of are more prone to adaptive radiation. Radiation into niche specialists does not necessarily lead to speciation in bacteria when barriers to recombination are absent. We propose that in this scenario, niche-specific genes could accumulate within a single lineage, leading to the evolution of an open pan-genome.}, } @article {pmid37995844, year = {2023}, author = {Bonnici, V and Mengoni, C and Mangoni, M and Franco, G and Giugno, R}, title = {PanDelos-frags: A methodology for discovering pangenomic content of incomplete microbial assemblies.}, journal = {Journal of biomedical informatics}, volume = {}, number = {}, pages = {104552}, doi = {10.1016/j.jbi.2023.104552}, pmid = {37995844}, issn = {1532-0480}, abstract = {Pangenomics was originally defined as the problem of comparing the composition of genes into gene families within a set of bacterial isolates belonging to the same species. The problem requires the calculation of sequence homology among such genes. When combined with metagenomics, namely for human microbiome composition analysis, gene-oriented pangenome detection becomes a promising method to decipher ecosystem functions and population-level evolution. Established computational tools are able to investigate the genetic content of isolates for which a complete genomic sequence is available. However, there is a plethora of incomplete genomes that are available on public resources, which only a few tools may analyze. Incomplete means that the process for reconstructing their genomic sequence is not complete, and only fragments of their sequence are currently available. However, the information contained in these fragments may play an essential role in the analyses. Here, we present PanDelos-frags, a computational tool which exploits and extends previous results in analysing complete genomes. It provides a new methodology for inferring missing genetic information and thus for managing incomplete genomes. PanDelos-frags outperforms state-of-the-art approaches in reconstructing gene families in synthetic benchmarks and in a real use case of metagenomics. PanDelos-frags is publicly available at https://github.com/InfOmics/PanDelos-frags.}, } @article {pmid37993882, year = {2023}, author = {Rice, ES and Alberdi, A and Alfieri, J and Athrey, G and Balacco, JR and Bardou, P and Blackmon, H and Charles, M and Cheng, HH and Fedrigo, O and Fiddaman, SR and Formenti, G and Frantz, LAF and Gilbert, MTP and Hearn, CJ and Jarvis, ED and Klopp, C and Marcos, S and Mason, AS and Velez-Irizarry, D and Xu, L and Warren, WC}, title = {A pangenome graph reference of 30 chicken genomes allows genotyping of large and complex structural variants.}, journal = {BMC biology}, volume = {21}, number = {1}, pages = {267}, pmid = {37993882}, issn = {1741-7007}, support = {2020-67015-31574//National Institute of Food and Agriculture/ ; 2022-67015-36218//National Institute of Food and Agriculture/ ; 817729//HORIZON EUROPE Research Infrastructures/ ; }, abstract = {BACKGROUND: The red junglefowl, the wild outgroup of domestic chickens, has historically served as a reference for genomic studies of domestic chickens. These studies have provided insight into the etiology of traits of commercial importance. However, the use of a single reference genome does not capture diversity present among modern breeds, many of which have accumulated molecular changes due to drift and selection. While reference-based resequencing is well-suited to cataloging simple variants such as single-nucleotide changes and short insertions and deletions, it is mostly inadequate to discover more complex structural variation in the genome.

METHODS: We present a pangenome for the domestic chicken consisting of thirty assemblies of chickens from different breeds and research lines.

RESULTS: We demonstrate how this pangenome can be used to catalog structural variants present in modern breeds and untangle complex nested variation. We show that alignment of short reads from 100 diverse wild and domestic chickens to this pangenome reduces reference bias by 38%, which affects downstream genotyping results. This approach also allows for the accurate genotyping of a large and complex pair of structural variants at the K feathering locus using short reads, which would not be possible using a linear reference.

CONCLUSIONS: We expect that this new paradigm of genomic reference will allow better pinpointing of exact mutations responsible for specific phenotypes, which will in turn be necessary for breeding chickens that meet new sustainability criteria and are resilient to quickly evolving pathogen threats.}, } @article {pmid37991492, year = {2023}, author = {Glad, HM and Tralamazza, SM and Croll, D}, title = {The expression landscape and pangenome of long non-coding RNA in the fungal wheat pathogen Zymoseptoria tritici.}, journal = {Microbial genomics}, volume = {9}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001136}, pmid = {37991492}, issn = {2057-5858}, abstract = {Long non-coding RNAs (lncRNAs) are regulatory molecules interacting in a wide array of biological processes. lncRNAs in fungal pathogens can be responsive to stress and play roles in regulating growth and nutrient acquisition. Recent evidence suggests that lncRNAs may also play roles in virulence, such as regulating pathogenicity-associated enzymes and on-host reproductive cycles. Despite the importance of lncRNAs, only a few model fungi have well-documented inventories of lncRNA. In this study, we apply a recent computational pipeline to predict high-confidence lncRNA candidates in Zymoseptoria tritici, an important global pathogen of wheat impacting global food production. We analyse genomic features of lncRNAs and the most likely associated processes through analyses of expression over a host infection cycle. We find that lncRNAs are frequently expressed during early infection, before the switch to necrotrophic growth. They are mostly located in facultative heterochromatic regions, which are known to contain many genes associated with pathogenicity. Furthermore, we find that lncRNAs are frequently co-expressed with genes that may be involved in responding to host defence signals, such as oxidative stress. Finally, we assess pangenome features of lncRNAs using four additional reference-quality genomes. We find evidence that the repertoire of expressed lncRNAs varies substantially between individuals, even though lncRNA loci tend to be shared at the genomic level. Overall, this study provides a repertoire and putative functions of lncRNAs in Z. tritici enabling future molecular genetics and functional analyses in an important pathogen.}, } @article {pmid37986038, year = {2023}, author = {Hernández-Soto, LM and Martínez-Abarca, F and Ramírez-Saad, H and López-Pérez, M and Aguirre-Garrido, JF}, title = {Genome analysis of haloalkaline isolates from the soda saline crater lake of Isabel Island; comparative genomics and potential metabolic analysis within the genus Halomonas.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {696}, pmid = {37986038}, issn = {1471-2164}, abstract = {BACKGROUND: Isabel Island is a Mexican volcanic island primarily composed of basaltic stones. It features a maar known as Laguna Fragatas, which is classified as a meromictic thalassohaline lake. The constant deposition of guano in this maar results in increased levels of phosphorus, nitrogen, and carbon. The aim of this study was to utilize high-quality genomes from the genus Halomonas found in specialized databases as a reference for genome mining of moderately halophilic bacteria isolated from Laguna Fragatas. This research involved genomic comparisons employing phylogenetic, pangenomic, and metabolic-inference approaches.

RESULTS: The Halomonas genus exhibited a large open pangenome, but several genes associated with salt metabolism and homeostatic regulation (ectABC and betABC), nitrogen intake through nitrate and nitrite transporters (nasA, and narGI), and phosphorus uptake (pstABCS) were shared among the Halomonas isolates.

CONCLUSIONS: The isolated bacteria demonstrate consistent adaptation to high salt concentrations, and their nitrogen and phosphorus uptake mechanisms are highly optimized. This optimization is expected in an extremophile environment characterized by minimal disturbances or abrupt seasonal variations. The primary significance of this study lies in the dearth of genomic information available for this saline and low-disturbance environment. This makes it important for ecosystem conservation and enabling an exploration of its biotechnological potential. Additionally, the study presents the first two draft genomes of H. janggokensis.}, } @article {pmid37976619, year = {2023}, author = {Cheng, J and Wu, S and Ye, Q and Gu, Q and Zhang, Y and Ye, Q and Lin, R and Liang, X and Liu, Z and Bai, J and Zhang, J and Chen, M and Wu, Q}, title = {A novel multiplex PCR based method for the detection of Listeria monocytogenes clonal complex 8.}, journal = {International journal of food microbiology}, volume = {409}, number = {}, pages = {110475}, doi = {10.1016/j.ijfoodmicro.2023.110475}, pmid = {37976619}, issn = {1879-3460}, abstract = {Listeria monocytogenes is an important foodborne pathogen worldwide, which could cause listeriosis with a 20-30 % fatality rate in immunocompromised individuals. Listeria monocytogenes MLST clonal complex (CC) 8 strain is a common clone in food and clinical cases. The aim of this study was to develop multiplex PCR (mPCR) and high-resolution melting (HRM) qPCR to simultaneously detect L. monocytogenes CC8 and the other L. monocytogenes strains based on pan-genome analysis. A novel multiplex PCR and HRM qPCR targeted for the genes LM5578_1180 (specific for CC8) and LM5578_2262 (for L. monocytogenes) were developed. The specificity of this multiplex PCR and HRM qPCR were verified with other CCs of L. monocytogenes and other species strains. The detection limit of this multiplex PCR and HRM qPCR is 2.1 × 10[3] CFU/mL and 2.1 × 10[0] CFU/mL, respectively. This multiplex PCR and HRM qPCR could accurately detect CC8 strains with the interference of different ratios of L. monocytogenes CC9, CC87, CC121, CC155, and L. innocua strains. Subsequently, the detection ability of mPCR and HRM qPCR were also evaluated in spiked samples. The mPCR method could successfully detect 6.2 × 10[3] CFU/mL of CC8 L. monocytogenes after 6 h enrichment while the multiplex HRM qPCR method could successfully detect 6.2 × 10[4] CFU/mL of CC8 L. monocytogenes after 3 h enrichment. The feasibility of these methods were satisfactory in terms of sensitivity, specificity, and efficiency after evaluating 12 mushroom samples and was consistent with that of the National Standard Detection Method (GB4789.30-2016). In conclusion, the developed assays could be applied for rapid screening and detection of L. monocytogenes CC8 strains both in food and food production environments, providing accurate results to adopt monitoring measures to improve microbiological safety.}, } @article {pmid37976215, year = {2023}, author = {Corut, AK and Wallace, JG}, title = {kGWASflow: a modular, flexible, and reproducible Snakemake workflow for k-mers-based GWAS.}, journal = {G3 (Bethesda, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1093/g3journal/jkad246}, pmid = {37976215}, issn = {2160-1836}, support = {//University of Georgia/ ; grant #1764127//National Science Foundation/ ; }, abstract = {Genome-wide association studies (GWAS) have been widely used to identify genetic variation associated with complex traits. Despite its success and popularity, the traditional GWAS approach comes with a variety of limitations. For this reason, newer methods for GWAS have been developed, including the use of pan-genomes instead of a reference genome and the utilization of markers beyond single-nucleotide polymorphisms, such as structural variations and k-mers. The k-mers-based GWAS approach has especially gained attention from researchers in recent years. However, these new methodologies can be complicated and challenging to implement. Here, we present kGWASflow, a modular, user-friendly, and scalable workflow to perform GWAS using k-mers. We adopted an existing kmersGWAS method into an easier and more accessible workflow using management tools like Snakemake and Conda and eliminated the challenges caused by missing dependencies and version conflicts. kGWASflow increases the reproducibility of the kmersGWAS method by automating each step with Snakemake and using containerization tools like Docker. The workflow encompasses supplemental components such as quality control, read-trimming procedures, and generating summary statistics. kGWASflow also offers post-GWAS analysis options to identify the genomic location and context of trait-associated k-mers. kGWASflow can be applied to any organism and requires minimal programming skills. kGWASflow is freely available on GitHub (https://github.com/akcorut/kGWASflow) and Bioconda (https://anaconda.org/bioconda/kgwasflow).}, } @article {pmid37975995, year = {2023}, author = {Khan, K and Jalal, K and Uddin, R}, title = {Pangenome diversification and resistance gene characterization in Salmonella Typhi prioritized RfaJ as a significant therapeutic marker.}, journal = {Journal, genetic engineering & biotechnology}, volume = {21}, number = {1}, pages = {125}, pmid = {37975995}, issn = {2090-5920}, abstract = {BACKGROUND: Salmonella Typhi stands as the etiological agent responsible for the onset of human typhoid fever. The pressing demand for innovative therapeutic targets against S. Typhi is underscored by the escalating prevalence of this pathogen and the severe nature of its infections. Consequently, this study employs pangenome analysis to scrutinize 119 S. Typhi-resistant strains, aiming to identify the most promising therapeutic targets originating from its core genome.

RESULTS: Subtractive genomics was employed to systematically eliminate non-homologous (n=1147), essential (n=551), drug-like (n=80), and pathogenicity-related (n=18) proteins from the initial pool of 3351 core genome proteins. Consequently, lipopolysaccharide 1,2-glucosyltransferase RfaJ was designated as the optimal pharmacological target due to its potential versatility. Furthermore, a compendium of 9000 FDA-approved compounds was repurposed for evaluation against the RfaJ drug target, with the specific intent of prioritizing novel, high-potency therapeutic candidates for combating S. Typhi. Ultimately, four compounds, namely DB00549 (Zafirlukast), DB15637 (Fluzoparib), DB15688 (Zavegepant), and DB12411 (Bemcentinib), were singled out as potential inhibitors based on the ligand-protein binding affinity (indicated by the lowest anticipated binding energy) and the overall stability of these compounds. Notably, molecular dynamics simulations, conducted over a 50 nanosecond interval, convincingly demonstrated the stability of these compounds in the context of the RfaJ protein.

CONCLUSION: In summary, the present findings hold significant promise as an initial stride in the broader drug discovery endeavor against S. Typhi infections. However, the experimental validation of the identified drug target and drug candidate is further required to increase the effectiveness of the applied methodology.}, } @article {pmid37974222, year = {2023}, author = {Baril, T and Croll, D}, title = {A pangenome-guided manually curated library of transposable elements for Zymoseptoria tritici.}, journal = {BMC research notes}, volume = {16}, number = {1}, pages = {335}, pmid = {37974222}, issn = {1756-0500}, support = {201149//Schweizerischer Nationalfonds zur Förderung der Wissenschaftlichen Forschung,Switzerland/ ; }, abstract = {OBJECTIVES: High-quality species-specific transposable element (TE) libraries are required for studies to elucidate the evolutionary dynamics of TEs and gain an understanding of their impacts on host genomes. Such high-quality TE resources are severely lacking for species in the fungal kingdom. To facilitate future studies on the putative role of TEs in rapid adaptation observed in the fungal wheat pathogen Zymoseptoria tritici, we produced a manually curated TE library. This was generated by detecting TEs in 19 reference genome assemblies representing the global diversity of the species supplemented by multiple sister species genomes. Improvements over previous TE libraries have been made on TE boundary resolution, detection of ORFs, TE domains, terminal inverted repeats, and class-specific motifs.

DATA DESCRIPTION: A TE consensus library for Z. tritici formatted for use with RepeatMasker. This data is relevant to other researchers investigating TE-host evolutionary dynamics in Z. tritici or who are interested in comparative studies of the fungal kingdom. Further, this TE library can be used to improve gene annotation. Finally, this TE library increases the number of manually curated TE datasets, providing resources to further our understanding of TE diversity.}, } @article {pmid37974097, year = {2023}, author = {Ferhaoui, N and Tanaka, R and Sekizuka, T and Kuroda, M and Sebaihia, M}, title = {Whole genome sequencing and pan-genome analysis of Staphylococcus/Mammaliicoccus spp. isolated from diabetic foot ulcers and contralateral healthy skin of Algerian patients.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {342}, pmid = {37974097}, issn = {1471-2180}, abstract = {BACKGROUND: Diabetic foot infections (DFIs) are the most common complications of diabetic foot ulcers (DFUs), and a significant cause of lower extremity amputation. In this study we used whole genome sequencing to characterize the clonal composition, virulence and resistance genetic determinants of 58 Staphylococcus/Mammaliicoccus spp. isolates from contralateral healthy skin and DFU from 44 hospitalized patients.

RESULTS: S. aureus (n = 32) and S. epidermidis (n = 10) isolates were recovered from both DFUs and healthy skin, whereas, S. haemolyticus (n = 8), M. sciuri (n = 1), S. hominis (n = 1) and S. simulans (n = 3) were recovered exclusively from healthy skin. In contrast, S. caprae (n = 2) and S. saprophyticus (n = 1) were recovered only from DFUs. Among S. aureus isolates, MRSA were present with high prevalence (27/32, 84.4%), 18 of which (66.7%) were from DFUs and 9 (33.3%) from healthy skin. In contrast, the coagulase-negative Staphylococcus (CoNS)/Mammaliicoccus isolates (n = 26), in particular S. epidermidis and S. haemolyticus were more prevalent in healthy skin, (10/26, 38.5%) and (8/26, 30.8%), respectively. MLST, spa and SCCmec typing classified the 32 S. aureus isolates into 6 STs, ST672, ST80, ST241, ST1, ST97, ST291 and 4 unknown STs (STNF); 8 spa types, t044, t037, t3841, t1247, t127, t639, t937 and t9432 and 2 SCCmec types, type IV and type III(A). Among CoNS, the S. epidermidis isolates belonged to ST54, ST35 and ST640. S. haemolyticus belonged to ST3, ST25, ST29, ST1 and ST56. The sole M. sciuri isolate was found to carry an SCCmec type III(A). A wide range of virulence genes and antimicrobial resistance genes were found among our isolates, with varying distribution between species or STs. The pan-genome analysis revealed a highly clonal population of Staphylococcus isolates, particularly among S. aureus isolates. Interestingly, the majority of S. aureus isolates including MRSA, recovered from the healthy skin and DFUs of the same patient belonged to the same clone and exhibited similar virulence/resistance genotype.

CONCLUSIONS: Our study provides clinically relevant information on the population profile, virulence and antibiotic resistance of Staphylococcus/Mammaliicoccus spp. in DFIs, which could serve as a basis for further studies on these as well as other groups of pathogens associated with DFIs.}, } @article {pmid37972151, year = {2023}, author = {McLaughlin, M and Fiebig, A and Crosson, S}, title = {XRE transcription factors conserved in Caulobacter and φCbK modulate adhesin development and phage production.}, journal = {PLoS genetics}, volume = {19}, number = {11}, pages = {e1011048}, doi = {10.1371/journal.pgen.1011048}, pmid = {37972151}, issn = {1553-7404}, abstract = {The xenobiotic response element (XRE) family of transcription factors (TFs), which are commonly encoded by bacteria and bacteriophage, regulate diverse features of bacterial cell physiology and impact phage infection dynamics. Through a pangenome analysis of Caulobacter species isolated from soil and aquatic ecosystems, we uncovered an apparent radiation of a paralogous XRE TF gene cluster, several of which have established functions in the regulation of holdfast adhesin development and biofilm formation in C. crescentus. We further discovered related XRE TFs throughout the class Alphaproteobacteria and its phages, including the φCbK Caulophage, suggesting that members of this cluster impact host-phage interactions. Here we show that a closely related group of XRE transcription factors encoded by both C. crescentus and φCbK can physically interact and function to control the transcription of a common gene set, influencing processes including holdfast development and the production of φCbK virions. The φCbK-encoded XRE paralog, tgrL, is highly expressed at the earliest stages of infection and can directly inhibit transcription of host genes including hfiA, a potent holdfast inhibitor, and gafYZ, an activator of prophage-like gene transfer agents (GTAs). XRE proteins encoded from the C. crescentus chromosome also directly repress gafYZ transcription, revealing a functionally redundant set of host regulators that may protect against spurious production of GTA particles and inadvertent cell lysis. Deleting the C. crescentus XRE transcription factors reduced φCbK burst size, while overexpressing these host genes or φCbK tgrL rescued this burst defect. We conclude that this XRE TF gene cluster, shared by C. crescentus and φCbK, plays an important role in adhesion regulation under phage-free conditions, and influences host-phage dynamics during infection.}, } @article {pmid37971714, year = {2023}, author = {Sharma, PK and Ahmed, HI and Heuberger, M and Koo, DH and Quiroz-Chavez, J and Adhikari, L and Raupp, J and Cauet, S and Rodde, N and Cravero, C and Callot, C and Yadav, IS and Kathiresan, N and Athiyannan, N and Ramirez-Gonzalez, RH and Uauy, C and Wicker, T and Abrouk, M and Gu, YQ and Poland, J and Krattinger, SG and Lazo, GR and Tiwari, VK}, title = {An online database for einkorn wheat to aid in gene discovery and functional genomics studies.}, journal = {Database : the journal of biological databases and curation}, volume = {2023}, number = {}, pages = {}, doi = {10.1093/database/baad079}, pmid = {37971714}, issn = {1758-0463}, support = {Award #2020-67013-31460//National Institute of Food and Agriculture/ ; Award #2020-67013-31460//National Institute of Food and Agriculture/ ; }, abstract = {Diploid A-genome wheat (einkorn wheat) presents a nutrition-rich option as an ancient grain crop and a resource for the improvement of bread wheat against abiotic and biotic stresses. Realizing the importance of this wheat species, reference-level assemblies of two einkorn wheat accessions were generated (wild and domesticated). This work reports an einkorn genome database that provides an interface to the cereals research community to perform comparative genomics, applied genetics and breeding research. It features queries for annotated genes, the use of a recent genome browser release, and the ability to search for sequence alignments using a modern BLAST interface. Other features include a comparison of reference einkorn assemblies with other wheat cultivars through genomic synteny visualization and an alignment visualization tool for BLAST results. Altogether, this resource will help wheat research and breeding. Database URL https://wheat.pw.usda.gov/GG3/pangenome.}, } @article {pmid37968318, year = {2023}, author = {Wang, T and Duan, S and Xu, C and Wang, Y and Zhang, X and Xu, X and Chen, L and Han, Z and Wu, T}, title = {Pan-genome analysis of 13 Malus accessions reveals structural and sequence variations associated with fruit traits.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {7377}, pmid = {37968318}, issn = {2041-1723}, support = {32072543//National Natural Science Foundation of China (National Science Foundation of China)/ ; }, abstract = {Structural variations (SVs) and copy number variations (CNVs) contribute to trait variations in fleshy-fruited species. Here, we assemble 10 genomes of genetically diverse Malus accessions, including the ever-green cultivar 'Granny Smith' and the widely cultivated cultivar 'Red Fuji'. Combining with three previously reported genomes, we assemble the pan-genome of Malus species and identify 20,220 CNVs and 317,393 SVs. We also observe CNVs that are positively correlated with expression levels of the genes they are associated with. Furthermore, we show that the noncoding RNA generated from a 209 bp insertion in the intron of mitogen-activated protein kinase homology encoding gene, MMK2, regulates the gene expression and affects fruit coloration. Moreover, we identify overlapping SVs associated with fruit quality and biotic resistance. This pan-genome uncovers possible contributions of CNVs to gene expression and highlights the role of SVs in apple domestication and economically important traits.}, } @article {pmid37966169, year = {2023}, author = {Nagano, DS and Taniguchi, I and Ono, T and Nakamura, K and Gotoh, Y and Hayashi, T}, title = {Systematic analysis of plasmids of the Serratia marcescens complex using 142 closed genomes.}, journal = {Microbial genomics}, volume = {9}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001135}, pmid = {37966169}, issn = {2057-5858}, abstract = {Plasmids play important roles in bacterial genome diversification. In the Serratia marcescens complex (SMC), a notable contribution of plasmids to genome diversification was also suggested by our recent analysis of >600 draft genomes. As accurate analyses of plasmids in draft genomes are difficult, in this study we analysed 142 closed genomes covering the entire complex, 67 of which were obtained in this study, and identified 132 plasmids (1.9-244.4 kb in length) in 77 strains. While the average numbers of plasmids in clinical and non-clinical strains showed no significant difference, strains belonging to clade 2 (one of the two hospital-adapted lineages) contained more plasmids than the others. Pangenome analysis revealed that of the 28 954 genes identified, 12.8 % were plasmid-specific, and 1.4 % were present in plasmids or chromosomes depending on the strain. In the latter group, while transposon-related genes were most prevalent (31.4 % of the function-predicted genes), genes related to antimicrobial resistance and heavy metal resistance accounted for a notable proportion (22.7 %). Mash distance-based clustering separated the 132 plasmids into 23 clusters and 50 singletons. Most clusters/singletons showed notably different GC contents compared to those of host chromosomes, suggesting their recent or relatively recent appearance in the SMC. Among the 23 clusters, 17 were found in only clinical or only non-clinical strains, suggesting the possible preference of their distribution on the environmental niches of host strains. Regarding the host strain phylogeny, 16 clusters were distributed in two or more clades, suggesting their interclade transmission. Moreover, for many plasmids, highly homologous plasmids were found in other species, indicating the broadness of their potential host ranges, beyond the genus, family, order, class or even phylum level. Importantly, highly homologous plasmids were most frequently found in Klebsiella pneumoniae and other species in the family Enterobacteriaceae, suggesting that this family, particularly K. pneumoniae, is the main source for plasmid exchanges with the SMC. These results highlight the power of closed genome-based analysis in the investigation of plasmids and provide important insights into the nature of plasmids distributed in the SMC.}, } @article {pmid37965675, year = {2023}, author = {Zhou, X and Kang, X and Chen, J and Song, Y and Jia, C and Teng, L and Tang, Y and Jiang, Z and Peng, X and Tao, X and Xu, Y and Huang, L and Xu, X and Xu, Y and Zhang, T and Yu, S and Gong, J and Wang, S and Liu, Y and Zhu, G and Kehrenberg, C and Weill, FX and Barrow, P and Li, Y and Zhao, G and Yue, M}, title = {Genome degradation promotes Salmonella pathoadaptation by remodeling fimbriae-mediated proinflammatory response.}, journal = {National science review}, volume = {10}, number = {10}, pages = {nwad228}, pmid = {37965675}, issn = {2053-714X}, abstract = {Understanding changes in pathogen behavior (e.g. increased virulence, a shift in transmission channel) is critical for the public health management of emerging infectious diseases. Genome degradation via gene depletion or inactivation is recognized as a pathoadaptive feature of the pathogen evolving with the host. However, little is known about the exact role of genome degradation in affecting pathogenic behavior, and the underlying molecular detail has yet to be examined. Using large-scale global avian-restricted Salmonella genomes spanning more than a century, we projected the genetic diversity of Salmonella Pullorum (bvSP) by showing increasingly antimicrobial-resistant ST92 prevalent in Chinese flocks. The phylogenomic analysis identified three lineages in bvSP, with an enhancement of virulence in the two recently emerged lineages (L2/L3), as evidenced in chicken and embryo infection assays. Notably, the ancestor L1 lineage resembles the Salmonella serovars with higher metabolic flexibilities and more robust environmental tolerance, indicating stepwise evolutionary trajectories towards avian-restricted lineages. Pan-genome analysis pinpointed fimbrial degradation from a virulent lineage. The later engineered fim-deletion mutant, and all other five fimbrial systems, revealed behavior switching that restricted horizontal fecal-oral transmission but boosted virulence in chicks. By depleting fimbrial appendages, bvSP established persistent replication with less proinflammation in chick macrophages and adopted vertical transovarial transmission, accompanied by ever-increasing intensification in the poultry industry. Together, we uncovered a previously unseen paradigm for remodeling bacterial surface appendages that supplements virulence-enhanced evolution with increased vertical transmission.}, } @article {pmid37965009, year = {2023}, author = {Tahir Ul Qamar, M and Sadaqat, M and Zhu, XT and Li, H and Huang, X and Fatima, K and Almutairi, MM and Chen, LL}, title = {Comparative genomics profiling revealed multi-stress responsive roles of the CC-NBS-LRR genes in three mango cultivars.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1285547}, pmid = {37965009}, issn = {1664-462X}, abstract = {The nucleotide-binding site-leucine-rich repeat (NBS-LRR) gene family is the largest group of disease resistance (R) genes in plants and is active in response to viruses, bacteria, and fungi usually involved in effector-triggered immunity (ETI). Pangenome-wide studies allow researchers to analyze the genetic diversity of multiple species or their members simultaneously, providing a comprehensive understanding of the evolutionary relationships and diversity present among them. The draft pan-genome of three Mangifera indica cultivars (Alphonso, Hong Xiang Ya, and Tommy atkins) was constructed and Presence/absence variants (PAVs) were filtered through the ppsPCP pipeline. As a result, 2823 genes and 5907 PAVs from H. Xiang Ya, and 1266 genes and 2098 PAVs from T. atkins were added to the reference genome. For the identification of CC-NBS-LRR (CNL) genes in these mango cultivars, this draft pan-genome study has successfully identified 47, 27, and 36 members in Alphonso, H. Xiang Ya, and T. atkins respectively. The phylogenetic analysis divided MiCNL proteins into four distinct subgroups. All MiCNL genes are unevenly distributed on chromosomes. Both tandem and segmental duplication events played a significant role in the expansion of the CNL gene family. These genes contain cis-elements related to light, stress, hormone, and development. The analysis of protein-protein interactions (PPI) revealed that MiCNL proteins interacted with other defense-responsive proteins. Gene Ontology (GO) analysis indicated that MiCNL genes play a role in defense mechanisms within the organism. The expression level of the identified genes in fruit peel was observed under disease and cold stress which showed that Mi_A_CNL13 and 14 were up-regulated while Mi_A_CNL15, 25, 30, 31, and 40 were down-regulated in disease stress. On the other hand, Mi_A_CNL2, 14, 41, and 45 were up-regulated and Mi_A_CNL47 is down-regulated in cold stress. Subsequently, the Random Forest (RF) classifier was used to assess the multi-stress response of MiCNLs. It was found that Mi_A_CNL14 is a gene that responds to multiple stress conditions. The CNLs have similar protein structures which show that they are involved in the same function. The above findings provide a foundation for a deeper understanding of the functional characteristics of the mango CNL gene family.}, } @article {pmid37961986, year = {2023}, author = {Hu, H and Scheben, A and Wang, J and Li, F and Li, C and Edwards, D and Zhao, J}, title = {Unravelling inversions: Technological advances, challenges, and potential impact on crop breeding.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14224}, pmid = {37961986}, issn = {1467-7652}, support = {2022-NPY-00-005//Seed industry revitalization project of the special fund for the rural revitalization strategy of Guangdong Province in 2022/ ; 2022-NPY-00-004//Seed industry revitalization project of the special fund for the rural revitalization strategy of Guangdong Province in 2022/ ; 2023YG01//the "YouGu" Plan of Rice Research Institute of Guangdong Academy of Agricultural Sciences/ ; 2023YG04//the "YouGu" Plan of Rice Research Institute of Guangdong Academy of Agricultural Sciences/ ; 2023KJ106//the Innovation Team Project of Guangdong Modern Agricultural Industrial System/ ; //Guangdong Key Laboratory of New Technology in Rice Breeding: 2023B1212060042/ ; R2023YJ-QC001//Introduction of Young Key Talents of Guangdong Academy of Agricultural Sciences/ ; }, abstract = {Inversions, a type of chromosomal structural variation, significantly influence plant adaptation and gene functions by impacting gene expression and recombination rates. However, compared with other structural variations, their roles in functional biology and crop improvement remain largely unexplored. In this review, we highlight technological and methodological advancements that have allowed a comprehensive understanding of inversion variants through the pangenome framework and machine learning algorithms. Genome editing is an efficient method for inducing or reversing inversion mutations in plants, providing an effective mechanism to modify local recombination rates. Given the potential of inversions in crop breeding, we anticipate increasing attention on inversions from the scientific community in future research and breeding applications.}, } @article {pmid37961660, year = {2023}, author = {Zakeri, M and Brown, NK and Ahmed, OY and Gagie, T and Langmead, B}, title = {Movi: a fast and cache-efficient full-text pangenome index.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.11.04.565615}, pmid = {37961660}, abstract = {Efficient pangenome indexes are promising tools for many applications, including rapid classification of nanopore sequencing reads. Recently, a compressed-index data structure called the "move structure" was proposed as an alternative to other BWT-based indexes like the FM index and r-index. The move structure uniquely achieves both O(r) space and O(1)-time queries, where r is the number of runs in the pangenome BWT. We implemented Movi, an efficient tool for building and querying move-structure pangenome indexes. While the size of the Movi's index is larger than the r-index, it scales at a smaller rate for pangenome references, as its size is exactly proportional to r, the number of runs in the BWT of the reference. Movi can compute sophisticated matching queries needed for classification - such as pseudo-matching lengths - at least ten times faster than the fastest available methods. Movi achieves this speed by leveraging the move structure's strong locality of reference, incurring close to the minimum possible number of cache misses for queries against large pangenomes. Movi's fast constant-time query loop makes it well suited to real-time applications like adaptive sampling for nanopore sequencing, where decisions must be made in a small and predictable time interval.}, } @article {pmid37961321, year = {2023}, author = {Krieger, M and AbdelRahman, YM and Choi, D and Palmer, EA and Yoo, A and McGuire, S and Kreth, J and Merritt, J}, title = {The prevalence of Fusobacterium nucleatum subspecies in the oral cavity stratifies by local health status.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.10.25.563997}, pmid = {37961321}, abstract = {The ubiquitous inflammophilic pathobiont Fusobacterium nucleatum is widely recognized for its strong association with a variety of human dysbiotic diseases such as periodontitis and oral/extraoral abscesses, as well as multiple types of cancer . F. nucleatum is currently subdivided into four subspecies: F. nucleatum subspecies nucleatum (Fn. nucleatum) , animalis (Fn. animalis), polymorphum (Fn. polymorphum), and vincentii/fusiforme (Fn. vincentii). Although these subspecies have been historically considered as functionally interchangeable in the oral cavity, direct clinical evidence is largely lacking for this assertion. Consequently, we assembled a collection of oral clinical specimens to determine whether F. nucleatum subspecies prevalence in the oral cavity stratifies by local oral health status. Patient-matched clinical specimens of both disease-free dental plaque and odontogenic abscess were analyzed with newly developed culture-dependent and culture-independent approaches using 44 and 60 oral biofilm/tooth abscess paired specimens, respectively. Most oral cavities were found to simultaneously harbor multiple F. nucleatum subspecies, with a greater diversity present within dental plaque compared to abscesses. In dental plaque, Fn. polymorphum is clearly the dominant organism, but this changes dramatically within odontogenic abscesses where Fn. animalis is heavily favored over all other fusobacteria. Surprisingly, the most commonly studied F. nucleatum subspecies, Fn. nucleatum, is only a minor constituent in the oral cavity. To gain further insights into the genetic basis for these phenotypes, we subsequently performed pangenome, phylogenetic, and functional enrichment analyses of oral fusobacterial genomes using the Anvi'o platform, which revealed significant genotypic distinctions among F. nucleatum subspecies. Accordingly, our results strongly support a taxonomic reassignment of each F. nucleatum subspecies into distinct Fusobacterium species. Of these, Fn. animalis should be considered as the most clinically relevant at sites of active inflammation, despite being among the least characterized oral fusobacteria.}, } @article {pmid37960081, year = {2023}, author = {Pushkova, EN and Borkhert, EV and Novakovskiy, RO and Dvorianinova, EM and Rozhmina, TA and Zhuchenko, AA and Zhernova, DA and Turba, AA and Yablokov, AG and Sigova, EA and Krasnov, GS and Bolsheva, NL and Melnikova, NV and Dmitriev, AA}, title = {Selection of Flax Genotypes for Pan-Genomic Studies by Sequencing Tagmentation-Based Transcriptome Libraries.}, journal = {Plants (Basel, Switzerland)}, volume = {12}, number = {21}, pages = {}, doi = {10.3390/plants12213725}, pmid = {37960081}, issn = {2223-7747}, support = {075-15-2021-1064//Ministry of Science and Higher Education of the Russian Federation/ ; }, abstract = {Flax (Linum usitatissimum L.) products are used in the food, pharmaceutical, textile, polymer, medical, and other industries. The creation of a pan-genome will be an important advance in flax research and breeding. The selection of flax genotypes that sufficiently cover the species diversity is a crucial step for the pan-genomic study. For this purpose, we have adapted a method based on Illumina sequencing of transcriptome libraries prepared using the Tn5 transposase (tagmentase). This approach reduces the cost of sample preparation compared to commercial kits and allows the generation of a large number of cDNA libraries in a short time. RNA-seq data were obtained for 192 flax plants (3-6 individual plants from 44 flax accessions of different morphology and geographical origin). Evaluation of the genetic relationship between flax plants based on the sequencing data revealed incorrect species identification for five accessions. Therefore, these accessions were excluded from the sample set for the pan-genomic study. For the remaining samples, typical genotypes were selected to provide the most comprehensive genetic diversity of flax for pan-genome construction. Thus, high-throughput sequencing of tagmentation-based transcriptome libraries showed high efficiency in assessing the genetic relationship of flax samples and allowed us to select genotypes for the flax pan-genomic analysis.}, } @article {pmid37957573, year = {2023}, author = {Dutta, B and Halder, U and Chitikineni, A and Varshney, RK and Bandopadhyay, R}, title = {Delving into the lifestyle of Sundarban Wetland resident, biofilm producing, halotolerant Salinicoccus roseus: a comparative genomics-based intervention.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {681}, pmid = {37957573}, issn = {1471-2164}, abstract = {BACKGROUND: Microbial community played an essential role in ecosystem processes, be it mangrove wetland or other intertidal ecologies. Several enzymatic activities like hydrolases are effective ecological indicators of soil microbial function. So far, little is known on halophilic bacterial contribution and function on a genomic viewpoint of Indian Sundarban Wetland. Considering the above mentioned issues, the aims of this study was to understand the life style, metabolic functionalities and genomic features of the isolated bacterium, Salinicoccus roseus strain RF1H. A comparative genome-based study of S. roseus has not been reported yet. Henceforth, we have considered the inclusion of the intra-species genome comparison of S. roseus to gain insight into the high degree of variation in the genome of strain RF1H among others.

RESULTS: Salinicoccus roseus strain RF1H is a pink-red pigmented, Gram-positive and non-motile cocci. The bacterium exhibited high salt tolerance (up to 15% NaCl), antibiotic resistance, biofilm formation and secretion of extracellular hydrolytic enzymes. The circular genome was approximately 2.62978 Mb in size, encoding 574 predicted genes with GC content 49.5%. Presence of genomic elements (prophages, transposable elements, CRISPR-Cas system) represented bacterial virulence and multidrug-resistance. Furthermore, genes associated with salt tolerance, temperature adaptation and DNA repair system were distributed in 17 genomic islands. Genes related to hydrocarbon degradation manifested metabolic capability of the bacterium for potential biotechnological applications. A comparative pangenome analysis revealed two-component response regulator, modified C4-dicarboxylate transport system and osmotic stress regulated ATP-binding proteins. Presence of genes encoding arginine decarboxylase (ADC) enzyme being involved in biofilm formation was reported from the genome. In silico study revealed the protein is thermostable and made up with ~ 415 amino acids, and hydrophilic in nature. Three motifs appeared to be evolutionary conserved in all Salinicoccus sequences.

CONCLUSION: The first report of whole genome analysis of Salinicoccus roseus strain RF1H provided information of metabolic functionalities, biofilm formation, resistance mechanism and adaptation strategies to thrive in climate-change induced vulnerable spot like Sundarban. Comparative genome analysis highlighted the unique genome content that contributed the strain's adaptability. The biomolecules produced during metabolism are important sources of compounds with potential beneficial applications in pharmaceuticals.}, } @article {pmid37956283, year = {2023}, author = {Joglekar, P and Conlan, S and Lee-Lin, SQ and Deming, C and Kashaf, SS and , and Kong, HH and Segre, JA}, title = {Integrated genomic and functional analyses of human skin-associated Staphylococcus reveal extensive inter- and intra-species diversity.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {47}, pages = {e2310585120}, doi = {10.1073/pnas.2310585120}, pmid = {37956283}, issn = {1091-6490}, support = {1ZIAHG000180-16//HHS | NIH | National Human Genome Research Institute (NHGRI)/ ; 1ZIABC010938-02//HHS | NIH | National Institute of Arthritis and Musculoskeletal and Skin Diseases (NIAMS)/ ; }, abstract = {Human skin is stably colonized by a distinct microbiota that functions together with epidermal cells to maintain a protective physical barrier. Staphylococcus, a prominent genus of the skin microbiota, participates in colonization resistance, tissue repair, and host immune regulation in strain-specific manners. To unlock the potential of engineering skin microbial communities, we aim to characterize the diversity of this genus within the context of the skin environment. We reanalyzed an extant 16S rRNA amplicon dataset obtained from distinct body sites of healthy volunteers, providing a detailed biogeographic depiction of staphylococcal species that colonize our skin. S. epidermidis, S. capitis, and S. hominis were the most abundant staphylococcal species present in all volunteers and were detected at all body sites. Pan-genome analysis of isolates from these three species revealed that the genus-core was dominated by central metabolism genes. Species-restricted-core genes encoded known host colonization functions. The majority (~68%) of genes were detected only in a fraction of isolate genomes, underscoring the immense strain-specific gene diversity. Conspecific genomes grouped into phylogenetic clades, exhibiting body site preference. Each clade was enriched for distinct gene sets that are potentially involved in site tropism. Finally, we conducted gene expression studies of select isolates showing variable growth phenotypes in skin-like medium. In vitro expression revealed extensive intra- and inter-species gene expression variation, substantially expanding the functional diversification within each species. Our study provides an important resource for future ecological and translational studies to examine the role of shared and strain-specific staphylococcal genes within the skin environment.}, } @article {pmid37953337, year = {2023}, author = {Harrison, PW and Amode, MR and Austine-Orimoloye, O and Azov, AG and Barba, M and Barnes, I and Becker, A and Bennett, R and Berry, A and Bhai, J and Bhurji, SK and Boddu, S and Branco Lins, PR and Brooks, L and Ramaraju, SB and Campbell, LI and Martinez, MC and Charkhchi, M and Chougule, K and Cockburn, A and Davidson, C and De Silva, NH and Dodiya, K and Donaldson, S and El Houdaigui, B and Naboulsi, TE and Fatima, R and Giron, CG and Genez, T and Grigoriadis, D and Ghattaoraya, GS and Martinez, JG and Gurbich, TA and Hardy, M and Hollis, Z and Hourlier, T and Hunt, T and Kay, M and Kaykala, V and Le, T and Lemos, D and Lodha, D and Marques-Coelho, D and Maslen, G and Merino, GA and Mirabueno, LP and Mushtaq, A and Hossain, SN and Ogeh, DN and Sakthivel, MP and Parker, A and Perry, M and Piližota, I and Poppleton, D and Prosovetskaia, I and Raj, S and Pérez-Silva, JG and Salam, AIA and Saraf, S and Saraiva-Agostinho, N and Sheppard, D and Sinha, S and Sipos, B and Sitnik, V and Stark, W and Steed, E and Suner, MM and Surapaneni, L and Sutinen, K and Tricomi, FF and Urbina-Gómez, D and Veidenberg, A and Walsh, TA and Ware, D and Wass, E and Willhoft, NL and Allen, J and Alvarez-Jarreta, J and Chakiachvili, M and Flint, B and Giorgetti, S and Haggerty, L and Ilsley, GR and Keatley, J and Loveland, JE and Moore, B and Mudge, JM and Naamati, G and Tate, J and Trevanion, SJ and Winterbottom, A and Frankish, A and Hunt, SE and Cunningham, F and Dyer, S and Finn, RD and Martin, FJ and Yates, AD}, title = {Ensembl 2024.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad1049}, pmid = {37953337}, issn = {1362-4962}, support = {222155/Z/20/Z/WT_/Wellcome Trust/United Kingdom ; 75N93019C00077/AI/NIAID NIH HHS/United States ; 226458/Z/22/Z/WT_/Wellcome Trust/United Kingdom ; }, abstract = {Ensembl (https://www.ensembl.org) is a freely available genomic resource that has produced high-quality annotations, tools, and services for vertebrates and model organisms for more than two decades. In recent years, there has been a dramatic shift in the genomic landscape, with a large increase in the number and phylogenetic breadth of high-quality reference genomes, alongside major advances in the pan-genome representations of higher species. In order to support these efforts and accelerate downstream research, Ensembl continues to focus on scaling for the rapid annotation of new genome assemblies, developing new methods for comparative analysis, and expanding the depth and quality of our genome annotations. This year we have continued our expansion to support global biodiversity research, doubling the number of annotated genomes we support on our Rapid Release site to over 1700, driven by our close collaboration with biodiversity projects such as Darwin Tree of Life. We have also strengthened support for key agricultural species, including the first regulatory builds for farmed animals, and have updated key tools and resources that support the global scientific community, notably the Ensembl Variant Effect Predictor. Ensembl data, software, and tools are freely available.}, } @article {pmid37953330, year = {2023}, author = {Raney, BJ and Barber, GP and Benet-Pagès, A and Casper, J and Clawson, H and Cline, MS and Diekhans, M and Fischer, C and Navarro Gonzalez, J and Hickey, G and Hinrichs, AS and Kuhn, RM and Lee, BT and Lee, CM and Le Mercier, P and Miga, KH and Nassar, LR and Nejad, P and Paten, B and Perez, G and Schmelter, D and Speir, ML and Wick, BD and Zweig, AS and Haussler, D and Kent, WJ and Haeussler, M}, title = {The UCSC Genome Browser database: 2024 update.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad987}, pmid = {37953330}, issn = {1362-4962}, support = {2U24HG002371/HG/NHGRI NIH HHS/United States ; 75N93019C00076/AI/NIAID NIH HHS/United States ; RF1MH132662/MH/NIMH NIH HHS/United States ; }, abstract = {The UCSC Genome Browser (https://genome.ucsc.edu) is a web-based genomic visualization and analysis tool that serves data to over 7,000 distinct users per day worldwide. It provides annotation data on thousands of genome assemblies, ranging from human to SARS-CoV2. This year, we have introduced new data from the Human Pangenome Reference Consortium and on viral genomes including SARS-CoV2. We have added 1,200 new genomes to our GenArk genome system, increasing the overall diversity of our genomic representation. We have added support for nine new user-contributed track hubs to our public hub system. Additionally, we have released 29 new tracks on the human genome and 11 new tracks on the mouse genome. Collectively, these new features expand both the breadth and depth of the genomic knowledge that we share publicly with users worldwide.}, } @article {pmid37953085, year = {2023}, author = {Li, Y and Wu, Y and Li, D and Du, L and Zhao, L and Wang, R and Chen, X and Jia, X and Ma, R and Wang, T and Li, J and Zhang, G and Wang, X and Hu, M and Chen, X and Wang, X and Kang, W and Sun, H and Xu, Y and Liu, Y}, title = {Multicenter comparative genomic study of Klebsiella oxytoca complex reveals a highly antibiotic-resistant subspecies of Klebsiellamichiganensis.}, journal = {Journal of microbiology, immunology, and infection = Wei mian yu gan ran za zhi}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jmii.2023.10.014}, pmid = {37953085}, issn = {1995-9133}, abstract = {BACKGROUND: The Klebsiella oxytoca complex is an opportunistic pathogen that has been recently identified as an actual complex. However, the characteristics of each species remain largely unknown. We aimed to study the clinical prevalence, antimicrobial profiles, genetic differences, and interaction with the host of each species of this complex.

METHODS: One hundred and three clinical isolates of the K. oxytoca complex were collected from 33 hospitals belonging to 19 areas in China from 2020 to 2021. Species were identified using whole genome sequencing based on average nucleotide identity. Clinical infection characteristics of the species were analyzed. Comparative genomics and pan-genome analyses were performed on these isolates and an augmented dataset, including 622 assemblies from the National Center for Biotechnology Information. In vitro assays evaluating the adhesion ability of human respiratory epithelial cells and survivability against macrophages were performed on randomly selected isolates.

RESULTS: Klebsiella michiganensis (46.6%, 48/103) and K. oxytoca (35.92%, 37/103) were the major species of the complex causing human infections. K. michiganensis had a higher genomic diversity and larger pan-genome size than did K. oxytoca. K. michiganensis isolates with blaoxy-5 had a higher resistance rate to various antibiotics, antimicrobial gene carriage rate, adhesion ability to human respiratory epithelial cells, and survival rate against macrophages than isolates of other species.

CONCLUSION: Our study revealed the genetic diversity of K. michiganensis and firstly identified the highly antimicrobial-resistant profile of K. michiganensis carrying blaoxy-5.}, } @article {pmid37951618, year = {2023}, author = {Laux, M and Piroupo, CM and Setubal, JC and Giani, A}, title = {The Raphidiopsis (= Cylindrospermopsis) raciborskii pangenome updated: Two new metagenome-assembled genomes from the South American clade.}, journal = {Harmful algae}, volume = {129}, number = {}, pages = {102518}, doi = {10.1016/j.hal.2023.102518}, pmid = {37951618}, issn = {1878-1470}, abstract = {Two Raphidiopsis (=Cylindrospermopsis) raciborskii metagenome-assembled genomes (MAGs) were recovered from two freshwater metagenomic datasets sampled in 2011 and 2012 in Pampulha Lake, a hypereutrophic, artificial, shallow reservoir, located in the city of Belo Horizonte (MG), Brazil. Since the late 1970s, the lake has undergone increasing eutrophication pressure, due to wastewater input, leading to the occurrence of frequent cyanobacterial blooms. The major difference observed between PAMP2011 and PAMP2012 MAGs was the lack of the saxitoxin gene cluster in PAMP2012, which also presented a smaller genome, while PAMP2011 presented the complete sxt cluster and all essential proteins and clusters. The pangenome analysis was performed with all Raphidiopsis/Cylindrospermopsis genomes available at NCBI to date, with the addition of PAMP2011 and PAMP2012 MAGs (All33 subset), but also without the South American strains (noSA subset), and only among the South American strains (SA10 and SA8 subsets). We observed a substantial increase in the core genome size for the 'noSA' subset, in comparison to 'All33' subset, and since the core genome reflects the closeness among the pangenome members, the results strongly suggest that the conservation level of the essential gene repertoire seems to be affected by the geographic origin of the strains being analyzed, supporting the existence of a distinct SA clade. The Raphidiopsis pangenome comprised a total of 7943 orthologous protein clusters, and the two new MAGs increased the pangenome size by 11%. The pangenome based phylogenetic relationships among the 33 analyzed genomes showed that the SA genomes clustered together with 99% bootstrap support, reinforcing the metabolic particularity of the Raphidiopsis South American clade, related to its saxitoxin producing unique ability, while also indicating a different evolutionary history due to its geographic isolation.}, } @article {pmid37944674, year = {2023}, author = {Mahnoor, I and Shabbir, H and Nawaz, S and Aziz, K and Aziz, U and Khalid, K and Irum, S and Andleeb, S}, title = {Characterization of exclusively non-commensal Neisseria gonorrhoeae pangenome to prioritize globally conserved and thermodynamically stable vaccine candidates using immune-molecular dynamic simulations.}, journal = {Microbial pathogenesis}, volume = {}, number = {}, pages = {106439}, doi = {10.1016/j.micpath.2023.106439}, pmid = {37944674}, issn = {1096-1208}, abstract = {Neisseria gonorrhoeae (Ngo) has emerged as a global threat leading to one of the most common sexually transmitted diseases in the world. It has also become one of the leading antimicrobial resistant organisms, resulting in fewer treatment options and an increased morbidity. Therefore, in recent years, there has been an increased focus on the development of new treatments and preventive strategies to combat its infection. In this study, we have combined the most conserved epitopes from the completely assembled strains of Ngo to develop a universal and a thermodynamically stable vaccine candidate. For our vaccine design, the epitopes were selected for their high immunogenicity, non-allergenicity and non-cytotoxicity, making them the ideal candidates for vaccine development. For the screening process, several reverse vaccinology tools were employed to rigorously extract non-homologous and immunogenic epitopes from the selected proteins. Consequently, a total number of 3 B-cell epitopes and 6 T-cell epitopes were selected and joined by multiple immune-modulating adjuvants and linkers to generate a promiscuous immune response. Additionally, the stability and flexible nature of the vaccine construct was confirmed using various molecular dynamic simulation tools. Overall, the vaccine candidate showed promising binding affinity to various HLA alleles and TLR receptors; however, further studies are needed to assess its efficacy in-vivo. In this way, we have designed a multi-subunit vaccine candidate to potentially combat and control the spread of N. gonorrhoeae.}, } @article {pmid37941143, year = {2023}, author = {Lu, K and Pan, Y and Shen, J and Yang, L and Zhan, C and Liang, S and Tai, S and Wan, L and Li, T and Cheng, T and Ma, B and Pan, G and He, N and Lu, C and Westhof, E and Xiang, Z and Han, MJ and Tong, X and Dai, F}, title = {SilkMeta: a comprehensive platform for sharing and exploiting pan-genomic and multi-omic silkworm data.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad956}, pmid = {37941143}, issn = {1362-4962}, support = {31 830 094//National Natural Science Foundation of China/ ; CARS-18-ZJ0102//China Agriculture Research System of MOF and MARA/ ; cstc2021jcyj-cxtt0005//Natural Science Foundation of Chongqing/ ; 2022CQBSHTB3066//Special Funding for Postdoctoral Research of Chongqing, China/ ; }, abstract = {The silkworm Bombyx mori is a domesticated insect that serves as an animal model for research and agriculture. The silkworm super-pan-genome dataset, which we published last year, is a unique resource for the study of global genomic diversity and phenotype-genotype association. Here we present SilkMeta (http://silkmeta.org.cn), a comprehensive database covering the available silkworm pan-genome and multi-omics data. The database contains 1082 short-read genomes, 546 long-read assembled genomes, 1168 transcriptomes, 294 phenotype characterizations (phenome), tens of millions of variations (variome), 7253 long non-coding RNAs (lncRNAs), 18 717 full length transcripts and a set of population statistics. We have compiled publications on functional genomics research and genetic stock deciphering (mutant map). A range of bioinformatics tools is also provided for data visualization and retrieval. The large batch of omics data and tools were integrated in twelve functional modules that provide useful strategies and data for comparative and functional genomics research. The interactive bioinformatics platform SilkMeta will benefit not only the silkworm but also the insect biology communities.}, } @article {pmid37940013, year = {2023}, author = {Krishnan, S and Sasi, S and Kodakkattumannil, P and Al Senaani, S and Lekshmi, G and Kottackal, M and Amiri, KMA}, title = {Cationic and anionic detergent buffers in sequence yield high-quality genomic DNA from diverse plant species.}, journal = {Analytical biochemistry}, volume = {}, number = {}, pages = {115372}, doi = {10.1016/j.ab.2023.115372}, pmid = {37940013}, issn = {1096-0309}, abstract = {Because of the heterogeneity among seedlings of outbreeding species, the use of seedling tissues as a source of DNA is unsuitable for the genomic characterization of elite germplasms. High-quality DNA, free of RNA, proteins, polysaccharides, secondary metabolites, and shearing, is mandatory for downstream molecular biology applications, especially for next-generation genome sequencing and pangenome analysis aiming to capture the complete genetic diversity within a species. The study aimed to accomplish an efficient protocol for the extraction of high-quality DNA suitable for diverse plant species/tissues. We describe a reliable, and consistent protocol suitable for the extraction of DNA from 42 difficult-to-extract plant species belonging to 33 angiosperm (monocot and dicot) families, including tissues such as seeds, roots, endosperm, and flower/fruit tissues. The protocol was first optimized for the outbreeding recalcitrant trees viz., Prosopis cineraria, Conocarpus erectus, and Phoenix dactylifera, which are rich in proteins, polysaccharides, and secondary metabolites, and the quality of the extracted DNA was confirmed by downstream applications. Nine procedures were attempted to extract high-quality, impurities-free DNA from these three plant species. Extraction of the ethanol-precipitated DNA from cetyltrimethylammonium bromide (CTAB) protocol using sodium dodecyl sulfate (SDS) buffer, i.e., the extraction using a cationic (CTAB) detergent followed by an anionic (SDS) detergent was the key for high yield and high purity (1.75-1.85 against A260/280 and an A260/230 ratio of >2) DNA. A vice versa extraction procedure, i.e., SDS buffer followed by CTAB buffer, and also CTAB buffer followed by CTAB, did not yield good-quality DNA. PCR (using different primers) and restriction endonuclease digestion of the DNA extracted from these three plants validated the protocol. The accomplishment of the genome of P. cineraria using the DNA extracted using the modified protocol confirmed its applicability to genomic studies. The optimized protocol successful in extracting high-quality DNA from diverse plant species/tissues extends its applicability and is useful for accomplishing genome sequences of elite germplasm of recalcitrant plant species with quality reads.}, } @article {pmid37938300, year = {2022}, author = {Gushgari-Doyle, S and Lui, LM and Nielsen, TN and Wu, X and Malana, RG and Hendrickson, AJ and Carion, H and Poole, FL and Adams, MWW and Arkin, AP and Chakraborty, R}, title = {Genotype to ecotype in niche environments: adaptation of Arthrobacter to carbon availability and environmental conditions.}, journal = {ISME communications}, volume = {2}, number = {1}, pages = {32}, pmid = {37938300}, issn = {2730-6151}, support = {DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; }, abstract = {Niche environmental conditions influence both the structure and function of microbial communities and the cellular function of individual strains. The terrestrial subsurface is a dynamic and diverse environment that exhibits specific biogeochemical conditions associated with depth, resulting in distinct environmental niches. Here, we present the characterization of seven distinct strains belonging to the genus Arthrobacter isolated from varying depths of a single sediment core and associated groundwater from an adjacent well. We characterized genotype and phenotype of each isolate to connect specific cellular functions and metabolisms to ecotype. Arthrobacter isolates from each ecotype demonstrated functional and genomic capacities specific to their biogeochemical conditions of origin, including laboratory-demonstrated characterization of salinity tolerance and optimal pH, and genes for utilization of carbohydrates and other carbon substrates. Analysis of the Arthrobacter pangenome revealed that it is notably open with a volatile accessory genome compared to previous pangenome studies on other genera, suggesting a high potential for adaptability to environmental niches.}, } @article {pmid37935586, year = {2023}, author = {Sen, S and Woodhouse, MR and Portwood, JL and Andorf, CM}, title = {Maize Feature Store: A centralized resource to manage and analyze curated maize multi-omics features for machine learning applications.}, journal = {Database : the journal of biological databases and curation}, volume = {2023}, number = {}, pages = {}, doi = {10.1093/database/baad078}, pmid = {37935586}, issn = {1758-0463}, support = {5030-21000-068-00-D//Department of Agriculture, Agricultural Research Service/ ; 5030-21000-068-00-D//Department of Agriculture, Agricultural Research Service/ ; }, abstract = {The big-data analysis of complex data associated with maize genomes accelerates genetic research and improves agronomic traits. As a result, efforts have increased to integrate diverse datasets and extract meaning from these measurements. Machine learning models are a powerful tool for gaining knowledge from large and complex datasets. However, these models must be trained on high-quality features to succeed. Currently, there are no solutions to host maize multi-omics datasets with end-to-end solutions for evaluating and linking features to target gene annotations. Our work presents the Maize Feature Store (MFS), a versatile application that combines features built on complex data to facilitate exploration, modeling and analysis. Feature stores allow researchers to rapidly deploy machine learning applications by managing and providing access to frequently used features. We populated the MFS for the maize reference genome with over 14 000 gene-based features based on published genomic, transcriptomic, epigenomic, variomic and proteomics datasets. Using the MFS, we created an accurate pan-genome classification model with an AUC-ROC score of 0.87. The MFS is publicly available through the maize genetics and genomics database. Database URL https://mfs.maizegdb.org/.}, } @article {pmid37935710, year = {2023}, author = {Radjasa, OK and Steven, R and Humaira, Z and Dwivany, FM and Nugrahapraja, H and Trinugroho, JP and Kristianti, T and Chahyadi, A and Natanael, Y and Priharto, N and Kamarisima, and Sembiring, FAPB and Dwijayanti, A and Kusmita, L and Moeis, MR and Suhardi, VSH}, title = {Biosynthetic gene cluster profiling from North Java Sea Virgibacillus salarius reveals hidden potential metabolites.}, journal = {Scientific reports}, volume = {13}, number = {1}, pages = {19273}, pmid = {37935710}, issn = {2045-2322}, support = {223/IT1.B07.1/TA.00/2022//Institut Teknologi Bandung Priority Research/ ; }, abstract = {Virgibacillus salarius 19.PP.SC1.6 is a coral symbiont isolated from Indonesia's North Java Sea; it has the ability to produce secondary metabolites that provide survival advantages and biological functions, such as ectoine, which is synthesized by an ectoine gene cluster. Apart from being an osmoprotectant for bacteria, ectoine is also known as a chemical chaperone with numerous biological activities such as maintaining protein stability, which makes ectoine in high demand in the market industry and makes it beneficial to investigate V. salarius ectoine. However, there has been no research on genome-based secondary metabolite and ectoine gene cluster characterization from Indonesian marine V. salarius. In this study, we performed a genomic analysis and ectoine identification of V. salarius. A high-quality draft genome with total size of 4.45 Mb and 4426 coding sequence (CDS) was characterized and then mapped into the Cluster of Orthologous Groups (COG) category. The genus Virgibacillus has an "open" pangenome type with total of 18 genomic islands inside the V. salarius 19.PP.SC1.6 genome. There were seven clusters of secondary metabolite-producing genes found, with a total of 80 genes classified as NRPS, PKS (type III), terpenes, and ectoine biosynthetic related genes. The ectoine gene cluster forms one operon consists of ectABC gene with 2190 bp gene cluster length, and is successfully characterized. The presence of ectoine in V. salarius was confirmed using UPLC-MS/MS operated in Multiple Reaction Monitoring (MRM) mode, which indicates that V. salarius has an intact ectoine gene clusters and is capable of producing ectoine as compatible solutes.}, } @article {pmid37934390, year = {2023}, author = {Ullah, A and Rehman, B and Khan, S and Almanaa, TN and Waheed, Y and Hassan, M and Naz, T and Ul Haq, M and Muhammad, R and Sanami, S and Irfan, M and Ahmad, S}, title = {An In Silico Multi-epitopes Vaccine Ensemble and Characterization Against Nosocomial Proteus penneri.}, journal = {Molecular biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37934390}, issn = {1559-0305}, support = {RSPD2023R632//Deanship of Scientific Research, King Saud University/ ; }, abstract = {Proteus penneri (P. penneri) is a bacillus-shaped, gram-negative, facultative anaerobe bacterium that is primarily an invasive pathogen and the etiological agent of several hospital-associated infections. P. penneri strains are naturally resistant to macrolides, amoxicillin, oxacillin, penicillin G, and cephalosporins; in addition, no vaccines are available against these strains. This warrants efforts to propose a theoretical based multi-epitope vaccine construct to prevent pathogen infections. In this research, reverse vaccinology bioinformatics and immunoinformatics approaches were adopted for vaccine target identification and construction of a multi-epitope vaccine. In the first phase, a core proteome dataset of the targeted pathogen was obtained using the NCBI database and subjected to bacterial pan-genome analysis using bacterial pan-genome analysis (BPGA) to predict core protein sequences which were then used to find good vaccine target candidates. This identified two proteins, Hcp family type VI secretion system effector and superoxide dismutase family protein, as promising vaccine targets. Afterward using the IEDB database, different B-cell and T-cell epitopes were predicted. A set of four epitopes "KGSVNVQDRE, NTGKLTGTR, IIHSDSWNER, and KDGKPVPALK" were chosen for the development of a multi-epitope vaccine construct. A 183 amino acid long vaccine design was built along with "EAAAK" and "GPGPG" linkers and a cholera toxin B-subunit adjuvant. The designed vaccine model comprised immunodominant, non-toxic, non-allergenic, and physicochemical stable epitopes. The model vaccine was docked with MHC-I, MHC-II, and TLR-4 immune cell receptors using the Cluspro2.0 web server. The binding energy score of the vaccine was - 654.7 kcal/mol for MHC-I, - 738.4 kcal/mol for MHC-II, and - 695.0 kcal/mol for TLR-4. A molecular dynamic simulation was done using AMBER v20 package for dynamic behavior in nanoseconds. Additionally, MM-PBSA binding free energy analysis was done to test intermolecular binding interactions between docked molecules. The MM-GBSA net binding energy score was - 148.00 kcal/mol, - 118.00 kcal/mol, and - 127.00 kcal/mol for vaccine with TLR-4, MHC-I, and MHC-II, respectively. Overall, these in silico-based predictions indicated that the vaccine is highly promising in terms of developing protective immunity against P. penneri. However, additional experimental validation is required to unveil the real immune response to the designed vaccine.}, } @article {pmid37934072, year = {2023}, author = {Raghuram, V and Gunoskey, JJ and Hofstetter, KS and Jacko, NF and Shumaker, MJ and Hu, YJ and Read, TD and David, MZ}, title = {Comparison of genomic diversity between single and pooled Staphylococcus aureus colonies isolated from human colonization cultures.}, journal = {Microbial genomics}, volume = {9}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001111}, pmid = {37934072}, issn = {2057-5858}, abstract = {The most common approach to sampling the bacterial populations within an infected or colonized host is to sequence genomes from a single colony obtained from a culture plate. However, it is recognized that this method does not capture the genetic diversity in the population. Sequencing a mixture of several colonies (pool-seq) is a better approach to detect population heterogeneity, but it is more complex to analyse due to different types of heterogeneity, such as within-clone polymorphisms, multi-strain mixtures, multi-species mixtures and contamination. Here, we compared 8 single-colony isolates (singles) and pool-seq on a set of 2286 Staphylococcus aureus culture samples to identify features that can distinguish pure samples, samples undergoing intraclonal variation and mixed strain samples. The samples were obtained by swabbing 3 body sites on 85 human participants quarterly for a year, who initially presented with a methicillin-resistant S. aureus skin and soft-tissue infection (SSTI). We compared parameters such as sequence quality, contamination, allele frequency, nucleotide diversity and pangenome diversity in each pool to those for the corresponding singles. Comparing singles from the same culture plate, we found that 18% of sample collections contained mixtures of multiple multilocus sequence types (MLSTs or STs). We showed that pool-seq data alone could predict the presence of multi-ST populations with 95% accuracy. We also showed that pool-seq could be used to estimate the number of intra-clonal polymorphic sites in the population. Additionally, we found that the pool may contain clinically relevant genes such as antimicrobial resistance markers that may be missed when only examining singles. These results highlight the potential advantage of analysing genome sequences of total populations obtained from clinical cultures rather than single colonies.}, } @article {pmid37934071, year = {2023}, author = {Sommer, H and Djamalova, D and Galardini, M}, title = {Reduced ambiguity and improved interpretability of bacterial genome-wide associations using gene-cluster-centric k-mers.}, journal = {Microbial genomics}, volume = {9}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001129}, pmid = {37934071}, issn = {2057-5858}, abstract = {The wide adoption of bacterial genome sequencing and encoding both core and accessory genome variation using k-mers has allowed bacterial genome-wide association studies (GWAS) to identify genetic variants associated with relevant phenotypes such as those linked to infection. Significant limitations still remain because of k-mers being duplicated across gene clusters and as far as the interpretation of association results is concerned, which affects the wider adoption of GWAS methods on microbial data sets. We have developed a simple computational method (panfeed) that explicitly links each k-mer to their gene cluster at base-resolution level, which allows us to avoid biases introduced by a global de Bruijn graph as well as more easily map and annotate associated variants. We tested panfeed on two independent data sets, correctly identifying previously characterized causal variants, which demonstrates the precision of the method, as well as its scalable performance. panfeed is a command line tool written in the python programming language and is available at https://github.com/microbial-pangenomes-lab/panfeed.}, } @article {pmid37934016, year = {2023}, author = {Garcia, J and Morales-Cruz, A and Cochetel, N and Minio, A and Figueroa-Balderas, R and Rolshausen, P and Baumgartner, K and Cantu, D}, title = {Comparative pangenomic insights into the distinct evolution of virulence factors among grapevine trunk pathogens.}, journal = {Molecular plant-microbe interactions : MPMI}, volume = {}, number = {}, pages = {}, doi = {10.1094/MPMI-09-23-0129-R}, pmid = {37934016}, issn = {0894-0282}, abstract = {The permanent organs of grapevines (V. vinifera L.), like other woody perennials, are colonized by various unrelated pathogenic ascomycete fungi secreting cell wall-degrading enzymes and phytotoxic secondary metabolites that contribute to host damage and disease symptoms. Trunk pathogens differ in the symptoms they induce and the extent and speed of damage. Isolates of the same species often display a wide virulence range, even within the same vineyard. This study focuses on Eutypa lata, Neofusicoccum parvum, and Phaeoacremonium minimum, causal agents of Eutypa dieback, Botryosphaeria dieback, and Esca, respectively. We sequenced fifty isolates from viticulture regions worldwide and built nucleotide-level, reference-free pangenomes for each species. Through examining genomic diversity and pangenome structure, we analyzed intraspecific conservation and variability of putative virulence factors, focusing on functions under positive selection, and recent gene-family dynamics of contraction and expansion. Our findings reveal contrasting distributions of putative virulence factors in the core, dispensable, and private genomes of each pangenome. For example, CAZymes were prevalent in the core genomes of each pangenome, whereas biosynthetic gene clusters were prevalent in the dispensable genomes of E. lata and P. minimum. The dispensable fractions were also enriched in Gypsy transposable elements and virulence factors under positive selection (polyketide synthases genes in E. lata and P. minimum glycosyltransferases in N. parvum). Our findings underscore the complexity of the genomic architecture in each species and provide insights into their adaptive strategies, enhancing our understanding of the underlying mechanisms of virulence.}, } @article {pmid37931775, year = {2023}, author = {Laufer, V and Glover, TW and Wilson, TE}, title = {Applications of advanced technologies for detecting genomic structural variation.}, journal = {Mutation research. Reviews in mutation research}, volume = {}, number = {}, pages = {108475}, doi = {10.1016/j.mrrev.2023.108475}, pmid = {37931775}, issn = {1388-2139}, abstract = {Chromosomal structural variation (SV) encompasses a heterogenous class of genetic variants that exerts strong influences on human health and disease. Despite their importance, many structural variants (SVs) have remained poorly characterized at even a basic level, a discrepancy predicated upon the technical limitations of prior genomic assays. However, recent advances in genomic technology can identify and localize SVs accurately, opening new questions regarding SV risk factors and their impacts in humans. Here, we first define and classify human SVs and their generative mechanisms, highlighting characteristics leveraged by various SV assays. We next examine the first-ever gapless assembly of the human genome and the technical process of assembling it, which required third-generation sequencing technologies to resolve structurally complex loci. The new portions of that "telomere-to-telomere" and subsequent pangenome assemblies highlight aspects of SV biology likely to develop in the near-term. We consider the strengths and limitations of the most promising new SV technologies and when they or longstanding approaches are best suited to meeting salient goals in the study of human SV in population-scale genomics research, clinical, and public health contexts. It is a watershed time in our understanding of human SV when new approaches are expected to fundamentally change genomic applications.}, } @article {pmid37928322, year = {2023}, author = {Magome, TG and Ramatla, T and Mokgokong, P and Thekisoe, O and Lekota, KE}, title = {The draft genome and pan-genome structure of Paraclostridium bifermentans strain T2 isolated from sheep faeces.}, journal = {Data in brief}, volume = {51}, number = {}, pages = {109660}, pmid = {37928322}, issn = {2352-3409}, abstract = {Paraclostridium bifermentans is a Gram-positive, rod-shaped bacterium that can inhabit various mesophilic environments such as soil, marine habitats, and polluted waters. Some species of Paraclostridium are reported to cause fatal infections in humans, although mechanisms and capacity for adaptation are still unknown. We hereby present the whole genome sequence data of P. bifermentans T2 strain isolated from sheep faecal matter in Potchefstroom, South Africa. DNA libraries were sequenced on the Oxford Nanopore Mk1B platform. The generated sequence data was assembled and polished using Flye assembler. Genome data analysis yielded a genome size of 2 911,782 bp, comprising of a 27.8 % G + C content. Rapid Annotation using Subsystem Technology (RAST) showed that the draft genome of this strain consists of 6 514 coding sequences (CDS). The pan-genome was defined by a total of 16 288 CDSs, grouping the strain with the genome of P. bifermentans SampleS7P1. The draft genome sequence has been deposited in NCBI GenBank with the accession number of JAUPET000000000.}, } @article {pmid37920964, year = {2023}, author = {Bachari, A and Nassar, N and Schanknecht, E and Telukutla, S and Piva, TJ and Mantri, N}, title = {Rationalizing a prospective coupling effect of cannabinoids with the current pharmacotherapy for melanoma treatment.}, journal = {WIREs mechanisms of disease}, volume = {}, number = {}, pages = {e1633}, doi = {10.1002/wsbm.1633}, pmid = {37920964}, issn = {2692-9368}, abstract = {Melanoma is one of the leading fatal forms of cancer, yet from a treatment perspective, we have minimal control over its reoccurrence and resistance to current pharmacotherapies. The endocannabinoid system (ECS) has recently been accepted as a multifaceted homeostatic regulator, influencing various physiological processes across different biological compartments, including the skin. This review presents an overview of the pathophysiology of melanoma, current pharmacotherapy used for treatment, and the challenges associated with the different pharmacological approaches. Furthermore, it highlights the utility of cannabinoids as an additive remedy for melanoma by restoring the balance between downregulated immunomodulatory pathways and elevated inflammatory cytokines during chronic skin conditions as one of the suggested critical approaches in treating this immunogenic tumor. This article is categorized under: Cancer > Molecular and Cellular Physiology.}, } @article {pmid37546988, year = {2023}, author = {Pibiri, GE and Fan, J and Patro, R}, title = {Meta-colored compacted de Bruijn graphs.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.07.21.550101}, pmid = {37546988}, abstract = {MOTIVATION: The colored compacted de Bruijn graph (c-dBG) has become a fundamental tool used across several areas of genomics and pangenomics. For example, it has been widely adopted by methods that perform read mapping or alignment, abundance estimation, and subsequent downstream analyses. These applications essentially regard the c-dBG as a map from k-mers to the set of references in which they appear. The c-dBG data structure should retrieve this set -- the color of the k-mer -- efficiently for any given k-mer, while using little memory. To aid retrieval, the colors are stored explicitly in the data structure and take considerable space for large reference collections, even when compressed. Reducing the space of the colors is therefore of utmost importance for large-scale sequence indexing.

RESULTS: We describe the meta-colored compacted de Bruijn graph (Mac-dBG) -- a new colored de Bruijn graph data structure where colors are represented holistically, i.e., taking into account their redundancy across the whole collection being indexed, rather than individually as atomic integer lists. This allows the factorization and compression of common sub-patterns across colors. While optimizing the space of our data structure is NP-hard, we propose a simple heuristic algorithm that yields practically good solutions. Results show that the Mac-dBG data structure improves substantially over the best previous space/time trade-off, by providing remarkably better compression effectiveness for the same (or better) query efficiency. This improved space/time trade-off is robust across different datasets and query workloads. Code availability: A C++17 implementation of the Mac-dBG is publicly available on GitHub at: https://github.com/jermp/fulgor.}, } @article {pmid37918082, year = {2023}, author = {Zhuang, Z and Cheng, YY and Deng, J and Cai, Z and Zhong, L and Qu, JX and Wang, K and Yang, L}, title = {Genomic insights into the phage-defense systems of Stenotrophomonas maltophilia clinical isolates.}, journal = {Microbiological research}, volume = {278}, number = {}, pages = {127528}, doi = {10.1016/j.micres.2023.127528}, pmid = {37918082}, issn = {1618-0623}, abstract = {Stenotrophomonas maltophilia is a rapidly evolving multidrug-resistant opportunistic pathogen that can cause serious infections in immunocompromised patients. Although phage therapy is one of promising strategies for dealing with MDR bacteria, the main challenges of phage therapeutics include accumulation of phage resistant mutations and acquisition of the phage defense systems. To systematically evaluate the impact of (pro)phages in shaping genetic and evolutionary diversity of S. maltophilia, we collected 166 S. maltophilia isolates from three hospitals in southern China to analyze its pangenome, virulence factors, prophage regions, and anit-viral immune systems. Pangenome analysis indicated that there are 1328 saturated core genes and 26961 unsaturated accessory genes in the pangenome, suggesting existence of highly variable parts of S. maltophilia genome. The presence of genes in relation to T3SS and T6SS mechanisms suggests the great potential to secrete toxins by the S. maltophilia population, which is contrary to the conventional notion of low-virulence of S. maltophilia. Additionally, we characterized the pan-immune system maps of these clinical isolates against phage infections and revealed the co-harboring of CBASS and anti-CBASS in some strains, suggesting a never-ending arms race and the co-evolutionary dynamic between bacteria and phages. Furthermore, our study predicted 310 prophage regions in S. maltophilia with high genetic diversity. Six viral defense systems were found to be located at specific position of the S. maltophilia prophage genomes, indicating potential evolution of certain site/region similar to bacterial 'defense islands' in prophage. Our study provides novel insights of the S. maltophilia pangenome in relation to phage-defense mechanisms, which extends to our understanding of bacterial-phage interactions and might the guide application of phage therapy in combating S. maltophilia infections.}, } @article {pmid37919000, year = {2024}, author = {Mun, SY and Lee, W and Lee, SY and Chang, JY and Chang, HC}, title = {Pediococcus inopinatus with a well-developed CRISPR-Cas system dominates in long-term fermented kimchi, Mukeunji.}, journal = {Food microbiology}, volume = {117}, number = {}, pages = {104385}, doi = {10.1016/j.fm.2023.104385}, pmid = {37919000}, issn = {1095-9998}, abstract = {Kimchi is produced through a low-temperature fermentation without pre-sterilization, resulting in a heterogeneous microbial community. As fermentation progresses, dominant lactic acid bacteria (LAB) species emerge and undergo a transition process. In this study, LAB were isolated from Mukeunji, a long-term fermented kimchi that is in the final stage of kimchi fermentation process. It was confirmed, through culture-dependent and independent analysis, as well as metagenome analysis, that Pediococcus inopinatus are generally dominant in long-term fermented kimchi. Comparative analysis of the de novo assembled whole genome of P. inopinatus with other kimchi LAB revealed that this species has a well-developed clustered regularly interspaced short palindromic repeats (CRISPR) system. The CRISPR system of P. inopinatus has an additional copy of the csa3 gene, a transcription factor for cas genes. Indeed, this species not only highly expresses cas1 and cas2, which induce spacer acquisition, but also has many diverse spacers that are actively expressed. These findings indicate that the well-developed CRISPR-Cas system is enabling P. inopinatus to dominate in long-fermented kimchi. Overall, this study revealed that LAB with a robust defense system dominate in the final stage of kimchi fermentation and presented a model for the succession mechanism of kimchi LAB.}, } @article {pmid37917733, year = {2023}, author = {Chinchilla, D and Nieves, C and Gutiérrez, R and Sordoillet, V and Veyrier, FJ and Picardeau, M}, title = {Phylogenomics of Leptospira santarosai, a prevalent pathogenic species in the Americas.}, journal = {PLoS neglected tropical diseases}, volume = {17}, number = {11}, pages = {e0011733}, doi = {10.1371/journal.pntd.0011733}, pmid = {37917733}, issn = {1935-2735}, abstract = {BACKGROUND: Leptospirosis is a complex zoonotic disease mostly caused by a group of eight pathogenic species (L. interrogans, L. borgpetersenii, L. kirschneri, L. mayottensis, L. noguchii, L. santarosai, L. weilii, L. alexanderi), with a wide spectrum of animal reservoirs and patient outcomes. Leptospira interrogans is considered as the leading causative agent of leptospirosis worldwide and it is the most studied species. However, the genomic features and phylogeography of other Leptospira pathogenic species remain to be determined.

Here we investigated the genome diversity of the main pathogenic Leptospira species based on a collection of 914 genomes from strains isolated around the world. Genome analyses revealed species-specific genome size and GC content, and an open pangenome in the pathogenic species, except for L. mayottensis. Taking advantage of a new set of genomes of L. santarosai strains isolated from patients in Costa Rica, we took a closer look at this species. L. santarosai strains are largely distributed in America, including the Caribbean islands, with over 96% of the available genomes originating from this continent. Phylogenetic analysis showed high genetic diversity within L. santarosai, and the clonal groups identified by cgMLST were strongly associated with geographical areas. Serotype identification based on serogrouping and/or analysis of the O-antigen biosynthesis gene loci further confirmed the great diversity of strains within the species.

CONCLUSIONS/SIGNIFICANCE: In conclusion, we report a comprehensive genome analysis of pathogenic Leptospira species with a focus on L. santarosai. Our study sheds new light onto the genomic diversity, evolutionary history, and epidemiology of leptospirosis in America and globally. Our findings also expand our knowledge of the genes driving O-antigen diversity. In addition, our work provides a framework for understanding the virulence and spread of L. santarosai and for improving its surveillance in both humans and animals.}, } @article {pmid37914227, year = {2023}, author = {Li, Z and Liu, X and Wang, C and Li, Z and Jiang, B and Zhang, R and Tong, L and Qu, Y and He, S and Chen, H and Mao, Y and Li, Q and Pook, T and Wu, Y and Zan, Y and Zhang, H and Li, L and Wen, K and Chen, Y}, title = {The pig pangenome provides insights into the roles of coding structural variations in genetic diversity and adaptation.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277638.122}, pmid = {37914227}, issn = {1549-5469}, abstract = {Structural variations have emerged as an important driving force for genome evolution and phenotypic variation in various organisms, yet their contributions to genetic diversity and adaptation in domesticated animals remain largely unknown. Here we constructed a pangenome based on 250 sequenced individuals from 32 pig breeds in Eurasia and systematically characterized coding sequence presence/absence variations (PAVs) within pigs. We identified 308.3-Mb nonreference sequences and 3438 novel genes absent from the current reference genome. Gene PAV analysis showed that 16.8% of the genes in the pangene catalog undergo PAV. A number of newly identified dispensable genes showed close associations with adaptation. For instance, several novel swine leukocyte antigen (SLA) genes discovered in nonreference sequences potentially participate in immune responses to productive and respiratory syndrome virus (PRRSV) infection. We delineated previously unidentified features of the pig mobilome that contained 490,480 transposable element insertion polymorphisms (TIPs) resulting from recent mobilization of 970 TE families, and investigated their population dynamics along with influences on population differentiation and gene expression. In addition, several candidate adaptive TE insertions were detected to be co-opted into genes responsible for responses to hypoxia, skeletal development, regulation of heart contraction, and neuronal cell development, likely contributing to local adaptation of Tibetan wild boars. These findings enhance our understanding on hidden layers of the genetic diversity in pigs and provide novel insights into the role of SVs in the evolutionary adaptation of mammals.}, } @article {pmid37910550, year = {2023}, author = {Angelo, L and Vaillant, A and Blanchet, M and Labonté, P}, title = {Pangenomic antiviral effect of REP 2139 in CRISPR/Cas9 engineered cell lines expressing hepatitis B virus surface antigen.}, journal = {PloS one}, volume = {18}, number = {11}, pages = {e0293167}, doi = {10.1371/journal.pone.0293167}, pmid = {37910550}, issn = {1932-6203}, abstract = {Chronic hepatitis B remains a global health problem with 296 million people living with chronic HBV infection and being at risk of developing cirrhosis and hepatocellular carcinoma. Non-infectious subviral particles (SVP) are produced in large excess over infectious Dane particles in patients and are the major source of Hepatitis B surface antigen (HBsAg). They are thought to exhaust the immune system, and it is generally considered that functional cure requires the clearance of HBsAg from blood of patient. Nucleic acid polymers (NAPs) antiviral activity lead to the inhibition of HBsAg release, resulting in rapid clearance of HBsAg from circulation in vivo. However, their efficacy has only been demonstrated in limited genotypes in small scale clinical trials. HBV exists as nine main genotypes (A to I). In this study, the HBsAg ORFs from the most prevalent genotypes (A, B, C, D, E, G), which account for over 96% of human cases, were inserted into the AAVS1 safe-harbor of HepG2 cells using CRISPR/Cas9 knock-in. A cell line producing the D144A vaccine escape mutant was also engineered. The secretion of HBsAg was confirmed into these new genotype cell lines (GCLs) and the antiviral activity of the NAP REP 2139 was then assessed. The results demonstrate that REP 2139 exerts an antiviral effect in all genotypes and serotypes tested in this study, including the vaccine escape mutant, suggesting a pangenomic effect of the NAPs.}, } @article {pmid37910167, year = {2023}, author = {English, J and Newberry, F and Hoyles, L and Patrick, S and Stewart, L}, title = {Genomic analyses of Bacteroides fragilis: subdivisions I and II represent distinct species.}, journal = {Journal of medical microbiology}, volume = {72}, number = {11}, pages = {}, doi = {10.1099/jmm.0.001768}, pmid = {37910167}, issn = {1473-5644}, abstract = {Introduction. Bacteroides fragilis is a Gram-negative anaerobe that is a member of the human gastrointestinal microbiota and is frequently found as an extra-intestinal opportunistic pathogen. B. fragilis comprises two distinct groups - divisions I and II - characterized by the presence/absence of genes [cepA and ccrA (cfiA), respectively] that confer resistance to β-lactam antibiotics by either serine or metallo-β-lactamase production. No large-scale analyses of publicly available B. fragilis sequence data have been undertaken, and the resistome of the species remains poorly defined.Hypothesis/Gap Statement. Reclassification of divisions I and II B. fragilis as two distinct species has been proposed but additional evidence is required.Aims. To investigate the genomic diversity of GenBank B. fragilis genomes and establish the prevalence of division I and II strains among publicly available B. fragilis genomes, and to generate further evidence to demonstrate that B. fragilis division I and II strains represent distinct genomospecies.Methodology. High-quality (n=377) genomes listed as B. fragilis in GenBank were included in pangenome and functional analyses. Genome data were also subject to resistome profiling using The Comprehensive Antibiotic Resistance Database.Results. Average nucleotide identity and phylogenetic analyses showed B. fragilis divisions I and II represent distinct species: B. fragilis sensu stricto (n=275 genomes) and B. fragilis A (n=102 genomes; Genome Taxonomy Database designation), respectively. Exploration of the pangenome of B. fragilis sensu stricto and B. fragilis A revealed separation of the two species at the core and accessory gene levels.Conclusion. The findings indicate that B. fragilis A, previously referred to as division II B. fragilis, is an individual species and distinct from B. fragilis sensu stricto. The B. fragilis pangenome analysis supported previous genomic, phylogenetic and resistome screening analyses collectively reinforcing that divisions I and II are two separate species. In addition, it was confirmed that differences in the accessory genes of B. fragilis divisions I and II are primarily associated with carbohydrate metabolism and suggest that differences other than antimicrobial resistance could also be used to distinguish between these two species.}, } @article {pmid37907856, year = {2023}, author = {Hodgeman, R and Mann, R and Djitro, N and Savin, K and Rochfort, S and Rodoni, B}, title = {The pan-genome of Mycobacterium avium subsp. paratuberculosis (Map) confirms ancestral lineage and reveals gene rearrangements within Map Type S.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {656}, pmid = {37907856}, issn = {1471-2164}, abstract = {BACKGROUND: To date genomic studies on Map have concentrated on Type C strains with only a few Type S strains included for comparison. In this study the entire pan-genome of 261 Map genomes (205 Type C, 52 Type S and 4 Type B) and 7 Mycobacterium avium complex (Mac) genomes were analysed to identify genomic similarities and differences between the strains and provide more insight into the evolutionary relationship within this Mycobacterial species.

RESULTS: Our analysis of the core genome of all the Map isolates identified two distinct lineages, Type S and Type C Map that is consistent with previous phylogenetic studies of Map. Pan-genome analysis revealed that Map has a larger accessory genome than Mycobacterium avium subsp. avium (Maa) and Type C Map has a larger accessory genome than Type S Map. In addition, we found large rearrangements within Type S strains of Map and little to none in Type C and Type B strains. There were 50 core genes identified that were unique to Type S Map and there were no unique core genes identified between Type B and Type C Map strains. In Type C Map we identified an additional CE10 CAZyme class which was identified as an alpha/beta hydrolase and an additional polyketide and non-ribosomal peptide synthetase cluster. Consistent with previous analysis no plasmids and only incomplete prophages were identified in the genomes of Map. There were 45 hypothetical CRISPR elements identified with no associated cas genes.

CONCLUSION: This is the most comprehensive comparison of the genomic content of Map isolates to date and included the closing of eight Map genomes. The analysis revealed that there is greater variation in gene synteny within Type S strains when compared to Type C indicating that the Type C Map strain emerged after Type S. Further analysis of Type C and Type B genomes revealed that they are structurally similar with little to no genetic variation and that Type B Map may be a distinct clade within Type C Map and not a different strain type of Map. The evolutionary lineage of Maa and Map was confirmed as emerging after M. hominissuis.}, } @article {pmid37904249, year = {2023}, author = {Manzano-Morales, S and Liu, Y and González-Bodí, S and Huerta-Cepas, J and Iranzo, J}, title = {Comparison of gene clustering criteria reveals intrinsic uncertainty in pangenome analyses.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {250}, pmid = {37904249}, issn = {1474-760X}, support = {PID2019-106618GA-I00//Agencia Estatal de Investigación/ ; RYC-2017-22524//Agencia Estatal de Investigación/ ; PGC2018-098073-A-I00//Agencia Estatal de Investigación/ ; SEV-2016-0672//Agencia Estatal de Investigación/ ; PID2021-127210NB-I00//Agencia Estatal de Investigación/ ; PTA2021-020636-I//Agencia Estatal de Investigación/ ; M190020074JIIS//Comunidad de Madrid/ ; 202008440425//China Scholarship Council/ ; }, abstract = {BACKGROUND: A key step for comparative genomics is to group open reading frames into functionally and evolutionarily meaningful gene clusters. Gene clustering is complicated by intraspecific duplications and horizontal gene transfers that are frequent in prokaryotes. In consequence, gene clustering methods must deal with a trade-off between identifying vertically transmitted representatives of multicopy gene families, which are recognizable by synteny conservation, and retrieving complete sets of species-level orthologs. We studied the implications of adopting homology, orthology, or synteny conservation as formal criteria for gene clustering by performing comparative analyses of 125 prokaryotic pangenomes.

RESULTS: Clustering criteria affect pangenome functional characterization, core genome inference, and reconstruction of ancestral gene content to different extents. Species-wise estimates of pangenome and core genome sizes change by the same factor when using different clustering criteria, allowing robust cross-species comparisons regardless of the clustering criterion. However, cross-species comparisons of genome plasticity and functional profiles are substantially affected by inconsistencies among clustering criteria. Such inconsistencies are driven not only by mobile genetic elements, but also by genes involved in defense, secondary metabolism, and other accessory functions. In some pangenome features, the variability attributed to methodological inconsistencies can even exceed the effect sizes of ecological and phylogenetic variables.

CONCLUSIONS: Choosing an appropriate criterion for gene clustering is critical to conduct unbiased pangenome analyses. We provide practical guidelines to choose the right method depending on the research goals and the quality of genome assemblies, and a benchmarking dataset to assess the robustness and reproducibility of future comparative studies.}, } @article {pmid37902967, year = {2023}, author = {Chandra, G and Jain, C}, title = {Gap-Sensitive Colinear Chaining Algorithms for Acyclic Pangenome Graphs.}, journal = {Journal of computational biology : a journal of computational molecular cell biology}, volume = {}, number = {}, pages = {}, doi = {10.1089/cmb.2023.0186}, pmid = {37902967}, issn = {1557-8666}, abstract = {A pangenome graph can serve as a better reference for genomic studies because it allows a compact representation of multiple genomes within a species. Aligning sequences to a graph is critical for pangenome-based resequencing. The seed-chain-extend heuristic works by finding short exact matches between a sequence and a graph. In this heuristic, colinear chaining helps identify a good cluster of exact matches that can be combined to form an alignment. Colinear chaining algorithms have been extensively studied for aligning two sequences with various gap costs, including linear, concave, and convex cost functions. However, extending these algorithms for sequence-to-graph alignment presents significant challenges. Recently, Makinen et al. introduced a sparse dynamic programming framework that exploits the small path cover property of acyclic pangenome graphs, enabling efficient chaining. However, this framework does not consider gap costs, limiting its practical effectiveness. We address this limitation by developing novel problem formulations and provably good chaining algorithms that support a variety of gap cost functions. These functions are carefully designed to enable fast chaining algorithms whose time requirements are parameterized in terms of the size of the minimum path cover. Through an empirical evaluation, we demonstrate the superior performance of our algorithm compared with existing aligners. When mapping simulated long reads to a pangenome graph comprising 95 human haplotypes, we achieved 98.7% precision while leaving <2% of reads unmapped.}, } @article {pmid37897717, year = {2023}, author = {Alsubaiyel, AM and Bukhari, SI}, title = {Computational exploration and design of a multi-epitopes vaccine construct against Chlamydia psittaci.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-17}, doi = {10.1080/07391102.2023.2268173}, pmid = {37897717}, issn = {1538-0254}, abstract = {Chlamydia psittaci is an intracellular pathogen and causes variety of deadly infections in humans. Antibiotics are effective against C. psittaci however high percentage of resistant strains have been reported in recent times. As there is no licensed vaccine, we used in-silico techniques to design a multi-epitopes vaccine against C. psittaci. Following a step-wise protocol, the proteome of available 26 strains was retrieved and filtered for subcellular localized proteins. Five proteins were selected (2 extracellular and 3 outer membrane) and were further analyzed for B-cell and T-cell epitopes prediction. Epitopes were further checked for antigenicity, solubility, stability, toxigenicity, allergenicity, and adhesive properties. Filtered epitopes were linked via linkers and the 3D structure of the designed vaccine construct was predicted. Binding of the designed vaccine with immune receptors: MHC-I, MHC-II, and TLR-4 was analyzed, which resulted in docking energy scores of -4.37 kcal/mol, -0.20 kcal/mol and -22.38 kcal/mol, respectively. Further, the docked complexes showed stable dynamics with a maximum value of vaccine-MHC-I complex (7.8 Å), vaccine-MHC-II complex (6.2 Å) and vaccine-TLR4 complex (5.2 Å). As per the results, the designed vaccine construct reported robust immune responses to protect the host against C. psittaci infections. In the study, the C. psittaci proteomes were considered in pan-genome analysis to extract core proteins. The pan-genome analysis was conducted using bacterial pan-genome analysis (BPGA) software. The core proteins were checked further for non-redundant proteins using a CD-Hit server. Surface localized proteins were investigated using PSORTb v 3.0. The surface proteins were BLASTp against Virulence Factor Data Base (VFDB) to predict virulent factors. Antigenicity prediction of the shortlisted proteins was further done using VAXIGEN v 2.0. The epitope mapping was done using the immune epitope database (IEDB). A multi-epitopes vaccine was built and a 3D structure was generated using 3Dprot online server. The docking analysis of the designed vaccine with immune receptors was carried out using PATCHDOCK. Molecular dynamics and post-simulation analyses were carried out using AMBER v20 to decipher the dynamics stability and intermolecular binding energies of the docked complexes.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37897710, year = {2023}, author = {Hamed, SM and Mohamed, HO and Ashour, HM and Fahmy, LI}, title = {Comparative genomic analysis of strong biofilm-forming Klebsiella pneumoniae isolates uncovers novel ISEcp1-mediated chromosomal integration of a full plasmid-like sequence.}, journal = {Infectious diseases (London, England)}, volume = {}, number = {}, pages = {1-19}, doi = {10.1080/23744235.2023.2272624}, pmid = {37897710}, issn = {2374-4243}, abstract = {BACKGROUND: The goal of the current study was to elucidate the genomic background of biofilm formation in Klebsiella pneumoniae.

METHODS: Clinical isolates were screened for biofilm formation using the crystal violet assay. Antimicrobial resistance (AMR) profiles were assessed by disk diffusion and broth microdilution tests. Biofilm formation was correlated to virulence and resistance genes screened by PCR. Draft genomes of three isolates that form strong biofilm were generated by Illumina sequencing.

RESULTS: Only the siderophore-coding gene iutA was significantly associated with more pronounced biofilm formation. ST1399-KL43-O1/O2v1 and ST11-KL15-O4 were assigned to the multidrug-resistant strain K21 and the extensively drug-resistant strain K237, respectively. ST1999-KL38-O12 was assigned to K57. Correlated with CRISPR/Cas distribution, more plasmid replicons and prophage sequences were identified in K21 and K237 compared to K57. The acquired AMR genes (blaOXA-48, rmtF, aac(6')-Ib and qnrB) and (blaNDM-1, blaCTX-M, aph(3')-VI, qnrS, and aac(6')-Ib-cr) were found in K237 and K21, respectively. The latter showed a novel ISEcp1-mediated chromosomal integration of replicon type IncM1 plasmid-like structure harboring blaCTX-M-14 and aph(3')-VI that uniquely interrupted rcsC. The plasmid-mediated heavy metal resistance genes merACDEPRT and arsABCDR were spotted in K21, which also exclusively carried the acquired virulence genes mrkABCDF and the hypervirulence-associated genes iucABCD-iutA, and rmpA/A2. Pangenome analysis revealed NTUH-K2044 accessory genes most frequently shared with K21.

CONCLUSIONS: While less virulent to Galleria mellonella than ST1999 (K57), the strong biofilm former, multidrug-resistant, NDM-producer K. pneumoniae K21 (ST1399-KL43-O1/O2v1) carries a novel chromosomally integrated plasmid-like structure and hypervirulence-associated genes and represents a serious threat to countries in the area.}, } @article {pmid37897361, year = {2023}, author = {Hu, R and Li, F and Chen, Y and Liu, C and Li, J and Ma, Z and Wang, Y and Cui, C and Luo, C and Zhou, P and Ni, W and Yang, QY and Hu, S}, title = {AnimalMetaOmics: a multi-omics data resources for exploring animal microbial genomes and microbiomes.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad931}, pmid = {37897361}, issn = {1362-4962}, support = {2021ZD01//Foundation of state key laboratory of sheep genetic improvement and healthy production/ ; //Tianshan Talent Project/ ; 2022xjkk1202//The Tird Xinjiang Scientifc Expedition Program/ ; }, abstract = {The Animal Meta-omics landscape database (AnimalMetaOmics, https://yanglab.hzau.edu.cn/animalmetaomics#/) is a comprehensive and freely available resource that includes metagenomic, metatranscriptomic, and metaproteomic data from various non-human animal species and provides abundant information on animal microbiomes, including cluster analysis of microbial cognate genes, functional gene annotations, active microbiota composition, gene expression abundance, and microbial protein identification. In this work, 55 898 microbial genomes were annotated from 581 animal species, including 42 924 bacterial genomes, 12 336 virus genomes, 496 archaea genomes and 142 fungi genomes. Moreover, 321 metatranscriptomic datasets were analyzed from 31 animal species and 326 metaproteomic datasets from four animal species, as well as the pan-genomic dynamics and compositional characteristics of 679 bacterial species and 13 archaea species from animal hosts. Researchers can efficiently access and acquire the information of cross-host microbiota through a user-friendly interface, such as species, genomes, activity levels, expressed protein sequences and functions, and pan-genome composition. These valuable resources provide an important reference for better exploring the classification, functional diversity, biological process diversity and functional genes of animal microbiota.}, } @article {pmid37897345, year = {2023}, author = {Dimonaco, NJ and Clare, A and Kenobi, K and Aubrey, W and Creevey, CJ}, title = {StORF-Reporter: finding genes between genes.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad814}, pmid = {37897345}, issn = {1362-4962}, support = {//Aberystwyth University/ ; //McMaster University/ ; //Weston Family Microbiome Initiative/ ; BB/E/W/10964A01//BBSRC/ ; R3192GFS//DAFM Ireland/DAERA Northern Ireland/ ; 818368//Horizon 2020/ ; }, abstract = {Large regions of prokaryotic genomes are currently without any annotation, in part due to well-established limitations of annotation tools. For example, it is routine for genes using alternative start codons to be misreported or completely omitted. Therefore, we present StORF-Reporter, a tool that takes an annotated genome and returns regions that may contain missing CDS genes from unannotated regions. StORF-Reporter consists of two parts. The first begins with the extraction of unannotated regions from an annotated genome. Next, Stop-ORFs (StORFs) are identified in these unannotated regions. StORFs are open reading frames that are delimited by stop codons and thus can capture those genes most often missing in genome annotations. We show this methodology recovers genes missing from canonical genome annotations. We inspect the results of the genomes of model organisms, the pangenome of Escherichia coli, and a set of 5109 prokaryotic genomes of 247 genera from the Ensembl Bacteria database. StORF-Reporter extended the core, soft-core and accessory gene collections, identified novel gene families and extended families into additional genera. The high levels of sequence conservation observed between genera suggest that many of these StORFs are likely to be functional genes that should now be considered for inclusion in canonical annotations.}, } @article {pmid37894252, year = {2023}, author = {Kurihara, MNL and Santos, INM and Eisen, AKA and Caleiro, GS and Araújo, J and Sales, RO and Pignatari, AC and Salles, MJ}, title = {Phenotypic and Genotypic Characterization of Cutibacterium acnes Isolated from Shoulder Surgery Reveals Insights into Genetic Diversity.}, journal = {Microorganisms}, volume = {11}, number = {10}, pages = {}, doi = {10.3390/microorganisms11102594}, pmid = {37894252}, issn = {2076-2607}, support = {88887.500796/2020-00//Coordenação de Aperfeicoamento de Pessoal de Nível Superior/ ; 88887.627094/2021-00//Coordenação de Aperfeicoamento de Pessoal de Nível Superior/ ; }, abstract = {Specific virulence factors that likely influence C. acnes invasion into deep tissues remain to be elucidated. Herein, we describe the frequency of C. acnes identification in deep tissue specimens of patients undergoing clean shoulder surgery and assess its phenotypic and genetic traits associated with virulence and antibiotic resistance patterns, compared with isolates from the skin of healthy volunteers. Multiple deep tissue specimens from the bone fragments, tendons, and bursa of 84 otherwise healthy patients undergoing primary clean-open and arthroscopic shoulder surgeries were aseptically collected. The overall yield of tissue sample cultures was 21.5% (55/255), with 11.8% (30/255) identified as C. acnes in 27.3% (23/84) of patients. Antibiotic resistance rates were low, with most strains expressing susceptibility to first-line antibiotics, while a few were resistant to penicillin and rifampicin. Phylotypes IB (73.3%) and II (23.3%) were predominant in deep tissue samples. Genomic analysis demonstrated differences in the pangenome of the isolates from the same clade. Even though strains displayed a range of pathogenic markers, such as biofilm formation, patients did not evolve to infection during the 1-year follow-up. This suggests that the presence of polyclonal C. acnes in multiple deep tissue samples does not necessarily indicate infection.}, } @article {pmid37894121, year = {2023}, author = {Nedashkovskaya, O and Otstavnykh, N and Balabanova, L and Bystritskaya, E and Kim, SG and Zhukova, N and Tekutyeva, L and Isaeva, M}, title = {Rhodoalgimonas zhirmunskyi gen. nov., sp. nov., a Marine Alphaproteobacterium Isolated from the Pacific Red Alga Ahnfeltia tobuchiensis: Phenotypic Characterization and Pan-Genome Analysis.}, journal = {Microorganisms}, volume = {11}, number = {10}, pages = {}, doi = {10.3390/microorganisms11102463}, pmid = {37894121}, issn = {2076-2607}, support = {15.BRK.21.0004 (Contract No. 075-15-2021-1052)//the Ministry of Science and Higher Education, Russian Federation/ ; }, abstract = {A novel Gram-staining negative, strictly aerobic, rod-shaped, and non-motile bacterium, designated strain 10Alg 79[T], was isolated from the red alga Ahnfeltia tobuchiensis. A phylogenetic analysis based on 16S rRNA gene sequences placed the novel strain within the family Roseobacteraceae, class Alphaproteobacteria, phylum Pseudomonadota, where the nearest neighbor was Shimia sediminis ZQ172[T] (97.33% of identity). However, a phylogenomic study clearly showed that strain 10Alg 79[T] forms a distinct evolutionary lineage at the genus level within the family Roseobacteraceae combining with strains Aquicoccus porphyridii L1 8-17[T], Marimonas arenosa KCTC 52189[T], and Lentibacter algarum DSM 24677[T]. The ANI, AAI, and dDDH values between them were 75.63-78.15%, 67.41-73.08%, and 18.8-19.8%, respectively. The genome comprises 3,754,741 bp with a DNA GC content of 62.1 mol%. The prevalent fatty acids of strain 10Alg 79[T] were C18:1 ω7c and C16:0. The polar lipid profile consisted of phosphatidylethanolamine, phosphatidylglycerol, phosphatidylcholine, an unidentified aminolipid, an unidentified phospholipid and an unidentified lipid. A pan-genome analysis showed that the unique part of the 10Alg 79[T] genome consists of 13 genus-specific clusters and 413 singletons. The annotated singletons were more often related to transport protein systems, transcriptional regulators, and enzymes. A functional annotation of the draft genome sequence revealed that this bacterium could be a source of a new phosphorylase, which may be used for phosphoglycoside synthesis. A combination of the genotypic and phenotypic data showed that the bacterial isolate represents a novel species and a novel genus, for which the name Rhodoalgimonas zhirmunskyi gen. nov., sp. nov. is proposed. The type strain is 10Alg 79[T] (=KCTC 72611[T] = KMM 6723[T]).}, } @article {pmid37894103, year = {2023}, author = {Covas, C and Figueiredo, G and Gomes, M and Santos, T and Mendo, S and Caetano, TS}, title = {The Pangenome of Gram-Negative Environmental Bacteria Hides a Promising Biotechnological Potential.}, journal = {Microorganisms}, volume = {11}, number = {10}, pages = {}, doi = {10.3390/microorganisms11102445}, pmid = {37894103}, issn = {2076-2607}, support = {SFRH/BD/98446/2013//Fundação para a Ciência e Tecnologia/ ; CEECIND/01463/2017//Fundação para a Ciência e Tecnologia/ ; UIDP/50017/2020+UIDB/50017/2020+LA/P/0094/2020//Fundação para a Ciência e Tecnologia/ ; }, abstract = {Secondary metabolites (SMs) from environmental bacteria offer viable solutions for various health and environmental challenges. Researchers are employing advanced bioinformatic tools to investigate less-explored microorganisms and unearth novel bioactive compounds. In this research area, our understanding of SMs from environmental Gram-negative bacteria lags behind that of its Gram-positive counterparts. In this regard, Pedobacter spp. have recently gained attention, not only for their role as plant growth promoters but also for their potential in producing antimicrobials. This study focuses on the genomic analysis of Pedobacter spp. to unveil the diversity of the SMs encoded in their genomes. Among the 41 genomes analyzed, a total of 233 biosynthetic gene clusters (BGCs) were identified, revealing the potential for the production of diverse SMs, including RiPPs (27%), terpenes (22%), hybrid SMs (17%), PKs (12%), NRPs (9%) and siderophores (6%). Overall, BGC distribution did not correlate with phylogenetic lineage and most of the BGCs showed no significant hits in the MIBiG database, emphasizing the uniqueness of the compounds that Pedobacter spp. can produce. Of all the species examined, P. cryoconitis and P. lusitanus stood out for having the highest number and diversity of BGCs. Focusing on their applicability and ecological functions, we investigated in greater detail the BGCs responsible for siderophore and terpenoid production in these species and their relatives. Our findings suggest that P. cryoconitis and P. lusitanus have the potential to produce novel mixtures of siderophores, involving bifunctional IucAC/AcD NIS synthetases, as well as carotenoids and squalene. This study highlights the biotechnological potential of Pedobacter spp. in medicine, agriculture and other industries, emphasizing the need for a continued exploration of its SMs and their applications.}, } @article {pmid37894090, year = {2023}, author = {Alghamdi, M and Al-Judaibi, E and Al-Rashede, M and Al-Judaibi, A}, title = {Comparative De Novo and Pan-Genome Analysis of MDR Nosocomial Bacteria Isolated from Hospitals in Jeddah, Saudi Arabia.}, journal = {Microorganisms}, volume = {11}, number = {10}, pages = {}, doi = {10.3390/microorganisms11102432}, pmid = {37894090}, issn = {2076-2607}, abstract = {Multidrug-resistant (MDR) bacteria are one of the most serious threats to public health, and one of the most important types of MDR bacteria are those that are acquired in a hospital, known as nosocomial. This study aimed to isolate and identify MDR bacteria from selected hospitals in Jeddah and analyze their antibiotic-resistant genes. Bacteria were collected from different sources and wards of hospitals in Jeddah City. Phoenix BD was used to identify the strains and perform susceptibility testing. Identification of selected isolates showing MDR to more than three classes on antibiotics was based on 16S rRNA gene and whole genome sequencing. Genes conferring resistance were characterized using de novo and pan-genome analyses. In total, we isolated 108 bacterial strains, of which 75 (69.44%) were found to be MDR. Taxonomic identification revealed that 24 (32%) isolates were identified as Escherichia coli, 19 (25.3%) corresponded to Klebsiella pneumoniae, and 17 (22.67%) were methicillin-resistant Staphylococcus aureus (MRSA). Among the Gram-negative bacteria, K. pneumoniae isolates showed the highest resistance levels to most antibiotics. Of the Gram-positive bacteria, S. aureus (MRSA) strains were noticed to exhibit the uppermost degree of resistance to the tested antibiotics, which is higher than that observed for K. pneumoniae isolates. Taken together, our results illustrated that MDR Gram-negative bacteria are the most common cause of nosocomial infections, while MDR Gram-positive bacteria are characterized by a wider antibiotic resistance spectrum. Whole genome sequencing found the appearance of antibiotic resistance genes, including SHV, OXA, CTX-M, TEM-1, NDM-1, VIM-1, ere(A), ermA, ermB, ermC, msrA, qacA, qacB, and qacC.}, } @article {pmid37892197, year = {2023}, author = {Yaraguppi, DA and Bagewadi, ZK and Patil, NR and Mantri, N}, title = {Iturin: A Promising Cyclic Lipopeptide with Diverse Applications.}, journal = {Biomolecules}, volume = {13}, number = {10}, pages = {}, doi = {10.3390/biom13101515}, pmid = {37892197}, issn = {2218-273X}, abstract = {This comprehensive review examines iturin, a cyclic lipopeptide originating from Bacillus subtilis and related bacteria. These compounds are structurally diverse and possess potent inhibitory effects against plant disease-causing bacteria and fungi. Notably, Iturin A exhibits strong antifungal properties and low toxicity, making it valuable for bio-pesticides and mycosis treatment. Emerging research reveals additional capabilities, including anticancer and hemolytic features. Iturin finds applications across industries. In food, iturin as a biosurfactant serves beyond surface tension reduction, enhancing emulsions and texture. Biosurfactants are significant in soil remediation, agriculture, wound healing, and sustainability. They also show promise in Microbial Enhanced Oil Recovery (MEOR) in the petroleum industry. The pharmaceutical and cosmetic industries recognize iturin's diverse properties, such as antibacterial, antifungal, antiviral, anticancer, and anti-obesity effects. Cosmetic applications span emulsification, anti-wrinkle, and antibacterial use. Understanding iturin's structure, synthesis, and applications gains importance as biosurfactant and lipopeptide research advances. This review focuses on emphasizing iturin's structural characteristics, production methods, biological effects, and applications across industries. It probes iturin's antibacterial, antifungal potential, antiviral efficacy, and cancer treatment capabilities. It explores diverse applications in food, petroleum, pharmaceuticals, and cosmetics, considering recent developments, challenges, and prospects.}, } @article {pmid37891426, year = {2023}, author = {Gould, AL and Donohoo, SA and Román, ED and Neff, EE}, title = {Strain-level diversity of symbiont communities between individuals and populations of a bioluminescent fish.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, pmid = {37891426}, issn = {1751-7370}, abstract = {The bioluminescent symbiosis involving the urchin cardinalfish, Siphamia tubifer, and Photobacterium mandapamensis, a luminous member of the Vibrionaceae, is highly specific compared to other bioluminescent fish-bacteria associations. Despite this high degree of specificity, patterns of genetic diversity have been observed for the symbionts from hosts sampled over relatively small spatial scales. We characterized and compared sub-species, strain-level symbiont diversity within and between S. tubifer hosts sampled from the Philippines and Japan using PCR fingerprinting. We then carried out whole genome sequencing of the unique symbiont genotypes identified to characterize the genetic diversity of the symbiont community and the symbiont pangenome. We determined that an individual light organ contains six symbiont genotypes on average, but varied between 1-13. Additionally, we found that there were few genotypes shared between hosts from the same location. A phylogenetic analysis of the unique symbiont strains indicated location-specific clades, suggesting some genetic differentiation in the symbionts between host populations. We also identified symbiont genes that were variable between strains, including luxF, a member of the lux operon, which is responsible for light production. We quantified the light emission and growth rate of two strains missing luxF along with the other strains isolated from the same light organs and determined that strains lacking luxF were dimmer but grew faster than most of the other strains, suggesting a potential metabolic trade-off. This study highlights the importance of strain-level diversity in microbial associations and provides new insight into the underlying genetic architecture of intraspecific symbiont communities within a host.}, } @article {pmid37887294, year = {2023}, author = {Bachari, A and Nassar, N and Telukutla, S and Zomer, R and Dekiwadia, C and Piva, TJ and Mantri, N}, title = {In Vitro Antiproliferative Effect of Cannabis Extract PHEC-66 on Melanoma Cell Lines.}, journal = {Cells}, volume = {12}, number = {20}, pages = {}, doi = {10.3390/cells12202450}, pmid = {37887294}, issn = {2073-4409}, support = {Not Applicable//MGC Pharmaceuticals Ltd/ ; }, abstract = {Melanoma, an aggressive form of skin cancer, can be fatal if not diagnosed and treated early. Melanoma is widely recognized to resist advanced cancer treatments, including immune checkpoint inhibitors, kinase inhibitors, and chemotherapy. Numerous studies have shown that various Cannabis sativa extracts exhibit potential anticancer effects against different types of tumours both in vitro and in vivo. This study is the first to report that PHEC-66, a Cannabis sativa extract, displays antiproliferative effects against MM418-C1, MM329 and MM96L melanoma cells. Although these findings suggest that PHEC-66 has promising potential as a pharmacotherapeutic agent for melanoma treatment, further research is necessary to evaluate its safety, efficacy, and clinical applications.}, } @article {pmid37884897, year = {2023}, author = {Depuydt, L and Renders, L and Abeel, T and Fostier, J}, title = {Pan-genome de Bruijn graph using the bidirectional FM-index.}, journal = {BMC bioinformatics}, volume = {24}, number = {1}, pages = {400}, pmid = {37884897}, issn = {1471-2105}, support = {1117322N//Fonds Wetenschappelijk Onderzoek/ ; 1SE7822N//Fonds Wetenschappelijk Onderzoek/ ; }, abstract = {BACKGROUND: Pan-genome graphs are gaining importance in the field of bioinformatics as data structures to represent and jointly analyze multiple genomes. Compacted de Bruijn graphs are inherently suited for this purpose, as their graph topology naturally reveals similarity and divergence within the pan-genome. Most state-of-the-art pan-genome graphs are represented explicitly in terms of nodes and edges. Recently, an alternative, implicit graph representation was proposed that builds directly upon the unidirectional FM-index. As such, a memory-efficient graph data structure is obtained that inherits the FM-index' backward search functionality. However, this representation suffers from a number of shortcomings in terms of functionality and algorithmic performance.

RESULTS: We present a data structure for a pan-genome, compacted de Bruijn graph that aims to address these shortcomings. It is built on the bidirectional FM-index, extending the ability of its unidirectional counterpart to navigate and search the graph in both directions. All basic graph navigation steps can be performed in constant time. Based on these features, we implement subgraph visualization as well as lossless approximate pattern matching to the graph using search schemes. We demonstrate that we can retrieve all occurrences corresponding to a read within a certain edit distance in a very efficient manner. Through a case study, we show the potential of exploiting the information embedded in the graph's topology through visualization and sequence alignment.

CONCLUSIONS: We propose a memory-efficient representation of the pan-genome graph that supports subgraph visualization and lossless approximate pattern matching of reads against the graph using search schemes. The C++ source code of our software, called Nexus, is available at https://github.com/biointec/nexus under AGPL-3.0 license.}, } @article {pmid37882557, year = {2023}, author = {Hoover, RL and Keffer, JL and Polson, SW and Chan, CS}, title = {Gallionellaceae pangenomic analysis reveals insight into phylogeny, metabolic flexibility, and iron oxidation mechanisms.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0003823}, doi = {10.1128/msystems.00038-23}, pmid = {37882557}, issn = {2379-5077}, abstract = {The iron-oxidizing Gallionellaceae drive a wide variety of biogeochemical cycles through their metabolisms and biominerals. To better understand the environmental impacts of Gallionellaceae, we need to improve our knowledge of their diversity and metabolisms, especially any novel iron oxidation mechanisms. Here, we used a pangenomic analysis of 103 genomes to resolve Gallionellaceae phylogeny and explore their genomic potential. Using a concatenated ribosomal protein tree and key gene patterns, we determined Gallionellaceae has four genera, divided into two groups: iron-oxidizing bacteria (FeOB) Gallionella, Sideroxydans, and Ferriphaselus with iron oxidation genes (cyc2, mtoA) and nitrite-oxidizing bacteria (NOB) Candidatus Nitrotoga with the nitrite oxidase gene nxr. The FeOB and NOB have similar electron transport chains, including genes for reverse electron transport and carbon fixation. Auxiliary energy metabolisms, including S oxidation, denitrification, and organotrophy, were scattered throughout the FeOB. Within FeOB, we found genes that may represent adaptations for iron oxidation, including a variety of extracellular electron uptake mechanisms. FeOB genomes encoded more predicted c-type cytochromes than NOB genomes, notably more multiheme c-type cytochromes (MHCs) with >10 CXXCH motifs. These include homologs of several predicted outer membrane porin-MHC complexes, including MtoAB and Uet. MHCs efficiently conduct electrons across longer distances and function across a wide range of redox potentials that overlap with mineral redox potentials, which can expand the range of usable iron substrates. Overall, the results of pangenome analyses suggest that the Gallionellaceae genera Gallionella, Sideroxydans, and Ferriphaselus have acquired a range of adaptations to succeed in various environments but are primarily iron oxidizers.IMPORTANCENeutrophilic iron-oxidizing bacteria (FeOB) produce copious iron (oxyhydr)oxides that can profoundly influence biogeochemical cycles, notably the fate of carbon and many metals. To fully understand environmental microbial iron oxidation, we need a thorough accounting of iron oxidation mechanisms. In this study, we show the Gallionellaceae FeOB genomes encode both characterized iron oxidases as well as uncharacterized multiheme cytochromes (MHCs). MHCs are predicted to transfer electrons from extracellular substrates and likely confer metabolic capabilities that help Gallionellaceae occupy a range of different iron- and mineral-rich niches. Gallionellaceae appear to specialize in iron oxidation, so it would be advantageous for them to have multiple mechanisms to oxidize various forms of iron, given the many iron minerals on Earth, as well as the physiological and kinetic challenges faced by FeOB. The multiple iron/mineral oxidation mechanisms may help drive the widespread ecological success of Gallionellaceae.}, } @article {pmid37882526, year = {2023}, author = {Pérez Castro, S and Peredo, EL and Mason, OU and Vineis, J and Bowen, JL and Mortazavi, B and Ganesh, A and Ruff, SE and Paul, BG and Giblin, AE and Cardon, ZG}, title = {Diversity at single nucleotide to pangenome scales among sulfur cycling bacteria in salt marshes.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0098823}, doi = {10.1128/aem.00988-23}, pmid = {37882526}, issn = {1098-5336}, abstract = {Sulfur-cycling microbial communities in salt marsh rhizosphere sediments mediate a recycling and detoxification system central to plant productivity. Despite the importance of sulfur-cycling microbes, their biogeographic, phylogenetic, and functional diversity remain poorly understood. Here, we use metagenomic data sets from Massachusetts (MA) and Alabama (AL) salt marshes to examine the distribution and genomic diversity of sulfur-cycling plant-associated microbes. Samples were collected from sediments under Sporobolus alterniflorus and Sporobolus pumilus in separate MA vegetation zones, and under S. alterniflorus and Juncus roemerianus co-occuring in AL. We grouped metagenomic data by plant species and site and identified 38 MAGs that included pathways for sulfate reduction or sulfur oxidation. Phylogenetic analyses indicated that 29 of the 38 were affiliated with uncultivated lineages. We showed differentiation in the distribution of MAGs between AL and MA, between S. alterniflorus and S. pumilus vegetation zones in MA, but no differentiation between S. alterniflorus and J. roemerianus in AL. Pangenomic analyses of eight ubiquitous MAGs also detected site- and vegetation-specific genomic features, including varied sulfur-cycling operons, carbon fixation pathways, fixed single-nucleotide variants, and active diversity-generating retroelements. This genetic diversity, detected at multiple scales, suggests evolutionary relationships affected by distance and local environment, and demonstrates differential microbial capacities for sulfur and carbon cycling in salt marsh sediments.IMPORTANCESalt marshes are known for their significant carbon storage capacity, and sulfur cycling is closely linked with the ecosystem-scale carbon cycling in these ecosystems. Sulfate reducers are key for the decomposition of organic matter, and sulfur oxidizers remove toxic sulfide, supporting the productivity of marsh plants. To date, the complexity of coastal environments, heterogeneity of the rhizosphere, high microbial diversity, and uncultured majority hindered our understanding of the genomic diversity of sulfur-cycling microbes in salt marshes. Here, we use comparative genomics to overcome these challenges and provide an in-depth characterization of sulfur-cycling microbial diversity in salt marshes. We characterize communities across distinct sites and plant species and uncover extensive genomic diversity at the taxon level and specific genomic features present in MAGs affiliated with uncultivated sulfur-cycling lineages. Our work provides insights into the partnerships in salt marshes and a roadmap for multiscale analyses of diversity in complex biological systems.}, } @article {pmid37876012, year = {2023}, author = {Yu, J and Jiang, C and Yamano, R and Koike, S and Sakai, Y and Mino, S and Sawabe, T}, title = {Unveiling the early life core microbiome of the sea cucumber Apostichopus japonicus and the unexpected abundance of the growth-promoting Sulfitobacter.}, journal = {Animal microbiome}, volume = {5}, number = {1}, pages = {54}, pmid = {37876012}, issn = {2524-4671}, abstract = {BACKGROUND: Microbiome in early life has long-term effects on the host's immunological and physiological development and its disturbance is known to trigger various diseases in host Deuterostome animals. The sea cucumber Apostichopus japonicus is one of the most valuable marine Deuterostome invertebrates in Asia and a model animal in regeneration studies. To understand factors that impact on host development and holobiont maintenance, host-microbiome association has been actively studied in the last decade. However, we currently lack knowledge of early life core microbiome during its ontogenesis and how it benefits the host's growth.

RESULTS: We analyzed the microbial community in 28 sea cucumber samples from a laboratory breeding system, designed to replicate aquaculture environments, across six developmental stages (fertilized eggs to the juvenile stage) over a three years-period to examine the microbiomes' dynamics and stability. Microbiome shifts occurred during sea cucumber larval ontogenesis in every case. Application of the most sophisticated core microbiome extraction methodology, a hybrid approach with abundance-occupancy core microbiome analyses (top 75% of total reads and > 70% occupation) and core index calculation, first revealed early life core microbiome consisted of Alteromonadaceae and Rhodobacteraceae, as well as a stage core microbiome consisting of pioneer core microbe Pseudoalteromonadaceae in A. japonicus, suggesting a stepwise establishment of microbiome related to ontogenesis and feeding behavior in A. japonicus. More interestingly, four ASVs affiliated to Alteromonadaceae and Rhodobacteraceae were extracted as early life core microbiome. One of the ASV (ASV0007) was affiliated to the Sulfitobactor strain BL28 (Rhodobacteraceae), isolated from blastula larvae in the 2019 raring batch. Unexpectedly, a bioassay revealed the BL28 strain retains a host growth-promoting ability. Further meta-pangenomics approach revealed the BL28 genome reads were abundant in the metagenomic sequence pool, in particular, in that of post-gut development in early life stages of A. japonicus.

CONCLUSION: Repeated rearing efforts of A. japonicus using laboratory aquaculture replicating aquaculture environments and hybrid core microbiome extraction approach first revealed particular ASVs affiliated to Alteromonadaceae and Rhodobacteraceae as the A. japonicus early life core microbiome. Further bioassay revealed the growth promoting ability to the host sea cucumber in one of the core microbes, the Sulfitobactor strain BL28 identified as ASV0007. Genome reads of the BL28 were abundant in post-gut development of A. japonicus, which makes us consider effective probiotic uses of those core microbiome for sea cucumber resource production and conservation. The study also emphasizes the importance of the core microbiome in influencing early life stages in marine invertebrates. Understanding these dynamics could offer pathways to improve growth, immunity, and disease resistance in marine invertebrates.}, } @article {pmid37873245, year = {2023}, author = {Islam, MM and Kolling, GL and Glass, EM and Goldberg, JB and Papin, JA}, title = {Model-driven characterization of functional diversity of Pseudomonas aeruginosa clinical isolates with broadly representative phenotypes.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.10.08.561426}, pmid = {37873245}, abstract = {UNLABELLED: Pseudomonas aeruginosa is a leading cause of infections in immunocompromised individuals and in healthcare settings. This study aims to understand the relationships between phenotypic diversity and the functional metabolic landscape of P. aeruginosa clinical isolates. To better understand the metabolic repertoire of P. aeruginosa in infection, we deeply profiled a representative set from a library of 971 clinical P. aeruginosa isolates with corresponding patient metadata and bacterial phenotypes. The genotypic clustering based on whole-genome sequencing of the isolates, multi-locus sequence types, and the phenotypic clustering generated from a multi-parametric analysis were compared to each other to assess the genotype-phenotype correlation. Genome-scale metabolic network reconstructions were developed for each isolate through amendments to an existing PA14 network reconstruction. These network reconstructions show diverse metabolic functionalities and enhance the collective P. aeruginosa pangenome metabolic repertoire. Characterizing this rich set of clinical P. aeruginosa isolates allows for a deeper understanding of the genotypic and metabolic diversity of the pathogen in a clinical setting and lays a foundation for further investigation of the metabolic landscape of this pathogen and host-associated metabolic differences during infection.

IMPACT STATEMENT: Pseudomonas aeruginosa is a leading cause of infections in immunocompromised individuals and in healthcare settings. The treatment of these infections is complicated by the presence of a variety of virulence mechanisms and metabolic uniqueness among clinically relevant strains. This study is an attempt to understand the relationships between isolate phenotypic diversity and the functional metabolic landscape within a representative group of P. aeruginosa clinical isolates. Characterizing this rich set of clinical P. aeruginosa isolates allows for a deeper understanding of genotypic and metabolic diversity of the pathogen in a clinical setting and lays a foundation for further investigation of the metabolic landscape of this pathogen and host-associated metabolic differences in infection.}, } @article {pmid37868321, year = {2023}, author = {Gao, Z and Bian, J and Lu, F and Jiao, Y and He, H}, title = {Corrigendum: Triticeae crop genome biology: an endless frontier.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1280660}, doi = {10.3389/fpls.2023.1280660}, pmid = {37868321}, issn = {1664-462X}, abstract = {[This corrects the article DOI: 10.3389/fpls.2023.1222681.].}, } @article {pmid37864332, year = {2023}, author = {Liang, Y and Han, Y}, title = {Pan-genome brings opportunities to revitalize ancient crop foxtail millet.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100735}, doi = {10.1016/j.xplc.2023.100735}, pmid = {37864332}, issn = {2590-3462}, abstract = {The annual grass, foxtail millet (Setaria italica), was first domesticated ∼11,000 years ago, making it one of the most ancient crops in the world, and it was the mainstay underpinning the development of Asian farming civilization. The looming food shortage crisis aggravated by climate change threatens to make current agriculture unsustainable. As a C4 photosynthetic plant, foxtail millet has attracted increasing attention from the scientific and industrial farming communities because of its drought tolerance, good adaptability and nutritional properties. Foxtail millet and green foxtail (Setaria viridis) have been developed into ideal model systems for C4 crops due to their compact diploid genomes, rich genetic diversity, self-pollination, high-throughput transformation, short life cycles and ease of laboratory culture.}, } @article {pmid37858045, year = {2023}, author = {Cumsille, A and Serna-Cardona, N and González, V and Claverías, F and Undabarrena, A and Molina, V and Salvà-Serra, F and Moore, ERB and Cámara, B}, title = {Exploring the biosynthetic gene clusters in Brevibacterium: a comparative genomic analysis of diversity and distribution.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {622}, pmid = {37858045}, issn = {1471-2164}, support = {21191625//Agencia Nacional de Investigación y Desarrollo/ ; 1221264//Agencia Nacional de Investigación y Desarrollo/ ; }, abstract = {Exploring Brevibacterium strains from various ecosystems may lead to the discovery of new antibiotic-producing strains. Brevibacterium sp. H-BE7, a strain isolated from marine sediments from Northern Patagonia, Chile, had its genome sequenced to study the biosynthetic potential to produce novel natural products within the Brevibacterium genus. The genome sequences of 98 Brevibacterium strains, including strain H-BE7, were selected for a genomic analysis. A phylogenomic cladogram was generated, which divided the Brevibacterium strains into four major clades. A total of 25 strains are potentially unique new species according to Average Nucleotide Identity (ANIb) values. These strains were isolated from various environments, emphasizing the importance of exploring diverse ecosystems to discover the full diversity of Brevibacterium. Pangenome analysis of Brevibacterium strains revealed that only 2.5% of gene clusters are included within the core genome, and most gene clusters occur either as singletons or as cloud genes present in less than ten strains. Brevibacterium strains from various phylogenomic clades exhibit diverse BGCs. Specific groups of BGCs show clade-specific distribution patterns, such as siderophore BGCs and carotenoid-related BGCs. A group of clade IV-A Brevibacterium strains possess a clade-specific Polyketide synthase (PKS) BGCs that connects with phenazine-related BGCs. Within the PKS BGC, five genes, including the biosynthetic PKS gene, participate in the mevalonate pathway and exhibit similarities with the phenazine A BGC. However, additional core biosynthetic phenazine genes were exclusively discovered in nine Brevibacterium strains, primarily isolated from cheese. Evaluating the antibacterial activity of strain H-BE7, it exhibited antimicrobial activity against Salmonella enterica and Listeria monocytogenes. Chemical dereplication identified bioactive compounds, such as 1-methoxyphenazine in the crude extracts of strain H-BE7, which could be responsible of the observed antibacterial activity. While strain H-BE7 lacks the core phenazine biosynthetic genes, it produces 1-methoxyphenazine, indicating the presence of an unknown biosynthetic pathway for this compound. This suggests the existence of alternative biosynthetic pathways or promiscuous enzymes within H-BE7's genome.}, } @article {pmid37854939, year = {2023}, author = {Srivastava, N and Shiburaj, S and Khare, SK}, title = {Pan-genomic comparison of a potential solvent-tolerant alkaline protease-producing Exiguobacterium sp. TBG-PICH-001 isolated from a marine habitat.}, journal = {3 Biotech}, volume = {13}, number = {11}, pages = {371}, pmid = {37854939}, issn = {2190-572X}, abstract = {UNLABELLED: The identification and applicability of bacteria are inconclusive until comprehended with genomic repositories. Our isolate, Exiguobacterium sp. TBG-PICH-001 exhibited excellent halo- and organic solvent tolerance with simultaneous production of alkaline protease/s (0.512 IU/mL). The crude protease (1 IU) showed a 43.57% degradation of whey protein. The bulk proteins in the whey were hydrolyzed to smaller peptides which were evident in the SDS-PAGE profile. With such characteristics, the isolate became interesting for its genomic studies. The TBG-PICH-001 genome was found to be 3.14 Mb in size with 17 contigs and 47.33% GC content. The genome showed 3176 coding genes, and 2699 genes were characterized for their functionality. The Next-Generation-Sequencing of the genome identified only the isolate's genus; hence we attempted to delineate its species position. The genomes of the isolate and other representative Exiguobacterium spp. were compared based on orthologous genes (Orthovenn2 server). A pan-genomic analysis revealed the match of TBG-PICH-001 with 15 uncharacterized Exiguobacterium genomes at the species level. All these collectively matched with Exiguobacterium indicum, and the results were reconfirmed through phylogenetic studies. Further, the Exiguobacterium indicum genomes were engaged for homology studies rendering 11 classes of protease genes. Two putative proteases (Zinc metalloprotease and Serine protease) obtained from homology were checked for PCR amplification using genomic DNA of TBG-PICH-001 and other Exiguobacterium genomes. The results showed amplification only in the Exiguobacterium indicum genome. These protease genes, after sequencing, were matched with the TBG-PICH-001 genome. Their presence in its whole genome experimentally validated the study.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s13205-023-03796-5.}, } @article {pmid36945625, year = {2023}, author = {Hadjifrangiskou, M and Reasoner, S and Flores, V and Van Horn, G and Morales, G and Peard, L and Abelson, B and Manuel, C and Lee, J and Baker, B and Williams, T and Schmitz, J and Clayton, D}, title = {Defining the Infant Male Urobiome and Moving Towards Mechanisms in Urobiome Research.}, journal = {Research square}, volume = {}, number = {}, pages = {}, pmid = {36945625}, support = {F30 AI169748/AI/NIAID NIH HHS/United States ; }, abstract = {The urinary bladder harbors a community of microbes termed the urobiome, which remains understudied. In this study, we present the urobiome of healthy infant males from samples collected by transurethral catheterization. Using a combination of extended culture and amplicon sequencing, we identify several common bacterial genera that can be further investigated for their effects on urinary health across the lifespan. Many genera were shared between all samples suggesting a consistent urobiome composition among this cohort. We note that, for this cohort, early life exposures including mode of birth (vaginal vs. Caesarean section), or prior antibiotic exposure did not influence urobiome composition. In addition, we report the isolation of culturable bacteria from the bladders of these infant males, including Actinotignum schaalii, a bacterial species that has been associated with urinary tract infection in older male adults. Herein, we isolate and sequence 9 distinct strains of A. schaalii enhancing the genomic knowledge surrounding this species and opening avenues for delineating the microbiology of this urobiome constituent. Furthermore, we present a framework for using the combination of culture-dependent and sequencing methodologies for uncovering mechanisms in the urobiome.}, } @article {pmid37847672, year = {2023}, author = {Connor, CH and Zucoloto, AZ and Munnoch, JT and Yu, IL and Corander, J and Hoskisson, PA and McDonald, B and McNally, A}, title = {Multidrug-resistant E. coli encoding high genetic diversity in carbohydrate metabolism genes displace commensal E. coli from the intestinal tract.}, journal = {PLoS biology}, volume = {21}, number = {10}, pages = {e3002329}, doi = {10.1371/journal.pbio.3002329}, pmid = {37847672}, issn = {1545-7885}, abstract = {Extra-intestinal pathogenic Escherichia coli (ExPEC) can cause a variety of infections outside of the intestine and are a major causative agent of urinary tract infections. Treatment of these infections is increasingly frustrated by antimicrobial resistance (AMR) diminishing the number of effective therapies available to clinicians. Incidence of multidrug resistance (MDR) is not uniform across the phylogenetic spectrum of E. coli. Instead, AMR is concentrated in select lineages, such as ST131, which are MDR pandemic clones that have spread AMR globally. Using a gnotobiotic mouse model, we demonstrate that an MDR E. coli ST131 is capable of out-competing and displacing non-MDR E. coli from the gut in vivo. This is achieved in the absence of antibiotic treatment mediating a selective advantage. In mice colonised with non-MDR E. coli strains, challenge with MDR E. coli either by oral gavage or co-housing with MDR E. coli colonised mice results in displacement and dominant intestinal colonisation by MDR E. coli ST131. To investigate the genetic basis of this superior gut colonisation ability by MDR E. coli, we assayed the metabolic capabilities of our strains using a Biolog phenotypic microarray revealing altered carbon metabolism. Functional pangenomic analysis of 19,571 E. coli genomes revealed that carriage of AMR genes is associated with increased diversity in carbohydrate metabolism genes. The data presented here demonstrate that independent of antibiotic selective pressures, MDR E. coli display a competitive advantage to colonise the mammalian gut and points to a vital role of metabolism in the evolution and success of MDR lineages of E. coli via carriage and spread.}, } @article {pmid37847157, year = {2023}, author = {Zhang, Z and Zhao, J and Li, J and Yao, J and Wang, B and Ma, Y and Li, N and Wang, H and Wang, T and Liu, B and Gong, L}, title = {Evolutionary trajectory of organelle-derived nuclear DNAs in the Triticum/Aegilops complex species.}, journal = {Plant physiology}, volume = {}, number = {}, pages = {}, doi = {10.1093/plphys/kiad552}, pmid = {37847157}, issn = {1532-2548}, abstract = {Organelle-derived nuclear DNAs, nuclear plastid DNAs (NUPTs) and nuclear mitochondrial DNAs (NUMTs) have been identified in plants. Most, if not all, genes residing in NUPTs/NUMTs (NUPGs/NUMGs) are known to be inactivated and pseudogenized. However, the role of epigenetic control in silencing NUPGs/NUMGs and the dynamic evolution of NUPTs/NUMTs with respect to organismal phylogeny remain barely explored. Based on the available nuclear and organellar genomic resources of wheat (genus Triticum) and goat grass (genus Aegilops) within Triticum/Aegilops complex species, we investigated the evolutionary fates of NUPTs/NUMTs in terms of their epigenetic silencing and their dynamic occurrence rates in the nuclear diploid genomes and allopolyploid subgenomes. NUPTs and NUMTs possessed similar genomic atlas, including (i) predominantly located in intergenic regions and preferential integration to gene regulation regions and (ii) generating sequence variations in the nuclear genome. Unlike nuclear indigenous genes, the alien NUPGs/NUMGs were associated with repressive epigenetic signals, namely high levels of DNA methylation and low levels of active histone modifications. Phylogenomic analyses suggested that the species-specific and gradual accumulation of NUPTs/NUMTs accompanied the speciation processes. Moreover, based on further pan-genomic analyses, we found significant subgenomic asymmetry in the NUPT/NUMT occurrence, which accumulated during allopolyploid wheat evolution. Our findings provide insight into the dynamic evolutionary fates of organelle-derived nuclear DNA in plants.}, } @article {pmid37846049, year = {2023}, author = {Aylward, AJ and Petrus, S and Mamerto, A and Hartwick, NT and Michael, TP}, title = {PanKmer: k-mer based and reference-free pangenome analysis.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btad621}, pmid = {37846049}, issn = {1367-4811}, abstract = {SUMMARY: Pangenomes are replacing single reference genomes as the definitive representation of DNA sequence within a species or clade. Pangenome analysis predominantly leverages graph-based methods that require computationally intensive multiple genome alignments, do not scale to highly complex eukaryotic genomes, limit their scope to identifying structural variants (SVs), or incur bias by relying on a reference genome. Here, we present PanKmer, a toolkit designed for reference-free analysis of pangenome datasets consisting of dozens to thousands of individual genomes. PanKmer decomposes a set of input genomes into a table of observed k-mers and their presence-absence values in each genome. These are stored in an efficient k-mer index data format that encodes SNPs, INDELs, and SVs. It also includes functions for downstream analysis of the k-mer index, such as calculating sequence similarity statistics between individuals at whole-genome or local scales. For example, k-mers can be "anchored" in any individual genome to quantify sequence variability or conservation at a specific locus. This facilitates workflows with various biological applications, e.g. identifying cases of hybridization between plant species. PanKmer provides researchers with a valuable and convenient means to explore the full scope of genetic variation in a population, without reference bias.

PanKmer is implemented as a Python package with components written in Rust, released under a BSD license. The source code is available from the Python Package Index (PyPI) at https://pypi.org/project/pankmer/ as well as Gitlab at https://gitlab.com/salk-tm/pankmer. Full documentation is available at https://salk-tm.gitlab.io/pankmer/.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid37841331, year = {2023}, author = {Asif, M and Li-Qun, Z and Zeng, Q and Atiq, M and Ahmad, K and Tariq, A and Al-Ansari, N and Blom, J and Fenske, L and Alodaini, HA and Hatamleh, AA}, title = {Comprehensive genomic analysis of Bacillus paralicheniformis strain BP9, pan-genomic and genetic basis of biocontrol mechanism.}, journal = {Computational and structural biotechnology journal}, volume = {21}, number = {}, pages = {4647-4662}, pmid = {37841331}, issn = {2001-0370}, abstract = {Many Bacillus species are essential antibacterial agents, but their antibiosis potential still needs to be elucidated to its full extent. Here, we isolated a soil bacterium, BP9, which has significant antibiosis activity against fungal and bacterial pathogens. BP9 improved the growth of wheat seedlings via active colonization and demonstrated effective biofilm and swarming activity. BP9 sequenced genome contains 4282 genes with a mean G-C content of 45.94% of the whole genome. A single copy concatenated 802 core genes of 28 genomes, and their calculated average nucleotide identity (ANI) discriminated the strain BP9 from Bacillus licheniformis and classified it as Bacillus paralicheniformis. Furthermore, a comparative pan-genome analysis of 40 B. paralicheniformis strains suggested that the genetic repertoire of BP9 belongs to open-type genome species. A comparative analysis of a pan-genome dataset using the Kyoto Encyclopedia of Genes and Genomes (KEGG) and Cluster of Orthologous Gene groups (COG) revealed the diversity of secondary metabolic pathways, where BP9 distinguishes itself by exhibiting a greater prevalence of loci associated with the metabolism and transportation of organic and inorganic substances, carbohydrate and amino acid for effective inhabitation in diverse environments. The primary secondary metabolites and their genes involved in synthesizing bacillibactin, fencing, bacitracin, and lantibiotics were identified as acquired through a recent Horizontal gene transfer (HGT) event, which contributes to a significant part of the strain`s antimicrobial potential. Finally, we report some genes essential for plant-host interaction identified in BP9, which reduce spore germination and virulence of multiple fungal and bacterial species. The effective colonization, diverse predicted metabolic pathways and secondary metabolites (antibiotics) suggest testing the suitability of strain BP9 as a potential bio-preparation in agricultural fields.}, } @article {pmid37835381, year = {2023}, author = {Giguère, A and Raymond-Bouchard, I and Collin, V and Claveau, JS and Hébert, J and LeBlanc, R}, title = {Optical Genome Mapping Reveals the Complex Genetic Landscape of Myeloma.}, journal = {Cancers}, volume = {15}, number = {19}, pages = {}, doi = {10.3390/cancers15194687}, pmid = {37835381}, issn = {2072-6694}, support = {N/A//Canada Research Chairs/ ; N/A//Fonds de Recherche du Québec - Santé/ ; }, abstract = {Fluorescence in situ hybridization (FISH) on enriched CD138 plasma cells is the standard method for identification of clinically relevant genetic abnormalities in multiple myeloma. However, FISH is a targeted analysis that can be challenging due to the genetic complexity of myeloma. The aim of this study was to evaluate the potential of optical genome mapping (OGM) to detect clinically significant cytogenetic abnormalities in myeloma and to provide larger pangenomic information. OGM and FISH analyses were performed on CD138-purified cells of 20 myeloma patients. OGM successfully detected structural variants (SVs) (IGH and MYC rearrangements), copy number variants (CNVs) (17p/TP53 deletion, 1p deletion and 1q gain/amplification) and aneuploidy (gains of odd-numbered chromosomes, monosomy 13) classically expected with myeloma and led to a 30% increase in prognosis yield at our institution when compared to FISH. Despite challenges in the interpretation of OGM calls for CNV and aneuploidy losses in non-diploid genomes, OGM has the potential to replace FISH as the standard of care analysis in clinical settings and to efficiently change how we identify prognostic and predictive markers for therapies in the future. To our knowledge, this is the first study highlighting the feasibility and clinical utility of OGM in myeloma.}, } @article {pmid37832344, year = {2023}, author = {Latifi, T and Jalilvand, S and Golsaz-Shirazi, F and Arashkia, A and Kachooei, A and Afchangi, A and Zafarian, S and Roohvand, F and Shoja, Z}, title = {Characterization and immunogenicity of a novel chimeric hepatitis B core-virus like particles (cVLPs) carrying rotavirus VP8*protein in mice model.}, journal = {Virology}, volume = {588}, number = {}, pages = {109903}, doi = {10.1016/j.virol.2023.109903}, pmid = {37832344}, issn = {1096-0341}, abstract = {Given the efficacy and safety issues of the WHO for approved/prequalified live attenuated rotavirus (RV) vaccines, studies on alternative non-replicating modals and proper RV antigens are actively undertaken. Herein, we report the novel chimeric hepatitis B core-virus like particles (VLPs) carrying RV VP8*26-231 protein of a P [8] strain (cVLPVP8*), as a parenteral VLP RV vaccine candidate. SDS-PAGE and Western blotting analyses indicated the expected size of the E. coli-derived HBc-VP8* protein that self-assembled to cVLPVP8* particles. Immunization in mice indicated development of higher levels of IgG and IgA as well as higher IgG1/IgG2a ratios by cVLPVP8* vaccination compared to the VP8* alone. Assessment of neutralizing antibodies (nAbs) indicated development of heterotypic nAbs with cross-reactivity to a heterotypic RV strain by cVLPVP8* immunization compared to VP8* alone. The observed anti-VP8* cross-reactivity might indicate the possibility of developing a Pan-genomic RVA vaccine based on the cVLPVP8* formulation that deserves further challenge studies.}, } @article {pmid37829450, year = {2023}, author = {Jiang, ZM and Deng, Y and Han, XF and Su, J and Wang, H and Yu, LY and Zhang, YQ}, title = {Corrigendum: Geminicoccus flavidas sp. nov. and Geminicoccus harenae sp. nov., two IAA-producing novel rare bacterial species inhabiting desert biological soil crusts.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1285950}, doi = {10.3389/fmicb.2023.1285950}, pmid = {37829450}, issn = {1664-302X}, abstract = {[This corrects the article DOI: 10.3389/fmicb.2022.1034816.].}, } @article {pmid37823548, year = {2023}, author = {Baby, V and Ambroset, C and Gaurivaud, P and Falquet, L and Boury, C and Guichoux, E and Jores, J and Lartigue, C and Tardy, F and Sirand-Pugnet, P}, title = {Comparative genomics of Mycoplasma feriruminatoris, a fast-growing pathogen of wild Caprinae.}, journal = {Microbial genomics}, volume = {9}, number = {10}, pages = {}, doi = {10.1099/mgen.0.001112}, pmid = {37823548}, issn = {2057-5858}, abstract = {Mycoplasma feriruminatoris is a fast-growing Mycoplasma species isolated from wild Caprinae and first described in 2013. M. feriruminatoris isolates have been associated with arthritis, kerato conjunctivitis, pneumonia and septicemia, but were also recovered from apparently healthy animals. To better understand what defines this species, we performed a genomic survey on 14 strains collected from free-ranging or zoo-housed animals between 1987 and 2017, mostly in Europe. The average chromosome size of the M. feriruminatoris strains was 1,040±0,024 kbp, with 24 % G+C and 852±31 CDS. The core genome and pan-genome of the M. feriruminatoris species contained 628 and 1312 protein families, respectively. The M. feriruminatoris strains displayed a relatively closed pan-genome, with many features and putative virulence factors shared with species from the M. mycoides cluster, including the MIB-MIP Ig cleavage system, a repertoire of DUF285 surface proteins and a complete biosynthetic pathway for galactan. M. feriruminatoris genomes were found to be mostly syntenic, although repertoires of mobile genetic elements, including Mycoplasma Integrative and Conjugative Elements, insertion sequences, and a single plasmid varied. Phylogenetic- and gene content analyses confirmed that M. feriruminatoris was closer to the M. mycoides cluster than to the ruminant species M. yeatsii and M. putrefaciens. Ancestral genome reconstruction showed that the emergence of the M. feriruminatoris species was associated with the gain of 17 gene families, some of which encode defence enzymes and surface proteins, and the loss of 25 others, some of which are involved in sugar transport and metabolism. This comparative study suggests that the M. mycoides cluster could be extended to include M. feriruminatoris. We also find evidence that the specific organization and structure of the DnaA boxes around the oriC of M. feriruminatoris may contribute to drive the remarkable fast growth of this minimal bacterium.}, } @article {pmid37817747, year = {2023}, author = {Beard, S and Moya-Beltrán, A and Silva-García, D and Valenzuela, C and Pérez-Acle, T and Loyola, A and Quatrini, R}, title = {Pangenome-level analysis of nucleoid-associated proteins in the Acidithiobacillia class: insights into their functional roles in mobile genetic elements biology.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1271138}, pmid = {37817747}, issn = {1664-302X}, abstract = {Mobile genetic elements (MGEs) are relevant agents in bacterial adaptation and evolutionary diversification. Stable appropriation of these DNA elements depends on host factors, among which are the nucleoid-associated proteins (NAPs). NAPs are highly abundant proteins that bind and bend DNA, altering its topology and folding, thus affecting all known cellular DNA processes from replication to expression. Even though NAP coding genes are found in most prokaryotic genomes, their functions in host chromosome biology and xenogeneic silencing are only known for a few NAP families. Less is known about the occurrence, abundance, and roles of MGE-encoded NAPs in foreign elements establishment and mobility. In this study, we used a combination of comparative genomics and phylogenetic strategies to gain insights into the diversity, distribution, and functional roles of NAPs within the class Acidithiobacillia with a special focus on their role in MGE biology. Acidithiobacillia class members are aerobic, chemolithoautotrophic, acidophilic sulfur-oxidizers, encompassing substantial genotypic diversity attributable to MGEs. Our search for NAP protein families (PFs) in more than 90 genomes of the different species that conform the class, revealed the presence of 1,197 proteins pertaining to 12 different NAP families, with differential occurrence and conservation across species. Pangenome-level analysis revealed 6 core NAP PFs that were highly conserved across the class, some of which also existed as variant forms of scattered occurrence, in addition to NAPs of taxa-restricted distribution. Core NAPs identified are reckoned as essential based on the conservation of genomic context and phylogenetic signals. In turn, various highly diversified NAPs pertaining to the flexible gene complement of the class, were found to be encoded in known plasmids or, larger integrated MGEs or, present in genomic loci associated with MGE-hallmark genes, pointing to their role in the stabilization/maintenance of these elements in strains and species with larger genomes. Both core and flexible NAPs identified proved valuable as markers, the former accurately recapitulating the phylogeny of the class, and the later, as seed in the bioinformatic identification of novel episomal and integrated mobile elements.}, } @article {pmid37811910, year = {2023}, author = {Le, VV and Ko, SR and Oh, HM and Ahn, CY}, title = {Genomic Insights into Paucibacter aquatile DH15, a Cyanobactericidal Bacterium, and Comparative Genomics of the Genus Paucibacter.}, journal = {Journal of microbiology and biotechnology}, volume = {33}, number = {12}, pages = {1-10}, doi = {10.4014/jmb.2307.07008}, pmid = {37811910}, issn = {1738-8872}, abstract = {Microcystis blooms threaten ecosystem function and cause substantial economic losses. Microorganism-based methods, mainly using cyanobactericidal bacteria, are considered one of the most ecologically sound methods to control Microcystis blooms. This study focused on gaining genomic insights into Paucibacter aquatile DH15 that exhibited excellent cyanobactericidal effects against Microcystis. Additionally, a pan-genome analysis of the genus Paucibacter was conducted to enhance our understanding of the ecophysiological significance of this genus. Based on phylogenomic analyses, strain DH15 was classified as a member of the species Paucibacter aquatile. The genome analysis supported that strain DH15 can effectively destroy Microcystis, possibly due to the specific genes involved in the flagellar synthesis, cell wall degradation, and the production of cyanobactericidal compounds. The pan-genome analysis revealed the diversity and adaptability of the genus Paucibacter, highlighting its potential to absorb external genetic elements. Paucibacter species were anticipated to play a vital role in the ecosystem by potentially providing essential nutrients, such as vitamins B7, B12, and heme, to auxotrophic microbial groups. Overall, our findings contribute to understanding the molecular mechanisms underlying the action of cyanobactericidal bacteria against Microcystis and shed light on the ecological significance of the genus Paucibacter.}, } @article {pmid37811774, year = {2023}, author = {Ishaq, Z and Zaheer, T and Waseem, M and Shahwar Awan, H and Ullah, N and AlAsmari, AF and AlAsmari, F and Ali, A}, title = {Immunoinformatics aided designing of a next generation poly-epitope vaccine against uropathogenic Escherichia coli to combat urinary tract infections.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-21}, doi = {10.1080/07391102.2023.2266018}, pmid = {37811774}, issn = {1538-0254}, abstract = {Urinary tract infections (UTIs) are the second most prevalent bacterial infections and uropathogenic Escherichia coli (UPEC) stands among the primary causative agents of UTIs. The usage of antibiotics is the routine therapy being used in various countries to treat UTIs but becoming ineffective because of increasing antibiotic resistance among UPEC strains. Thus, there must be the development of some alternative treatment strategies such as vaccine development against UPEC. In the following study, pan-genomics along with reverse vaccinology approaches is used under the framework of bioinformatics for the identification of core putative vaccine candidates, employing 307 UPEC genomes (complete and draft), available publicly. A total of nine T-cell epitopes (derived from B-cells) of both MHC classes (I and II), were prioritized among three potential protein candidates. These epitopes were then docked together by using linkers (GPGPG and AAY) and an adjuvant (Cholera Toxin B) to form a poly-valent vaccine construct. The chimeric vaccine construct was undergone by molecular modelling, further refinement and energy minimization. We predicted positive results of the vaccine construct in immune simulations with significantly high levels of immune cells. The protein-protein docking analysis of vaccine construct with toll-like receptors predicted efficient binding, which was further validated by molecular dynamics simulation of vaccine construct with TLR-2 and TLR-4 at 120 ns, resulting in stable complexes' conformation throughout the simulation run. Overall, the vaccine construct demonstrated positive antigenic response. In future, this chimeric vaccine construct or the identified epitopes could be experimentally validated for the development of UPEC vaccines against UTIs.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37808295, year = {2023}, author = {Wang, Z and Liu, Y and Liu, P and Jian, Z and Yan, Q and Tang, B and Yang, A and Liu, W}, title = {Genomic and clinical characterization of Klebsiella pneumoniae carrying the pks island.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1189120}, pmid = {37808295}, issn = {1664-302X}, abstract = {BACKGROUND: The pks island and its production of the bacterial secondary metabolite genotoxin, colibactin, have attracted increasing attention. However, genomic articles focusing on pks islands in Klebsiella pneumoniae, as well as comparative genomic studies of mobile genetic elements, such as prophages, plasmids, and insertion sequences, are lacking. In this study, a large-scale analysis was conducted to understand the prevalence and evolution of pks islands, differences in mobile genetic elements between pks-negative and pks-positive K. pneumoniae, and clinical characteristics of infection caused by pks-positive K. pneumoniae.

METHODS: The genomes of 2,709 K. pneumoniae were downloaded from public databases, among which, 1,422 were from NCBI and 1,287 were from the China National GeneBank DataBase (CNGBdb). Screening for virulence and resistance genes, phylogenetic tree construction, and pan-genome analysis were performed. Differences in mobile genetic elements between pks-positive and pks-negative strains were compared. The clinical characteristics of 157 pks-positive and 157 pks-negative K. pneumoniae infected patients were investigated.

RESULTS: Of 2,709 K. pneumoniae genomes, 245 pks-positive genomes were screened. The four siderophores, type VI secretion system, and nutritional factor genes were present in at least 77.9% (191/245), 66.9% (164/245), and 63.3% (155/245) of pks-positive strains, respectively. The number and fragment length of prophage were lower in pks-positive strains than in pks-negative strains (p < 0.05). The prevalence of the IS6 family was higher in pks-negative strains than in pks-positive strains, and the prevalence of multiple plasmid replicon types differed between the pks-positive and pks-negative strains (p < 0.05). The detection rate of pks-positive K. pneumoniae in abscess samples was higher than that of pks-negative K. pneumoniae (p < 0.05).

CONCLUSION: The pks-positive strains had abundant virulence genes. There were differences in the distribution of mobile genetic elements between pks-positive and pks-negative isolates. Further analysis of the evolutionary pattern of pks island and epidemiological surveillance in different populations are needed.}, } @article {pmid37806426, year = {2023}, author = {Rosani, U and Sollitto, M and Fogal, N and Salata, C}, title = {Comparative analysis of Presence-Absence gene Variations in five hard tick species: impact and functional considerations.}, journal = {International journal for parasitology}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.ijpara.2023.08.004}, pmid = {37806426}, issn = {1879-0135}, abstract = {Tick species are vectors of harmful human and animal diseases, and their expansion is raising concerns under the global environmental changes' scenario. Ticks host and transmit bacteria, protozoa and viruses, making the understanding of host-pathogen molecular pathways critical to development of effective disease control strategies. Despite the considerable sizes and repeat contents of tick genomes, individual tick genomics is perhaps the most effective approach to reveal genotypic traits of interest. Presence-Absence gene Variations (PAVs) can contribute to individual differences within species, with dispensable genes carried by subsets of individuals possibly underpinning functional significance at individual or population-levels. We exploited 350 resequencing datasets of Dermacentor silvarum, Haemaphysalis longicornis, Ixodes persulcatus, Rhipicephalus microplus and Rhipicephalus sanguineus hard tick specimens to reveal the extension of PAV and the conservation of dispensable genes among individuals and, comparatively, between species. Overall, we traced 550-3,346 dispensable genes per species and were able to reconstruct 5.3-7 Mb of genomic regions not included in the respective reference genomes, as part of the tick pangenomes. Both dispensable genes and de novo predicted genes indicated that PAVs preferentially impacted mobile genetic elements in these tick species.}, } @article {pmid37804413, year = {2023}, author = {Bouznada, K and Belaouni, HA and Meklat, A}, title = {Genome-based reclassification of Kitasatospora niigatensis as a later heterotypic synonym of Kitasatospora cineracea Tajima et al. (2001).}, journal = {Antonie van Leeuwenhoek}, volume = {}, number = {}, pages = {}, pmid = {37804413}, issn = {1572-9699}, abstract = {The present study used genome-based approaches to investigate the taxonomic relationship between Kitasatospora cineracea DSM 44780[T] and Kitasatospora niigatensis DSM 44781[T], two species that were previously described by Tajima et al. (Int J Syst Evol Microbiol 51:1765-1771, 2001). The digital DNA-DNA hybridization (dDDH), average amino acid identity (AAI), and average nucleotide identity (ANI) values between the genomes of the two type strains were 90.3, 98.7, and 99.1%, respectively. These values exceeded the established thresholds of 70% (dDDH) and 95-96% (ANI and AAI) for bacterial species delineation, suggesting that K. cineracea and K. niigatensis should share the same taxonomic position. Furthermore, our analysis using the 'Bacterial Pan Genome Analysis' (BPGA) pipeline and the Maximum Likelihood core-genes tree inferred using FastTree2 consistently demonstrated that K. cineracea DSM 44780[T] and K. niigatensis DSM 44781[T] are closely related, as indicated by the clustering of these strains in the core-genes phylogenomic tree. Based on these findings, we propose that K. niigatensis should be considered a later heterotypic synonym of K. cineracea.}, } @article {pmid37803826, year = {2023}, author = {Niu, Y and Liu, Q and He, Z and Raman, R and Wang, H and Long, X and Qin, H and Raman, H and Parkin, IAP and Bancroft, I and Zou, J}, title = {A Brassica carinata pan-genome platform for Brassica crop improvement.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100725}, doi = {10.1016/j.xplc.2023.100725}, pmid = {37803826}, issn = {2590-3462}, } @article {pmid37803772, year = {2023}, author = {You, L and Lv, R and Jin, H and Ma, T and Zhao, Z and Kwok, LY and Sun, Z}, title = {A large-scale comparative genomics study reveals niche-driven and within-sample intra-species functional diversification in Lacticaseibacillus rhamnosus.}, journal = {Food research international (Ottawa, Ont.)}, volume = {173}, number = {Pt 2}, pages = {113446}, doi = {10.1016/j.foodres.2023.113446}, pmid = {37803772}, issn = {1873-7145}, abstract = {Lacticaseibacillus rhamnosus (L. rhamnosus) is widely recognized as a probiotic species, and it exists in a variety of environments including host gut and dairy products. This work aimed at conducting a large-scale comparative genomics analysis of 384 L. rhamnosus genomes (257 whole-sequence or metagenomic-assembled genomes from gut-associated isolates [122 and 135 retrieved from the UHGG and NCBI databases, respectively] and 127 genomes from dairy isolates [34 from the NCBI database; 93 isolated from a cheese sample and sequenced here]). Our results showed that L. rhamnosus had a large and open pan-genome (15,253 pan-genes identified from all 384 genomes; 15,028 pan-genes if the 93 cheese-originated isolates were excluded). The core-gene phylogenetic tree constructed from the 384 L. rhamnosus genomes comprised five phylogenetic branches, with a random distribution of dairy and gut-associated isolates/genomes across the tree. No significant difference was identified in the overall profile of metabolism-related genes between dairy and gut-associated genomes; however, notably, the gut-associated strains/isolates contained more genes coding for specific metabolic pathways and carbohydrate-active enzymes, e.g., lacto-N-biosidase (EC 3.2.1.140; GT20) and lacto-N-biose phosphorylase/galacto-N-biose phosphorylase (EC 2.4.1.211; GH112). Further, we found that there was obvious intra-species diversification of the 93 cheese-originated L. rhamnosus isolates, forming three clades (Clades A, B, and C) in the reconstructed core-gene phylogenetic tree. There were numerous single nucleotide variations (over 10,000) across the three clades. Moreover, significant differences were observed in the content of metabolism-related genes across clades (p < 0.05, Adonis test), characterized by the enrichment in glycoside hydrolases in Clade C and the possession of unique metabolic pathways in each clade. These results implicated genomics/functional diversification of L. rhamnosus in a single food matrix and niche-driven adaptive evolution of isolates from dairy and host gut-associated origins. Our study shed insights into the selection of candidate strains for food industry applications.}, } @article {pmid37802986, year = {2023}, author = {Kang, M and Wu, H and Liu, H and Liu, W and Zhu, M and Han, Y and Liu, W and Chen, C and Song, Y and Tan, L and Yin, K and Zhao, Y and Yan, Z and Lou, S and Zan, Y and Liu, J}, title = {The pan-genome and local adaptation of Arabidopsis thaliana.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {6259}, pmid = {37802986}, issn = {2041-1723}, abstract = {Arabidopsis thaliana serves as a model species for investigating various aspects of plant biology. However, the contribution of genomic structural variations (SVs) and their associate genes to the local adaptation of this widely distribute species remains unclear. Here, we de novo assemble chromosome-level genomes of 32 A. thaliana ecotypes and determine that variable genes expand the gene pool in different ecotypes and thus assist local adaptation. We develop a graph-based pan-genome and identify 61,332 SVs that overlap with 18,883 genes, some of which are highly involved in ecological adaptation of this species. For instance, we observe a specific 332 bp insertion in the promoter region of the HPCA1 gene in the Tibet-0 ecotype that enhances gene expression, thereby promotes adaptation to alpine environments. These findings augment our understanding of the molecular mechanisms underlying the local adaptation of A. thaliana across diverse habitats.}, } @article {pmid37801223, year = {2023}, author = {Dias, RS and Kremer, FS and da Costa de Avila, LF}, title = {In silico prospection of Lactobacillus acidophilus strains with potential probiotic activity.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {}, number = {}, pages = {}, pmid = {37801223}, issn = {1678-4405}, abstract = {Lactic acid bacteria (LAB) are fermentative microorganisms and perform different roles in biotechnological processes, mainly in the food and pharmaceutical industries. Among the LAB, Lactobacillus acidophilus is a species that deserves to be highlighted for being used both in prophylaxis and in the treatment of pathologies. Most of the metabolites produced by this species are linked to the inhibition of pathogens. In this study, we utilized a pangenomic and metabolic annotation analysis using Roary and BlastKOALA, ML-based probiotic activity prediction with iProbiotic and whole-genome similarity using ANI to identify strains of L. acidophilus with potential probiotic activity. According to the results in BlastKOALA and iProbiotics, L. acidophilus NCTC 13721 had the greatest potential among the 64 strains tested, both in terms of its ability to be a Lactobacillus spp. probiotic, when in the amount of genes involved in the metabolism of organic acids and quorum sensing. In addition, DSM 20079 proved to be promising for prospecting new probiotic Lactobacillus from BlastKOALA analyses, as they presented similar results in the number of genes involved in the production of lactic acid, acetic acid, hydrogen peroxide, except for quorum sensing where the NCTC 13721 strain had 14 more genes. L. acidophilus NCTC 13721 and L. acidophilus La-5 strains showed greater ability to be Lactobacillus spp. probiotic capacity, showing 84.8% and 51.9% capacity in the iProbiotics tool, respectively. When analyzed in ANI, none of the evaluated strains showed genomic similarity with NCTC 13721. In contrast, the DSM 20079 strain showed genomic similarity with all evaluated strains except NCTC 13721. Furthermore, eight strains with characteristics with approximately 100% genomic similarity to La-5 were listed: S20_1, LA-5, FSI4, APC2845, LA-G80-111, DS1_1A, LA1, and BCRC 14065. Therefore, according to the findings in iProbiotics and BlastKoala, among the 64 strains evaluated, NCTC 13721 is the most promising strain to be used for future in vitro studies.}, } @article {pmid37799143, year = {2023}, author = {Fatima, K and Sadaqat, M and Azeem, F and Rao, MJ and Albekairi, NA and Alshammari, A and Tahir Ul Qamar, M}, title = {Integrated omics and machine learning-assisted profiling of cysteine-rich-receptor-like kinases from three peanut spp. revealed their role in multiple stresses.}, journal = {Frontiers in genetics}, volume = {14}, number = {}, pages = {1252020}, pmid = {37799143}, issn = {1664-8021}, abstract = {Arachis hypogaea (peanut) is a leading oil and protein-providing crop with a major food source in many countries. It is mostly grown in tropical regions and is largely affected by abiotic and biotic stresses. Cysteine-rich receptor-like kinases (CRKs) is a family of transmembrane proteins that play important roles in regulating stress-signaling and defense mechanisms, enabling plants to tolerate stress conditions. However, almost no information is available regarding this gene family in Arachis hypogaea and its progenitors. This study conducts a pangenome-wide investigation of A. hypogaea and its two progenitors, A. duranensis and A. ipaensis CRK genes (AhCRKs, AdCRKs, and AiCRKs). The gene structure, conserved motif patterns, phylogenetic history, chromosomal distribution, and duplication were studied in detail, showing the intraspecies structural conservation and evolutionary patterns. Promoter cis-elements, protein-protein interactions, GO enrichment, and miRNA targets were also predicted, showing their potential functional conservation. Their expression in salt and drought stresses was also comprehensively studied. The CRKs identified were divided into three groups, phylogenetically. The expansion of this gene family in peanuts was caused by both types of duplication: tandem and segmental. Furthermore, positive as well as negative selection pressure directed the duplication process. The peanut CRK genes were also enriched in hormones, light, development, and stress-related elements. MicroRNA (miRNA) also targeted the AhCRK genes, which suggests the regulatory association of miRNAs in the expression of these genes. Transcriptome datasets showed that AhCRKs have varying expression levels under different abiotic stress conditions. Furthermore, the multi-stress responsiveness of the AhCRK genes was evaluated using a machine learning-based method, Random Forest (RF) classifier. The 3D structures of AhCRKs were also predicted. Our study can be utilized in developing a detailed understanding of the stress regulatory mechanisms of the CRK gene family in peanuts and its further studies to improve the genetic makeup of peanuts to thrive better under stress conditions.}, } @article {pmid37798879, year = {2023}, author = {Miao, H and Wang, L and Qu, L and Liu, H and Sun, Y and Le, M and Wang, Q and Wei, S and Zheng, Y and Lin, W and Duan, Y and Cao, H and Xiong, S and Wang, X and Wei, L and Li, C and Ma, Q and Ju, M and Zhao, R and Li, G and Mu, C and Tian, Q and Mei, H and Zhang, T and Gao, T and Zhang, H}, title = {Genomic evolution and insights into agronomic trait innovations of Sesamum Species.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100729}, doi = {10.1016/j.xplc.2023.100729}, pmid = {37798879}, issn = {2590-3462}, abstract = {Sesame is an ancient oilseed crop with a high oil content and quality. However, the evolutionary history and genetic mechanisms of the valuable agronomic traits remain unclear. Herein, we reported chromosome-scale genomes for the cultivated and six wild Sesamum species, representing all three karyotypes within this genus. Karyotyping and genome-based phylogenic analysis found the evolution route of Sesamum species from n = 13 to n = 16 and revealed that allotetraploidization occurred in wild species S. radiatum. Moreover, the early divergence and ancient phylogenic position of the Sesamum genus (48.5-19.7 million years ago) was observed within eudicots during the Tertiary period. Pan-genome analysis further revealed 9,164 core gene families in the seven Sesamum species. These families were significantly enriched in various metabolic pathways, including fatty acid (FA) metabolism and FA biosynthesis. Structural variations in SiPT1 and SiDT1 within the PEBP gene family led to the genomic evolution of the plant architecture and inflorescence development phenotypes in Sesamum. A genome-wide association study (GWAS) of the interspecific population and comparative genome identified a long terminal repeat insertion in wild S. angustifolium and sequence deletion in cultivated sesame DIR genes, both independently caused high Fusarium wilt disease susceptibility. A GWAS of 560 sesame accessions combined with an overexpression study confirmed NAC1 and PPO genes play an important role in oil content upregulation in sesame. Collectively, our study provides high-quality genomic resources for cultivated and wild Sesamum species revealing insights to improve the molecular breeding strategies of sesame and other oilseed crops.}, } @article {pmid37798615, year = {2023}, author = {Contreras-Moreira, B and Saraf, S and Naamati, G and Casas, AM and Amberkar, SS and Flicek, P and Jones, AR and Dyer, S}, title = {GET_PANGENES: calling pangenes from plant genome alignments confirms presence-absence variation.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {223}, pmid = {37798615}, issn = {1474-760X}, support = {WT222155/Z/20/Z/WT_/Wellcome Trust/United Kingdom ; }, abstract = {Crop pangenomes made from individual cultivar assemblies promise easy access to conserved genes, but genome content variability and inconsistent identifiers hamper their exploration. To address this, we define pangenes, which summarize a species coding potential and link back to original annotations. The protocol get_pangenes performs whole genome alignments (WGA) to call syntenic gene models based on coordinate overlaps. A benchmark with small and large plant genomes shows that pangenes recapitulate phylogeny-based orthologies and produce complete soft-core gene sets. Moreover, WGAs support lift-over and help confirm gene presence-absence variation. Source code and documentation: https://github.com/Ensembl/plant-scripts .}, } @article {pmid37796250, year = {2023}, author = {Jung, H and Lee, D and Lee, S and Kong, HJ and Park, J and Seo, YS}, title = {Comparative genomic analysis of Chryseobacterium species: deep insights into plant-growth-promoting and halotolerant capacities.}, journal = {Microbial genomics}, volume = {9}, number = {10}, pages = {}, doi = {10.1099/mgen.0.001108}, pmid = {37796250}, issn = {2057-5858}, abstract = {Members of the genus Chryseobacterium have attracted great interest as beneficial bacteria that can promote plant growth and biocontrol. Given the recent risks of climate change, it is important to develop tolerance strategies for efficient applications of plant-beneficial bacteria in saline environments. However, the genetic determinants of plant-growth-promoting and halotolerance effects in Chryseobacterium have not yet been investigated at the genomic level. Here, a comparative genomic analysis was conducted with seven Chryseobacterium species. Phylogenetic and phylogenomic analyses revealed niche-specific evolutionary distances between soil and freshwater Chryseobacterium species, consistent with differences in genomic statistics, indicating that the freshwater bacteria have smaller genome sizes and fewer genes than the soil bacteria. Phosphorus- and zinc-cycling genes (required for nutrient acquisition in plants) were universally present in all species, whereas nitrification and sulphite reduction genes (required for nitrogen- and sulphur-cycling, respectively) were distributed only in soil bacteria. A pan-genome containing 6842 gene clusters was constructed, which reflected the general features of the core, accessory and unique genomes. Halotolerant species with an accessory genome shared a Kdp potassium transporter and biosynthetic pathways for branched-chain amino acids and the carotenoid lycopene, which are associated with countermeasures against salt stress. Protein-protein interaction network analysis was used to define the genetic determinants of Chryseobacterium salivictor NBC122 that reduce salt damage in bacteria and plants. Sixteen hub genes comprised the aromatic compound degradation and Por secretion systems, which are required to cope with complex stresses associated with saline environments. Horizontal gene transfer and CRISPR-Cas analyses indicated that C. salivictor NBC122 underwent more evolutionary events when interacting with different environments. These findings provide deep insights into genomic adaptation to dynamic interactions between plant-growth-promoting Chryseobacterium and salt stress.}, } @article {pmid37793435, year = {2023}, author = {Raimondeau, P and Bianconi, ME and Pereira, L and Parisod, C and Christin, PA and Dunning, LT}, title = {Lateral gene transfer generates accessory genes that accumulate at different rates within a grass lineage.}, journal = {The New phytologist}, volume = {}, number = {}, pages = {}, doi = {10.1111/nph.19272}, pmid = {37793435}, issn = {1469-8137}, support = {NE/T011025/1//Natural Environment Research Council/ ; NE/V000012/1//Natural Environment Research Council/ ; URF/R/180022//Royal Society/ ; }, abstract = {Lateral gene transfer (LGT) is the movement of DNA between organisms without sexual reproduction. The acquired genes represent genetic novelties that have independently evolved in the donor's genome. Phylogenetic methods have shown that LGT is widespread across the entire grass family, although we know little about the underlying dynamics. We identify laterally acquired genes in five de novo reference genomes from the same grass genus (four Alloteropsis semialata and one Alloteropsis angusta). Using additional resequencing data for a further 40 Alloteropsis individuals, we place the acquisition of each gene onto a phylogeny using stochastic character mapping, and then infer rates of gains and losses. We detect 168 laterally acquired genes in the five reference genomes (32-100 per genome). Exponential decay models indicate that the rate of LGT acquisitions (6-28 per Ma) and subsequent losses (11-24% per Ma) varied significantly among lineages. Laterally acquired genes were lost at a higher rate than vertically inherited loci (0.02-0.8% per Ma). This high turnover creates intraspecific gene content variation, with a preponderance of them occurring as accessory genes in the Alloteropsis pangenome. This rapid turnover generates standing variation that can ultimately fuel local adaptation.}, } @article {pmid37791541, year = {2023}, author = {Felgate, H and Sethi, D and Faust, K and Kiy, C and Härtel, C and Rupp, J and Clifford, R and Dean, R and Tremlett, C and Wain, J and Langridge, G and Clarke, P and Page, AJ and Webber, MA}, title = {Characterisation of neonatal Staphylococcus capitis NRCS-A isolates compared with non NRCS-A Staphylococcus capitis from neonates and adults.}, journal = {Microbial genomics}, volume = {9}, number = {10}, pages = {}, doi = {10.1099/mgen.0.001106}, pmid = {37791541}, issn = {2057-5858}, abstract = {Staphylococcus capitis is a frequent cause of late-onset sepsis in neonates admitted to Neonatal Intensive Care Units (NICU). One clone of S. capitis, NRCS-A has been isolated from NICUs globally although the reasons for the global success of this clone are not well understood.We analysed a collection of S. capitis colonising babies admitted to two NICUs, one in the UK and one in Germany as well as corresponding pathological clinical isolates. Genome analysis identified a population structure of three groups; non-NRCS-A isolates, NRCS-A isolates, and a group of 'proto NRCS-A' - isolates closely related to NRCS-A but not associated with neonatal infection. All bloodstream isolates belonged to the NRCS-A group and were indistinguishable from strains carried on the skin or in the gut. NRCS-A isolates showed increased tolerance to chlorhexidine and antibiotics relative to the other S. capitis as well as enhanced ability to grow at higher pH values. Analysis of the pangenome of 138 isolates identified characteristic nsr and tarJ genes in both the NRCS-A and proto groups. A CRISPR-cas system was only seen in NRCS-A isolates which also showed enrichment of genes for metal acquisition and transport.We found evidence for transmission of S. capitis NRCS-A within NICU, with related isolates shared between babies and multiple acquisitions by some babies. Our data show NRCS-A strains commonly colonise uninfected babies in NICU representing a potential reservoir for potential infection. This work provides more evidence that adaptation to survive in the gut and on skin facilitates spread of NRCS-A, and that metal acquisition and tolerance may be important to the biology of NRCS-A. Understanding how NRCS-A survives in NICUs can help develop infection control procedures against this clone.}, } @article {pmid37790793, year = {2023}, author = {Shen, F and Hu, C and Huang, X and He, H and Yang, D and Zhao, J and Yang, X}, title = {Advances in alternative splicing identification: deep learning and pantranscriptome.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1232466}, pmid = {37790793}, issn = {1664-462X}, abstract = {In plants, alternative splicing is a crucial mechanism for regulating gene expression at the post-transcriptional level, which leads to diverse proteins by generating multiple mature mRNA isoforms and diversify the gene regulation. Due to the complexity and variability of this process, accurate identification of splicing events is a vital step in studying alternative splicing. This article presents the application of alternative splicing algorithms with or without reference genomes in plants, as well as the integration of advanced deep learning techniques for improved detection accuracy. In addition, we also discuss alternative splicing studies in the pan-genomic background and the usefulness of integrated strategies for fully profiling alternative splicing.}, } @article {pmid37790531, year = {2023}, author = {Heumos, S and Guarracino, A and Schmelzle, JM and Li, J and Zhang, Z and Hagmann, J and Nahnsen, S and Prins, P and Garrison, E}, title = {Pangenome graph layout by Path-Guided Stochastic Gradient Descent.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.09.22.558964}, pmid = {37790531}, abstract = {MOTIVATION: The increasing availability of complete genomes demands for models to study genomic variability within entire populations. Pangenome graphs capture the full genetic diversity between multiple genomes, but their layouts may exhibit complex structures due to common, nonlinear patterns of genome variation and evolution. These structures hamper downstream analyses, visualization, and interpretation.

RESULTS: In response, we introduce a novel graph layout algorithm: the Path-Guided Stochastic Gradient Descent (PG-SGD). PG-SGD uses the genomes, represented in the pangenome graph as paths, to move pairs of nodes in parallel applying a modified HOGWILD! strategy. We show that our implementation efficiently computes the layout of gigabase-scale pangenome graphs, unveiling their biological features.

AVAILABILITY: We integrated PG-SGD in ODGI which is released as free software under the MIT open source license. Source code is available at https://github.com/pangenome/odgi .

CONTACT: egarris5@uthsc.edu.}, } @article {pmid37783780, year = {2023}, author = {Huang, Y and He, J and Xu, Y and Zheng, W and Wang, S and Chen, P and Zeng, B and Yang, S and Jiang, X and Liu, Z and Wang, L and Wang, X and Liu, S and Lu, Z and Liu, Z and Yu, H and Yue, J and Gao, J and Zhou, X and Long, C and Zeng, X and Guo, YJ and Zhang, WF and Xie, Z and Li, C and Ma, Z and Jiao, W and Zhang, F and Larkin, RM and Krueger, RR and Smith, MW and Ming, R and Deng, X and Xu, Q}, title = {Pangenome analysis provides insight into the evolution of the orange subfamily and a key gene for citric acid accumulation in citrus fruits.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {37783780}, issn = {1546-1718}, abstract = {The orange subfamily (Aurantioideae) contains several Citrus species cultivated worldwide, such as sweet orange and lemon. The origin of Citrus species has long been debated and less is known about the Aurantioideae. Here, we compiled the genome sequences of 314 accessions, de novo assembled the genomes of 12 species and constructed a graph-based pangenome for Aurantioideae. Our analysis indicates that the ancient Indian Plate is the ancestral area for Citrus-related genera and that South Central China is the primary center of origin of the Citrus genus. We found substantial variations in the sequence and expression of the PH4 gene in Citrus relative to Citrus-related genera. Gene editing and biochemical experiments demonstrate a central role for PH4 in the accumulation of citric acid in citrus fruits. This study provides insights into the origin and evolution of the orange subfamily and a regulatory mechanism underpinning the evolution of fruit taste.}, } @article {pmid37779718, year = {2023}, author = {Yang, W and Yang, H and Bao, X and Hussain, M and Bao, Q and Zeng, Z and Xiao, C and Zhou, L and Qin, X}, title = {Brevibacillus brevis HNCS-1: a biocontrol bacterium against tea plant diseases.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1198747}, pmid = {37779718}, issn = {1664-302X}, abstract = {As a biocontrol bacteria, Brevibacillus has been the subject of extensive research for agricultural applications. Antibacterial peptides (AMPs) are the main antibacterial products of Brevibacillus. This study isolated a strain of Br. brevis HNCS-1 from tea garden soil, and the strain has an antagonistic effect against five types of pathogens of tea diseases, namely Gloeosporium theae-sinensis, Elsinoe leucospira, Phyllosticta theaefolia, Fusarium sp., and Cercospora theae. To determine the genetic characteristics implicated in the biocontrol mechanism, the genome sequence of the HNCS-1 strain was obtained and analyzed further, and the data are deposited in the GenBank repository (No. CP128411). Comparative genomics analyses revealed that the HNCS-1 strain and 17 public Br. brevis share a core genome composed of 3,742 genes. Interestingly, only one non-ribosomal peptide synthetase (NRPS) gene cluster annotated as edeine is present in the core genome. And UHPLC-MS/MS detection results showd that edeine B and edeine A were the principal antibacterial peptides in the HNCS-1 strain. This study proves that edeine is the main antibacterial peptide of Br. brevis, and provides a new strategy for the identification of antibacterial products from other biocontrol bacteria.}, } @article {pmid37779688, year = {2023}, author = {González, D and Morales-Olavarria, M and Vidal-Veuthey, B and Cárdenas, JP}, title = {Insights into early evolutionary adaptations of the Akkermansia genus to the vertebrate gut.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1238580}, pmid = {37779688}, issn = {1664-302X}, abstract = {Akkermansia, a relevant mucin degrader from the vertebrate gut microbiota, is a member of the deeply branched Verrucomicrobiota, as well as the only known member of this phylum to be described as inhabitants of the gut. Only a few Akkermansia species have been officially described so far, although there is genomic evidence addressing the existence of more species-level variants for this genus. This niche specialization makes Akkermansia an interesting model for studying the evolution of microorganisms to their adaptation to the gastrointestinal tract environment, including which kind of functions were gained when the Akkermansia genus originated or how the evolutionary pressure functions over those genes. In order to gain more insight into Akkermansia adaptations to the gastrointestinal tract niche, we performed a phylogenomic analysis of 367 high-quality Akkermansia isolates and metagenome-assembled genomes, in addition to other members of Verrucomicrobiota. This work was focused on three aspects: the definition of Akkermansia genomic species clusters and the calculation and functional characterization of the pangenome for the most represented species; the evolutionary relationship between Akkermansia and their closest relatives from Verrucomicrobiota, defining the gene families which were gained or lost during the emergence of the last Akkermansia common ancestor (LAkkCA) and; the evaluation of the evolutionary pressure metrics for each relevant gene family of main Akkermansia species. This analysis found 25 Akkermansia genomic species clusters distributed in two main clades, divergent from their non-Akkermansia relatives. Pangenome analyses suggest that Akkermansia species have open pangenomes, and the gene gain/loss model indicates that genes associated with mucin degradation (both glycoside hydrolases and peptidases), (micro)aerobic metabolism, surface interaction, and adhesion were part of LAkkCA. Specifically, mucin degradation is a very ancestral innovation involved in the origin of Akkermansia. Horizontal gene transfer detection suggests that Akkermansia could receive genes mostly from unknown sources or from other Gram-negative gut bacteria. Evolutionary metrics suggest that Akkemansia species evolved differently, and even some conserved genes suffered different evolutionary pressures among clades. These results suggest a complex evolutionary landscape of the genus and indicate that mucin degradation could be an essential feature in Akkermansia evolution as a symbiotic species.}, } @article {pmid37779189, year = {2023}, author = {Xie, S and Isaacs, K and Becker, G and Murdoch, BM}, title = {A computational framework for improving genetic variants identification from 5,061 sheep sequencing data.}, journal = {Journal of animal science and biotechnology}, volume = {14}, number = {1}, pages = {127}, pmid = {37779189}, issn = {1674-9782}, support = {USDA-NIFA-IDA1566//National Institute of Food and Agriculture/ ; }, abstract = {BACKGROUND: Pan-genomics is a recently emerging strategy that can be utilized to provide a more comprehensive characterization of genetic variation. Joint calling is routinely used to combine identified variants across multiple related samples. However, the improvement of variants identification using the mutual support information from multiple samples remains quite limited for population-scale genotyping.

RESULTS: In this study, we developed a computational framework for joint calling genetic variants from 5,061 sheep by incorporating the sequencing error and optimizing mutual support information from multiple samples' data. The variants were accurately identified from multiple samples by using four steps: (1) Probabilities of variants from two widely used algorithms, GATK and Freebayes, were calculated by Poisson model incorporating base sequencing error potential; (2) The variants with high mapping quality or consistently identified from at least two samples by GATK and Freebayes were used to construct the raw high-confidence identification (rHID) variants database; (3) The high confidence variants identified in single sample were ordered by probability value and controlled by false discovery rate (FDR) using rHID database; (4) To avoid the elimination of potentially true variants from rHID database, the variants that failed FDR were reexamined to rescued potential true variants and ensured high accurate identification variants. The results indicated that the percent of concordant SNPs and Indels from Freebayes and GATK after our new method were significantly improved 12%-32% compared with raw variants and advantageously found low frequency variants of individual sheep involved several traits including nipples number (GPC5), scrapie pathology (PAPSS2), seasonal reproduction and litter size (GRM1), coat color (RAB27A), and lentivirus susceptibility (TMEM154).

CONCLUSION: The new method used the computational strategy to reduce the number of false positives, and simultaneously improve the identification of genetic variants. This strategy did not incur any extra cost by using any additional samples or sequencing data information and advantageously identified rare variants which can be important for practical applications of animal breeding.}, } @article {pmid37777666, year = {2023}, author = {Rahim, MS and Sharma, V and Pragati Yadav, and Parveen, A and Kumar, A and Roy, J and Kumar, V}, title = {Rethinking underutilized cereal crops: pan-omics integration and green system biology.}, journal = {Planta}, volume = {258}, number = {5}, pages = {91}, pmid = {37777666}, issn = {1432-2048}, abstract = {Due to harsh lifestyle changes, in the present era, nutritional security is needed along with food security so it is necessary to include underutilized cereal crops (UCCs) in our daily diet to counteract the rising danger of human metabolic illness. We can attain both the goal of zero hunger and nutritional security by developing improved UCCs using advanced pan-omics (genomics, transcriptomics, proteomics, metabolomics, nutrigenomics, phenomics and ionomics) practices. Plant sciences research progressed profoundly since the last few decades with the introduction of advanced technologies and approaches, addressing issues of food demand of the growing population, nutritional security challenges and climate change. However, throughout the expansion and popularization of commonly consumed major cereal crops such as wheat and rice, other cereal crops such as millet, rye, sorghum, and others were impeded, despite their potential medicinal and nutraceutical qualities. Undoubtedly neglected underutilized cereal crops (UCCs) also have the capability to withstand diverse climate change. To relieve the burden of major crops, it is necessary to introduce the new crops in our diet in the way of UCCs. Introgression of agronomically and nutritionally important traits by pan-omics approaches in UCCs could be a defining moment for the population's well-being on the globe. This review discusses the importance of underutilized cereal crops, as well as the application of contemporary omics techniques and advanced bioinformatics tools that could open up new avenues for future study and be valuable assets in the development and usage of UCCs in the perspective of green system biology. The increased and improved use of UCCs is dependent on number of factors that necessitate a concerted research effort in agricultural sciences. The emergence of functional genomics with molecular genetics might gear toward the reawakening of interest in underutilized cereals crops. The need of this era is to focus on potential UCCs in advanced agriculture and breeding programmes. Hence, targeting the UCCs, might provide a bright future for better health and scientific rationale for its use.}, } @article {pmid37775806, year = {2023}, author = {Bonnet, K and Marschall, T and Doerr, D}, title = {Constructing founder sets under allelic and non-allelic homologous recombination.}, journal = {Algorithms for molecular biology : AMB}, volume = {18}, number = {1}, pages = {15}, pmid = {37775806}, issn = {1748-7188}, support = {1U01HG010973/NH/NIH HHS/United States ; }, abstract = {Homologous recombination between the maternal and paternal copies of a chromosome is a key mechanism for human inheritance and shapes population genetic properties of our species. However, a similar mechanism can also act between different copies of the same sequence, then called non-allelic homologous recombination (NAHR). This process can result in genomic rearrangements-including deletion, duplication, and inversion-and is underlying many genomic disorders. Despite its importance for genome evolution and disease, there is a lack of computational models to study genomic loci prone to NAHR. In this work, we propose such a computational model, providing a unified framework for both (allelic) homologous recombination and NAHR. Our model represents a set of genomes as a graph, where haplotypes correspond to walks through this graph. We formulate two founder set problems under our recombination model, provide flow-based algorithms for their solution, describe exact methods to characterize the number of recombinations, and demonstrate scalability to problem instances arising in practice.}, } @article {pmid37773075, year = {2023}, author = {Trinh, P and Clausen, DS and Willis, AD}, title = {happi: a hierarchical approach to pangenomics inference.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {214}, pmid = {37773075}, issn = {1474-760X}, support = {R35 GM133420/GM/NIGMS NIH HHS/United States ; R21 AI168679/AI/NIAID NIH HHS/United States ; T32 ES015459/ES/NIEHS NIH HHS/United States ; }, abstract = {Recovering metagenome-assembled genomes (MAGs) from shotgun sequencing data is an increasingly common task in microbiome studies, as MAGs provide deeper insight into the functional potential of both culturable and non-culturable microorganisms. However, metagenome-assembled genomes vary in quality and may contain omissions and contamination. These errors present challenges for detecting genes and comparing gene enrichment across sample types. To address this, we propose happi, an approach to testing hypotheses about gene enrichment that accounts for genome quality. We illustrate the advantages of happi over existing approaches using published Saccharibacteria MAGs, Streptococcus thermophilus MAGs, and via simulation.}, } @article {pmid37772863, year = {2023}, author = {Lanza, A and Mizobata, H and Yonezawa, R and Yoshitake, K and Shigeharu, K and Asakawa, S}, title = {Complete genome sequence of Edwardsiella sp. NBRC12716 isolated in 1962 from the liver of diseased eel.}, journal = {Microbiology resource announcements}, volume = {}, number = {}, pages = {e0073723}, doi = {10.1128/MRA.00737-23}, pmid = {37772863}, issn = {2576-098X}, abstract = {We report the complete genome sequence of Edwardsiella sp. NBRC12716 isolated from a diseased eel in 1962. The genome consists of a single, circular chromosome 3,771,060 bp in length with 59.74% GC content and encodes 25 rRNA, 96 tRNA, and 3,182 protein-coding genes.}, } @article {pmid37764993, year = {2023}, author = {Tamayo-Ordóñez, MC and Rosas-García, NM and Ayil-Gutiérrez, BA and Bello-López, JM and Tamayo-Ordóñez, FA and Anguebes-Franseschi, F and Damas-Damas, S and Tamayo-Ordóñez, YJ}, title = {Non-Structural Proteins (Nsp): A Marker for Detection of Human Coronavirus Families.}, journal = {Pathogens (Basel, Switzerland)}, volume = {12}, number = {9}, pages = {}, doi = {10.3390/pathogens12091185}, pmid = {37764993}, issn = {2076-0817}, abstract = {SARS-CoV-2 was the cause of the global pandemic that caused a total of 14.9 million deaths during the years 2020 and 2021, according to the WHO. The virus presents a mutation rate between 10-5 and 10-3 substitutions per nucleotide site per cell infection (s/n/c). Due to this, studies aimed at knowing the evolution of this virus could help us to foresee (through the future development of new detection strategies and vaccines that prevent the infection of this virus in human hosts) that a pandemic caused by this virus will be generated again. In this research, we performed a functional annotation and identification of changes in Nsp (non-structural proteins) domains in the coronavirus genome. The comparison of the 13 selected coronavirus pangenomes demonstrated a total of 69 protein families and 57 functions associated with the structural domain's differentials between genomes. A marked evolutionary conservation of non-structural proteins was observed. This allowed us to identify and classify highly pathogenic human coronaviruses into alpha, beta, gamma, and delta groups. The designed Nsp cluster provides insight into the trajectory of SARS-CoV-2, demonstrating that it continues to evolve rapidly. An evolutionary marker allows us to discriminate between phylogenetically divergent groups, viral genotypes, and variants between the alpha and betacoronavirus genera. These types of evolutionary studies provide a window of opportunity to use these Nsp as targets of viral therapies.}, } @article {pmid37761974, year = {2023}, author = {Merkushova, AV and Shikov, AE and Nizhnikov, AA and Antonets, KS}, title = {For Someone, You Are the Whole World: Host-Specificity of Salmonella enterica.}, journal = {International journal of molecular sciences}, volume = {24}, number = {18}, pages = {}, doi = {10.3390/ijms241813670}, pmid = {37761974}, issn = {1422-0067}, support = {MD-2302.2022.5//The Grant of the President of the Russian Federation/ ; }, abstract = {Salmonella enterica is a bacterial pathogen known to cause gastrointestinal infections in diverse hosts, including humans and animals. Despite extensive knowledge of virulence mechanisms, understanding the factors driving host specificity remains limited. In this study, we performed a comprehensive pangenome-wide analysis of S. enterica to identify potential loci determining preference towards certain hosts. We used a dataset of high-quality genome assemblies grouped into 300 reference clusters with a special focus on four host groups: humans, pigs, cattle, and birds. The reconstructed pangenome was shown to be open and enriched with the accessory component implying high genetic diversity. Notably, phylogenetic inferences did not correspond to the distribution of affected hosts, as large compact phylogenetic groups were absent. By performing a pangenome-wide association study, we identified potential host specificity determinants. These included multiple genes encoding proteins involved in distinct infection stages, e.g., secretion systems, surface structures, transporters, transcription regulators, etc. We also identified antibiotic resistance loci in host-adapted strains. Functional annotation corroborated the results obtained with significant enrichments related to stress response, antibiotic resistance, ion transport, and surface or extracellular localization. We suggested categorizing the revealed specificity factors into three main groups: pathogenesis, resistance to antibiotics, and propagation of mobile genetic elements (MGEs).}, } @article {pmid37759803, year = {2023}, author = {Naithani, S and Deng, CH and Sahu, SK and Jaiswal, P}, title = {Exploring Pan-Genomes: An Overview of Resources and Tools for Unraveling Structure, Function, and Evolution of Crop Genes and Genomes.}, journal = {Biomolecules}, volume = {13}, number = {9}, pages = {}, doi = {10.3390/biom13091403}, pmid = {37759803}, issn = {2218-273X}, abstract = {The availability of multiple sequenced genomes from a single species made it possible to explore intra- and inter-specific genomic comparisons at higher resolution and build clade-specific pan-genomes of several crops. The pan-genomes of crops constructed from various cultivars, accessions, landraces, and wild ancestral species represent a compendium of genes and structural variations and allow researchers to search for the novel genes and alleles that were inadvertently lost in domesticated crops during the historical process of crop domestication or in the process of extensive plant breeding. Fortunately, many valuable genes and alleles associated with desirable traits like disease resistance, abiotic stress tolerance, plant architecture, and nutrition qualities exist in landraces, ancestral species, and crop wild relatives. The novel genes from the wild ancestors and landraces can be introduced back to high-yielding varieties of modern crops by implementing classical plant breeding, genomic selection, and transgenic/gene editing approaches. Thus, pan-genomic represents a great leap in plant research and offers new avenues for targeted breeding to mitigate the impact of global climate change. Here, we summarize the tools used for pan-genome assembly and annotations, web-portals hosting plant pan-genomes, etc. Furthermore, we highlight a few discoveries made in crops using the pan-genomic approach and future potential of this emerging field of study.}, } @article {pmid37759383, year = {2023}, author = {Hafez, M and Gourlie, R and McDonald, M and Telfer, M and Carmona, M and Sautua, F and Moffat, C and Moolhuijzen, P and See, PT and Aboukhaddour, R}, title = {Evolution of the ToxB gene in Pyrenophora tritici-repentis and related species.}, journal = {Molecular plant-microbe interactions : MPMI}, volume = {}, number = {}, pages = {}, doi = {10.1094/MPMI-08-23-0114-FI}, pmid = {37759383}, issn = {0894-0282}, abstract = {Pyrenophora tritici-repentis is a destructive pathogen of wheat with global impact. It possesses a highly plastic open pangenome shaped by the gain and loss of effector genes. This study investigated the allelic variations in the chlorosis-encoding gene, ToxB, across 422 isolates representing all identified pathotypes and worldwide origins. To gain better insights into ToxB evolution, we examined its presence and variability in other Pyrenophora spp. A ToxB haplotype network was constructed, revealing the evolutionary relationships of this gene (20 haplotypes) across four Pyrenophora species. Notably, toxb, the homolog of ToxB, was detected for the first time in the barley pathogen Pyrenophora teres. The ToxB/toxb genes display evidence of selection that is characterized by loss of function, duplication, and diverse mutations. Among ToxB/toxb open reading frame, 72 mutations were identified, including 14 synonymous, 55 nonsynonymous, and 3 indel mutations. Remarkably, a ~5.6 Kb Copia-like retrotransposon, named Copia-1_Ptr, was found inserted in the toxb gene of a race 3 isolate. This insert disrupted the ToxB gene's function, a first case of effector gene disruption by a transposable element in Ptr. Additionally, a microsatellite with 25-nucleotide repeats (0 to 10) in the upstream region of ToxB suggested a potential mechanism influencing ToxB expression and regulation. Exploring ToxB-like protein distribution in other Ascomycetes revealed their presence in 19 additional species, including the Leotiomycetes class for the first time. The presence/absence pattern of ToxB-like proteins defied species relatedness compared to a phylogenetic tree, suggesting a past horizontal gene transfer event.}, } @article {pmid37754275, year = {2023}, author = {Ma, J and Zhao, H and Mo, S and Li, J and Ma, X and Tang, Y and Li, H and Liu, Z}, title = {Acquisition of Type I methyltransferase via horizontal gene transfer increases the drug resistance of Aeromonas veronii.}, journal = {Microbial genomics}, volume = {9}, number = {9}, pages = {}, doi = {10.1099/mgen.0.001107}, pmid = {37754275}, issn = {2057-5858}, abstract = {Aeromonas veronii is an opportunistic pathogen that affects both fish and mammals, including humans, leading to bacteraemia, sepsis, meningitis and even death. The increasing virulence and drug resistance of A. veronii are of significant concern and pose a severe risk to public safety. The Type I restriction-modification (RM) system, which functions as a bacterial defence mechanism, can influence gene expression through DNA methylation. However, little research has been conducted to explore its origin, evolutionary path, and relationship to virulence and drug resistance in A. veronii. In this study, we analysed the pan-genome of 233 A. veronii strains, and the results indicated that it was 'open', meaning that A. veronii has acquired additional genes from other species. This suggested that A. veronii had the potential to adapt and evolve rapidly, which might have contributed to its drug resistance. One Type I methyltransferase (MTase) and two complete Type I RM systems were identified, namely AveC4I, AveC4II and AveC4III in A. veronii strain C4, respectively. Notably, AveC4I was exclusive to A. veronii C4. Phylogenetic analysis revealed that AveC4I was derived from horizontal gene transfer from Thiocystis violascens and exchanged genes with the human pathogen Comamonas kerstersii. Single molecule real-time sequencing was applied to identify the motif methylated by AveC4I, which was unique and not recognized by any reported MTases in the REBASE database. We also annotated the functions and pathways of the genes containing the motif, revealing that AveC4I may control drug resistance in A. veronii C4. Our findings provide new insight on the mechanisms underlying drug resistance in pathogenic bacteria. By identifying the specific genes and pathways affected by AveC4I, this study may aid in the development of new therapeutic approaches to combat A. veronii infections.}, } @article {pmid37752302, year = {2023}, author = {Woolley, SA and Salavati, M and Clark, EL}, title = {Recent advances in the genomic resources for sheep.}, journal = {Mammalian genome : official journal of the International Mammalian Genome Society}, volume = {}, number = {}, pages = {}, pmid = {37752302}, issn = {1432-1777}, support = {BB/S01540X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/D/10002070/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/S01540X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {Sheep (Ovis aries) provide a vital source of protein and fibre to human populations. In coming decades, as the pressures associated with rapidly changing climates increase, breeding sheep sustainably as well as producing enough protein to feed a growing human population will pose a considerable challenge for sheep production across the globe. High quality reference genomes and other genomic resources can help to meet these challenges by: (1) informing breeding programmes by adding a priori information about the genome, (2) providing tools such as pangenomes for characterising and conserving global genetic diversity, and (3) improving our understanding of fundamental biology using the power of genomic information to link cell, tissue and whole animal scale knowledge. In this review we describe recent advances in the genomic resources available for sheep, discuss how these might help to meet future challenges for sheep production, and provide some insight into what the future might hold.}, } @article {pmid37750924, year = {2023}, author = {Adhikari, T and Olukolu, B and Paudel, R and Pandey, A and Halterman, D and Louws, F}, title = {Genotyping-by-Sequencing Reveals Population Differentiation and Linkage Disequilibrium in Alternaria linariae from Tomato.}, journal = {Phytopathology}, volume = {}, number = {}, pages = {}, doi = {10.1094/PHYTO-07-23-0229-R}, pmid = {37750924}, issn = {0031-949X}, abstract = {Alternaria linariae (Neerg.) Simmons is an economically important foliar pathogen that causes early blight disease in tomatoes. Understanding genetic diversity, population genetic structure, and evolutionary potential is crucial to contemplating effective disease management strategies. We leveraged genotyping-by-sequencing (GBS) technology to compare genome-wide variation in 124 isolates of Alternaria spp. (A. alternata, A. linariae, and A. solani) for comparative genome analysis and to test the hypotheses of genetic differentiation and linkage disequilibrium (LD) in A. linariae collected from tomatoes in western North Carolina. We performed a pangenome-aware variant calling and filtering with GBSapp and identified 53,238 variants conserved across the reference genomes of three Alternaria spp. The highest marker density was observed on chromosome 1 (7 Mb). Both discriminant analysis of principal components (DAPC) and Bayesian model-based STRUCTURE analysis of A. linariae isolates revealed three subpopulations with minimal admixture. The genetic differentiation coefficient (FST) within A. linariae subpopulations were similar and high (0.86), indicating that alleles in the subpopulations are fixed and the genetic structure is likely due to restricted recombination. Analysis of molecular variance indicates higher variation among populations (89%) than within the population (11%). We found long-range LD between pairs of loci in A. linariae, supporting the hypothesis of low recombination expected for a fungal pathogen with limited asexual reproduction. Our findings provide evidence of a high level of population genetic differentiation in A. linariae, which reinforces the importance of developing tomato varieties with broad-spectrum resistance to various isolates of A. linariae.}, } @article {pmid37745608, year = {2023}, author = {Lin, MJ and Iyer, S and Chen, NC and Langmead, B}, title = {Measuring, visualizing and diagnosing reference bias with biastools.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.09.13.557552}, pmid = {37745608}, abstract = {A goal of recent alignment methods is to reduce reference bias, which occurs when reads containing non-reference alleles fail to align to their true point of origin. However, there is a lack of methods for systematically measuring, categorizing, and diagnosing reference bias. We present biastools , which analyzes and categorizes instances of reference bias. Biastools has different sets of functionality tailored to different scenarios, i.e. (a) when the donor genome is well-characterized and input reads are simulated, (b) when the donor is well-characterized and reads are real, and (c) when the donor is not well-characterized and reads are real. When possible, biastools divides instances of reference bias into categories according to their cause: bias due to loss, flux, or local misalignment. Biastools 's scan mode detects large-scale mapping artifacts due to structural variation and flaws in the reference representation. Our findings confirm that including more variants in a graph genome alignment method results in fewer reference biases. We also find that end-to-end alignment modes are effective in reducing bias at insertions and deletions, compared to local aligners that allow soft clipping. Finally, we use biastools to characterize the ways in which using the new telomere-to-telomere human reference can improve bias at a large scale. In short, biastools is a tool uniquely focused on reference bias, making it a valuable resource as the field continues to develop new aligners and pangenome representations to reduce bias.}, } @article {pmid37744919, year = {2023}, author = {Deng, Y and Mou, T and Wang, J and Su, J and Yan, Y and Zhang, YQ}, title = {Characterization of three rapidly growing novel Mycobacterium species with significant polycyclic aromatic hydrocarbon bioremediation potential.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1225746}, pmid = {37744919}, issn = {1664-302X}, abstract = {Mycobacterium species exhibit high bioremediation potential for the degradation of polycyclic aromatic hydrocarbons (PAHs) that are significant environmental pollutants. In this study, three Gram-positive, rapidly growing strains (YC-RL4[T], MB418[T], and HX176[T]) were isolated from petroleum-contaminated soils and were classified as Mycobacterium within the family Mycobacteriaceae. Genomic average nucleotide identity (ANI; < 95%) and digital DNA-DNA hybridization (dDDH; < 70%) values relative to other Mycobacterium spp. indicated that the strains represented novel species. The morphological, physiological, and chemotaxonomic characteristics of the isolates also supported their affiliation with Mycobacterium and their delineation as novel species. The strains were identified as Mycobacterium adipatum sp. nov. (type strain YC-RL4[T] = CPCC 205684[T] = CGMCC 1.62027[T]), Mycobacterium deserti sp. nov. (type strain MB418[T] = CPCC 205710[T] = KCTC 49782[T]), and Mycobacterium hippophais sp. nov. (type strain HX176[T] = CPCC 205372[T] = KCTC 49413[T]). Genes encoding enzymes involved in PAH degradation and metal resistance were present in the genomes of all three strains. Specifically, genes encoding alpha subunits of aromatic ring-hydroxylating dioxygenases were encoded by the genomes. The genes were also identified as core genes in a pangenomic analysis of the three strains along with 70 phylogenetically related mycobacterial strains that were previously classified as Mycolicibacterium. Notably, strain YC-RL4[T] could not only utilize phthalates as their sole carbon source for growth, but also convert di-(2-ethylhexyl) phthalate into phthalic acid. These results indicated that strains YC-RL4[T], MB418[T], and HX176[T] were important resources with significant bioremediation potential in soils contaminated by PAHs and heavy metals.}, } @article {pmid37740204, year = {2023}, author = {Bhattacharya, A and Das, S and Bhattacharjee, MJ and Mukherjee, AK and Khan, MR}, title = {Comparative pangenomic analysis of predominant human vaginal lactobacilli strains towards population-specific adaptation: understanding the role in sustaining a balanced and healthy vaginal microenvironment.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {565}, pmid = {37740204}, issn = {1471-2164}, mesh = {Humans ; Female ; *Lactobacillus/genetics ; Phylogeny ; *Bacteriocins/genetics ; Biological Evolution ; Cell Membrane ; }, abstract = {The vaginal microenvironment of healthy women has a predominance of Lactobacillus crispatus, L. iners, L. gasseri, and L. jensenii. The genomic repertoire of the strains of each of the species associated with the key attributes thereby regulating a healthy vaginal environment needs a substantial understanding.We studied all available human strains of the four lactobacilli across different countries, isolated from vaginal and urinal sources through phylogenetic and pangenomic approaches. The findings showed that L. iners has the highest retention of core genes, and L. crispatus has more gene gain in the evolutionary stratum. Interestingly, L. gasseri and L. jensenii demonstrated major population-specific gene-cluster gain/loss associated with bacteriocin synthesis, iron chelating, adherence, zinc and ATP binding proteins, and hydrolase activity. Gene ontology enrichment analysis revealed that L. crispatus strains showed greater enrichment of functions related to plasma membrane integrity, biosurfactant, hydrogen peroxide synthesis, and iron sequestration as an ancestral derived core function, while bacteriocin and organic acid biosynthesis are strain-specific accessory enriched functions. L. jensenii showed greater enrichment of functions related to adherence, aggregation, and exopolysaccharide synthesis. Notably, the key functionalities are heterogeneously enriched in some specific strains of L. iners and L. gasseri.This study shed light on the genomic features and their variability that provides advantageous attributes to predominant vaginal Lactobacillus species maintaining vaginal homeostasis. These findings evoke the need to consider region-specific candidate strains of Lactobacillus to formulate prophylactic measures against vaginal dysbiosis for women's health.}, } @article {pmid37738420, year = {2023}, author = {Welgemoed, T and Duong, TA and Barnes, I and Stukenbrock, EH and Berger, DK}, title = {Population genomic analyses suggest recent dispersal events of the pathogen Cercospora zeina into East and Southern African maize cropping systems.}, journal = {G3 (Bethesda, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1093/g3journal/jkad214}, pmid = {37738420}, issn = {2160-1836}, abstract = {A serious factor hampering global maize production is gray leaf spot disease. Cercospora zeina is one of the causative pathogens, but population genomics analysis of C. zeina is lacking. We conducted whole-genome Illumina sequencing of a representative set of 30 C. zeina isolates from Kenya and Uganda (East Africa) and Zambia, Zimbabwe and South Africa (Southern Africa). Selection of the diverse set was based on microsatellite data from a larger collection of the pathogen. Pangenome analysis of the C. zeina isolates was done by (i) de novo assembly of the reads with SPAdes, (ii) annotation with BRAKER, and (iii) protein clustering with OrthoFinder. A published long-read assembly of C. zeina (CMW25467) from Zambia was included and annotated using the same pipeline. This analysis revealed 790 non-shared accessory and 10,677 shared core orthogroups (genes) between the 31 isolates. Accessory gene content was largely shared between isolates from all countries, with a few genes unique to populations from Southern Africa (32) or East Africa (6). There was a significantly higher proportion of effector genes in the accessory secretome (44%) compared to the core secretome (24%). PCA, ADMIXTURE, and phylogenetic analysis using a neighbour-net network indicated a population structure with a geographical subdivision between the East African isolates and the Southern African isolates, although gene flow was also evident. The small pangenome and partial population differentiation indicated recent dispersal of C. zeina into Africa, possibly from two regional founder populations, followed by recurrent gene flow owing to widespread maize production across sub-Saharan Africa.}, } @article {pmid37736763, year = {2023}, author = {Peña-Montenegro, TD and Kleindienst, S and Allen, AE and Eren, AM and McCrow, JP and Sánchez-Calderón, JD and Arnold, J and Joye, SB}, title = {Species-specific responses of marine bacteria to environmental perturbation.}, journal = {ISME communications}, volume = {3}, number = {1}, pages = {99}, pmid = {37736763}, issn = {2730-6151}, support = {ECOGIG-2//Gulf of Mexico Research Initiative (GoMRI)/ ; Ecogig-2//Gulf of Mexico Research Initiative (GoMRI)/ ; Ecogig-2//Gulf of Mexico Research Initiative (GoMRI)/ ; Ecogig-2//Gulf of Mexico Research Initiative (GoMRI)/ ; }, abstract = {Environmental perturbations shape the structure and function of microbial communities. Oil spills are a major perturbation and resolving spills often requires active measures like dispersant application that can exacerbate the initial disturbance. Species-specific responses of microorganisms to oil and dispersant exposure during such perturbations remain largely unknown. We merged metatranscriptomic libraries with pangenomes to generate Core-Accessory Metatranscriptomes (CA-Metatranscriptomes) for two microbial hydrocarbon degraders that played important roles in the aftermath of the Deepwater Horizon oil spill. The Colwellia CA-Metatranscriptome illustrated pronounced dispersant-driven acceleration of core (~41%) and accessory gene (~59%) transcription, suggesting an opportunistic strategy. Marinobacter responded to oil exposure by expressing mainly accessory genes (~93%), suggesting an effective hydrocarbon-degrading lifestyle. The CA-Metatranscriptome approach offers a robust way to identify the underlying mechanisms of key microbial functions and highlights differences of specialist-vs-opportunistic responses to environmental disturbance.}, } @article {pmid37732781, year = {2023}, author = {Dong, X and Yu, Y and Liu, J and Cao, D and Xiang, Y and Bi, K and Yuan, X and Li, S and Wu, T and Zhang, Y}, title = {Whole-genome sequencing provides insights into a novel species: Providencia hangzhouensis associated with urinary tract infections.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0122723}, doi = {10.1128/spectrum.01227-23}, pmid = {37732781}, issn = {2165-0497}, abstract = {Providencia rettgeri is a clinically significant opportunistic pathogen that is involved in urinary tract infections. Due to the resolution limitations of identification, distinguishing P. rettgeri from closely related species is challenging by commercial biochemical test systems. Here, we first reported a novel species, Providencia hangzhouensis, which had been misidentified as P. rettgeri. Exhibiting ≤91.97% average nucleotide identity (ANI) and ≤46.10% in silico DNA-DNA hybridization values with all known Providencia species, P. hangzhouensis falls well beneath the established species-defining thresholds. We conducted a population genomics analysis of P. hangzhouensis isolates worldwide. Our study revealed that P. hangzhouensis has emerged in many countries and has formed several transmission clusters. We found that P. hangzhouensis shared the highest ANI values (91.54% and 91.97%) with P. rettgeri and P. huaxiensis, respectively. The pan-genome analysis revealed that these three species possessed a similar component of pan-genomes. Two genes associated with metabolism, folE2 and ccmM, were identified to be specific to P. hangzhouensis. Furthermore, we also observed that carbapenem-resistance genes frequently occur in P. hangzhouensis with the blaIMP-27 being the most prevalent (46.15%; 36/78). The emergence of P. hangzhouensis is often accompanied by extended-spectrum β-lactamase and carbapenem-resistance genes, and calls for tailored surveillance of this species as a clinically relevant species in the future. IMPORTANCE Our study has identified and characterized a novel species, Providencia hangzhouensis, which is associated with urinary tract infections and was previously misidentified as Providencia rettgeri. Through this study, we have identified specific genes unique to P. hangzhouensis, which could serve as marker genes for rapid PCR identification. Additionally, our findings suggest that the emergence of P. hangzhouensis is often accompanied by extended-spectrum β-lactamase and carbapenem-resistance genes, emphasizing the need for attention to clinical management and the importance of accurate species identification and proper drug use.}, } @article {pmid37728044, year = {2023}, author = {Lin, Y and Zhu, Y and Cui, Y and Qian, H and Yuan, Q and Chen, R and Lin, Y and Chen, J and Zhou, X and Shi, C and He, H and Hu, T and Gu, C and Yu, X and Zhu, X and Wang, Y and Qian, Q and Zhang, C and Wang, F and Shang, L}, title = {Identification of natural allelic variation in TTL1 controlling thermotolerance and grain size by a rice super pan-genome.}, journal = {Journal of integrative plant biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/jipb.13568}, pmid = {37728044}, issn = {1744-7909}, abstract = {Continuously increasing global temperatures present great challenges to food security. Grain size, one of the critical components determining grain yield in rice (Oryza sativa L.), is a prime target for genetic breeding. Thus, there is an immediate need for genetic improvement in rice to maintain grain yield under heat stress. However, quantitative trait loci (QTLs) endowing heat stress tolerance and grain size in rice are extremely rare. Here, we identified a novel negative regulator with pleiotropic effects, Thermo-Tolerance and grain Length 1 (TTL1), from the super pan-genomic and transcriptomic data. Loss-of-function mutations in TTL1 enhanced heat tolerance, and caused an increase in grain size by coordinating cell expansion and proliferation. TTL1 was shown to function as a transcriptional regulator and localized to the nucleus and cell membrane. Furthermore, haplotype analysis showed that hap [L] and hap [S] of TTL1 were obviously correlated with variations of thermotolerance and grain size in a core collection of cultivars. Genome evolution analysis of available rice germplasms suggested that TTL1 was selected during domestication of the indica and japonica rice subspecies, but still had much breeding potential for increasing grain length and thermotolerance. These findings provide insights into TTL1 as a novel potential target for development of high-yield and thermotolerant rice varieties. This article is protected by copyright. All rights reserved.}, } @article {pmid37727231, year = {2023}, author = {Rios Galicia, B and Sáenz, JS and Yergaliyev, T and Camarinha-Silva, A and Seifert, J}, title = {Host specific adaptations of Ligilactobacillus aviarius to poultry.}, journal = {Current research in microbial sciences}, volume = {5}, number = {}, pages = {100199}, pmid = {37727231}, issn = {2666-5174}, abstract = {The genus Ligilactobacillus encompasses species adapted to vertebrate hosts and fermented food. Their genomes encode adaptations to the host lifestyle. Reports of gut microbiota from chicken and turkey gastrointestinal tract have shown a high persistence of Ligilactobacillus aviarius along the digestive system compared to other species found in the same host. However, its adaptations to poultry as a host has not yet been described. In this work, the pan-genome of Ligilactobacillus aviarius was explored to describe the functional adaptability to the gastrointestinal environment. The core genome is composed of 1179 gene clusters that are present at least in one copy that codifies to structural, ribosomal and biogenesis proteins. The rest of the identified regions were classified into three different functional clusters of orthologous groups (clusters) that codify carbohydrate metabolism, envelope biogenesis, viral defence mechanisms, and mobilome inclusions. The pan-genome of Ligilactobacillus aviarius is a closed pan-genome, frequently found in poultry and highly prevalent across chicken faecal samples. The genome of L. aviarius codifies different clusters of glycoside hydrolases and glycosyltransferases that mediate interactions with the host cells. Accessory features, such as antiviral mechanisms and prophage inclusions, variate amongst strains from different GIT sections. This information provides hints about the interaction of this species with viral particles and other bacterial species. This work highlights functional adaptability traits present in L. aviarius that make it a dominant key member of the poultry gut microbiota and enlightens the convergent ecological relation of this species to the poultry gut environment.}, } @article {pmid37722405, year = {2023}, author = {Low, SJ and O'Neill, MT and Kerry, WJ and Krysiak, M and Papadakis, G and Whitehead, LW and Savic, I and Prestedge, J and Williams, L and Cooney, JP and Tran, T and Lim, CK and Caly, L and Towns, JM and Bradshaw, CS and Fairley, C and Chow, EPF and Chen, MY and Pellegrini, M and Pasricha, S and Williamson, DA}, title = {Rapid detection of monkeypox virus using a CRISPR-Cas12a mediated assay: a laboratory validation and evaluation study.}, journal = {The Lancet. Microbe}, volume = {}, number = {}, pages = {}, doi = {10.1016/S2666-5247(23)00148-9}, pmid = {37722405}, issn = {2666-5247}, abstract = {BACKGROUND: The 2022 outbreak of mpox (formerly known as monkeypox) led to the spread of monkeypox virus (MPXV) in over 110 countries, demanding effective disease management and surveillance. As current diagnostics rely largely on centralised laboratory testing, our objective was to develop a simple rapid point-of-care assay to detect MPXV in clinical samples using isothermal amplification coupled with CRISPR and CRISPR-associated protein (Cas) technology.

METHODS: In this proof-of-concept study, we developed a portable isothermal amplification CRISPR-Cas12a-based assay for the detection of MPXV. We designed a panel of 22 primer-guide RNA sets using pangenome and gene-agnostic approaches, and subsequently shortlisted the three sets producing the strongest signals for evaluation of analytical sensitivity and specificity using a fluorescence-based readout. The set displaying 100% specificity and the lowest limit of detection (LOD) was selected for further assay validation using both a fluorescence-based and lateral-flow readout. Assay specificity was confirmed using a panel of viral and bacterial pathogens. Finally, we did a blind concordance study on genomic DNA extracted from 185 clinical samples, comparing assay results with a gold-standard quantitative PCR (qPCR) assay. We identified the optimal time to detection and analysed the performance of the assay relative to qPCR using receiver operating characteristic (ROC) curves. We also assessed the compatibility with lateral-flow strips, both visually and computationally, where strips were interpreted blinded to the fluorescence results on the basis of the presence or absence of test bands.

FINDINGS: With an optimal run duration of approximately 45 min from isothermal amplification to CRISPR-assay readout, the MPXV recombinase polymerase amplification CRISPR-Cas12a-based assay with the selected primer-guide set had an LOD of 1 copy per μL and 100% specificity against tested viral pathogens. Blinded concordance testing of 185 clinical samples resulted in 100% sensitivity (95% CI 89·3-100) and 99·3% specificity (95% CI 95·7-100) using the fluorescence readout. For optimal time to detection by fluorescence readout, we estimated the areas under the ROC curve to be 0·98 at 2 min and 0·99 at 4 min. Lateral-flow strips had 100% sensitivity (89·3-100) and 98·6% specificity (94·7-100) with both visual and computational assessment. Overall, lateral-flow results were highly concordant with fluorescence-based readouts (179 of 185 tests, 96·8% concordant), with discrepancies associated with low viral load samples.

INTERPRETATION: Our assay for the diagnosis of mpox displayed good performance characteristics compared with qPCR. Although optimisation of the assay will be required before deployment, its usability and versatility present a potential solution to MPXV detection in low-resource and remote settings, as well as a means of community-based, on-site testing.

FUNDING: Victorian Medical Research Accelerator Fund and the Australian Government Department of Health.}, } @article {pmid37714713, year = {2023}, author = {Dai, X and Bian, P and Hu, D and Luo, F and Huang, Y and Jiao, S and Wang, X and Gong, M and Li, R and Cai, Y and Wen, J and Yang, Q and Deng, W and Nanaei, HA and Wang, Y and Wang, F and Zhang, Z and Rosen, BD and Heller, R and Jiang, Y}, title = {A Chinese indicine pangenome reveals a wealth of novel structural variants introgressed from other Bos species.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277481.122}, pmid = {37714713}, issn = {1549-5469}, abstract = {Chinese indicine cattle harbor a much higher genetic diversity compared with other domestic cattle, but their genome architecture remains uninvestigated. Using PacBio HiFi sequencing data from 10 Chinese indicine cattle across southern China, we assembled 20 high-quality partially phased genomes and integrated them into a multiassembly graph containing 148.5 Mb (5.6%) of novel sequence. We identified 156,009 high-confidence nonredundant structural variants (SVs) and 206 SV hotspots spanning ∼195 Mb of gene-rich sequence. We detected 34,249 archaic introgressed fragments in Chinese indicine cattle covering 1.93 Gb (73.3%) of the genome. We inferred an average of 3.8%, 3.2%, 1.4%, and 0.5% of introgressed sequence originating, respectively, from banteng-like, kouprey-like, gayal-like, and gaur-like Bos species, as well as 0.6% of unknown origin. Introgression from multiple donors might have contributed to the genetic diversity of Chinese indicine cattle. Altogether, this study highlights the contribution of interspecies introgression to the genomic architecture of an important livestock population and shows how exotic genomic elements can contribute to the genetic variation available for selection.}, } @article {pmid37710263, year = {2023}, author = {Zhu, Q and Dovletgeldiyev, A and Shen, C and Li, K and Hu, S and He, Z}, title = {Comparative genomic analysis of Fusobacterium nucleatum reveals high intra-species diversity and cgmlst marker construction.}, journal = {Gut pathogens}, volume = {15}, number = {1}, pages = {43}, pmid = {37710263}, issn = {1757-4749}, abstract = {BACKGROUND: Fusobacterium nucleatum is a one of the most important anaerobic opportunistic pathogens in the oral and intestinal tracts of human and animals. It can cause various diseases such as infections, Lemierre's syndrome, oral cancer and colorectal cancer. The comparative genomic studies on the population genome level, have not been reported.

RESULTS: We analyzed all publicly available Fusobacterium nucleatums' genomic data for a comparative genomic study, focusing on the pan-genomic features, virulence genes, plasmid genomes and developed cgmlst molecular markers. We found the pan-genome shows a clear open tendency and most of plasmids in Fusobacterium nucleatum are mainly transmitted intraspecifically.

CONCLUSIONS: Our comparative analysis of Fusobacterium nucleatum systematically revealed the open pan-genomic features and phylogenetic tree based on cgmlst molecular markers. What's more, we also identified common plasmid typing among genomes. We hope that our study will provide a theoretical basis for subsequent functional studies.}, } @article {pmid37710174, year = {2023}, author = {Mahboob, S and Ullah, N and Farhan Ul Haque, M and Rauf, W and Iqbal, M and Ali, A and Rahman, M}, title = {Genomic characterization and comparative genomic analysis of HS-associated Pasteurella multocida serotype B:2 strains from Pakistan.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {546}, pmid = {37710174}, issn = {1471-2164}, support = {NRPU-7254//Higher Education Commision, Pakistan/ ; }, mesh = {Animals ; Cattle ; Pakistan ; *Pasteurella multocida/genetics ; Serogroup ; *Hemorrhagic Septicemia/veterinary ; Genomics ; Buffaloes ; }, abstract = {BACKGROUND: Haemorrhagic septicaemia (HS) is a highly fatal and predominant disease in livestock, particularly cattle and buffalo in the tropical regions of the world. Pasteurella multocida (P. multocida), serotypes B:2 and E:2, are reported to be the main causes of HS wherein serotype B:2 is more common in Asian countries including Pakistan and costs heavy financial losses every year. As yet, very little molecular and genomic information related to the HS-associated serotypes of P. multocida isolated from Pakistan is available. Therefore, this study aimed to explore the characteristics of novel bovine isolates of P. multocida serotype B:2 at the genomic level and perform comparative genomic analysis of various P. multocida strains from Pakistan to better understand the genetic basis of pathogenesis and virulence.

RESULTS: To understand the genomic variability and pathogenomics, we characterized three HS-associated P. multocida serotype B:2 strains isolated from the Faisalabad (PM1), Peshawar (PM2) and Okara (PM3) districts of Punjab, Pakistan. Together with the other nine publicly available Pakistani-origin P. multocida strains and a reference strain Pm70, a comparative genomic analysis was performed. The sequenced strains were characterized as serotype B and belong to ST-122. The strains contain no plasmids; however, each strain contains at least two complete prophages. The pan-genome analysis revealed a higher number of core genes indicating a close resemblance to the studied genomes and very few genes (1%) of the core genome serve as a part of virulence, disease, and defense mechanisms. We further identified that studied P. multocida B:2 strains harbor common antibiotic resistance genes, specifically PBP3 and EF-Tu. Remarkably, the distribution of virulence factors revealed that OmpH and plpE were not present in any P. multocida B:2 strains while the presence of these antigens was reported uniformly in all serotypes of P. multocida.

CONCLUSION: This study's findings indicate the absence of OmpH and PlpE in the analyzed P. multocida B:2 strains, which are known surface antigens and provide protective immunity against P. multocida infection. The availability of additional genomic data on P. multocida B:2 strains from Pakistan will facilitate the development of localized therapeutic agents and rapid diagnostic tools specifically targeting HS-associated P. multocida B:2 strains.}, } @article {pmid37695773, year = {2023}, author = {Le Naour-Vernet, M and Charriat, F and Gracy, J and Cros-Arteil, S and Ravel, S and Veillet, F and Meusnier, I and Padilla, A and Kroj, T and Cesari, S and Gladieux, P}, title = {Adaptive evolution in virulence effectors of the rice blast fungus Pyricularia oryzae.}, journal = {PLoS pathogens}, volume = {19}, number = {9}, pages = {e1011294}, pmid = {37695773}, issn = {1553-7374}, mesh = {Virulence/genetics ; Amino Acid Sequence ; *Amino Acids ; *Ascomycota/genetics ; }, abstract = {Plant pathogens secrete proteins called effectors that target host cellular processes to promote disease. Recently, structural genomics has identified several families of fungal effectors that share a similar three-dimensional structure despite remarkably variable amino-acid sequences and surface properties. To explore the selective forces that underlie the sequence variability of structurally-analogous effectors, we focused on MAX effectors, a structural family of effectors that are major determinants of virulence in the rice blast fungus Pyricularia oryzae. Using structure-informed gene annotation, we identified 58 to 78 MAX effector genes per genome in a set of 120 isolates representing seven host-associated lineages. The expression of MAX effector genes was primarily restricted to the early biotrophic phase of infection and strongly influenced by the host plant. Pangenome analyses of MAX effectors demonstrated extensive presence/absence polymorphism and identified gene loss events possibly involved in host range adaptation. However, gene knock-in experiments did not reveal a strong effect on virulence phenotypes suggesting that other evolutionary mechanisms are the main drivers of MAX effector losses. MAX effectors displayed high levels of standing variation and high rates of non-synonymous substitutions, pointing to widespread positive selection shaping the molecular diversity of MAX effectors. The combination of these analyses with structural data revealed that positive selection acts mostly on residues located in particular structural elements and at specific positions. By providing a comprehensive catalog of amino acid polymorphism, and by identifying the structural determinants of the sequence diversity, our work will inform future studies aimed at elucidating the function and mode of action of MAX effectors.}, } @article {pmid37695632, year = {2023}, author = {Naveed, M and Mahmood, S and Aziz, T and Azeem, A and Hussain, I and Waseem, M and Ali, A and Alharbi, M and Alshammari, A and Alasmari, AF}, title = {Designing a novel chimeric multi-epitope vaccine subunit against Staphylococcus argenteus through artificial intelligence approach integrating pan-genome analysis, in vitro identification, and immunogenicity profiling.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-16}, doi = {10.1080/07391102.2023.2256881}, pmid = {37695632}, issn = {1538-0254}, abstract = {Staphylococcus argenteus is a newly identified pathogen that causes respiratory tract infections, skin infections, such as cellulitis, abscesses, and impetigo, and currently, there is no licensed vaccine available against it. To develop a vaccine against S. argenteus, a bacterial pan-genome analysis was applied to identify potential vaccine candidates. A total of 4908 core proteins were retrieved and utilized for identifying four proteins, including SG38 Panton-Valentine leukocidin LukS-PV protein, SG62 staphylococcal enterotoxin type A protein, SG39 enterotoxin B protein, and SG43 enterotoxin type C3 protein as potential vaccine candidates. Epitopes were predicted for these proteins using different types of B and T-cell epitope prediction tools, and only those with a non-toxic profile, antigenic, non-allergenic, and immunogenic were selected. The selected epitopes were linked to each other to form a multi-epitope vaccine construct, which was further linked to the PADRE sequence (AKFVAAWTLKAAA) and 50s ribosomal L7/L12 protein to enhance the vaccine's antigenicity. The three-dimensional structure of the vaccine construct was assessed to determine its binding affinity with key Toll-like receptor 9 (TLR-9) and Toll-like receptor 5 (TLR-5) immune cell receptors. Our findings demonstrate that the vaccine exhibits favorable binding interactions with these immune cell receptors, indicating its potential efficacy. Molecular dynamic simulations further confirmed the accessibility of vaccine epitopes to the host immune system, substantiating its ability to elicit protective immune responses. Taken together, this study highlights the promising candidacy of the modeled vaccine construct for future in vivo and in vitro experimental investigations.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37692398, year = {2023}, author = {Villacís, JE and Castelán-Sánchez, HG and Rojas-Vargas, J and Rodríguez-Cruz, UE and Albán, V and Reyes, JA and Meza-Rodríguez, PM and Dávila-Ramos, S and Villavicencio, F and Galarza, M and Gestal, MC}, title = {Emergence of Raoultella ornithinolytica in human infections from different hospitals in Ecuador with OXA-48-producing resistance.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1216008}, pmid = {37692398}, issn = {1664-302X}, abstract = {PURPOSE: The purpose of this study was to highlight the clinical and molecular features of 13 Raoultella ornithinolytica strains isolated from clinical environments in Ecuador, and to perform comparative genomics with previously published genomes of Raoultella spp. As Raoultella is primarily found in environmental, clinical settings, we focused our work on identifying mechanisms of resistance that can provide this bacterium an advantage to establish and persist in hospital environments.

METHODS: We analyzed 13 strains of Raoultella ornithinolytica isolated from patients with healthcare associated infections (HAI) in three hospitals in Quito and one in Santo Domingo de Los Tsáchilas, Ecuador, between November 2017 and April 2018. These isolates were subjected to phenotypic antimicrobial susceptibility testing, end-point polymerase chain reaction (PCR) to detect the presence of carbapenemases and whole-genome sequencing.

RESULTS: Polymerase chain reaction revealed that seven isolates were positive isolates for blaOXA-48 and one for blaKPC-2 gene. Of the seven strains that presented the blaOXA-48 gene, six harbored it on an IncFII plasmid, one was inserted into the bacterial chromosome. The blaKPC gene was detected in an IncM2/IncR plasmid. From the bioinformatics analysis, nine genomes had the gene blaOXA-48, originating from Ecuador. Moreover, all R. ornithinolytica strains contained the ORN-1 gene, which confers resistance for β-lactams, such as penicillins and cephalosporins. Comparative genome analysis of the strains showed that the pangenome of R. ornithinolytica is considered an open pangenome, with 27.77% of core genes, which could be explained by the fact that the antibiotic resistance genes in the ancestral reconstruction are relatively new, suggesting that this genome is constantly incorporating new genes.

CONCLUSION: These results reveal the genome plasticity of R. ornithinolytica, particularly in acquiring antibiotic-resistance genes. The genomic surveillance and infectious control of these uncommon species are important since they may contribute to the burden of antimicrobial resistance and human health.}, } @article {pmid37690289, year = {2023}, author = {Sarker, P and Mitro, A and Hoque, H and Hasan, MN and Nurnabi Azad Jewel, GM}, title = {Identification of potential novel therapeutic drug target against Elizabethkingia anophelis by integrative pan and subtractive genomic analysis: An in silico approach.}, journal = {Computers in biology and medicine}, volume = {165}, number = {}, pages = {107436}, doi = {10.1016/j.compbiomed.2023.107436}, pmid = {37690289}, issn = {1879-0534}, abstract = {Elizabethkingia anophelis is a human pathogen responsible for severe nosocomial infections in neonates and immunocompromised patients. The significantly higher mortality rate from E. anophelis infections and the lack of available regimens highlight the critical need to explore novel drug targets. The current study investigated effective novel drug targets by employing a comprehensive in silico subtractive genomic approach integrated with pangenomic analysis of E. anophelis strains. A total of 2809 core genomic proteins were found by pangenomic analysis of non-paralogous proteins. Subsequently, 156 pathogen-specific, 442 choke point, 202 virulence factor, 53 antibiotic resistant and 119 host-pathogen interacting proteins were identified in E. anophelis. By subtractive genomic approach, at first 791 proteins were found to be indispensable for the survival of E. anophelis. 558 and 315 proteins were detected as non-homologous to human and gut microflora respectively. Following that 245 cytoplasmic, 245 novel, and 23 broad-spectrum targets were selected and finally four proteins were considered as potential therapeutic targets of E. anophelis based on highest degree score in PPI network. Among those, three proteins were subjected to molecular docking and subsequent MD simulation as one protein did not contain a plausible binding pocket with sufficient surface area and volume. All the complexes were found to be stable and compact in 100 ns molecular dynamics simulation studies as measured by RMSD, RMSF, and Rg. These three short-listed targets identified in this study may lead to the development of novel antimicrobials capable of curing infections and pave the way to prevent and control the disease progression caused by the deadly agent E. anophelis.}, } @article {pmid37684624, year = {2023}, author = {Nageeb, WM and Hetta, HF}, title = {Pangenome analysis of Corynebacterium striatum: insights into a neglected multidrug-resistant pathogen.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {252}, pmid = {37684624}, issn = {1471-2180}, mesh = {*Corynebacterium/genetics ; *Genomics ; Multigene Family ; Anti-Bacterial Agents/pharmacology ; Prophages/genetics ; }, abstract = {BACKGROUND: Over the past two decades, Corynebacterium striatum has been increasingly isolated from clinical cultures with most isolates showing increased antimicrobial resistance (AMR) to last resort agents. Advances in the field of pan genomics would facilitate the understanding of the clinical significance of such bacterial species previously thought to be among commensals paving the way for identifying new drug targets and control strategies.

METHODS: We constructed a pan-genome using 310 genome sequences of C. striatum. Pan-genome analysis was performed using three tools including Roary, PIRATE, and PEPPAN. AMR genes and virulence factors have been studied in relation to core genome phylogeny. Genomic Islands (GIs), Integrons, and Prophage regions have been explored in detail.

RESULTS: The pan-genome ranges between a total of 5253-5857 genes with 2070 - 1899 core gene clusters. Some antimicrobial resistance genes have been identified in the core genome portion, but most of them were located in the dispensable genome. In addition, some well-known virulence factors described in pathogenic Corynebacterium species were located in the dispensable genome. A total of 115 phage species have been identified with only 44 intact prophage regions.

CONCLUSION: This study presents a detailed comparative pangenome report of C. striatum. The species show a very slowly growing pangenome with relatively high number of genes in the core genome contributing to lower genomic variation. Prophage elements carrying AMR and virulence elements appear to be infrequent in the species. GIs appear to offer a prominent role in mobilizing antibiotic resistance genes in the species and integrons occur at a frequency of 50% in the species. Control strategies should be directed against virulence and resistance determinants carried on the core genome and those frequently occurring in the accessory genome.}, } @article {pmid37679681, year = {2023}, author = {Wang, Y and Xu, X and Chen, H and Yang, F and Xu, B and Wang, K and Liu, Q and Liang, G and Zhang, R and Jiao, X and Zhang, Y}, title = {Assessment of beneficial effects and identification of host adaptation-associated genes of Ligilactobacillus salivarius isolated from badgers.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {530}, pmid = {37679681}, issn = {1471-2164}, support = {PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; }, mesh = {Animals ; Humans ; *Ligilactobacillus salivarius ; Host Adaptation ; Phylogeny ; Chickens ; Acclimatization ; Animals, Domestic ; }, abstract = {BACKGROUND: Ligilactobacillus salivarius has been frequently isolated from the gut microbiota of humans and domesticated animals and has been studied as a candidate probiotic. Badger (Meles meles) is known as a "generalist" species that consumes complex foods and exhibits tolerance and resistance to certain pathogens, which can be partly attributed to the beneficial microbes such as L. salivarius in the gut microbiota. However, our understanding of the beneficial traits and genomic features of badger-originated L. salivarius remains elusive.

RESULTS: In this study, nine L. salivarius strains were isolated from wild badgers' feces, one of which exhibited good probiotic properties. Complete genomes of the nine L. salivarius strains were generated, and comparative genomic analysis was performed with the publicly available complete genomes of L. salivarius obtained from humans and domesticated animals. The strains originating from badgers harbored a larger genome, a higher number of protein-coding sequences, and functionally annotated genes than those originating from humans and chickens. The pan-genome phylogenetic tree demonstrated that the strains originating from badgers formed a separate clade, and totally 412 gene families (12.6% of the total gene families in the pan-genome) were identified as genes gained by the last common ancestor of the badger group. The badger group harbored significantly more gene families responsible for the degradation of complex carbohydrate substrates and production of polysaccharides than strains from other hosts; many of these were acquired by gene gain events.

CONCLUSIONS: A candidate probiotic and nine L. salivarius complete genomes were obtained from the badgers' gut microbiome, and several beneficial genes were identified to be specifically present in the badger-originated strains that were gained in the evolution. Our study provides novel insights into the adaptation of L. salivarius to the intestinal habitat of wild badgers and provides valuable strain and genome resources for the development of L. salivarius as a probiotic.}, } @article {pmid37679363, year = {2023}, author = {Liu, F and Zhao, J and Sun, H and Xiong, C and Sun, X and Wang, X and Wang, Z and Jarret, R and Wang, J and Tang, B and Xu, H and Hu, B and Suo, H and Yang, B and Ou, L and Li, X and Zhou, S and Yang, S and Liu, Z and Yuan, F and Pei, Z and Ma, Y and Dai, X and Wu, S and Fei, Z and Zou, X}, title = {Genomes of cultivated and wild Capsicum species provide insights into pepper domestication and population differentiation.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {5487}, pmid = {37679363}, issn = {2041-1723}, mesh = {*Capsicum/genetics ; Domestication ; Vegetables ; Fruit/genetics ; Crops, Agricultural/genetics ; Camphor ; Menthol ; *Piper nigrum ; }, abstract = {Pepper (Capsicum spp.) is one of the earliest cultivated crops and includes five domesticated species, C. annuum var. annuum, C. chinense, C. frutescens, C. baccatum var. pendulum and C. pubescens. Here, we report a pepper graph pan-genome and a genome variation map of 500 accessions from the five domesticated Capsicum species and close wild relatives. We identify highly differentiated genomic regions among the domesticated peppers that underlie their natural variations in flowering time, characteristic flavors, and unique resistances to biotic and abiotic stresses. Domestication sweeps detected in C. annuum var. annuum and C. baccatum var. pendulum are mostly different, and the common domestication traits, including fruit size, shape and pungency, are achieved mainly through the selection of distinct genomic regions between these two cultivated species. Introgressions from C. baccatum into C. chinense and C. frutescens are detected, including those providing genetic sources for various biotic and abiotic stress tolerances.}, } @article {pmid37676357, year = {2023}, author = {Huang, B and Yan, H and Sun, M and Jin, Y}, title = {Novel discovery in roles of structural variations and RWP-RK transcription factors in heat tolerance for pearl millet.}, journal = {Stress biology}, volume = {3}, number = {1}, pages = {12}, pmid = {37676357}, issn = {2731-0450}, abstract = {Global warming adversely affects crop production worldwide. Massive efforts have been undertaken to study mechanisms regulating heat tolerance in plants. However, the roles of structural variations (SVs) in heat stress tolerance remain unclear. In a recent article, Yan et al. (Nat Genet 1-12, 2023) constructed the first pan-genome of pearl millet (Pennisetum glaucum) and identified key SVs linked to genes involved in regulating plant tolerance to heat stress for an important crop with a superior ability to thrive in extremely hot and arid climates. Through multi-omics analyses integrating by pan-genomics, comparative genomics, transcriptomics, population genetics and and molecular biological technologies, they found RWP-RK transcription factors cooperating with endoplasmic reticulum-related genes play key roles in heat tolerance in pearl millet. The results in this paper provided novel insights to advance the understanding of the genetic and genomic basis of heat tolerance and an exceptional resource for molecular breeding to improve heat tolerance in pearl millet and other crops.}, } @article {pmid37676306, year = {2023}, author = {González-Gómez, JP and Lozano-Aguirre, LF and Medrano-Félix, JA and Chaidez, C and Gerba, CP and Betancourt, WQ and Castro-Del Campo, N}, title = {Evaluation of nuclear and mitochondrial phylogenetics for the subtyping of Cyclospora cayetanensis.}, journal = {Parasitology research}, volume = {}, number = {}, pages = {}, pmid = {37676306}, issn = {1432-1955}, abstract = {Cyclospora cayetanensis is an enteric coccidian parasite responsible for gastrointestinal disease transmitted through contaminated food and water. It has been documented in several countries, mostly with low-socioeconomic levels, although major outbreaks have hit developed countries. Detection methods based on oocyst morphology, staining, and molecular testing have been developed. However, the current MLST panel offers an opportunity for enhancement, as amplification of all molecular markers remains unfeasible in the majority of samples. This study aims to address this challenge by evaluating two approaches for analyzing the genetic diversity of C. cayetanensis and identifying reliable markers for subtyping: core homologous genes and mitochondrial genome analysis. A pangenome was constructed using 36 complete genomes of C. cayetanensis, and a haplotype network and phylogenetic analysis were conducted using 33 mitochondrial genomes. Through the analysis of the pangenome, 47 potential markers were identified, emphasizing the need for more sequence data to achieve comprehensive characterization. Additionally, the analysis of mitochondrial genomes revealed 19 single-nucleotide variations that can serve as characteristic markers for subtyping this parasite. These findings not only contribute to the selection of molecular markers for C. cayetanensis subtyping, but they also drive the knowledge toward the potential development of a comprehensive genotyping method for this parasite.}, } @article {pmid37671027, year = {2023}, author = {Lee, H and Greer, SU and Pavlichin, DS and Zhou, B and Urban, AE and Weissman, T and , and Ji, HP}, title = {Pan-conserved segment tags identify ultra-conserved sequences across assemblies in the human pangenome.}, journal = {Cell reports methods}, volume = {3}, number = {8}, pages = {100543}, pmid = {37671027}, issn = {2667-2375}, support = {K01 MH129758/MH/NIMH NIH HHS/United States ; }, mesh = {Humans ; Conserved Sequence ; Haploidy ; *Neoplasms, Squamous Cell ; Polymorphism, Genetic ; *Skin Neoplasms ; }, abstract = {The human pangenome, a new reference sequence, addresses many limitations of the current GRCh38 reference. The first release is based on 94 high-quality haploid assemblies from individuals with diverse backgrounds. We employed a k-mer indexing strategy for comparative analysis across multiple assemblies, including the pangenome reference, GRCh38, and CHM13, a telomere-to-telomere reference assembly. Our k-mer indexing approach enabled us to identify a valuable collection of universally conserved sequences across all assemblies, referred to as "pan-conserved segment tags" (PSTs). By examining intervals between these segments, we discerned highly conserved genomic segments and those with structurally related polymorphisms. We found 60,764 polymorphic intervals with unique geo-ethnic features in the pangenome reference. In this study, we utilized ultra-conserved sequences (PSTs) to forge a link between human pangenome assemblies and reference genomes. This methodology enables the examination of any sequence of interest within the pangenome, using the reference genome as a comparative framework.}, } @article {pmid37668148, year = {2023}, author = {Mentasti, M and David, S and Turton, J and Morgan, M and Turner, L and Westlake, J and Jenkins, J and Williams, C and Rey, S and Watkins, J and Daniel, V and Mitchell, S and Forbes, G and Wootton, M and Jones, L}, title = {Clonal expansion and rapid characterization of Klebsiella pneumoniae ST1788, an otherwise uncommon strain spreading in Wales, UK.}, journal = {Microbial genomics}, volume = {9}, number = {9}, pages = {}, doi = {10.1099/mgen.0.001104}, pmid = {37668148}, issn = {2057-5858}, mesh = {Humans ; *Klebsiella pneumoniae/genetics ; Phylogeny ; Wales/epidemiology ; *Aminoglycosides ; Anti-Bacterial Agents ; }, abstract = {A multidrug-resistant strain of Klebsiella pneumoniae (Kp) sequence type (ST) 1788, an otherwise uncommon ST worldwide, was isolated from 65 patients at 11 hospitals and 11 general practices across South and West Wales, UK, between February 2019 and November 2021. A collection of 97 Kp ST1788 isolates (including 94 from Wales) was analysed to investigate the diversity and spread across Wales and to identify molecular marker(s) to aid development of a strain-specific real-time PCR. Whole genome sequencing (WGS) was performed with Illumina technology and the data were used to perform phylogenetic analyses. Pan-genome analysis of further Kp genome collections was used to identify an ST1788-specific gene target; a real-time PCR was then validated against a panel of 314 strains and 218 broth-enriched screening samples. Low genomic diversity was demonstrated amongst the 94 isolates from Wales. Evidence of spread within and across healthcare facilities was found. A yersiniabactin locus and the KL2 capsular locus were identified in 85/94 (90.4 %) and 94/94 (100 %) genomes respectively; bla SHV-232, bla TEM-1, bla CTX-M-15 and bla OXA-1 were simultaneously carried by 86/94 (91.5 %) isolates; 4/94 (4.3 %) isolates also carried bla OXA-48 carbapenemase. Aminoglycoside and fluoroquinolone resistance markers were found in 94/94 (100 %) and 86/94 (91.5 %) isolates respectively. The ST1788-specific real-time PCR was 100 % sensitive and specific. Our analyses demonstrated recent clonal expansion and spread of Kp ST1788 in the community and across healthcare facilities in South and West Wales with isolates carrying well-defined antimicrobial resistance and virulence markers. An ST1788-specific marker was also identified, enabling rapid and reliable preliminary characterization of isolates by real-time PCR. This study confirms the utility of WGS in investigating novel strains and in aiding proactive implementation of molecular tools to assist infection control specialists.}, } @article {pmid37667515, year = {2023}, author = {Baker, JL}, title = {Illuminating the oral microbiome and its host interactions: recent advancements in omics and bioinformatics technologies in the context of oral microbiome research.}, journal = {FEMS microbiology reviews}, volume = {47}, number = {5}, pages = {}, pmid = {37667515}, issn = {1574-6976}, support = {K99 DE029228/DE/NIDCR NIH HHS/United States ; }, mesh = {Humans ; *Quality of Life ; Computational Biology ; Genomics ; Metabolomics ; *Microbiota/genetics ; }, abstract = {The oral microbiota has an enormous impact on human health, with oral dysbiosis now linked to many oral and systemic diseases. Recent advancements in sequencing, mass spectrometry, bioinformatics, computational biology, and machine learning are revolutionizing oral microbiome research, enabling analysis at an unprecedented scale and level of resolution using omics approaches. This review contains a comprehensive perspective of the current state-of-the-art tools available to perform genomics, metagenomics, phylogenomics, pangenomics, transcriptomics, proteomics, metabolomics, lipidomics, and multi-omics analysis on (all) microbiomes, and then provides examples of how the techniques have been applied to research of the oral microbiome, specifically. Key findings of these studies and remaining challenges for the field are highlighted. Although the methods discussed here are placed in the context of their contributions to oral microbiome research specifically, they are pertinent to the study of any microbiome, and the intended audience of this includes researchers would simply like to get an introduction to microbial omics and/or an update on the latest omics methods. Continued research of the oral microbiota using omics approaches is crucial and will lead to dramatic improvements in human health, longevity, and quality of life.}, } @article {pmid37662009, year = {2023}, author = {Li, Z and Zhou, X and Liao, D and Liu, R and Zhao, X and Wang, J and Zhong, Q and Zeng, Z and Peng, Y and Tan, Y and Yang, Z}, title = {Comparative genomics and DNA methylation analysis of Pseudomonas aeruginosa clinical isolate PA3 by single-molecule real-time sequencing reveals new targets for antimicrobials.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1180194}, pmid = {37662009}, issn = {2235-2988}, mesh = {*Pseudomonas aeruginosa/genetics ; DNA Methylation ; Phylogeny ; Genomics ; *Anti-Infective Agents ; DNA ; }, abstract = {INTRODUCTION: Pseudomonas aeruginosa (P.aeruginosa) is an important opportunistic pathogen with broad environmental adaptability and complex drug resistance. Single-molecule real-time (SMRT) sequencing technique has longer read-length sequences, more accuracy, and the ability to identify epigenetic DNA alterations.

METHODS: This study applied SMRT technology to sequence a clinical strain P. aeruginosa PA3 to obtain its genome sequence and methylation modification information. Genomic, comparative, pan-genomic, and epigenetic analyses of PA3 were conducted.

RESULTS: General genome annotations of PA3 were discovered, as well as information about virulence factors, regulatory proteins (RPs), secreted proteins, type II toxin-antitoxin (TA) pairs, and genomic islands. A genome-wide comparison revealed that PA3 was comparable to other P. aeruginosa strains in terms of identity, but varied in areas of horizontal gene transfer (HGT). Phylogenetic analysis showed that PA3 was closely related to P. aeruginosa 60503 and P. aeruginosa 8380. P. aeruginosa's pan-genome consists of a core genome of roughly 4,300 genes and an accessory genome of at least 5,500 genes. The results of the epigenetic analysis identified one main methylation sites, N6-methyladenosine (m6A) and 1 motif (CATNNNNNNNTCCT/AGGANNNNNNNATG). 16 meaningful methylated sites were picked. Among these, purH, phaZ, and lexA are of great significance playing an important role in the drug resistance and biological environment adaptability of PA3, and the targeting of these genes may benefit further antibacterial studies.

DISUCSSION: This study provided a detailed visualization and DNA methylation information of the PA3 genome and set a foundation for subsequent research into the molecular mechanism of DNA methyltransferase-controlled P. aeruginosa pathogenicity.}, } @article {pmid37659733, year = {2023}, author = {Sharma, N and Raman, H and Wheeler, D and Kalenahalli, Y and Sharma, R}, title = {Data-driven approaches to improve water-use efficiency and drought resistance in crop plants.}, journal = {Plant science : an international journal of experimental plant biology}, volume = {336}, number = {}, pages = {111852}, doi = {10.1016/j.plantsci.2023.111852}, pmid = {37659733}, issn = {1873-2259}, abstract = {With the increasing population, there lies a pressing demand for food, feed and fibre, while the changing climatic conditions pose severe challenges for agricultural production worldwide. Water is the lifeline for crop production; thus, enhancing crop water-use efficiency (WUE) and improving drought resistance in crop varieties are crucial for overcoming these challenges. Genetically-driven improvements in yield, WUE and drought tolerance traits can buffer the worst effects of climate change on crop production in dry areas. While traditional crop breeding approaches have delivered impressive results in increasing yield, the methods remain time-consuming and are often limited by the existing allelic variation present in the germplasm. Significant advances in breeding and high-throughput omics technologies in parallel with smart agriculture practices have created avenues to dramatically speed up the process of trait improvement by leveraging the vast volumes of genomic and phenotypic data. For example, individual genome and pan-genome assemblies, along with transcriptomic, metabolomic and proteomic data from germplasm collections, characterised at phenotypic levels, could be utilised to identify marker-trait associations and superior haplotypes for crop genetic improvement. In addition, these omics approaches enable the identification of genes involved in pathways leading to the expression of a trait, thereby providing an understanding of the genetic, physiological and biochemical basis of trait variation. These data-driven gene discoveries and validation approaches are essential for crop improvement pipelines, including genomic breeding, speed breeding and gene editing. Herein, we provide an overview of prospects presented using big data-driven approaches (including artificial intelligence and machine learning) to harness new genetic gains for breeding programs and develop drought-tolerant crop varieties with favourable WUE and high-yield potential traits.}, } @article {pmid37655941, year = {2023}, author = {Meyer, S and Laval, L and Pimenta, M and González-Flores, Y and Gaschet, M and Couvé-Deacon, E and Barraud, O and Dagot, C and Ploy, MC}, title = {[Tracking transfers of resistance-carrying bacteria between animals, humans and the environment].}, journal = {Comptes rendus biologies}, volume = {}, number = {}, pages = {}, doi = {10.5802/crbiol.114}, pmid = {37655941}, issn = {1768-3238}, abstract = {The fight against antibiotic resistance must incorporate the "One Health" concept to be effective. This means having a holistic approach embracing the different ecosystems, human, animal, and environment. Transfers of resistance genes may exist between these three domains and different stresses related to the exposome may influence these transfers. Various targeted or pan-genomic molecular biology techniques can be used to better characterise the dissemination of bacterial clones and to identify exchanges of genes and mobile genetic elements between ecosystems.}, } @article {pmid37653687, year = {2023}, author = {Dixon, TA and Walker, RSK and Pretorius, IS}, title = {Visioning synthetic futures for yeast research within the context of current global techno-political trends.}, journal = {Yeast (Chichester, England)}, volume = {}, number = {}, pages = {}, doi = {10.1002/yea.3897}, pmid = {37653687}, issn = {1097-0061}, support = {//Australian Research Council/ ; }, abstract = {Yeast research is entering into a new period of scholarship, with new scientific tools, new questions to ask and new issues to consider. The politics of emerging and critical technology can no longer be separated from the pursuit of basic science in fields, such as synthetic biology and engineering biology. Given the intensifying race for technological leadership, yeast research is likely to attract significant investment from government, and that it offers huge opportunities to the curious minded from a basic research standpoint. This article provides an overview of new directions in yeast research with a focus on Saccharomyces cerevisiae, and places these trends in their geopolitical context. At the highest level, yeast research is situated within the ongoing convergence of the life sciences with the information sciences. This convergent effect is most strongly pronounced in areas of AI-enabled tools for the life sciences, and the creation of synthetic genomes, minimal genomes, pan-genomes, neochromosomes and metagenomes using computer-assisted design tools and methodologies. Synthetic yeast futures encompass basic and applied science questions that will be of intense interest to government and nongovernment funding sources. It is essential for the yeast research community to map and understand the context of their research to ensure their collaborations turn global challenges into research opportunities.}, } @article {pmid37646934, year = {2023}, author = {Bayer, PE and Edwards, D}, title = {Investigating Pangenome Graphs Using Wheat Panache.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2703}, number = {}, pages = {23-29}, pmid = {37646934}, issn = {1940-6029}, mesh = {Humans ; *Triticum/genetics ; *Agriculture ; Genomics ; Research Personnel ; Workflow ; }, abstract = {Pangenome graphs quickly become the central data structure representing the diversity of variation we see across related genomes. Pangenome graphs have been published for some species, including plants of agronomic interest. However, visualizing these graphs is not easy as the graphs are large, and variants within these graphs are complex. Tools are needed to visualize graph data structures. Here, we present a workflow to search and visualize a wheat pangenome graph using Wheat Panache. The approach presented assists researchers interested in wheat genomics.}, } @article {pmid37645952, year = {2023}, author = {McLaughlin, M and Fiebig, A and Crosson, S}, title = {XRE Transcription Factors Conserved in Caulobacter and φCbK Modulate Adhesin Development and Phage Production.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37645952}, support = {F32 GM141017/GM/NIGMS NIH HHS/United States ; R35 GM131762/GM/NIGMS NIH HHS/United States ; }, abstract = {Upon infection, transcriptional shifts in both a host bacterium and its invading phage determine host and viral fitness. The xenobiotic response element (XRE) family of transcription factors (TFs), which are commonly encoded by bacteria and phages, regulate diverse features of bacterial cell physiology and impact phage infection dynamics. Through a pangenome analysis of Caulobacter species isolated from soil and aquatic ecosystems, we uncovered an apparent radiation of a paralogous XRE TF gene cluster, several of which have established functions in the regulation of holdfast adhesin development and biofilm formation in C. crescentus . We further discovered related XRE TFs across the class Alphaproteobacteria and its phages, including the φCbK Caulophage, suggesting that members of this gene cluster impact host-phage interactions. Here we show that that a closely related group of XRE proteins, encoded by both C. crescentus and φCbK, can form heteromeric associations and control the transcription of a common gene set, influencing processes including holdfast development and the production of φCbK virions. The φCbK XRE paralog, tgrL , is highly expressed at the earliest stages of infection and can directly repress transcription of hfiA , a potent holdfast inhibitor, and gafYZ , a transcriptional activator of prophage-like gene transfer agents (GTAs) encoded on the C. crescentus chromosome. XRE proteins encoded from the C. crescentus chromosome also directly repress gafYZ transcription, revealing a functionally redundant set of host regulators that may protect against spurious production of GTA particles and inadvertent cell lysis. Deleting host XRE transcription factors reduced φCbK burst size, while overexpressing these genes or φCbK tgrL rescued this burst defect. We conclude that an XRE TF gene cluster, shared by C. crescentus and φCbK, plays an important role in adhesion regulation under phage-free conditions, and influences host-phage dynamics during infection.}, } @article {pmid37645873, year = {2023}, author = {Shivakumar, VS and Ahmed, OY and Kovaka, S and Zakeri, M and Langmead, B}, title = {Sigmoni: classification of nanopore signal with a compressed pangenome index.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37645873}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; U01 CA253481/CA/NCI NIH HHS/United States ; }, abstract = {Improvements in nanopore sequencing necessitate efficient classification methods, including pre-filtering and adaptive sampling algorithms that enrich for reads of interest. Signal-based approaches circumvent the computational bottleneck of basecalling. But past methods for signal-based classification do not scale efficiently to large, repetitive references like pangenomes, limiting their utility to partial references or in- dividual genomes. We introduce Sigmoni: a rapid, multiclass classification method based on the r-index that scales to references of hundreds of Gbps. Sigmoni quantizes nanopore signal into a discrete alphabet of picoamp ranges. It performs rapid, approximate matching using matching statistics, classifying reads based on distributions of picoamp matching statistics and co-linearity statistics. Sigmoni is 10-100X faster than previous methods for adaptive sampling in host depletion experiments with improved accuracy, and can query reads against large microbial or human pangenomes.}, } @article {pmid37644736, year = {2023}, author = {Le, VV and Ko, SR and Kang, M and Jeong, S and Oh, HM and Ahn, CY}, title = {Comparative Genome analysis of the Genus Curvibacter and the Description of Curvibacter microcysteis sp. nov. and Curvibacter cyanobacteriorum sp. nov., Isolated from Fresh Water during the Cyanobacterial Bloom Period.}, journal = {Journal of microbiology and biotechnology}, volume = {33}, number = {11}, pages = {1-10}, doi = {10.4014/jmb.2306.06017}, pmid = {37644736}, issn = {1738-8872}, abstract = {The three Gram-negative, catalase- and oxidase-positive bacterial strains RS43[T], HBC28, and HBC61[T], were isolated from fresh water and subjected to a polyphasic study. Comparison of 16S rRNA gene sequence initially indicated that strains RS43[T], HBC28, and HBC61[T] were closely related to species of genus Curvibacter and shared the highest sequence similarity of 98.14%, 98.21%, and 98.76%, respectively, with Curvibacter gracilis 7-1[T]. Phylogenetic analysis based on genome sequences placed all strains within the genus Curvibacter. The average nucleotide identity (ANI) and digital DNA-DNA hybridization (dDDH) values between the three strains and related type strains supported their recognition as two novel genospecies in the genus Curvibacter. Comparative genomic analysis revealed that the genus possessed an open pangenome. Based on KEGG BlastKOALA analyses, Curvibacter species have the potential to metabolize benzoate, phenylacetate, catechol, and salicylate, indicating their potential use in the elimination of these compounds from the water systems. The results of polyphasic characterization indicated that strain RS43[T] and HBC61[T] represent two novel species, for which the name Curvibacter microcysteis sp. nov. (type strain RS43[T] =KCTC 92793T=LMG 32714[T]) and Curvibacter cyanobacteriorum sp. nov. (type strain HBC61[T] =KCTC 92794[T] =LMG 32713[T]) are proposed.}, } @article {pmid37639729, year = {2023}, author = {Prajapati, A and Yogisharadhya, R and Mohanty, NN and Mendem, SK and Chanda, MM and Siddaramappa, S and Shivachandra, SB}, title = {Comparative genome analysis of Pasteurella multocida strains of porcine origin.}, journal = {Genome}, volume = {}, number = {}, pages = {}, doi = {10.1139/gen-2023-0021}, pmid = {37639729}, issn = {1480-3321}, abstract = {Pasteurella multocida causes acute/chronic pasteurellosis in porcine resulting in considerable economic losses globally. The draft genomes of two Indian strains NIVEDIPm17 (serogroup D) and NIVEDIPm36 (serogroup A) were sequenced. A total of 2182- 2284 coding sequences (CDSs) were predicted along with 5-6 rRNA and 45-46 tRNA genes in the genomes. Multi locus sequence analysis and LPS genotyping showed the presence of ST50: genotype 07 and ST74: genotype 06 in NIVEDIPm17 and NIVEDIPm36, respectively. Pangenome analysis of 61 strains showed the presence of 1653 core genes, 167 soft core genes, 750 shell genes, and 1820 cloud genes. Analysis of virulence-associated genes in 61 genomes indicated the presence of nanB, exbB, exbD. ptfA, ompA, ompH, fur, plpB, fimA, sodA, sodC, tonB, and omp87 in all strains. The 61 genomes contained genes encoding tetracycline (54%), streptomycin (48%), sulphonamide (28%), tigecycline (25%), chloramphenicol (21%), amikacin (7%), cephalosporin (5%) and trimethoprim (5%) resistance. MLST revealed that ST50 was the most common (34%), followed by ST74 (26%), ST13 (24%), ST287 (5%), ST09 (5%), ST122 (3%), and ST07 (2%). SNP and core genome-based phylogenetic analysis clustered the strains in to 3 major clusters. In conclusion, we described the various virulence factors, mobile genetic elements and antimicrobial resistance genes in pangenome of P. multocida of porcine origin besides a rare presence of LPS genotype 7 in serogroup D..}, } @article {pmid37636268, year = {2023}, author = {Yang, Z and Guarracino, A and Biggs, PJ and Black, MA and Ismail, N and Wold, JR and Merriman, TR and Prins, P and Garrison, E and de Ligt, J}, title = {Pangenome graphs in infectious disease: a comprehensive genetic variation analysis of Neisseria meningitidis leveraging Oxford Nanopore long reads.}, journal = {Frontiers in genetics}, volume = {14}, number = {}, pages = {1225248}, pmid = {37636268}, issn = {1664-8021}, abstract = {Whole genome sequencing has revolutionized infectious disease surveillance for tracking and monitoring the spread and evolution of pathogens. However, using a linear reference genome for genomic analyses may introduce biases, especially when studies are conducted on highly variable bacterial genomes of the same species. Pangenome graphs provide an efficient model for representing and analyzing multiple genomes and their variants as a graph structure that includes all types of variations. In this study, we present a practical bioinformatics pipeline that employs the PanGenome Graph Builder and the Variation Graph toolkit to build pangenomes from assembled genomes, align whole genome sequencing data and call variants against a graph reference. The pangenome graph enables the identification of structural variants, rearrangements, and small variants (e.g., single nucleotide polymorphisms and insertions/deletions) simultaneously. We demonstrate that using a pangenome graph, instead of a single linear reference genome, improves mapping rates and variant calling for both simulated and real datasets of the pathogen Neisseria meningitidis. Overall, pangenome graphs offer a promising approach for comparative genomics and comprehensive genetic variation analysis in infectious disease. Moreover, this innovative pipeline, leveraging pangenome graphs, can bridge variant analysis, genome assembly, population genetics, and evolutionary biology, expanding the reach of genomic understanding and applications.}, } @article {pmid37630674, year = {2023}, author = {Aguirre-Sánchez, JR and Quiñones, B and Ortiz-Muñoz, JA and Prieto-Alvarado, R and Vega-López, IF and Martínez-Urtaza, J and Lee, BG and Chaidez, C}, title = {Comparative Genomic Analyses of Virulence and Antimicrobial Resistance in Citrobacter werkmanii, an Emerging Opportunistic Pathogen.}, journal = {Microorganisms}, volume = {11}, number = {8}, pages = {}, pmid = {37630674}, issn = {2076-2607}, support = {CRIS Project Number 2030-42000-055-00D//United States Department of Agriculture (USDA), Agricultural Research Service (ARS)/ ; Laboratorio Nacional para la Investigación en Inocuidad Alimentaria (LANIIA)//Centro de Investigación y Desarrollo A. C. (CIAD) in Culiacán, Sinaloa/ ; }, abstract = {Citrobacter werkmanii is an emerging and opportunistic human pathogen found in developing countries and is a causative agent of wound, urinary tract, and blood infections. The present study conducted comparative genomic analyses of a C. werkmanii strain collection from diverse geographical locations and sources to identify the relevant virulence and antimicrobial resistance genes. Pangenome analyses divided the examined C. werkmanii strains into five distinct clades; the subsequent classification identified genes with functional roles in carbohydrate and general metabolism for the core genome and genes with a role in secretion, adherence, and the mobilome for the shell and cloud genomes. A maximum-likelihood phylogenetic tree with a heatmap, showing the virulence and antimicrobial genes' presence or absence, demonstrated the presence of genes with functional roles in secretion systems, adherence, enterobactin, and siderophore among the strains belonging to the different clades. C. werkmanii strains in clade V, predominantly from clinical sources, harbored genes implicated in type II and type Vb secretion systems as well as multidrug resistance to aminoglycoside, beta-lactamase, fluoroquinolone, phenicol, trimethoprim, macrolides, sulfonamide, and tetracycline. In summary, these comparative genomic analyses have demonstrated highly pathogenic and multidrug-resistant genetic profiles in C. werkmanii strains, indicating a virulence potential for this commensal and opportunistic human pathogen.}, } @article {pmid37630640, year = {2023}, author = {van der Lee, TAJ and van Gent-Pelzer, MPE and Jonkheer, EM and Brankovics, B and Houwers, IM and van der Wolf, JM and Bonants, PJM and van Duivenbode, I and Vreeburg, RAM and Nas, M and Smit, S}, title = {An Efficient Triplex TaqMan Quantitative PCR to Detect a Blackleg-Causing Lineage of Pectobacterium brasiliense in Potato Based on a Pangenome Analysis.}, journal = {Microorganisms}, volume = {11}, number = {8}, pages = {}, pmid = {37630640}, issn = {2076-2607}, support = {TU-16022//Dutch Ministry of Agriculture, Nature and Food Safety/ ; }, abstract = {P. brasiliense is an important bacterial pathogen causing blackleg (BL) in potatoes. Nevertheless, P. brasiliense is often detected in seed lots that do not develop any of the typical blackleg symptoms in the potato crop when planted. Field bioassays identified that P. brasiliense strains can be categorized into two distinct classes, some able to cause blackleg symptoms and some unable to do it. A comparative pangenomic approach was performed on 116 P. brasiliense strains, of which 15 were characterized as BL-causing strains and 25 as non-causative. In a genetically homogeneous clade comprising all BL-causing P. brasiliense strains, two genes only present in the BL-causing strains were identified, one encoding a predicted lysozyme inhibitor Lprl (LZI) and one encoding a putative Toll/interleukin-1 receptor (TIR) domain-containing protein. TaqMan assays for the specific detection of BL-causing P. brasiliense were developed and integrated with the previously developed generic P. brasiliense assay into a triplex TaqMan assay. This simultaneous detection makes the scoring more efficient as only a single tube is needed, and it is more robust as BL-causing strains of P. brasiliense should be positive for all three assays. Individual P. brasiliense strains were found to be either positive for all three assays or only for the P. brasiliense assay. In potato samples, the mixed presence of BL-causing and not BL-causing P. brasiliense strains was observed as shown by the difference in Ct value of the TaqMan assays. However, upon extension of the number of strains, it became clear that in recent years additional BL-causing lineages of P. brasiliense were detected for which additional assays must be developed.}, } @article {pmid37630590, year = {2023}, author = {Mevada, V and Patel, R and Dudhagara, P and Chaudhari, R and Vohra, M and Khan, V and J H Shyu, D and Chen, YY and Zala, D}, title = {Whole Genome Sequencing and Pan-Genomic Analysis of Multidrug-Resistant Vibrio cholerae VC01 Isolated from a Clinical Sample.}, journal = {Microorganisms}, volume = {11}, number = {8}, pages = {}, pmid = {37630590}, issn = {2076-2607}, abstract = {Cholera, a disease caused by the Vibrio cholerae bacteria, threatens public health worldwide. The organism mentioned above has a significant historical record of being identified as a prominent aquatic environmental pollutant capable of adapting its phenotypic and genotypic traits to react to host patients effectively. This study aims to elucidate the heterogeneity of the sporadic clinical strain of V. cholerae VC01 among patients residing in Silvasa. The study involved conducting whole-genome sequencing of the isolate obtained from patients exhibiting symptoms, including those not commonly observed in clinical practice. The strain was initially identified through a combination of biochemical analysis, microscopy, and 16s rRNA-based identification, followed by type strain-based identification. The investigation demonstrated the existence of various genetic alterations and resistance profiles against multiple drugs, particularly chloramphenicol (catB9), florfenicol (floR), oxytetracycline (tet(34)), sulfonamide (sul2), and Trimethoprim (dfrA1). The pan-genomic analysis indicated that 1099 distinct clusters were detected within the genome sequences of recent isolates worldwide. The present study helps to establish a correlation between the mutation and the coexistence of antimicrobial resistance toward current treatment.}, } @article {pmid37628823, year = {2023}, author = {Li, H and Song, K and Zhang, X and Wang, D and Dong, S and Liu, Y and Yang, L}, title = {Application of Multi-Perspectives in Tea Breeding and the Main Directions.}, journal = {International journal of molecular sciences}, volume = {24}, number = {16}, pages = {}, pmid = {37628823}, issn = {1422-0067}, support = {SDAIT-25-01//The Foundation of Innovation Team Project for Modern Agricultural Industrious Technology System of Shandong Province/ ; YDZX2022123//Special Funds for Local Scientific and Technological Development Guided by the Central Government/ ; }, mesh = {*Plant Breeding ; *Camellia sinensis/genetics ; Crops, Agricultural ; Cytoplasm ; Tea ; }, abstract = {Tea plants are an economically important crop and conducting research on tea breeding contributes to enhancing the yield and quality of tea leaves as well as breeding traits that satisfy the requirements of the public. This study reviews the current status of tea plants germplasm resources and their utilization, which has provided genetic material for the application of multi-omics, including genomics and transcriptomics in breeding. Various molecular markers for breeding were designed based on multi-omics, and available approaches in the direction of high yield, quality and resistance in tea plants breeding are proposed. Additionally, future breeding of tea plants based on single-cellomics, pangenomics, plant-microbe interactions and epigenetics are proposed and provided as references. This study aims to provide inspiration and guidance for advancing the development of genetic breeding in tea plants, as well as providing implications for breeding research in other crops.}, } @article {pmid37623951, year = {2023}, author = {Pitta, JLLP and Bezerra, MF and Fernandes, DLRDS and Block, T and Novaes, AS and Almeida, AMP and Rezende, AM}, title = {Genomic Analysis of Yersinia pestis Strains from Brazil: Search for Virulence Factors and Association with Epidemiological Data.}, journal = {Pathogens (Basel, Switzerland)}, volume = {12}, number = {8}, pages = {}, pmid = {37623951}, issn = {2076-0817}, abstract = {Yersinia pestis, the etiological agent of the plague, is considered a genetically homogeneous species. Brazil is currently in a period of epidemiological silence but plague antibodies are still detected in sentinel animals, suggesting disease activity in the sylvatic cycle. The present study deployed an in silico approach to analyze virulence factors among 407 Brazilian genomes of Y. pestis belonging to the Fiocruz Collection (1966-1997). The pangenome analysis associated several known virulence factors of Y. pestis in clades according to the presence or absence of genes. Four main strain clades (C, E, G, and H) exhibited the absence of various virulence genes. Notably, clade G displayed the highest number of absent genes, while clade E showed a significant absence of genes related to the T6SS secretion system and clade H predominantly demonstrated the absence of plasmid-related genes. These results suggest attenuation of virulence in these strains over time. The cgMLST analysis associated genomic and epidemiological data highlighting evolutionary patterns related to the isolation years and outbreaks of Y. pestis in Brazil. Thus, the results contribute to the understanding of the genetic diversity and virulence within Y. pestis and the potential for utilizing genomic data in epidemiological investigations.}, } @article {pmid37620118, year = {2023}, author = {Horsfield, ST and Tonkin-Hill, G and Croucher, NJ and Lees, JA}, title = {Accurate and fast graph-based pangenome annotation and clustering with ggCaller.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277733.123}, pmid = {37620118}, issn = {1549-5469}, abstract = {Bacterial genomes differ in both gene content and sequence mutations, which underlies extensive phenotypic diversity, including variation in susceptibility to antimicrobials or vaccine-induced immunity. To identify and quantify important variants, all genes within a population must be predicted, functionally annotated and clustered, representing the pangenome. Despite the volume of genome data available, gene prediction and annotation are currently conducted in isolation on individual genomes, which is computationally inefficient and frequently inconsistent across genomes. Here, we introduce the open-source software graph-gene-caller (ggCaller). ggCaller combines gene prediction, functional annotation, and clustering into a single workflow using population-wide de Bruijn Graphs, removing redundancy in gene annotation, and resulting in more accurate gene predictions and orthologue clustering. We applied ggCaller to simulated and real-world bacterial datasets containing hundreds or thousands of genomes, comparing it to current state-of-the-art tools. ggCaller has considerable speed-ups with equivalent or greater accuracy, particularly with datasets containing complex sources of error, such as assembly contamination or fragmentation. ggCaller is also an important extension to bacterial genome-wide association studies, enabling querying of annotated graphs for functional analyses. We highlight this application by functionally annotating DNA sequences with significant associations to tetracycline and macrolide resistance in Streptococcus pneumoniae, identifying key resistance determinants that were missed when using only a single reference genome. ggCaller is a novel bacterial genome analysis tool with applications in bacterial evolution and epidemiology.}, } @article {pmid37612339, year = {2023}, author = {Jang, J and Jung, J and Lee, YH and Lee, S and Baik, M and Kim, H}, title = {Chromosome-level genome assembly of Korean native cattle and pangenome graph of 14 Bos taurus assemblies.}, journal = {Scientific data}, volume = {10}, number = {1}, pages = {560}, pmid = {37612339}, issn = {2052-4463}, support = {NRF-2021R1A2C2094111//National Research Foundation of Korea (NRF)/ ; }, mesh = {Animals ; *Cattle/genetics ; Humans ; *Asian People ; Chromosomes/genetics ; Republic of Korea ; *Tandem Repeat Sequences ; *Genome ; }, abstract = {This study presents the first chromosome-level genome assembly of Hanwoo, an indigenous Korean breed of Bos taurus taurus. This is the first genome assembly of Asian taurus breed. Also, we constructed a pangenome graph of 14 B. taurus genome assemblies. The contig N50 was over 55 Mb, the scaffold N50 was over 89 Mb and a genome completeness of 95.8%, as estimated by BUSCO using the mammalian set, indicated a high-quality assembly. 48.7% of the genome comprised various repetitive elements, including DNAs, tandem repeats, long interspersed nuclear elements, and simple repeats. A total of 27,314 protein-coding genes were identified, including 25,302 proteins with inferred gene names and 2,012 unknown proteins. The pangenome graph of 14 B. taurus autosomes revealed 528.47 Mb non-reference regions in total and 61.87 Mb Hanwoo-specific regions. Our Hanwoo assembly and pangenome graph provide valuable resources for studying B. taurus populations.}, } @article {pmid37610465, year = {2023}, author = {Szuhaj, M and Kakuk, B and Wirth, R and Rákhely, G and Kovács, KL and Bagi, Z}, title = {Regulation of the methanogenesis pathways by hydrogen at transcriptomic level in time.}, journal = {Applied microbiology and biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37610465}, issn = {1432-0614}, support = {2020-3.1.2-ZFR-KVG-2020-00009//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; K143198//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; FK123902//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; 2019-2.1.13-TÉT_IN-2020-00016//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; PD 132145//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; }, abstract = {The biomethane formation from 4 H2 + CO2 by pure cultures of two methanogens, Methanocaldococcus fervens and Methanobacterium thermophilum, has been studied. The goal of the study was to understand the regulation of the enzymatic steps associated with biomethane biosynthesis by H2, using metagenomic, pan-genomic, and transcriptomic approaches. Methanogenesis in the autotrophic methanogen M. fervens could be easily "switched off" and "switched on" by H2/CO2 within about an hour. In contrast, the heterotrophic methanogen M. thermophilum was practically insensitive to the addition of the H2/CO2 trigger although this methanogen also converted H2/CO2 to CH4. From practical points of view, the regulatory function of H2/CO2 suggests that in the power-to-gas (P2G) renewable excess electricity conversion and storage systems, the composition of the biomethane-generating methanogenic community is essential for sustainable operation. In addition to managing the specific hydrogenotrophic methanogenesis biochemistry, H2/CO2 affected several, apparently unrelated, metabolic pathways. The redox-regulated overall biochemistry and symbiotic relationships in the methanogenic communities should be explored in order to make the P2G technology more efficient. KEY POINTS : • Hydrogenotrophic methanogens may respond distinctly to H2/CO2 in bio-CH4 formation. • H2/CO2 can also activate metabolic routes, which are apparently unrelated to methanogenesis. • Sustainable conversion of the fluctuating renewable electricity to bio-CH4 is an option.}, } @article {pmid37599459, year = {2023}, author = {Alsaiari, AA and Hakami, MA and Alotaibi, BS and Alkhalil, SS and Alkhorayef, N and Khan, K and Jalal, K}, title = {Delineating multi-epitopes vaccine designing from membrane protein CL5 against all monkeypox strains: a pangenome reverse vaccinology approach.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-22}, doi = {10.1080/07391102.2023.2248301}, pmid = {37599459}, issn = {1538-0254}, abstract = {The recently identified monkeypox virus (MPXV or mpox) is a zoonotic orthopox virus that infects humans and causes diseases with traits like smallpox. The world health organization (WHO) estimates that 3-6% of MPXV cases result in death. As it might impact everyone globally, like COVID, and become the next pandemic, the cure for this disease is important for global public health. The high incidence and disease ratio of MPXV necessitates immediate efforts to design a unique vaccine candidate capable of addressing MPXV diseases. Here, we used a computational pan-genome-based vaccine design strategy for all currently reported 19 MPXV strains acquired from different regions of the world. Thus, this study's objective was to develop a new and safe vaccine candidate against MPXV by targeting the membrane CL5 protein; identified after the pangenome analysis. Proteomics and reverse vaccinology have covered up all of the MPXV epitopes that would usually stimulate robust host immune responses. Following this, only two mapped (MHC-I, MHC-II, and B-cell) epitopes were observed to be extremely effective that can be used in the construction of CL5 protein vaccine candidates. The suggested vaccine (V5) candidate from eight vaccine models was shown to be antigenic, non-allergenic, and stable (with 213 amino acids). The vaccine's candidate efficacy was evaluated by using many in silico methods to predict, improve, and validate its 3D structure. Molecular docking and molecular dynamics simulations further reveal that the proposed vaccine candidate ensemble has a high interaction energy with the HLAs and TRL2/4 immunological receptors under study. Later, the vaccine sequence was used to generate an expression vector for the E. coli K12 strain. Further study uncovers that V5 was highly immunogenic because it produced robust primary, secondary, and tertiary immune responses. Eventually, the use of computer-aided vaccine designing may significantly reduce costs and speed up the process of developing vaccines. Although, the results of this research are promising, however, more research (experimental; in vivo, and in vitro studies) is needed to verify the biological efficacy of the proposed vaccine against MPXV.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37596715, year = {2023}, author = {Sun, Y and Zheng, C and Zhou, J and Zhen, M and Wei, X and Yan, X and Guo, X and Zheng, L and Shao, M and Li, C and Qin, D and Zhang, J and Xiong, L and Xing, J and Huang, B and Dong, Z and Cheng, P and Yu, G}, title = {Pathogen Profile of Klebsiella variicola, the Causative Agent of Banana Sheath Rot.}, journal = {Plant disease}, volume = {107}, number = {8}, pages = {2325-2334}, doi = {10.1094/PDIS-09-22-2018-RE}, pmid = {37596715}, issn = {0191-2917}, mesh = {Animals ; Humans ; *Musa ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; China ; Klebsiella/genetics ; Endophytes ; }, abstract = {Banana (Musa spp.) is an important fruit and food crop worldwide. In recent years, banana sheath rot has become a major problem in banana cultivation, causing plant death and substantial economic losses. Nevertheless, the pathogen profile of this disease has not been fully characterized. Klebsiella variicola is a versatile bacterium capable of colonizing different hosts, such as plants, humans, insects, and animals, and is recognized as an emerging pathogen in various hosts. In this study, we obtained 12 bacterial isolates from 12 different banana samples showing banana sheath rot in Guangdong and Guangxi Provinces, China. Phylogenetic analysis based on 16S rRNA sequences confirmed that all 12 isolates were K. variicola strains. We sequenced the genomes of these strains, performed comparative genomic analysis with other sequenced K. variicola strains, and found a lack of consistency in accessory gene content among these K. variicola strains. However, prediction based on the pan-genome of K. variicola revealed 22 unique virulence factors carried by the 12 pathogenic K. variicola isolates. Microbiome and microbial interaction network analysis of endophytes between the healthy tissues of diseased plants and healthy plants of two cultivars showed that Methanobacterium negatively interacts with Klebsiella in banana plants and that Herbaspirillum might indirectly inhibit Methanobacterium to promote Klebsiella growth. These results suggest that banana sheath rot is caused by the imbalance of plant endophytes and opportunistic pathogenic bacteria, providing an important basis for research and control of this disease.[Formula: see text] Copyright © 2023 The Author(s). This is an open access article distributed under the CC BY-NC-ND 4.0 International license.}, } @article {pmid37596178, year = {2023}, author = {Mertz, P and Hentgen, V and Boursier, G and Delon, J and Georgin-Lavialle, S}, title = {[Monogenic auto-inflammatory diseases associated with actinopathies: A review of the literature].}, journal = {La Revue de medecine interne}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.revmed.2023.06.005}, pmid = {37596178}, issn = {1768-3122}, abstract = {Auto-inflammatory diseases (AIDs) are diseases resulting from an inappropriate activation of innate immunity in the absence of any infection. The field of monogenic AIDs is constantly expanding, with the discovery of new pathologies and pathophysiological mechanisms thanks to pangenomic sequencing. Actinopathies with auto-inflammatory manifestations are a new emerging group of AIDs, linked to defects in the regulation of the actin cytoskeleton dynamics. These diseases most often begin in the neonatal period and combine to varying degrees a more or less severe primary immune deficiency, cytopenias (especially thrombocytopenia), auto-inflammatory manifestations (especially cutaneous and digestive), atopic and auto-immune manifestations. The diagnosis is to be evoked essentially in front of a cutaneous-digestive auto-inflammation picture of early onset, associated with a primary immune deficiency and thrombocytopenia or a tendency to bleed. Some of these diseases have specificities, including a risk of macrophagic activation syndrome or a tendency to atopy or lymphoproliferation. We propose here a review of the literature on these new diseases, with a proposal for a practical approach according to the main associated biological abnormalities and some clinical particularities. However, the diagnosis remains genetic, and several differential diagnoses must be considered. The pathophysiology of these diseases is not yet fully elucidated, and studies are needed to better clarify the inherent mechanisms that can guide the choice of therapies. In most cases, the severity of the picture indicates allogeneic marrow transplantation.}, } @article {pmid37594286, year = {2023}, author = {Kim, M and Cha, IT and Lee, KE and Li, M and Park, SJ}, title = {Pangenome analysis provides insights into the genetic diversity, metabolic versatility, and evolution of the genus Flavobacterium.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0100323}, doi = {10.1128/spectrum.01003-23}, pmid = {37594286}, issn = {2165-0497}, abstract = {Members of the genus Flavobacterium are widely distributed and produce various polysaccharide-degrading enzymes. Many species in the genus have been isolated and characterized. However, few studies have focused on marine isolates or fish pathogens, and in-depth genomic analyses, particularly comparative analyses of isolates from different habitat types, are lacking. Here, we isolated 20 strains of the genus from various environments in South Korea and sequenced their full-length genomes. Combined with published sequence data, we examined genomic traits, evolution, environmental adaptation, and putative metabolic functions in total 187 genomes of isolated species in Flavobacterium categorized as marine, host-associated, and terrestrial including freshwater. A pangenome analysis revealed a correlation between genome size and coding or noncoding density. Flavobacterium spp. had high levels of diversity, allowing for novel gene repertories via recombination events. Defense-related genes only accounted for approximately 3% of predicted genes in all Flavobacterium genomes. While genes involved in metabolic pathways did not differ with respect to isolation source, there was substantial variation in genomic traits; in particular, the abundances of tRNAs and rRNAs were higher in the host-associdated group than in other groups. One genome in the host-associated group contained a Microviridae prophage closely related to an enterobacteria phage. The proteorhodopsin gene was only identified in four terrestrial strains isolated for this study. Furthermore, recombination events clearly influenced genomic diversity and may contribute to the response to environmental stress. These findings shed light on the high genetic variation in Flavobacterium and functional roles in diverse ecosystems as a result of their metabolic versatility. IMPORTANCE The genus Flavobacterium is a diverse group of bacteria that are found in a variety of environments. While most species of this genus are harmless and utilize organic substrates such as proteins and polysaccharides, some members may play a significant role in the cycling for organic substances within their environments. Nevertheless, little is known about the genomic dynamics and/or metabolic capacity of Flavobacterium. Here, we found that Flavobacterium species may have an open pangenome, containing a variety of diverse and novel gene repertoires. Intriguingly, we discovered that one genome (classified into host-associated group) contained a Microviridae prophage closely related to that of enterobacteria. Proteorhodopsin may be expressed under conditions of light or oxygen pressure in some strains isolated for this study. Our findings significantly contribute to the understanding of the members of the genus Flavobacterium diversity exploration and will provide a framework for the way for future ecological characterizations.}, } @article {pmid37592233, year = {2023}, author = {Zhang, X and Xiao, L and Liu, J and Tian, Q and Xie, J}, title = {Trade-off in genome turnover events leading to adaptive evolution of Microcystis aeruginosa species complex.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {462}, pmid = {37592233}, issn = {1471-2164}, support = {32101368//National Natural Science Foundation of China/ ; 2022YFE0119600//National Key Research and Development Program of China/ ; }, mesh = {*Microcystis/genetics ; Genome-Wide Association Study ; *Bacteriophages ; Energy Metabolism ; Evolution, Molecular ; }, abstract = {BACKGROUND: Numerous studies in the past have expanded our understanding of the genetic differences of global distributed cyanobacteria that originated around billions of years ago, however, unraveling how gene gain and loss drive the genetic evolution of cyanobacterial species, and the trade-off of these evolutionary forces are still the central but poorly understood issues.

RESULTS: To delineate the contribution of gene flow in mediating the hereditary differentiation and shaping the microbial evolution, a global genome-wide study of bloom-forming cyanobacterium, Microcystis aeruginosa species complex, provided robust evidence for genetic diversity, reflected by enormous variation in gene repertoire among various strains. Mathematical extrapolation showed an 'open' microbial pan-genome of M. aeruginosa species, since novel genes were predicted to be introduced after new genomes were sequenced. Identification of numerous horizontal gene transfer's signatures in genome regions of interest suggested that genome expansion via transformation and phage-mediated transduction across bacterial lineage as an evolutionary route may contribute to the differentiation of Microcystis functions (e.g., carbohydrate metabolism, amino acid metabolism, and energy metabolism). Meanwhile, the selective loss of some dispensable genes at the cost of metabolic versatility is as a mean of adaptive evolution that has the potential to increase the biological fitness.

CONCLUSIONS: Now that the recruitment of novel genes was accompanied by a parallel loss of some other ones, a trade-off in gene content may drive the divergent differentiation of M. aeruginosa genomes. Our study provides a genetic framework for the evolution of M. aeruginosa species and illustrates their possible evolutionary patterns.}, } @article {pmid37587248, year = {2023}, author = {Pei, Z and Li, X and Cui, S and Yang, B and Lu, W and Zhao, J and Mao, B and Chen, W}, title = {Population genomics of Lacticaseibacillus paracasei: pan-genome, integrated prophage, antibiotic resistance, and carbohydrate utilization.}, journal = {World journal of microbiology & biotechnology}, volume = {39}, number = {10}, pages = {280}, pmid = {37587248}, issn = {1573-0972}, support = {32172173//National Natural Science Foundation of China/ ; 31972086//National Natural Science Foundation of China/ ; 2021YFD2100700//Key Technologies Research and Development Program/ ; }, mesh = {Humans ; *Lacticaseibacillus paracasei ; Metagenomics ; Lacticaseibacillus ; Prophages/genetics ; Drug Resistance, Microbial ; Carbohydrates ; }, abstract = {Lacticaseibacillus paracasei has beneficial effects on human health and holds promising potential as a probiotic for use in the development of functional foods, especially dairy products. This species can adapt to a variety of ecological niches and presents fundamental carbohydrate metabolism and tolerance to environmental stresses. However, the population structure, ecology, and antibiotic resistance of Lc. paracasei in diverse ecological niches are poorly understood. Reclassification of Lc. paracasei as a separate species of Lacticaseibacillus has stimulated renewed interest in its research, and a deeper interpretation of it will be important for screening strains beneficial to human health. Here, we collected 121 self-isolated and 268 publicly available Lc. paracasei genomes discussed how genomic approaches have advanced our understanding of its taxonomy, ecology, evolution, diversity, integrated prophage-related element distribution, antibiotic resistance, and carbohydrate utilization. Moreover, for the Lc. paracasei strains isolated in this study, we assessed the inducibility of integrated prophages in their genomes and determined the phenotypes that presented tolerance to multiple antibiotics to provide evidence for safety evaluations of Lc. paracasei during the fermentation processes.}, } @article {pmid37580659, year = {2023}, author = {Ma, C and Li, M and Peng, H and Lan, M and Tao, L and Li, C and Wu, C and Bai, H and Zhong, Y and Zhong, S and Qin, R and Li, F and Li, J and He, J}, title = {Mesomycoplasma ovipneumoniae from goats with respiratory infection: pathogenic characteristics, population structure, and genomic features.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {220}, pmid = {37580659}, issn = {1471-2180}, mesh = {Animals ; Sheep ; Goats ; *Mycoplasma ovipneumoniae/genetics ; Phylogeny ; Genome-Wide Association Study ; *Respiratory Tract Infections/veterinary ; Genomics ; *Pneumonia, Mycoplasma/pathology/veterinary ; *Sheep Diseases ; }, abstract = {BACKGROUND: Mycoplasma ovipneumoniae is a critical pathogen that causes respiratory diseases that threaten Caprini health and cause economic damage. A genome-wide study of M. ovipneumoniae will help understand the pathogenic characteristics of this microorganism.

RESULTS: Toxicological pathology and whole-genome sequencing of nine M. ovipneumoniae strains isolated from goats were performed using an epidemiological survey. These strains exhibited anterior ventral lung consolidation, typical of bronchopneumonia in goats. Average nucleotide identity and phylogenetic analysis based on whole-genome sequences showed that all M. ovipneumoniae strains clustered into two clades, largely in accordance with their geographical origins. The pan-genome of the 23 M. ovipneumoniae strains contained 5,596 genes, including 385 core, 210 soft core, and 5,001 accessory genes. Among these genes, two protein-coding genes were annotated as cilium adhesion and eight as paralog surface adhesins when annotated to VFDB, and no antibiotic resistance-related genes were predicted. Additionally, 23 strains carried glucosidase-related genes (ycjT and group_1595) and glucosidase-related genes (atpD_2), indicating that M. ovipneumoniae possesses a wide range of glycoside hydrolase activities.

CONCLUSIONS: The population structure and genomic features identified in this study will facilitate further investigations into the pathogenesis of M. ovipneumoniae and lay the foundation for the development of preventive and therapeutic methods.}, } @article {pmid37580306, year = {2023}, author = {Alexandrov, N and Wang, T and Blair, L and Nadon, B and Sayer, D}, title = {HLA-OLI: A new MHC class I pseudogene and HLA-Y are located on a 60 kb indel in the human MHC between HLA-W and HLA-J.}, journal = {HLA}, volume = {}, number = {}, pages = {}, doi = {10.1111/tan.15180}, pmid = {37580306}, issn = {2059-2310}, abstract = {Analysis of publicly available whole-genome sequence data from the Human Pangenome Project and the 1000 Genomes Project has identified a DNA segment of approximately 60 kb in the major histocompatibility complex (MHC) between HLA-W and HLA-J that is present in some MHC haplotypes but not others. This DNA segment is largely repeat element-rich but includes the pseudogene HLA-Y, thus pinpointing the location of this pseudogene, and a new HLA class I sequence we have called HLA-OLI. HLA-OLI clusters phylogenetically with the HLA class I pseudogenes, HLA-P and HLA-W, and appears to have a similar genetic structure. The availability of whole-genome sequence data from diverse populations enables a detailed characterization of the MHC at the population level and will have implications for understanding MHC disease associations and the non-HLA MHC factors that impact unrelated hematopoietic cell transplant outcomes.}, } @article {pmid37578072, year = {2023}, author = {Khan, K and Burki, S and Alsaiari, AA and Alhuthali, HM and Alharthi, NS and Jalal, K}, title = {A therapeutic epitopes-based vaccine engineering against Salmonella enterica XDR strains for typhoid fever: a Pan-vaccinomics approach.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-15}, doi = {10.1080/07391102.2023.2246587}, pmid = {37578072}, issn = {1538-0254}, abstract = {A prevalent food-borne pathogen, Salmonella enterica serotypes Typhi, is responsible for gastrointestinal and systemic infections globally. Salmonella vaccines are the most effective, however, producing a broad-spectrum vaccine remains challenging due to Salmonella's many serotypes. Efforts are urgently required to develop a novel vaccine candidate that can tackle all S. Typhi strains because of their high resistance to multiple kinds of antibiotics (particularly the XDR H58 strain). In this work, we used a computational pangenome-based vaccine design technique on all available (n = 119) S. Typhi reference genomes and identified one TonB-dependent siderophore receptor (WP_001034967.1) as highly conserved and prospective vaccine candidates from the predicted core genome (n = 3,351). The applied pan-proteomics and Immunoinformatic approaches help in the identification of four epitopes that may trigger adequate host body immune responses. Furthermore, the proposed vaccine ensemble demonstrates a stable binding conformation with the examined immunological receptor (HLAs and TRL2/4) and has large interaction energy determined via molecular docking and molecular dynamics simulation techniques. Eventually, an expression vector for the Escherichia. coli K12 strain was constructed from the vaccine sequence. Additional analysis revealed that the vaccine may help to elicit strong immune responses for typhoid infections, however, experimental analysis is required to verify the vaccine's effectiveness based on these results. Moreover, the applied computer-assisted vaccine design may considerably decrease vaccine development costs and speed up the process. The study's findings are intriguing, but they must be evaluated in the experimental labs to confirm the developed vaccine's biological efficiency against XDR S. Typhi.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37577683, year = {2023}, author = {Yocca, AE and Platts, A and Alger, E and Teresi, S and Mengist, MF and Benevenuto, J and Felipe V Ferrão, L and Jacobs, M and Babinski, M and Magallanes-Lundback, M and Bayer, P and Golicz, A and Humann, JL and Main, D and Espley, RV and Chagné, D and Albert, NW and Montanari, S and Vorsa, N and Polashock, J and Díaz-Garcia, L and Zalapa, J and Bassil, NV and Munoz, PR and Iorizzo, M and Edger, PP}, title = {Blueberry and cranberry pangenomes as a resource for future genetic studies and breeding efforts.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37577683}, support = {T32 GM110523/GM/NIGMS NIH HHS/United States ; }, abstract = {Domestication of cranberry and blueberry began in the United States in the early 1800s and 1900s, respectively, and in part owing to their flavors and health-promoting benefits are now cultivated and consumed worldwide. The industry continues to face a wide variety of production challenges (e.g. disease pressures) as well as a demand for higher-yielding cultivars with improved fruit quality characteristics. Unfortunately, molecular tools to help guide breeding efforts for these species have been relatively limited compared with those for other high-value crops. Here, we describe the construction and analysis of the first pangenome for both blueberry and cranberry. Our analysis of these pangenomes revealed both crops exhibit great genetic diversity, including the presence-absence variation of 48.4% genes in highbush blueberry and 47.0% genes in cranberry. Auxiliary genes, those not shared by all cultivars, are significantly enriched with molecular functions associated with disease resistance and the biosynthesis of specialized metabolites, including compounds previously associated with improving fruit quality traits. The discovery of thousands of genes, not present in the previous reference genomes for blueberry and cranberry, will serve as the basis of future research and as potential targets for future breeding efforts. The pangenome, as a multiple-sequence alignment, as well as individual annotated genomes, are publicly available for analysis on the Genome Database for Vaccinium - a curated and integrated web-based relational database. Lastly, the core-gene predictions from the pangenomes will serve useful to develop a community genotyping platform to guide future molecular breeding efforts across the family.}, } @article {pmid37576785, year = {2023}, author = {Qiu, J and Shi, Y and Zhao, F and Xu, Y and Xu, H and Dai, Y and Cao, Y}, title = {The Pan-Genomic Analysis of Corynebacterium striatum Revealed its Genetic Characteristics as an Emerging Multidrug-Resistant Pathogen.}, journal = {Evolutionary bioinformatics online}, volume = {19}, number = {}, pages = {11769343231191481}, pmid = {37576785}, issn = {1176-9343}, abstract = {Corynebacterium striatum is a Gram-positive bacterium that is straight or slightly curved and non-spore-forming. Although it was originally believed to be a part of the normal microbiome of human skin, a growing number of studies have identified it as a cause of various chronic diseases, bacteremia, and respiratory infections. However, despite its increasing importance as a pathogen, the genetic characteristics of the pathogen population, such as genomic characteristics and differences, the types of resistance genes and virulence factors carried by the pathogen and their distribution in the population are poorly understood. To address these knowledge gaps, we conducted a pan-genomic analysis of 314 strains of C. striatum isolated from various tissues and geographic locations. Our analysis revealed that C. striatum has an open pan-genome, comprising 5692 gene families, including 1845 core gene families, 2362 accessory gene families, and 1485 unique gene families. We also found that C. striatum exhibits a high degree of diversity across different sources, but strains isolated from skin tissue are more conserved. Furthermore, we identified 53 drug resistance genes and 42 virulence factors by comparing the strains to the drug resistance gene database (CARD) and the pathogen virulence factor database (VFDB), respectively. We found that these genes and factors are widely distributed among C. striatum, with 77.7% of strains carrying 2 or more resistance genes and displaying primary resistance to aminoglycosides, tetracyclines, lincomycin, macrolides, and streptomycin. The virulence factors are primarily associated with pathogen survival within the host, iron uptake, pili, and early biofilm formation. In summary, our study provides insights into the population diversity, resistance genes, and virulence factors ofC. striatum from different sources. Our findings could inform future research and clinical practices in the diagnosis, prevention, and treatment of C. striatum-associated diseases.}, } @article {pmid37576287, year = {2023}, author = {Liu, L and Yu, W and Cai, K and Ma, S and Wang, Y and Ma, Y and Zhao, H}, title = {Identification of vaccine candidates against rhodococcus equi by combining pangenome analysis with a reverse vaccinology approach.}, journal = {Heliyon}, volume = {9}, number = {8}, pages = {e18623}, pmid = {37576287}, issn = {2405-8440}, abstract = {Rhodococcus equi (R. equi) is a zoonotic opportunistic pathogen that can cause life-threatening infections. The rapid evolution of multidrug-resistant R. equi and the fact that there is no currently licensed effective vaccine against R. equi warrant the need for vaccine development. Reverse vaccinology (RV), which involves screening a pathogen's entire genome and proteome using various web-based prediction tools, is considered one of the most effective approaches for identifying vaccine candidates. Here, we performed a pangenome analysis to determine the core proteins of R. equi. We then used the RV approach to examine the subcellular localization, host and gut flora homology, antigenicity, transmembrane helices, physicochemical properties, and immunogenicity of the core proteins to select potential vaccine candidates. The vaccine candidates were then subjected to epitope mapping to predict the exposed antigenic epitopes that possess the ability to bind with major histocompatibility complex I/II (MHC I/II) molecules. These vaccine candidates and epitopes will form a library of elements for the development of a polyvalent or universal vaccine against R. equi. Sixteen R. equi complete proteomes were found to contain 6,238 protein families, and the core proteins consisted of 3,969 protein families (∼63.63% of the pangenome), reflecting a low degree of intraspecies genomic variability. From the pool of core proteins, 483 nonhost homologous membrane and extracellular proteins were screened, and 12 vaccine candidates were finally identified according to their antigenicity, physicochemical properties and other factors. These included four cell wall/membrane/envelope biogenesis proteins; four amino acid transport and metabolism proteins; one cell cycle control, cell division and chromosome partitioning protein; one carbohydrate transport and metabolism protein; one secondary metabolite biosynthesis, transport and catabolism protein; and one defense mechanism protein. All 12 vaccine candidates have an experimentally validated 3D structure available in the protein data bank (PDB). Epitope mapping of the candidates showed that 16 MHC I epitopes and 13 MHC II epitopes with the strongest immunogenicity were exposed on the protein surface, indicating that they could be used to develop a polypeptide vaccine. Thus, we utilized an analytical strategy that combines pangenome analysis and RV to generate a peptide antigen library that simplifies the development of multivalent or universal vaccines against R. equi and can be applied to the development of other vaccines.}, } @article {pmid37575187, year = {2023}, author = {Chao, KH and Chen, PW and Seshia, SA and Langmead, B}, title = {WGT: Tools and algorithms for recognizing, visualizing, and generating Wheeler graphs.}, journal = {iScience}, volume = {26}, number = {8}, pages = {107402}, pmid = {37575187}, issn = {2589-0042}, abstract = {A Wheeler graph represents a collection of strings in a way that is particularly easy to index and query. Such a graph is a practical choice for representing a graph-shaped pangenome, and it is the foundation for current graph-based pangenome indexes. However, there are no practical tools to visualize or to check graphs that may have the Wheeler properties. Here, we present Wheelie, an algorithm that combines a renaming heuristic with a permutation solver (Wheelie-PR) or a Satisfiability Modulo Theory (SMT) solver (Wheelie-SMT) to check whether a given graph has the Wheeler properties, a problem that is NP-complete in general. Wheelie can check a variety of random and real-world graphs in far less time than any algorithm proposed to date. It can check a graph with 1,000s of nodes in seconds. We implement these algorithms together with complementary visualization tools in the WGT toolkit, available as open source software at https://github.com/Kuanhao-Chao/Wheeler_Graph_Toolkit.}, } @article {pmid37573136, year = {2023}, author = {Kokate, PP and Bales, E and Joyner, D and Hazen, TC and Techtmann, SM}, title = {Biogeographic patterns in populations of marine Pseudoalteromonas atlantica isolates.}, journal = {FEMS microbiology letters}, volume = {370}, number = {}, pages = {}, doi = {10.1093/femsle/fnad081}, pmid = {37573136}, issn = {1574-6968}, mesh = {Phylogeny ; *Pseudoalteromonas ; Biodiversity ; }, abstract = {Intra-specific genomic diversity is well documented in microbes. The question, however, remains whether natural selection or neutral evolution is the major contributor to this diversity. We undertook this study to estimate genomic diversity in Pseudoalteromonas atlantica populations and whether the diversity, if present, could be attributed to environmental factors or distance effects. We isolated and sequenced twenty-three strains of P. atlantica from three geographically distant deep marine basins and performed comparative genomic analyses to study the genomic diversity of populations among these basins. Average nucleotide identity followed a strictly geographical pattern. In two out of three locations, the strains within the location exhibited >99.5% identity, whereas, among locations, the strains showed <98.11% identity. Phylogenetic and pan-genome analysis also reflected the biogeographical separation of the strains. Strains from the same location shared many accessory genes and clustered closely on the phylogenetic tree. Phenotypic diversity between populations was studied in ten out of twenty-three strains testing carbon and nitrogen source utilization and osmotolerance. A genetic basis for phenotypic diversity could be established in most cases but was apparently not influenced by local environmental conditions. Our study suggests that neutral evolution may have a substantial role in the biodiversity of P. atlantica.}, } @article {pmid37571822, year = {2023}, author = {Raza, A and Bohra, A and Garg, V and Varshney, RK}, title = {Back to wild relatives for future breeding through super-pangenome.}, journal = {Molecular plant}, volume = {16}, number = {9}, pages = {1363-1365}, doi = {10.1016/j.molp.2023.08.005}, pmid = {37571822}, issn = {1752-9867}, mesh = {*Genomics ; *Plant Breeding ; }, } @article {pmid37567624, year = {2023}, author = {Rajput, A and Chauhan, SM and Mohite, OS and Hyun, JC and Ardalani, O and Jahn, LJ and Sommer, MO and Palsson, BO}, title = {Pangenome analysis reveals the genetic basis for taxonomic classification of the Lactobacillaceae family.}, journal = {Food microbiology}, volume = {115}, number = {}, pages = {104334}, doi = {10.1016/j.fm.2023.104334}, pmid = {37567624}, issn = {1095-9998}, mesh = {*Lactobacillaceae ; *Genomics ; Phylogeny ; }, abstract = {Lactobacillaceae represent a large family of important microbes that are foundational to the food industry. Many genome sequences of Lactobacillaceae strains are now available, enabling us to conduct a comprehensive pangenome analysis of this family. We collected 3591 high-quality genomes from public sources and found that: 1) they contained enough genomes for 26 species to perform a pangenomic analysis, 2) the normalized Heap's coefficient λ (a measure of pangenome openness) was found to have an average value of 0.27 (ranging from 0.07 to 0.37), 3) the pangenome openness was correlated with the abundance and genomic location of transposons and mobilomes, 4) the pangenome for each species was divided into core, accessory, and rare genomes, that highlight the species-specific properties (such as motility and restriction-modification systems), 5) the pangenome of Lactiplantibacillus plantarum (which contained the highest number of genomes found amongst the 26 species studied) contained nine distinct phylogroups, and 6) genome mining revealed a richness of detected biosynthetic gene clusters, with functions ranging from antimicrobial and probiotic to food preservation, but ∼93% were of unknown function. This study provides the first in-depth comparative pangenomics analysis of the Lactobacillaceae family.}, } @article {pmid37556679, year = {2023}, author = {Hill, H and Mitsi, E and Nikolaou, E and Blizard, A and Pojar, S and Howard, A and Hyder-Wright, A and Devin, J and Reiné, J and Robinson, R and Solórzano, C and Jochems, SP and Kenny-Nyazika, T and Ramos-Sevillano, E and Weight, CM and Myerscough, C and McLenaghan, D and Morton, B and Gibbons, E and Farrar, M and Randles, V and Burhan, H and Chen, T and Shandling, AD and Campo, JJ and Heyderman, RS and Gordon, SB and Brown, JS and Collins, AM and Ferreira, DM}, title = {A Randomised Controlled Trial of Nasal Immunisation with Live Virulence Attenuated Streptococcus pneumoniae Strains Using Human Infection Challenge.}, journal = {American journal of respiratory and critical care medicine}, volume = {}, number = {}, pages = {}, doi = {10.1164/rccm.202302-0222OC}, pmid = {37556679}, issn = {1535-4970}, abstract = {RATIONALE: Pneumococcal pneumonia remains a global health problem. Pneumococcal colonisation increases local and systemic protective immunity, suggesting nasal administration of live attenuated S. pneumoniae strains could help prevent infections.

OBJECTIVES: We used a controlled human infection model to investigate whether nasopharyngeal colonisation with attenuated S. pneumoniae strains protected against re-colonisation with wild-type (WT) S. pneumoniae (Spn).

METHODS: Healthy adults aged 18-50 years were randomised (1:1:1:1) for nasal administration twice (two weeks interval) with saline, WT Spn6B (BHN418) or one of two genetically modified Spn6B strains - SpnA1 (∆fhs/piaA) or SpnA3 (∆proABC/piaA) (Stage I). After 6 months, participants were challenged with SpnWT to assess protection against the homologous serotype (Stage II).

MEASUREMENTS AND MAIN RESULTS: 125 participants completed both study stages as per intention to treat. No Serious Adverse Events were reported. In Stage I, colonisation rates were similar amongst groups: SpnWT 58.1% (18/31), SpnA1 60% (18/30) and SpnA3 59.4% (19/32). Anti-Spn nasal IgG levels post-colonisation were similar in all groups whilst serum IgG responses were higher in the SpnWT and SpnA1 groups than the SpnA3 group. In colonised individuals, increases in IgG responses were identified against 197 Spn protein antigens and serotype 6 capsular polysaccharide using a pangenome array. Participants given SpnWT or SpnA1 in stage 1 were partially protected against homologous challenge with SpnWT (29% and 30% recolonisation rates, respectively) at stage II, whereas those exposed to SpnA3 achieved recolonisation rate similar to control group group (50% vs 47%, respectively).

CONCLUSION: Nasal colonisation with genetically modified live attenuated Spn was safe and induced protection against recolonisation, suggesting nasal adminstration of live attenuated Spn could be an effective stategy for preventing pneumococcal infections.}, } @article {pmid37555725, year = {2023}, author = {Wei, F and Liang, X and Shi, JC and Luo, J and Qiu, LJ and Li, XX and Lu, LJ and Wen, Y and Feng, J}, title = {Pan-genomic Analysis Identifies the Chinese Strain as a New Subspecies of Xanthomonas fragariae.}, journal = {Plant disease}, volume = {}, number = {}, pages = {}, doi = {10.1094/PDIS-05-23-0933-SC}, pmid = {37555725}, issn = {0191-2917}, abstract = {Xanthomonas fragariae (X. fragariae) is classified as a quarantine pathogen by the European and Mediterranean Plant Protection Organization. It commonly induces typical angular leaf spot (ALS) symptoms in strawberry leaves. X. fragariae strains from China (YL19, SHAQP01, and YLX21) exhibit ALS symptoms in leaves and more severe symptoms of dry cavity rot in strawberry crowns. Conversely, strains from other countries do not cause severe dry cavity rot symptoms in strawberries. Employing multilocus sequence analysis (MLSA), average nucleotide identity (ANI), and amino acid identity (AAI), we determined that Chinese strains of X. fragariae are genetically distinct from other strains and can be considered a new subspecies. Subsequent analysis of 63 X. fragariae genomes published at NCBI using IPGA and EDGAR3.0 revealed the pan-genomic profile, with 1680 shared genes present in the all 63 strains, including 71 virulence-related genes. Additionally, we identified 123 genes exclusive to the of all Chinese strains, encompassing 12 virulence-related genes. The qRT-PCR analysis demonstrated that the expression of XopD, XopG1, CE8, GT2 and GH121, out of 12 virulence-related genes of Chinese strains (YL19) exhibited a constant increase in the early stages (6 hpi, 24 hpi, 54 hpi, and 96 hpi) of strawberry leaf infected by YL19. Hence, the presence of XopD, XopG1, CE8, GT2, and GH121 in Chinese strains may play important roles in the early infection process of Chinese strains. These findings offer novel insights into comprehending the population structure and variation in the pathogenic capacity of X. fragariae.}, } @article {pmid37553643, year = {2023}, author = {Hyun, JC and Palsson, BO}, title = {Reconstruction of the last bacterial common ancestor from 183 pangenomes reveals a versatile ancient core genome.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {183}, pmid = {37553643}, issn = {1474-760X}, support = {U01 AI124316/AI/NIAID NIH HHS/United States ; }, mesh = {Phylogeny ; *Evolution, Molecular ; *Genome ; Gene Frequency ; Bacteria/genetics ; Genome, Bacterial ; }, abstract = {BACKGROUND: Cumulative sequencing efforts have yielded enough genomes to construct pangenomes for dozens of bacterial species and elucidate intraspecies gene conservation. Given the diversity of organisms for which this is achievable, similar analyses for ancestral species are feasible through the integration of pangenomics and phylogenetics, promising deeper insights into the nature of ancient life.

RESULTS: We construct pangenomes for 183 bacterial species from 54,085 genomes and identify their core genomes using a novel statistical model to estimate genome-specific error rates and underlying gene frequencies. The core genomes are then integrated into a phylogenetic tree to reconstruct the core genome of the last bacterial common ancestor (LBCA), yielding three main results: First, the gene content of modern and ancestral core genomes are diverse at the level of individual genes but are similarly distributed by functional category and share several poorly characterized genes. Second, the LBCA core genome is distinct from any individual modern core genome but has many fundamental biological systems intact, especially those involving translation machinery and biosynthetic pathways to all major nucleotides and amino acids. Third, despite this metabolic versatility, the LBCA core genome likely requires additional non-core genes for viability, based on comparisons with the minimal organism, JCVI-Syn3A.

CONCLUSIONS: These results suggest that many cellular systems commonly conserved in modern bacteria were not just present in ancient bacteria but were nearly immutable with respect to short-term intraspecies variation. Extending this analysis to other domains of life will likely provide similar insights into more distant ancestral species.}, } @article {pmid37546276, year = {2023}, author = {Gao, Z and Bian, J and Lu, F and Jiao, Y and He, H}, title = {Triticeae crop genome biology: an endless frontier.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1222681}, pmid = {37546276}, issn = {1664-462X}, abstract = {Triticeae, the wheatgrass tribe, includes several major cereal crops and their wild relatives. Major crops within the Triticeae are wheat, barley, rye, and oat, which are important for human consumption, animal feed, and rangeland protection. Species within this tribe are known for their large genomes and complex genetic histories. Powered by recent advances in sequencing technology, researchers worldwide have made progress in elucidating the genomes of Triticeae crops. In addition to assemblies of high-quality reference genomes, pan-genome studies have just started to capture the genomic diversities of these species, shedding light on our understanding of the genetic basis of domestication and environmental adaptation of Triticeae crops. In this review, we focus on recent signs of progress in genome sequencing, pan-genome analyses, and resequencing analysis of Triticeae crops. We also propose future research avenues in Triticeae crop genomes, including identifying genome structure variations, the association of genomic regions with desired traits, mining functions of the non-coding area, introgression of high-quality genes from wild Triticeae resources, genome editing, and integration of genomic resources.}, } @article {pmid37542576, year = {2023}, author = {Hong, H and Yang, SM and Kim, E and Kim, HJ and Park, SH}, title = {Comprehensive metagenomic analysis of stress-resistant and -sensitive Listeria monocytogenes.}, journal = {Applied microbiology and biotechnology}, volume = {107}, number = {19}, pages = {6047-6056}, pmid = {37542576}, issn = {1432-0614}, support = {E0210702-01//Korea Food Research Institute/ ; }, mesh = {*Listeria monocytogenes/genetics ; Food Microbiology ; Virulence/genetics ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {Listeria monocytogenes is a pathogenic bacterium which can live in adverse environments (low pH, high salinity, and low temperature). Even though there are various whole genome sequencing (WGS) data on L. monocytogenes, investigations on genetic differences between stress-resistant and -sensitive L. monocytogenes grown under stress environments have been not fully examined. This study aims to investigate and compare genetic characteristics between stress-resistant and -sensitive L. monocytogenes using whole genome sequencing (WGS). A total of 47 L. monocytogenes strains (43 stress-resistant and 4 stress-sensitive) were selected based on the stress-resistance tests under pH 3, 5% salt concentration, and 1 °C. The sequencing library for WGS was prepared and sequenced using an Illumina MiSeq. Genetic characteristics of two different L. monocytogenes groups were examined to analyze the pangenome, functionality, virulence, antibiotic resistance, core, and unique genes. The functionality of unique genes in the stress-resistant L. monocytogenes was distinct compared to the stress-sensitive L. monocytogenes, such as carbohydrate and nucleotide transport and metabolism. The lisR virulence gene was detected more in the stress-resistant L. monocytogenes than in the stress-sensitive group. Five stress-resistant L. monocytogenes strains possessed tet(M) antibiotic resistance gene. This is the first study suggesting that deep genomic characteristics of L. monocytogenes may have different resistance level under stress conditions. This new insight will aid in understanding the genetic relationship between stress-resistant and -sensitive L. monocytogenes strains isolated from diverse resources. KEY POINTS: • Whole genomes of L. monocytogenes isolated from three different sources were analyzed. • Differences in two L. monocytogenes groups were identified in functionality, virulence, and antibiotic resistance genes. • This study first examines the association between resistances and whole genomes of stress-resistant and -sensitive L. monocytogenes.}, } @article {pmid37538845, year = {2023}, author = {Morales-Olavarría, M and Nuñez-Belmar, J and González, D and Vicencio, E and Rivas-Pardo, JA and Cortez, C and Cárdenas, JP}, title = {Phylogenomic analysis of the Porphyromonas gingivalis - Porphyromonas gulae duo: approaches to the origin of periodontitis.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1226166}, pmid = {37538845}, issn = {1664-302X}, abstract = {Porphyromonas gingivalis is an oral human pathogen associated with the onset and progression of periodontitis, a chronic immune-inflammatory disease characterized by the destruction of the teeth-supporting tissue. P. gingivalis belongs to the genus Porphyromonas, which is characterized by being composed of Gram-negative, asaccharolytic, non-spore-forming, non-motile, obligatory anaerobic species, inhabiting niches such as the oral cavity, urogenital tract, gastrointestinal tract and infected wound from different mammals including humans. Among the Porphyromonas genus, P. gingivalis stands out for its specificity in colonizing the human oral cavity and its keystone pathogen role in periodontitis pathogenesis. To understand the evolutionary process behind P. gingivalis in the context of the Pophyoromonas genus, in this study, we performed a comparative genomics study with publicly available Porphyromonas genomes, focused on four main objectives: (A) to confirm the phylogenetic position of P. gingivalis in the Porphyromonas genus by phylogenomic analysis; (B) the definition and comparison of the pangenomes of P. gingivalis and its relative P. gulae; and (C) the evaluation of the gene family gain/loss events during the divergence of P. gingivalis and P. gulae; (D) the evaluation of the evolutionary pressure (represented by the calculation of Tajima-D values and dN/dS ratios) comparing gene families of P. gingivalis and P. gulae. Our analysis found 84 high-quality assemblies representing P. gingivalis and 14 P. gulae strains (from a total of 233 Porphyromonas genomes). Phylogenomic analysis confirmed that P. gingivalis and P. gulae are highly related lineages, close to P. loveana. Both organisms harbored open pangenomes, with a strong core-to-accessory ratio for housekeeping genes and a negative ratio for unknown function genes. Our analyses also characterized the gene set differentiating P. gulae from P. gingivalis, mainly associated with unknown functions. Relevant virulence factors, such as the FimA, Mfa1, and the hemagglutinins, are conserved in P. gulae, P. gingivalis, and P. loveana, suggesting that the origin of those factors occurred previous to the P. gulae - P. gingivalis divergence. These results suggest an unexpected evolutionary relationship between the P. gulae - P. gingivalis duo and P. loveana, showing more clues about the origin of the role of those organisms in periodontitis.}, } @article {pmid37537691, year = {2023}, author = {Wu, D and Xie, L and Sun, Y and Huang, Y and Jia, L and Dong, C and Shen, E and Ye, CY and Qian, Q and Fan, L}, title = {A syntelog-based pan-genome provides insights into rice domestication and de-domestication.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {179}, pmid = {37537691}, issn = {1474-760X}, mesh = {*Oryza/genetics ; Domestication ; Genome, Plant ; Genes, Plant ; Genomics ; Evolution, Molecular ; }, abstract = {BACKGROUND: Asian rice is one of the world's most widely cultivated crops. Large-scale resequencing analyses have been undertaken to explore the domestication and de-domestication genomic history of Asian rice, but the evolution of rice is still under debate.

RESULTS: Here, we construct a syntelog-based rice pan-genome by integrating and merging 74 high-accuracy genomes based on long-read sequencing, encompassing all ecotypes and taxa of Oryza sativa and Oryza rufipogon. Analyses of syntelog groups illustrate subspecies divergence in gene presence-and-absence and haplotype composition and identify massive genomic regions putatively introgressed from ancient Geng/japonica to ancient Xian/indica or its wild ancestor, including almost all well-known domestication genes and a 4.5-Mbp centromere-spanning block, supporting a single domestication event in main rice subspecies. Genomic comparisons between weedy and cultivated rice highlight the contribution from wild introgression to the emergence of de-domestication syndromes in weedy rice.

CONCLUSIONS: This work highlights the significance of inter-taxa introgression in shaping diversification and divergence in rice evolution and provides an exploratory attempt by utilizing the advantages of pan-genomes in evolutionary studies.}, } @article {pmid37531401, year = {2023}, author = {Burgaya, J and Marin, J and Royer, G and Condamine, B and Gachet, B and Clermont, O and Jaureguy, F and Burdet, C and Lefort, A and de Lastours, V and Denamur, E and Galardini, M and Blanquart, F and , }, title = {The bacterial genetic determinants of Escherichia coli capacity to cause bloodstream infections in humans.}, journal = {PLoS genetics}, volume = {19}, number = {8}, pages = {e1010842}, pmid = {37531401}, issn = {1553-7404}, mesh = {Humans ; Escherichia coli ; *Escherichia coli Infections/genetics/microbiology ; Genes, Bacterial ; Virulence/genetics ; *Sepsis/genetics ; Phylogeny ; }, abstract = {Escherichia coli is both a highly prevalent commensal and a major opportunistic pathogen causing bloodstream infections (BSI). A systematic analysis characterizing the genomic determinants of extra-intestinal pathogenic vs. commensal isolates in human populations, which could inform mechanisms of pathogenesis, diagnostic, prevention and treatment is still lacking. We used a collection of 912 BSI and 370 commensal E. coli isolates collected in France over a 17-year period (2000-2017). We compared their pangenomes, genetic backgrounds (phylogroups, STs, O groups), presence of virulence-associated genes (VAGs) and antimicrobial resistance genes, finding significant differences in all comparisons between commensal and BSI isolates. A machine learning linear model trained on all the genetic variants derived from the pangenome and controlling for population structure reveals similar differences in VAGs, discovers new variants associated with pathogenicity (capacity to cause BSI), and accurately classifies BSI vs. commensal strains. Pathogenicity is a highly heritable trait, with up to 69% of the variance explained by bacterial genetic variants. Lastly, complementing our commensal collection with an older collection from 1980, we predict that pathogenicity continuously increased through 1980, 2000, to 2010. Together our findings imply that E. coli exhibit substantial genetic variation contributing to the transition between commensalism and pathogenicity and that this species evolved towards higher pathogenicity.}, } @article {pmid37530223, year = {2023}, author = {Sun, M and Yan, H and Zhang, A and Jin, Y and Lin, C and Luo, L and Wu, B and Fan, Y and Tian, S and Cao, X and Wang, Z and Luo, J and Yang, Y and Jia, J and Zhou, P and Tang, Q and Jones, CS and Varshney, RK and Srivastava, RK and He, M and Xie, Z and Wang, X and Feng, G and Nie, G and Huang, D and Zhang, X and Zhu, F and Huang, L}, title = {Milletdb: a multi-omics database to accelerate the research of functional genomics and molecular breeding of millets.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14136}, pmid = {37530223}, issn = {1467-7652}, support = {CARS-34//CARS/ ; SCCXTD-2021-16//Modern Agricultural Industry System Sichuan Forage Innovation Team/ ; 31771866//National Natural Science Foundation of China/ ; 32071867//National Natural Science Foundation of China/ ; 2021YFYZ0013//Sichuan Province Research Grant/ ; }, abstract = {Millets are a class of nutrient-rich coarse cereals with high resistance to abiotic stress; thus, they guarantee food security for people living in areas with extreme climatic conditions and provide stress-related genetic resources for other crops. However, no platform is available to provide a comprehensive and systematic multi-omics analysis for millets, which seriously hinders the mining of stress-related genes and the molecular breeding of millets. Here, a free, web-accessible, user-friendly millets multi-omics database platform (Milletdb, http://milletdb.novogene.com) has been developed. The Milletdb contains six millets and their one related species genomes, graph-based pan-genomics of pearl millet, and stress-related multi-omics data, which enable Milletdb to be the most complete millets multi-omics database available. We stored GWAS (genome-wide association study) results of 20 yield-related trait data obtained under three environmental conditions [field (no stress), early drought and late drought] for 2 years in the database, allowing users to identify stress-related genes that support yield improvement. Milletdb can simplify the functional genomics analysis of millets by providing users with 20 different tools (e.g., 'Gene mapping', 'Co-expression', 'KEGG/GO Enrichment' analysis, etc.). On the Milletdb platform, a gene PMA1G03779.1 was identified through 'GWAS', which has the potential to modulate yield and respond to different environmental stresses. Using the tools provided by Milletdb, we found that the stress-related PLATZs TFs (transcription factors) family expands in 87.5% of millet accessions and contributes to vegetative growth and abiotic stress responses. Milletdb can effectively serve researchers in the mining of key genes, genome editing and molecular breeding of millets.}, } @article {pmid37529582, year = {2023}, author = {Liang, J and Duan, R and Qin, S and Lv, D and He, Z and Zhang, H and Duan, Q and Xi, J and Chun, H and Fu, G and Zheng, X and Tang, D and Wu, W and Han, H and Jing, H and Wang, X}, title = {The complex genomic diversity of Yersinia pestis on the long-term plague foci in Qinghai-Tibet plateau.}, journal = {Ecology and evolution}, volume = {13}, number = {8}, pages = {e10387}, pmid = {37529582}, issn = {2045-7758}, abstract = {Plague is a typical natural focus disease that circulates in different ecology of vectors and reservoir hosts. We conducted genomic population and phylogenetic analyses of the Yersinia pestis collected from the 12 natural plague foci in China with more than 20 kinds of hosts and vectors. Different ecological landscapes with specific hosts, vectors, and habitat which shape various niches for Y. pestis. The phylogeographic diversity of Y. pestis in different kinds plague foci in China showed host niches adaptation. Most natural plague foci strains are region-and focus-specific, with one predominant subpopulation; but the isolates from the Qinghai-Tibet plateau harbor a higher genetic diversity than other foci. The Y. pestis from Marmota himalayana plague foci are defined as the ancestors of different populations at the root of the evolutionary tree, suggesting several different evolutionary paths to other foci. It has the largest pan-genome and widest SNP distances with most accessory genes enriched in mobilome functions (prophages, transposons). Geological barriers play an important role in the maintenance of local Y. pestis species and block the introduction of non-native strains. This study provides new insights into the control of plague outbreaks and epidemics, deepened the understanding of the evolutionary history of MHPF (M. himalayana plague focus) in China. The population structure and identify clades among different natural foci of China renewed the space cognition of the plague.}, } @article {pmid37526693, year = {2023}, author = {Campillo-Balderas, JA and Lazcano, A and Cottom-Salas, W and Jácome, R and Becerra, A}, title = {Pangenomic Analysis of Nucleo-Cytoplasmic Large DNA Viruses. I: The Phylogenetic Distribution of Conserved Oxygen-Dependent Enzymes Reveals a Capture-Gene Process.}, journal = {Journal of molecular evolution}, volume = {}, number = {}, pages = {}, pmid = {37526693}, issn = {1432-1432}, support = {IN214421//DGAPA-PAPIIT, UNAM/ ; }, abstract = {The Nucleo-Cytoplasmic Large DNA Viruses (NCLDVs) infect a wide range of eukaryotic species, including amoeba, algae, fish, amphibia, arthropods, birds, and mammals. This group of viruses has linear or circular double-stranded DNA genomes whose size spans approximately one order of magnitude, from 100 to 2500 kbp. The ultimate origin of this peculiar group of viruses remains an open issue. Some have argued that NCLDVs' origin may lie in a bacteriophage ancestor that increased its genome size by subsequent recruitment of eukaryotic and bacterial genes. Others have suggested that NCLDVs families originated from cells that underwent an irreversible process of genome reduction. However, the hypothesis that a number of NCLDVs sequences have been recruited from the host genomes has been largely ignored. In the present work, we have performed pangenomic analyses of each of the seven known NCLDVs families. We show that these families' core- and shell genes have cellular homologs, supporting possible escaping-gene events as part of its evolution. Furthermore, the detection of sequences that belong to two protein families (small chain ribonucleotide reductase and Erv1/Air) and to one superfamily [2OG-Fe(II) oxygenases] that are for distribution in all NCLDVs core and shell clusters encoding for oxygen-dependent enzymes suggests that the highly conserved core these viruses originated after the Proterozoic Great Oxidation Event that transformed the terrestrial atmosphere 2.4-2.3 Ga ago.}, } @article {pmid37526649, year = {2023}, author = {Rodrigues, JA and Blankenship, HM and Cha, W and Mukherjee, S and Sloup, RE and Rudrik, JT and Soehnlen, M and Manning, SD}, title = {Pangenomic analyses of antibiotic-resistant Campylobacter jejuni reveal unique lineage distributions and epidemiological associations.}, journal = {Microbial genomics}, volume = {9}, number = {8}, pages = {}, pmid = {37526649}, issn = {2057-5858}, support = {U01 CK000510/CK/NCEZID CDC HHS/United States ; U01CK000510/ACL/ACL HHS/United States ; }, mesh = {Animals ; Cattle ; Anti-Bacterial Agents/pharmacology ; *Campylobacter jejuni/genetics ; *Campylobacter Infections/epidemiology ; Phylogeny ; Multilocus Sequence Typing ; }, abstract = {Application of whole-genome sequencing (WGS) to characterize foodborne pathogens has advanced our understanding of circulating genotypes and evolutionary relationships. Herein, we used WGS to investigate the genomic epidemiology of Campylobacter jejuni, a leading cause of foodborne disease. Among the 214 strains recovered from patients with gastroenteritis in Michigan, USA, 85 multilocus sequence types (STs) were represented and 135 (63.1 %) were phenotypically resistant to at least one antibiotic. Horizontally acquired antibiotic resistance genes were detected in 128 (59.8 %) strains and the genotypic resistance profiles were mostly consistent with the phenotypes. Core-gene phylogenetic reconstruction identified three sequence clusters that varied in frequency, while a neighbour-net tree detected significant recombination among the genotypes (pairwise homoplasy index P<0.01). Epidemiological analyses revealed that travel was a significant contributor to pangenomic and ST diversity of C. jejuni, while some lineages were unique to rural counties and more commonly possessed clinically important resistance determinants. Variation was also observed in the frequency of lineages over the 4 year period with chicken and cattle specialists predominating. Altogether, these findings highlight the importance of geographically specific factors, recombination and horizontal gene transfer in shaping the population structure of C. jejuni. They also illustrate the usefulness of WGS data for predicting antibiotic susceptibilities and surveillance, which are important for guiding treatment and prevention strategies.}, } @article {pmid37525145, year = {2023}, author = {Safar, HA and Alatar, F and Nasser, K and Al-Ajmi, R and Alfouzan, W and Mustafa, AS}, title = {The impact of applying various de novo assembly and correction tools on the identification of genome characterization, drug resistance, and virulence factors of clinical isolates using ONT sequencing.}, journal = {BMC biotechnology}, volume = {23}, number = {1}, pages = {26}, pmid = {37525145}, issn = {1472-6750}, mesh = {*Virulence Factors/genetics ; Reproducibility of Results ; *Genomics ; Escherichia coli/genetics ; High-Throughput Nucleotide Sequencing ; Drug Resistance ; Sequence Analysis, DNA ; }, abstract = {Oxford Nanopore sequencing technology (ONT) is currently widely used due to its affordability, simplicity, and reliability. Despite the advantage ONT has over next-generation sequencing in detecting resistance genes in mobile genetic elements, its relatively high error rate (10-15%) is still a deterrent. Several bioinformatic tools are freely available for raw data processing and obtaining complete and more accurate genome assemblies. In this study, we evaluated the impact of using mix-and-matched read assembly (Flye, Canu, Wtdbg2, and NECAT) and read correction (Medaka, NextPolish, and Racon) tools in generating complete and accurate genome assemblies, and downstream genomic analysis of nine clinical Escherichia coli isolates. Flye and Canu assemblers were the most robust in genome assembly, and Medaka and Racon correction tools significantly improved assembly parameters. Flye functioned well in pan-genome analysis, while Medaka increased the number of core genes detected. Flye, Canu, and NECAT assembler functioned well in detecting antimicrobial resistance genes (AMR), while Wtdbg2 required correction tools for better detection. Flye was the best assembler for detecting and locating both virulence and AMR genes (i.e., chromosomal vs. plasmid). This study provides insight into the performance of several read assembly and read correction tools for analyzing ONT sequencing reads for clinical isolates.}, } @article {pmid37524789, year = {2023}, author = {O'Donnell, S and Yue, JX and Saada, OA and Agier, N and Caradec, C and Cokelaer, T and De Chiara, M and Delmas, S and Dutreux, F and Fournier, T and Friedrich, A and Kornobis, E and Li, J and Miao, Z and Tattini, L and Schacherer, J and Liti, G and Fischer, G}, title = {Telomere-to-telomere assemblies of 142 strains characterize the genome structural landscape in Saccharomyces cerevisiae.}, journal = {Nature genetics}, volume = {55}, number = {8}, pages = {1390-1399}, pmid = {37524789}, issn = {1546-1718}, mesh = {*Saccharomyces cerevisiae/genetics ; Phylogeny ; *Genome ; Genomics ; Telomere/genetics ; }, abstract = {Pangenomes provide access to an accurate representation of the genetic diversity of species, both in terms of sequence polymorphisms and structural variants (SVs). Here we generated the Saccharomyces cerevisiae Reference Assembly Panel (ScRAP) comprising reference-quality genomes for 142 strains representing the species' phylogenetic and ecological diversity. The ScRAP includes phased haplotype assemblies for several heterozygous diploid and polyploid isolates. We identified circa (ca.) 4,800 nonredundant SVs that provide a broad view of the genomic diversity, including the dynamics of telomere length and transposable elements. We uncovered frequent cases of complex aneuploidies where large chromosomes underwent large deletions and translocations. We found that SVs can impact gene expression near the breakpoints and substantially contribute to gene repertoire evolution. We also discovered that horizontally acquired regions insert at chromosome ends and can generate new telomeres. Overall, the ScRAP demonstrates the benefit of a pangenome in understanding genome evolution at population scale.}, } @article {pmid37512795, year = {2023}, author = {Jaén-Luchoro, D and Kahnamouei, A and Yazdanshenas, S and Lindblom, A and Samuelsson, E and Åhrén, C and Karami, N}, title = {Comparative Genomic Analysis of ST131 Subclade C2 of ESBL-Producing E. coli Isolates from Patients with Recurrent and Sporadic Urinary Tract Infections.}, journal = {Microorganisms}, volume = {11}, number = {7}, pages = {}, pmid = {37512795}, issn = {2076-2607}, support = {ALFGBG-725361//Region Västra Götaland/ ; VGFOUREG-929979//Region Västra Götaland/ ; 2020-02518//Sahlgrenska University Hospital/ ; }, abstract = {The global emergence of extended-spectrum beta-lactamase-producing Escherichia coli (ESBL-E. coli), mainly causing urinary tract infections (UTI), is a major threat to human health. ESBL-E. coli sequence type (ST) 131 is the dominating clone worldwide, especially its subclade C2. Patients developing recurrent UTI (RUTI) due to ST131 subclade C2 appear to have an increased risk of recurrent infections. We have thus compared the whole genome of ST131 subclade C2 isolates from 14 patients with RUTI to those from 14 patients with sporadic UTI (SUTI). We aimed to elucidate if isolates causing RUTI can be associated with specific genomic features. Paired isolates from patients with RUTI were identical, presenting 2-18 single nucleotide polymorphism (SNP) differences for all six patients investigated. Comparative genomic analyses, including virulence factors, antibiotic resistance, pangenome and SNP analyses did not find any pattern associated with isolates causing RUTI. Despite extensive whole genome analyses, an increased risk of recurrences seen in patients with UTI due to ST131 subclade C2 isolates could not be explained by bacterial genetic differences in the two groups of isolates. Hence, additional factors that could aid in identifying bacterial properties contributing to the increased risk of RUTI due to ESBL-E. coli ST131 subclade C2 remains to be explored.}, } @article {pmid37511853, year = {2023}, author = {Panova, VV and Dolinnaya, NG and Novoselov, KA and Savitskaya, VY and Chernykh, IS and Kubareva, EA and Alexeevski, AV and Zvereva, MI}, title = {Conserved G-Quadruplex-Forming Sequences in Mammalian TERT Promoters and Their Effect on Mutation Frequency.}, journal = {Life (Basel, Switzerland)}, volume = {13}, number = {7}, pages = {}, pmid = {37511853}, issn = {2075-1729}, support = {21-14-00161//Russian Science Foundation/ ; }, abstract = {Somatic mutations in the promoter region of the human telomerase reverse transcriptase (hTERT) gene have been identified in many types of cancer. The hTERT promoter is known to be enriched with sequences that enable the formation of G-quadruplex (G4) structures, whose presence is associated with elevated mutagenicity and genome instability. Here, we used a bioinformatics tool (QGRS mapper) to search for G4-forming sequences (G4 motifs) in the 1000 bp TERT promoter regions of 141 mammalian species belonging to 20 orders, 5 of which, including primates and predators, contain more than 10 species. Groups of conserved G4 motifs and single-nucleotide variants within these groups were discovered using a block alignment approach (based on the Nucleotide PanGenome explorer). It has been shown that: (i) G4 motifs are predominantly located in the region proximal to the transcription start site (up to 400 bp) and are over-represented on the non-coding strand of the TERT promoters, (ii) 11 to 22% of the G4 motifs found are evolutionarily conserved across the related organisms, and (iii) a statistically significant higher frequency of nucleotide substitutions in the conserved G4 motifs compared to the surrounding regions was confirmed only for the order Primates. These data support the assumption that G4s can interfere with the DNA repair process and affect the evolutionary adaptation of organisms and species.}, } @article {pmid37510288, year = {2023}, author = {Leszczyńska, K and Święcicka, I and Daniluk, T and Lebensztejn, D and Chmielewska-Deptuła, S and Leszczyńska, D and Gawor, J and Kliber, M}, title = {Escherichia albertii as a Potential Enteropathogen in the Light of Epidemiological and Genomic Studies.}, journal = {Genes}, volume = {14}, number = {7}, pages = {}, pmid = {37510288}, issn = {2073-4425}, mesh = {Humans ; Animals ; *Enterobacteriaceae Infections ; *Genome, Bacterial ; Polymorphism, Restriction Fragment Length ; Computational Biology ; Phylogeny ; }, abstract = {Escherichia albertii is a new enteropathogen of humans and animals. The aim of the study was to assess the prevalence and pathogenicity of E. albertii strains isolated in northeastern Poland using epidemiological and genomic studies. In 2015-2018, a total of 1154 fecal samples from children and adults, 497 bird droppings, 212 food samples, 92 water samples, and 500 lactose-negative E. coli strains were tested. A total of 42 E. albertii strains were isolated. The PCR method was suitable for their rapid identification. In total, 33.3% of E. albertii isolates were resistant to one antibiotic, and 16.7% to two. Isolates were sensitive to cefepime, imipenem, levofloxacin, gentamicin, trimethoprim/sulfamethoxazole, and did not produce ESBL β-lactamases. High genetic variability of E. albertii has been demonstrated. In the PFGE method, 90.5% of the strains had distinct pulsotypes. In MLST typing, 85.7% of strains were assigned distinct sequence types (STs), of which 64% were novel ST types. Cytolethal distending toxin (CDT) and Paa toxin genes were found in 100% of E. albertii isolates. Genes encoding toxins, IbeA, CdtB type 2, Tsh and Shiga (Stx2f), were found in 26.2%, 9.7%, 1.7%, and 0.4% of E. albertii isolates, respectively. The chromosome size of the tested strains ranged from 4,573,338 to 5,141,010 bp (average 4,784,003 bp), and at least one plasmid was present in all strains. The study contributes to a more accurate assessment of the genetic diversity of E. albertii and the potential threat it poses to public health.}, } @article {pmid37503282, year = {2023}, author = {Joglekar, P and Conlan, S and Lee-Lin, SQ and Deming, C and Kashaf, SS and , and Kong, HH and Segre, JA}, title = {Integrated genomic and functional analyses of human skin-associated Staphylococcus reveals extensive inter- and intra-species diversity.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37503282}, abstract = {UNLABELLED: Human skin is stably colonized by a distinct microbiota that functions together with epidermal cells to maintain a protective physical barrier. Staphylococcus , a prominent genus of the skin microbiota, participates in colonization resistance, tissue repair, and host immune regulation in strain specific manners. To unlock the potential of engineering skin microbial communities, we aim to fully characterize the functional diversity of this genus within the context of the skin environment. We conducted metagenome and pan-genome analyses of isolates obtained from distinct body sites of healthy volunteers, providing a detailed biogeographic depiction of staphylococcal species that colonize our skin. S. epidermidis , S. capitis, and S. hominis were the most abundant species present in all volunteers and were detected at all body sites. Pan-genome analysis of these three species revealed that the genus-core was dominated by central metabolism genes. Species-specific core genes were enriched in host colonization functions. The majority (∼68%) of genes were detected only in a fraction of isolate genomes, underscoring the immense strain-specific gene diversity. Conspecific genomes grouped into phylogenetic clades, exhibiting body site preference. Each clade was enriched for distinct gene-sets that are potentially involved in site tropism. Finally, we conducted gene expression studies of select isolates showing variable growth phenotypes in skin-like medium. In vitro expression revealed extensive intra- and inter-species gene expression variation, substantially expanding the functional diversification within each species. Our study provides an important resource for future ecological and translational studies to examine the role of shared and strain-specific staphylococcal genes within the skin environment.

SIGNIFICANCE: The bacterial genus Staphylococcus is a prominent member of the human skin microbiome, performing important and diverse functions such as tuning immunity, driving tissue repair, and preventing pathogen colonization. Each of these functions is carried out by a subset of staphylococcal strains, displaying differences in gene content and regulation. Delineating the genomic and functional diversity of Staphylococcus will enable researchers to unlock the potential of engineering skin communities to promote health. Here, we present a comprehensive multi-omics analysis to characterize the inter- and intra-species diversity present in human skin-associated staphylococci. Our study is the first to conduct a detailed pan-genome comparison between prominent skin staphylococcal species giving a valuable insight into gene sharing and provides an important resource.}, } @article {pmid37502876, year = {2023}, author = {Ahmed, NM and Joglekar, P and Deming, C and , and Lemon, KP and Kong, HH and Segre, JA and Conlan, S}, title = {Genomic characterization of the C. tuberculostearicum species complex, a ubiquitous member of the human skin microbiome.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37502876}, abstract = {UNLABELLED: Corynebacterium is a predominant genus in the skin microbiome, yet its genetic diversity on skin is incompletely characterized and lacks a comprehensive set of reference genomes. Our work aims to investigate the distribution of Corynebacterium species on the skin, as well as to expand the existing genome reference catalog to enable more complete characterization of skin metagenomes. We used V1-V3 16S rRNA gene sequencing data from 14 body sites of 23 healthy volunteers to characterize Corynebacterium diversity and distribution across healthy human skin. Corynebacterium tuberculostearicum is the predominant species found on human skin and we identified two distinct C. tuberculostearicum ribotypes (A & B) that can be distinguished by variation in the 16S rRNA V1-V3 sequence. One is distributed across all body sites and the other found primarily on the feet. We performed whole genome sequencing of 40 C. tuberculostearicum isolates cultured from the skin of five healthy individuals across seven skin sites. We generated five closed genomes of diverse C. tuberculostearicum which revealed that C. tuberculostearicum isolates are largely syntenic and carry a diversity of methylation patterns, plasmids and CRISPR/Cas systems. The pangenome of C. tuberculostearicum is open with a core genome size of 1806 genes and a pangenome size of 5451 total genes. This expanded pangenome enabled the mapping of 24% more C. tuberculostearicum reads from shotgun metagenomic datasets derived from skin body sites. Finally, while the genomes from this study all fall within a C. tuberculostearicum species complex, the ribotype B isolates may constitute a new species.

IMPORTANCE: Amplicon sequencing data combined with isolate whole genome sequencing has expanded our understanding of Corynebacterium on the skin. Human skin is characterized by a diverse collection of Corynebacterium species but C. tuberculostearicum predominates many sites. Our work supports the emerging idea that C. tuberculostearicum is a species complex encompassing several distinct species. We produced a collection of genomes that help define this complex including a potentially new species which we are calling C. hallux based on a preference for sites on the feet, whole-genome average nucleotide identity, pangenomics and growth in skin-like media. This isolate collection and high-quality genome resource sets the stage for developing engineered strains for both basic and translational clinical studies. Microbiomes are shaped by taxa that are both characteristic to those sites and functionally important to that community. The genus Corynebacterium is one such taxa for the human skin and nares. Foundational studies using 16S rRNA gene sequencing and shotgun metagenomics by our lab (1, 2) and others (3) have established Corynebacterium as common members of the skin microbiome. While Corynebacterium have been positively correlated with the resolution of dysbiosis associated with eczema flares (4), the importance of the Corynebacterium spp. is less defined for skin disease severity in primary immune deficient patients (5, 6). Corynebacterium spp. are predominant members of the human aerodigestive tract microbiome (nares, oral cavity and respiratory tract) (3) and participate in microbe-microbe interactions with members of nasal microbiome (7, 8). Corynebacterium have been shown to engage with the host immune system, specifically C. accolens -promoted IL23-dependent inflammation in mice on a high-fat diet (9). C. bovis and C. mastiditis have been shown to predominate the microbiome of a ADAM10-deficient mouse model (10) as well as an ADAM17-deficient mouse model of eczema (11). Finally, C. tuberculostearicum has been shown to induce inflammation in human epidermal keratinocyte cell cultures (12). These studies establish Corynebacterium spp. as key members of the skin microbiome capable of both microbe-microbe and microbe-host interactions. A critical resource for understanding the biology of Corynebacterium on the skin is a robust collection of complete reference genomes, including isolates collected from a variety of individuals and body sites. Previously published genome collections from skin- or nares-resident species include Staphylococcus epidermidis (13), Cutibacterium acnes (14) and the recent comparative analysis of Dolosigranulum pigrum (15). Of note, while emerging bioinformatic methods and pipelines are now being employed to extract nearly-complete genomes (MAGs) from metagenomic assemblies of skin samples (16), MAGs are not yet a substitute for genomes from cultured isolates to understand strain level or pangenomic diversity. In addition to functional prediction, comparative genomics is increasingly being used to augment conventional microbiological methods to define or redefine taxonomic boundaries (17, 18), as well as describe the full extent of diversity within these boundaries (19). A pangenome, which encompasses the complete set of genes present within a set of genome sequences, enables the characterization of gene-level heterogeneity within a taxonomic group. The pangenome is commonly subdivided into the 'core' genome, referring to genes present in all strains, and the 'accessory' or 'dispensable' genome, referring to those present in only one or some isolates. (The accessory pangenome can be further subdivided to reflect a wider range of gene uniqueness, e.g. singletons.) Thorough characterization of taxa is limited by the availability of representative and high-quality genome assemblies. Unfortunately, with the exceptions of clinically relevant Corynebacterium spp. (e.g. , C. diphtheriae , C. striatum and C. pseudotuberculosis), the genus is inadequately sequenced, with 75% of species having fewer than six genomes. This includes common skin-associated species like C. tuberculostearicum with just five unique isolate genomes, only two of which are from skin. This work seeks first to characterize the distribution of Corynebacterium across 14 skin sites from 23 healthy volunteers. The second goal of this work focuses on what we identify as the predominant skin Corynebacterium species, C. tuberculostearicum . We have sequenced 23 distinct C. tuberculostearicum strains (n=40 genomes before dereplication), a five-fold increase in the number of publicly available, unique genomes (n=5). In addition to short-read assemblies, we generated five complete genomes which, along with the type strain (DSM44922), demonstrate that C. tuberculostearicum genomes are largely syntenic and carry a number of methylation systems as well as a CRISPR/Cas system. Genes from the C. tuberculostearicum genomes in our collection fall into 5451 gene clusters comprising the species pangenome. This expanded pangenome, as compared to existing public references, improved the mapping of C. tuberculostearicum metagenomic reads from unrelated healthy volunteers. In addition, we have identified a distinct C. tuberculostearicum clade that is highly enriched on the feet that may represent a new species, tentatively designated Corynebacterium hallux .}, } @article {pmid37497030, year = {2023}, author = {Price, C and Russell, JA}, title = {AMAnD: an automated metagenome anomaly detection methodology utilizing DeepSVDD neural networks.}, journal = {Frontiers in public health}, volume = {11}, number = {}, pages = {1181911}, pmid = {37497030}, issn = {2296-2565}, mesh = {Humans ; *Metagenome ; *COVID-19/genetics ; Neural Networks, Computer ; Genomics ; Metagenomics/methods ; }, abstract = {The composition of metagenomic communities within the human body often reflects localized medical conditions such as upper respiratory diseases and gastrointestinal diseases. Fast and accurate computational tools to flag anomalous metagenomic samples from typical samples are desirable to understand different phenotypes, especially in contexts where repeated, long-duration temporal sampling is done. Here, we present Automated Metagenome Anomaly Detection (AMAnD), which utilizes two types of Deep Support Vector Data Description (DeepSVDD) models; one trained on taxonomic feature space output by the Pan-Genomics for Infectious Agents (PanGIA) taxonomy classifier and one trained on kmer frequency counts. AMAnD's semi-supervised one-class approach makes no assumptions about what an anomaly may look like, allowing the flagging of potentially novel anomaly types. Three diverse datasets are profiled. The first dataset is hosted on the National Center for Biotechnology Information's (NCBI) Sequence Read Archive (SRA) and contains nasopharyngeal swabs from healthy and COVID-19-positive patients. The second dataset is also hosted on SRA and contains gut microbiome samples from normal controls and from patients with slow transit constipation (STC). AMAnD can learn a typical healthy nasopharyngeal or gut microbiome profile and reliably flag the anomalous COVID+ or STC samples in both feature spaces. The final dataset is a synthetic metagenome created by the Critical Assessment of Metagenome Annotation Simulator (CAMISIM). A control dataset of 50 well-characterized organisms was submitted to CAMISIM to generate 100 synthetic control class samples. The experimental conditions included 12 different spiked-in contaminants that are taxonomically similar to organisms present in the laboratory blank sample ranging from one strain tree branch taxonomic distance away to one family tree branch taxonomic distance away. This experiment was repeated in triplicate at three different coverage levels to probe the dependence on sample coverage. AMAnD was again able to flag the contaminant inserts as anomalous. AMAnD's assumption-free flagging of metagenomic anomalies, the real-time model training update potential of the deep learning approach, and the strong performance even with lightweight models of low sample cardinality would make AMAnD well-suited to a wide array of applied metagenomics biosurveillance use-cases, from environmental to clinical utility.}, } @article {pmid37494467, year = {2023}, author = {Ma, J and Cáceres, M and Salmela, L and Mäkinen, V and Tomescu, AI}, title = {Chaining for accurate alignment of erroneous long reads to acyclic variation graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {8}, pages = {}, pmid = {37494467}, issn = {1367-4811}, mesh = {Humans ; Sequence Analysis, DNA ; *High-Throughput Nucleotide Sequencing ; *Algorithms ; Sequence Alignment ; Computational Biology ; Software ; }, abstract = {MOTIVATION: Aligning reads to a variation graph is a standard task in pangenomics, with downstream applications such as improving variant calling. While the vg toolkit [Garrison et al. (Variation graph toolkit improves read mapping by representing genetic variation in the reference. Nat Biotechnol 2018;36:875-9)] is a popular aligner of short reads, GraphAligner [Rautiainen and Marschall (GraphAligner: rapid and versatile sequence-to-graph alignment. Genome Biol 2020;21:253-28)] is the state-of-the-art aligner of erroneous long reads. GraphAligner works by finding candidate read occurrences based on individually extending the best seeds of the read in the variation graph. However, a more principled approach recognized in the community is to co-linearly chain multiple seeds.

RESULTS: We present a new algorithm to co-linearly chain a set of seeds in a string labeled acyclic graph, together with the first efficient implementation of such a co-linear chaining algorithm into a new aligner of erroneous long reads to acyclic variation graphs, GraphChainer. We run experiments aligning real and simulated PacBio CLR reads with average error rates 15% and 5%. Compared to GraphAligner, GraphChainer aligns 12-17% more reads, and 21-28% more total read length, on real PacBio CLR reads from human chromosomes 1, 22, and the whole human pangenome. On both simulated and real data, GraphChainer aligns between 95% and 99% of all reads, and of total read length. We also show that minigraph [Li et al. (The design and construction of reference pangenome graphs with minigraph. Genome Biol 2020;21:265-19.)] and minichain [Chandra and Jain (Sequence to graph alignment using gap-sensitive co-linear chaining. In: Proceedings of the 27th Annual International Conference on Research in Computational Molecular Biology (RECOMB 2023). Springer, 2023, 58-73.)] obtain an accuracy of <60% on this setting.

GraphChainer is freely available at https://github.com/algbio/GraphChainer. The datasets and evaluation pipeline can be reached from the previous address.}, } @article {pmid37492100, year = {2023}, author = {Frazer, KA and Schork, NJ}, title = {The human pangenome reference anticipates equitable and fundamental genomic insights.}, journal = {Cell genomics}, volume = {3}, number = {7}, pages = {100360}, pmid = {37492100}, issn = {2666-979X}, abstract = {For the past few years, researchers in the Human Pangenome Reference Consortium (HPRC) have been working to catalog almost all human genomic diversity. Frazer and Schork preview an article recently published in Nature, "A draft human pangenome reference,"[1] which represents the initial release of 47 fully phased diploid assemblies of genomes of individuals with diverse ancestries.}, } @article {pmid37491415, year = {2023}, author = {Matrishin, CB and Haase, EM and Dewhirst, FE and Mark Welch, JL and Miranda-Sanchez, F and Chen, T and MacFarland, DC and Kauffman, KM}, title = {Phages are unrecognized players in the ecology of the oral pathogen Porphyromonas gingivalis.}, journal = {Microbiome}, volume = {11}, number = {1}, pages = {161}, pmid = {37491415}, issn = {2049-2618}, support = {T32DE023526/DE/NIDCR NIH HHS/United States ; R01DE016937/DE/NIDCR NIH HHS/United States ; R03 DE030987/DE/NIDCR NIH HHS/United States ; }, mesh = {Humans ; *Bacteriophages/genetics ; Porphyromonas gingivalis/genetics ; Prophages/genetics ; *Periodontal Diseases ; Base Sequence ; }, abstract = {BACKGROUND: Porphyromonas gingivalis (hereafter "Pg") is an oral pathogen that has been hypothesized to act as a keystone driver of inflammation and periodontal disease. Although Pg is most readily recovered from individuals with actively progressing periodontal disease, healthy individuals and those with stable non-progressing disease are also colonized by Pg. Insights into the factors shaping the striking strain-level variation in Pg, and its variable associations with disease, are needed to achieve a more mechanistic understanding of periodontal disease and its progression. One of the key forces often shaping strain-level diversity in microbial communities is infection of bacteria by their viral (phage) predators and symbionts. Surprisingly, although Pg has been the subject of study for over 40 years, essentially nothing is known of its phages, and the prevailing paradigm is that phages are not important in the ecology of Pg.

RESULTS: Here we systematically addressed the question of whether Pg are infected by phages-and we found that they are. We found that prophages are common in Pg, they are genomically diverse, and they encode genes that have the potential to alter Pg physiology and interactions. We found that phages represent unrecognized targets of the prevalent CRISPR-Cas defense systems in Pg, and that Pg strains encode numerous additional mechanistically diverse candidate anti-phage defense systems. We also found that phages and candidate anti-phage defense system elements together are major contributors to strain-level diversity and the species pangenome of this oral pathogen. Finally, we demonstrate that prophages harbored by a model Pg strain are active in culture, producing extracellular viral particles in broth cultures.

CONCLUSION: This work definitively establishes that phages are a major unrecognized force shaping the ecology and intra-species strain-level diversity of the well-studied oral pathogen Pg. The foundational phage sequence datasets and model systems that we establish here add to the rich context of all that is already known about Pg, and point to numerous avenues of future inquiry that promise to shed new light on fundamental features of phage impacts on human health and disease broadly. Video Abstract.}, } @article {pmid37491393, year = {2023}, author = {Cho, MK and Fullerton, SM and Hammonds, EM and Lee, SS and Panofsky, A and Reardon, J}, title = {Pangenomics: prioritize diversity in collaborations.}, journal = {Nature}, volume = {619}, number = {7971}, pages = {698}, doi = {10.1038/d41586-023-02248-7}, pmid = {37491393}, issn = {1476-4687}, mesh = {Humans ; *Genomics/methods/trends ; *Health Equity ; *Intersectoral Collaboration ; }, } @article {pmid37490004, year = {2023}, author = {Wu, S and Sun, H and Gao, L and Branham, S and McGregor, C and Renner, SS and Xu, Y and Kousik, C and Wechter, WP and Levi, A and Fei, Z}, title = {A Citrullus genus super-pangenome reveals extensive variations in wild and cultivated watermelons and sheds light on watermelon evolution and domestication.}, journal = {Plant biotechnology journal}, volume = {21}, number = {10}, pages = {1926-1928}, pmid = {37490004}, issn = {1467-7652}, support = {2015-51181-24285//National Institute of Food and Agriculture/ ; 2020-51181-32139//National Institute of Food and Agriculture/ ; 1855585//National Science Foundation/ ; }, mesh = {*Citrullus/genetics ; Domestication ; Genome, Plant/genetics ; Polymorphism, Single Nucleotide ; }, } @article {pmid37487084, year = {2023}, author = {Bozan, I and Achakkagari, SR and Anglin, NL and Ellis, D and Tai, HH and Strömvik, MV}, title = {Pangenome analyses reveal impact of transposable elements and ploidy on the evolution of potato species.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {31}, pages = {e2211117120}, pmid = {37487084}, issn = {1091-6490}, mesh = {*Solanum tuberosum ; DNA Transposable Elements ; Phylogeny ; Ploidies ; *Solanum ; }, abstract = {Potato (Solanum sp., family Solanaceae) is the most important noncereal food crop globally. It has over 100 wild relatives in the Solanum section Petota, which features species with both sexual and asexual reproduction and varying ploidy levels. A pangenome of Solanum section Petota composed of 296 accessions was constructed including diploids and polyploids compared via presence/absence variation (PAV). The Petota core (genes shared by at least 97% of the accessions) and shell genomes (shared by 3 to 97%) are enriched in basic molecular and cellular functions, while the cloud genome (genes present in less than 3% of the member accessions) showed enrichment in transposable elements (TEs). Comparison of PAV in domesticated vs. wild accessions was made, and a phylogenetic tree was constructed based on PAVs, grouping accessions into different clades, similar to previous phylogenies produced using DNA markers. A cladewise pangenome approach identified abiotic stress response among the core genes in clade 1+2 and clade 3, and flowering/tuberization among the core genes in clade 4. The TE content differed between the clades, with clade 1+2, which is composed of species from North and Central America with reproductive isolation from species in other clades, having much lower TE content compared to other clades. In contrast, accessions with in vitro propagation history were identified and found to have high levels of TEs. Results indicate a role for TEs in adaptation to new environments, both natural and artificial, for Solanum section Petota.}, } @article {pmid37485508, year = {2023}, author = {Liu, W and Ou, P and Tian, F and Liao, J and Ma, Y and Wang, J and Jin, X}, title = {Anti-Vibrio parahaemolyticus compounds from Streptomyces parvus based on Pan-genome and subtractive proteomics.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1218176}, pmid = {37485508}, issn = {1664-302X}, abstract = {INTRODUCTION: Vibrio parahaemolyticus is a foodborne pathogen commonly found in seafood, and drug resistance poses significant challenges to its control. This study aimed to identify novel drug targets for antibacterial drug discovery.

METHODS: To identify drug targets, we performed a pan-genome analysis on 58 strains of V. parahaemolyticus genomes to obtain core genes. Subsequently, subtractive proteomics and physiochemical checks were conducted on the core proteins to identify potential therapeutic targets. Molecular docking was then employed to screen for anti-V. parahaemolyticus compounds using a in-house compound library of Streptomyces parvus, chosen based on binding energy. The anti-V. parahaemolyticus efficacy of the identified compounds was further validated through a series of experimental tests.

RESULTS AND DISCUSSION: Pangenome analysis of 58 V. parahaemolyticus genomes revealed that there were 1,392 core genes. After Subtractive proteomics and physiochemical checks, Flagellar motor switch protein FliN was selected as a therapeutic target against V. parahaemolyticus. FliN was modeled and docked with Streptomyces parvus source compounds, and Actinomycin D was identified as a potential anti-V. parahaemolyticus agent with a strong binding energy. Experimental verification confirmed its effectiveness in killing V. parahaemolyticus and significantly inhibiting biofilm formation and motility. This study is the first to use pan-genome and subtractive proteomics to identify new antimicrobial targets for V. parahaemolyticus and to identify the anti-V. parahaemolyticus effect of Actinomycin D. These findings suggest potential avenues for the development of new antibacterial drugs to control V. parahaemolyticus infections.}, } @article {pmid37480395, year = {2023}, author = {Tanuku, SNR and Pinnaka, AK and Behera, S and Singh, A and Pydi, S and Vasudeva, G and Vaidya, B and Sharma, G and Ganta, SK and Garbhapu, NS}, title = {Marinobacterium lacunae sp. nov. isolated from estuarine sediment.}, journal = {Archives of microbiology}, volume = {205}, number = {8}, pages = {294}, pmid = {37480395}, issn = {1432-072X}, support = {GAP3195//Ministry of Earth Sciences/ ; }, mesh = {Phylogeny ; RNA, Ribosomal, 16S/genetics ; Agar ; *Alteromonadaceae ; Cardiolipins ; }, abstract = {A novel motile bacterium was isolated from a sediment sample collected in Kochi backwaters, Kerala, India. This bacterium is Gram negative, rod shaped, 1.0-1.5 µm wide, and 2.0-3.0 µm long. It was designated as strain AK27[T]. Colonies were grown on marine agar displayed circular, off-white, shiny, moist, translucent, flat, margin entire, 1-2 mm in diameter. The major fatty acids identified in this strain were C18:1 ω7c, C16:0, and summed in feature 3. The composition of polar lipids in the strain AK27[T] included phosphatidylglycerol, phosphatidylethanolamine, diphosphatidylglycerol, one unidentified amino lipid, two unidentified aminophospholipids, two unidentified phospholipids, and six unidentified lipids. The genomic DNA of strain AK27[T] exhibited a G+C content of 56.4 mol%. Based on the analysis of 16S rRNA gene sequence, strain AK27[T] showed sequence similarity to M. ramblicola D7[T] and M. zhoushanense WM3[T] as 98.99% and 98.58%, respectively. Compared to other type strains of the Marinobacterium genus, strain AK27[T] exhibited sequence similarities ranging from 91.7% to 96.4%. When compared to Marinobacterium zhoushanense WM3[T] and Marinobacterium ramblicola D7[T], strain AK27[T] exhibited average nucleotide identity values of 80.25% and 79.97%, and dDDH values of 22.9% and 22.6%, respectively. The genome size of the strain AK27[T] was 4.55 Mb, with 4,229 coding sequences. Based on the observed phenotypic and chemotaxonomic features, and the results of phylogenetic and phylogenomic analysis, this study proposes the classification of strain AK27[T] as a novel species within the genus Marinobacterium. The proposed name for this novel species is Marinobacterium lacunae sp. nov.}, } @article {pmid37477947, year = {2023}, author = {Lyu, X and Xia, Y and Wang, C and Zhang, K and Deng, G and Shen, Q and Gao, W and Zhang, M and Liao, N and Ling, J and Bo, Y and Hu, Z and Yang, J and Zhang, M}, title = {Pan-genome analysis sheds light on structural variation-based dissection of agronomic traits in melon crops.}, journal = {Plant physiology}, volume = {193}, number = {2}, pages = {1330-1348}, doi = {10.1093/plphys/kiad405}, pmid = {37477947}, issn = {1532-2548}, support = {2018YFD1000800//National Key Research and Development Program of China/ ; CARS-25-17//Earmarked Fund for China Agriculture Research System/ ; 2021R51007//Special Support Plan for high-level talents of Zhejiang Province/ ; 2021Z057//Major science and technology project of Ningbo City/ ; }, mesh = {Chromosome Mapping ; *Cucurbitaceae/genetics/metabolism ; Genome-Wide Association Study ; Plant Breeding ; Genes, Plant ; *Cucumis melo/genetics ; Fruit/genetics/metabolism ; }, abstract = {Sweetness and appearance of fresh fruits are key palatable and preference attributes for consumers and are often controlled by multiple genes. However, fine-mapping the key loci or genes of interest by single genome-based genetic analysis is challenging. Herein, we present the chromosome-level genome assembly of 1 landrace melon accession (Cucumis melo ssp. agrestis) with wild morphologic features and thus construct a melon pan-genome atlas via integrating sequenced melon genome datasets. Our comparative genomic analysis reveals a total of 3.4 million genetic variations, of which the presence/absence variations (PAVs) are mainly involved in regulating the function of genes for sucrose metabolism during melon domestication and improvement. We further resolved several loci that are accountable for sucrose contents, flesh color, rind stripe, and suture using a structural variation (SV)-based genome-wide association study. Furthermore, via bulked segregation analysis (BSA)-seq and map-based cloning, we uncovered that a single gene, (CmPIRL6), determines the edible or inedible characteristics of melon fruit exocarp. These findings provide important melon pan-genome information and provide a powerful toolkit for future pan-genome-informed cultivar breeding of melon.}, } @article {pmid37476668, year = {2023}, author = {Agarwal, V and Stubits, R and Nassrullah, Z and Dillon, MM}, title = {Pangenome insights into the diversification and disease specificity of worldwide Xanthomonas outbreaks.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1213261}, pmid = {37476668}, issn = {1664-302X}, abstract = {The bacterial genus Xanthomonas is responsible for disease outbreaks in several hundred plant species, many of them economically important crops. In the era of next-generation sequencing, thousands of strains from this genus have now been sequenced as part of isolated studies that focus on outbreak characterization, host range, diversity, and virulence factor identification. However, these data have not been synthesized and we lack a comprehensive phylogeny for the genus, with some species designations in public databases still relying on phenotypic similarities and representative sequence typing. The extent of genetic cohesiveness among Xanthomonas strains, the distribution of virulence factors across strains, and the impact of evolutionary history on host range across the genus are also poorly understood. In this study, we present a pangenome analysis of 1,910 diverse Xanthomonas genomes, highlighting their evolutionary relationships, the distribution of virulence-associated genes across strains, and rates of horizontal gene transfer. We find a number of broadly conserved classes of virulence factors and considerable diversity in the Type 3 Secretion Systems (T3SSs) and Type 3 Secreted Effector (T3SE) repertoires of different Xanthomonas species. We also use these data to re-assign incorrectly classified strains to phylogenetically informed species designations and find evidence of both monophyletic host specificity and convergent evolution of phylogenetically distant strains to the same host. Finally, we explore the role of recombination in maintaining genetic cohesion within the Xanthomonas genus as a result of both ancestral and recent recombination events. Understanding the evolutionary history of Xanthomonas species and the relationship of key virulence factors with host-specificity provides valuable insight into the mechanisms through which Xanthomonas species shift between hosts and will enable us to develop more robust resistance strategies against these highly virulent pathogens.}, } @article {pmid37474912, year = {2023}, author = {Ortega-Sanz, I and Barbero-Aparicio, JA and Canepa-Oneto, A and Rovira, J and Melero, B}, title = {CamPype: an open-source workflow for automated bacterial whole-genome sequencing analysis focused on Campylobacter.}, journal = {BMC bioinformatics}, volume = {24}, number = {1}, pages = {291}, pmid = {37474912}, issn = {1471-2105}, support = {LCF/PR/PR18/51130007//"la Caixa" Foundation/ ; }, mesh = {*Campylobacter/genetics ; Genome, Bacterial ; Workflow ; Bacteria/genetics ; Genomics ; }, abstract = {BACKGROUND: The rapid expansion of Whole-Genome Sequencing has revolutionized the fields of clinical and food microbiology. However, its implementation as a routine laboratory technique remains challenging due to the growth of data at a faster rate than can be effectively analyzed and critical gaps in bioinformatics knowledge.

RESULTS: To address both issues, CamPype was developed as a new bioinformatics workflow for the genomics analysis of sequencing data of bacteria, especially Campylobacter, which is the main cause of gastroenteritis worldwide making a negative impact on the economy of the public health systems. CamPype allows fully customization of stages to run and tools to use, including read quality control filtering, read contamination, reads extension and assembly, bacterial typing, genome annotation, searching for antibiotic resistance genes, virulence genes and plasmids, pangenome construction and identification of nucleotide variants. All results are processed and resumed in an interactive HTML report for best data visualization and interpretation.

CONCLUSIONS: The minimal user intervention of CamPype makes of this workflow an attractive resource for microbiology laboratories with no expertise in bioinformatics as a first line method for bacterial typing and epidemiological analyses, that would help to reduce the costs of disease outbreaks, or for comparative genomic analyses. CamPype is publicly available at https://github.com/JoseBarbero/CamPype .}, } @article {pmid37474911, year = {2023}, author = {Huff, M and Hulse-Kemp, AM and Scheffler, BE and Youngblood, RC and Simpson, SA and Babiker, E and Staton, M}, title = {Long-read, chromosome-scale assembly of Vitis rotundifolia cv. Carlos and its unique resistance to Xylella fastidiosa subsp. fastidiosa.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {409}, pmid = {37474911}, issn = {1471-2164}, support = {6062-21000-010-013//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-013//USDA-ARS/ ; }, mesh = {*Vitis/microbiology ; Disease Resistance/genetics ; *Xylella/genetics ; Chromosomes ; Plant Diseases/genetics/microbiology ; }, abstract = {BACKGROUND: Muscadine grape (Vitis rotundifolia) is resistant to many of the pathogens that negatively impact the production of common grape (V. vinifera), including the bacterial pathogen Xylella fastidiosa subsp. fastidiosa (Xfsf), which causes Pierce's Disease (PD). Previous studies in common grape have indicated Xfsf delays host immune response with a complex O-chain antigen produced by the wzy gene. Muscadine cultivars range from tolerant to completely resistant to Xfsf, but the mechanism is unknown.

RESULTS: We assembled and annotated a new, long-read genome assembly for 'Carlos', a cultivar of muscadine that exhibits tolerance, to build upon the existing genetic resources available for muscadine. We used these resources to construct an initial pan-genome for three cultivars of muscadine and one cultivar of common grape. This pan-genome contains a total of 34,970 synteny-constrained entries containing genes of similar structure. Comparison of resistance gene content between the 'Carlos' and common grape genomes indicates an expansion of resistance (R) genes in 'Carlos.' We further identified genes involved in Xfsf response by transcriptome sequencing 'Carlos' plants inoculated with Xfsf. We observed 234 differentially expressed genes with functions related to lipid catabolism, oxidation-reduction signaling, and abscisic acid (ABA) signaling as well as seven R genes. Leveraging public data from previous experiments of common grape inoculated with Xfsf, we determined that most differentially expressed genes in the muscadine response were not found in common grape, and three of the R genes identified as differentially expressed in muscadine do not have an ortholog in the common grape genome.

CONCLUSIONS: Our results support the utility of a pan-genome approach to identify candidate genes for traits of interest, particularly disease resistance to Xfsf, within and between muscadine and common grape.}, } @article {pmid37465028, year = {2023}, author = {Thieringer, PH and Boyd, ES and Templeton, AS and Spear, JR}, title = {Metapangenomic investigation provides insight into niche differentiation of methanogenic populations from the subsurface serpentinizing environment, Samail Ophiolite, Oman.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1205558}, pmid = {37465028}, issn = {1664-302X}, abstract = {Serpentinization reactions produce highly reduced waters that have hyperalkaline pH and that can have high concentrations of H2 and CH4. Putatively autotrophic methanogenic archaea have been identified in the subsurface waters of the Samail Ophiolite, Sultanate of Oman, though the strategies to overcome hyperalkaline pH and dissolved inorganic carbon limitation remain to be fully understood. Here, we recovered metagenome assembled genomes (MAGs) and applied a metapangenomic approach to three different Methanobacterium populations to assess habitat-specific functional gene distribution. A Type I population was identified in the fluids with neutral pH, while a Type II and "Mixed" population were identified in the most hyperalkaline fluids (pH 11.63). The core genome of all Methanobacterium populations highlighted potential DNA scavenging techniques to overcome phosphate or nitrogen limitation induced by environmental conditions. With particular emphasis on the Mixed and Type II population found in the most hyperalkaline fluids, the accessory genomes unique to each population reflected adaptation mechanisms suggesting lifestyles that minimize niche overlap. In addition to previously reported metabolic capability to utilize formate as an electron donor and generate intracellular CO2, the Type II population possessed genes relevant to defense against antimicrobials and assimilating potential osmoprotectants to provide cellular stability. The accessory genome of the Mixed population was enriched in genes for multiple glycosyltransferases suggesting reduced energetic costs by adhering to mineral surfaces or to other microorganisms, and fostering a non-motile lifestyle. These results highlight the niche differentiation of distinct Methanobacterium populations to circumvent the challenges of serpentinization impacted fluids through coexistence strategies, supporting our ability to understand controls on methanogenic lifestyles and adaptations within the serpentinizing subsurface fluids of the Samail Ophiolite.}, } @article {pmid37464310, year = {2023}, author = {Singh, RP and Kumari, K and Sharma, PK and Ma, Y}, title = {Characterization and in-depth genome analysis of a halotolerant probiotic bacterium Paenibacillus sp. S-12, a multifarious bacterium isolated from Rauvolfia serpentina.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {192}, pmid = {37464310}, issn = {1471-2180}, mesh = {*Rauwolfia/genetics ; *Paenibacillus/genetics ; Base Composition ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; DNA, Bacterial/genetics ; Sequence Analysis, DNA ; Fatty Acids ; Soil Microbiology ; }, abstract = {BACKGROUND: Members of Paenibacillus genus from diverse habitats have attracted great attention due to their multifarious properties. Considering that members of this genus are mostly free-living in soil, we characterized the genome of a halotolerant environmental isolate belonging to the genus Paenibacillus. The genome mining unravelled the presence of CAZymes, probiotic, and stress-protected genes that suggested strain S-12 for industrial and agricultural purposes.

RESULTS: Molecular identification by 16 S rRNA gene sequencing showed its closest match to other Paenibacillus species. The complete genome size of S-12 was 5.69 Mb, with a GC-content 46.5%. The genome analysis of S-12 unravelled the presence of an open reading frame (ORF) encoding the functions related to environmental stress tolerance, adhesion processes, multidrug efflux systems, and heavy metal resistance. Genome annotation identified the various genes for chemotaxis, flagellar motility, and biofilm production, illustrating its strong colonization ability.

CONCLUSION: The current findings provides the in-depth investigation of a probiotic Paenibacillus bacterium that possessed various genome features that enable the bacterium to survive under diverse conditions. The strain shows the strong ability for probiotic application purposes.}, } @article {pmid37461539, year = {2023}, author = {Steenwyk, JL and Knowles, S and Bastos, RW and Balamurugan, C and Rinker, D and Mead, ME and Roberts, CD and Raja, HA and Li, Y and Colabardini, AC and de Castro, PA and Dos Reis, TF and Canóvas, D and Sanchez, RL and Lagrou, K and Torrado, E and Rodrigues, F and Oberlies, NH and Zhou, X and Goldman, GH and Rokas, A}, title = {Evolutionary origin, population diversity, and diagnostics for a cryptic hybrid pathogen.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37461539}, abstract = {Cryptic fungal pathogens pose significant identification and disease management challenges due to their morphological resemblance to known pathogenic species while harboring genetic and (often) infection-relevant trait differences. The cryptic fungal pathogen Aspergillus latus , an allodiploid hybrid originating from Aspergillus spinulosporus and an unknown close relative of Aspergillus quadrilineatus within section Nidulantes , remains poorly understood. The absence of accurate diagnostics for A. latus has led to misidentifications, hindering epidemiological studies and the design of effective treatment plans. We conducted an in-depth investigation of the genomes and phenotypes of 44 globally distributed isolates (41 clinical isolates and three type strains) from Aspergillus section Nidulantes . We found that 21 clinical isolates were A. latus ; notably, standard methods of pathogen identification misidentified all A. latus isolates. The remaining isolates were identified as A. spinulosporus (8), A. quadrilineatus (1), or A. nidulans (11). Phylogenomic analyses shed light on the origin of A. latus , indicating one or two hybridization events gave rise to the species during the Miocene, approximately 15.4 to 8.8 million years ago. Characterizing the A. latus pangenome uncovered substantial genetic diversity within gene families and biosynthetic gene clusters. Transcriptomic analysis revealed that both parental genomes are actively expressed in nearly equal proportions and respond to environmental stimuli. Further investigation into infection-relevant chemical and physiological traits, including drug resistance profiles, growth under oxidative stress conditions, and secondary metabolite biosynthesis, highlight distinct phenotypic profiles of the hybrid A. latus compared to its parental and closely related species. Leveraging our comprehensive genomic and phenotypic analyses, we propose five genomic and phenotypic markers as diagnostics for A. latus species identification. These findings provide valuable insights into the evolutionary origin, genomic outcome, and phenotypic implications of hybridization in a cryptic fungal pathogen, thus enhancing our understanding of the underlying processes contributing to fungal pathogenesis. Furthermore, our study underscores the effectiveness of extensive genomic and phenotypic analyses as a promising approach for developing diagnostics applicable to future investigations of cryptic and emerging pathogens.}, } @article {pmid37460717, year = {2023}, author = {Kumari, K and Sharma, PK and Shikha, S and Singh, RP}, title = {Molecular characterization and in-depth genome analysis of Enterobacter sp. S-16.}, journal = {Functional & integrative genomics}, volume = {23}, number = {3}, pages = {245}, pmid = {37460717}, issn = {1438-7948}, mesh = {Humans ; *Enterobacter/genetics ; RNA, Ribosomal, 16S ; *Genome, Bacterial ; Genomics ; Carbohydrates ; Phylogeny ; }, abstract = {Enterobacter species are considered to be an opportunistic human pathogen owing to the existence of antibiotic-resistant strains and drug resides; however, the detailed analysis of the antibiotic resistance and virulence features in environmental isolates is poorly characterized. Here, in the study, we characterized the biochemical characteristics, and genome, pan-genome, and comparative genome analyses of an environmental isolate Enterobacter sp. S-16. The strain was identified as Enterobacter spp. by using 16S rRNA gene sequencing. To unravel genomic features, whole genome of Enterobacter sp. S-16 was sequenced using a hybrid assembly approach and genome assembly was performed using the Unicycler tool. The assembled genome contained the single conting size 5.3 Mbp, GC content 55.43%, and 4500 protein-coding genes. The genome analysis revealed the various gene clusters associated with virulence, antibiotic resistance, type VI secretion system (T6SS), and many stress tolerant genes, which may provide important insight for adapting to changing environment conditions. Moreover, different metabolic pathways were identified that potentially contribute to environmental survival. Various hydrolytic enzymes and motility functions equipped the strain S-16 as an active colonizer. The genome analysis confirms the presence of carbohydrate-active enzymes (CAZymes), and non-enzymatic carbohydrate-binding modules (CBMs) involved in the hydrolysis of complex carbohydrate polymers. Moreover, the pan-genome analysis provides detailed information about the core genes and shared genes with the closest related Enterobacter species. The present study is the first report showing the presence of YdhE/NorM in Enterobacter spp. Thus, the elucidation of genome sequencing will increase our understanding of the pathogenic nature of environmental isolate, supporting the One Health Concept.}, } @article {pmid37449094, year = {2023}, author = {Buzzanca, D and Kerkhof, PJ and Alessandria, V and Rantsiou, K and Houf, K}, title = {Arcobacteraceae comparative genome analysis demonstrates genome heterogeneity and reduction in species isolated from animals and associated with human illness.}, journal = {Heliyon}, volume = {9}, number = {7}, pages = {e17652}, pmid = {37449094}, issn = {2405-8440}, abstract = {The Arcobacteraceae family groups Gram-negative bacterial species previously included in the family Campylobacteraceae. These species of which some are considered foodborne pathogens, have been isolated from different environmental niches and hosts. They have been isolated from various types of foods, though predominantly from food of animal origin, as well as from stool of humans with enteritis. Their different abilities to survive in different hosts and environments suggest an evolutionary pressure with consequent variation in their genome content. Moreover, their different physiological and genomic characteristics led to the recent proposal to create new genera within this family, which is however criticized due to the lack of discriminatory features and biological and clinical relevance. Aims of the present study were to assess the Arcobacteraceae pangenome, and to characterize existing similarities and differences in 20 validly described species. For this, analysis has been conducted on the genomes of the corresponding type strains obtained by Illumina sequencing, applying several bioinformatic tools. Results of the present study do not support the proposed division into different genera and revealed the presence of pangenome partitions with numbers comparable to other Gram-negative bacteria genera, such as Campylobacter. Different gene class compositions in animal and human-associated species are present, including a higher percentage of virulence-related gene classes such as cell motility genes. The adaptation to environmental and/or host conditions of some species was identified by the presence of specific genes. Furthermore, a division into pathogenic and non-pathogenic species is suggested, which can support future research on food safety and public health.}, } @article {pmid37446042, year = {2023}, author = {Arifuzzaman, M and Jost, M and Wang, M and Chen, X and Perovic, D and Park, RF and Rouse, M and Forrest, K and Hayden, M and Khan, GA and Dracatos, PM}, title = {Mining the Australian Grains Gene Bank for Rust Resistance in Barley.}, journal = {International journal of molecular sciences}, volume = {24}, number = {13}, pages = {}, pmid = {37446042}, issn = {1422-0067}, mesh = {Chromosome Mapping ; *Hordeum/genetics/microbiology ; Disease Resistance/genetics ; Australia ; Phenotype ; *Basidiomycota/genetics ; Plant Diseases/genetics/microbiology ; }, abstract = {Global barley production is threatened by plant pathogens, especially the rusts. In this study we used a targeted genotype-by-sequencing (GBS) assisted GWAS approach to identify rust resistance alleles in a collection of 287 genetically distinct diverse barley landraces and historical cultivars available in the Australian Grains Genebank (AGG) and originally sourced from Eastern Europe. The accessions were challenged with seven US-derived cereal rust pathogen races including Puccinia hordei (Ph-leaf rust) race 17VA12C, P. coronata var. hordei (Pch-crown rust) race 91NE9305 and five pathogenically diverse races of P. striiformis f. sp. hordei (Psh-stripe rust) (PSH-33, PSH-48, PSH-54, PSH-72 and PSH-100) and phenotyped quantitatively at the seedling stage. Novel resistance factors were identified on chromosomes 1H, 2H, 4H and 5H in response to Pch, whereas a race-specific QTL on 7HS was identified that was effective only to Psh isolates PSH-72 and PSH-100. A major effect QTL on chromosome 5HL conferred resistance to all Psh races including PSH-72, which is virulent on all 12 stripe rust differential tester lines. The same major effect QTL was also identified in response to leaf rust (17VA12C) suggesting this locus contains several pathogen specific rust resistance genes or the same gene is responsible for both leaf rust and stripe rust resistance. Twelve accessions were highly resistant to both leaf and stripe rust diseases and also carried the 5HL QTL. We subsequently surveyed the physical region at the 5HL locus for across the barley pan genome variation in the presence of known resistance gene candidates and identified a rich source of high confidence protein kinase and antifungal genes in the QTL region.}, } @article {pmid37435610, year = {2023}, author = {Deverka, P and Geary, J and Mathews, C and Cohen, M and Hooker, G and Majumder, M and Skvarkova, Z and Cook-Deegan, R}, title = {Payer reimbursement practices and incentives for improving interpretation of germline genetic testing.}, journal = {Journal of law and the biosciences}, volume = {10}, number = {2}, pages = {lsad020}, doi = {10.1093/jlb/lsad020}, pmid = {37435610}, issn = {2053-9711}, abstract = {Germline genetic testing for inherited cancer risk has shifted to multi-gene panel tests (MGPTs). While MGPTs detect more pathogenic variants, they also detect more variants of uncertain significance (VUSs) that increase the possibility of harms such as unnecessary surgery. Data sharing by laboratories is critical to addressing the VUS problem. However, barriers to sharing and an absence of incentives have limited laboratory contributions to the ClinVar database. Payers can play a crucial role in the expansion of knowledge and effectiveness of genetic testing. Current policies affecting MGPT reimbursement are complex and create perverse incentives. Trends in utilization and coverage for private payers and Medicare illustrate opportunities and challenges for data sharing to close knowledge gaps and improve clinical utility. Policy options include making data sharing (i) a condition of payment, and (ii) a metric of laboratory quality in payment contracts, yielding preferred coverage or enhanced reimbursement. Mandating data sharing sufficient to verify interpretations and resolve discordance among labs under Medicare and federal health programs is an option for the US Congress. Such policies can reduce the current waste of valuable data needed for precision oncology and improved patient outcomes, enabling a learning health system.}, } @article {pmid37434713, year = {2023}, author = {Batarseh, TN and Batarseh, SN and Morales-Cruz, A and Gaut, BS}, title = {Comparative genomics of the Liberibacter genus reveals widespread diversity in genomic content and positive selection history.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1206094}, pmid = {37434713}, issn = {1664-302X}, abstract = {'Candidatus Liberibacter' is a group of bacterial species that are obligate intracellular plant pathogens and cause Huanglongbing disease of citrus trees and Zebra Chip in potatoes. Here, we examined the extent of intra- and interspecific genetic diversity across the genus using comparative genomics. Our approach examined a wide set of Liberibacter genome sequences including five pathogenic species and one species not known to cause disease. By performing comparative genomics analyses, we sought to understand the evolutionary history of this genus and to identify genes or genome regions that may affect pathogenicity. With a set of 52 genomes, we performed comparative genomics, measured genome rearrangement, and completed statistical tests of positive selection. We explored markers of genetic diversity across the genus, such as average nucleotide identity across the whole genome. These analyses revealed the highest intraspecific diversity amongst the 'Ca. Liberibacter solanacearum' species, which also has the largest plant host range. We identified sets of core and accessory genes across the genus and within each species and measured the ratio of nonsynonymous to synonymous mutations (dN/dS) across genes. We identified ten genes with evidence of a history of positive selection in the Liberibacter genus, including genes in the Tad complex, which have been previously implicated as being highly divergent in the 'Ca. L. capsica' species based on high values of dN.}, } @article {pmid37433982, year = {2023}, author = {Attwaters, M}, title = {A diverse and inclusive human pangenome.}, journal = {Nature reviews. Genetics}, volume = {24}, number = {9}, pages = {585}, pmid = {37433982}, issn = {1471-0064}, } @article {pmid37431308, year = {2023}, author = {Amas, JC and Bayer, PE and Hong Tan, W and Tirnaz, S and Thomas, WJW and Edwards, D and Batley, J}, title = {Comparative pangenome analyses provide insights into the evolution of Brassica rapa resistance gene analogues (RGAs).}, journal = {Plant biotechnology journal}, volume = {21}, number = {10}, pages = {2100-2112}, pmid = {37431308}, issn = {1467-7652}, support = {DP200100762//Australian Research Council/ ; DP210100296//Australian Research Council/ ; UWA1905- 006RTX//Grains Research and Development Corporation/ ; }, mesh = {*Brassica rapa/genetics ; Genes, Plant/genetics ; Disease Resistance/genetics ; Leucine ; Plant Breeding ; *Brassica napus/genetics ; }, abstract = {Brassica rapa is grown worldwide as economically important vegetable and oilseed crop. However, its production is challenged by yield-limiting pathogens. The sustainable control of these pathogens mainly relies on the deployment of genetic resistance primarily driven by resistance gene analogues (RGAs). While several studies have identified RGAs in B. rapa, these were mainly based on a single genome reference and do not represent the full range of RGA diversity in B. rapa. In this study, we utilized the B. rapa pangenome, constructed from 71 lines encompassing 12 morphotypes, to describe a comprehensive repertoire of RGAs in B. rapa. We show that 309 RGAs were affected by presence-absence variation (PAV) and 223 RGAs were missing from the reference genome. The transmembrane leucine-rich repeat (TM-LRR) RGA class had more core gene types than variable genes, while the opposite was observed for nucleotide-binding site leucine-rich repeats (NLRs). Comparative analysis with the B. napus pangenome revealed significant RGA conservation (93%) between the two species. We identified 138 candidate RGAs located within known B. rapa disease resistance QTL, of which the majority were under negative selection. Using blackleg gene homologues, we demonstrated how these genes in B. napus were derived from B. rapa. This further clarifies the genetic relationship of these loci, which may be useful in narrowing-down candidate blackleg resistance genes. This study provides a novel genomic resource towards the identification of candidate genes for breeding disease resistance in B. rapa and its relatives.}, } @article {pmid37430957, year = {2022}, author = {Rani, A and Dike, CC and Mantri, N and Ball, A}, title = {Point-of-Care Lateral Flow Detection of Viable Escherichia coli O157:H7 Using an Improved Propidium Monoazide-Recombinase Polymerase Amplification Method.}, journal = {Foods (Basel, Switzerland)}, volume = {11}, number = {20}, pages = {}, pmid = {37430957}, issn = {2304-8158}, abstract = {The detection of both viable and viable but non-culturable (VBNC) Escherichia coli O157:H7 is a crucial part of food safety. Traditional culture-dependent methods are lengthy, expensive, laborious, and unable to detect VBNC. Hence, there is a need to develop a rapid, simple, and cost-effective detection method to differentiate between viable/dead E. coli O157:H7 and detect VBNC cells. In this work, recombinase polymerase amplification (RPA) was developed for the detection of viable E. coli O157:H7 through integration with propidium monoazide (PMAxx). Initially, two primer sets, targeting two different genes (rfbE and stx) were selected, and DNA amplification by RPA combined with PMAxx treatment and the lateral flow assay (LFA) was carried out. Subsequently, the rfbE gene target was found to be more effective in inhibiting the amplification from dead cells and detecting only viable E. coli O157:H7. The assay's detection limit was found to be 10[2] CFU/mL for VBNC E. coli O157:H7 when applied to spiked commercial beverages including milk, apple juice, and drinking water. pH values from 3 to 11 showed no significant effect on the efficacy of the assay. The PMAxx-RPA-LFA was completed at 39 °C within 40 min. This study introduces a rapid, robust, reliable, and reproducible method for detecting viable bacterial counts. In conclusion, the optimised assay has the potential to be used by the food and beverage industry in quality assurance related to E. coli O157:H7.}, } @article {pmid37429841, year = {2023}, author = {Tisza, MJ and Smith, DDN and Clark, AE and Youn, JH and , and Khil, PP and Dekker, JP}, title = {Roving methyltransferases generate a mosaic epigenetic landscape and influence evolution in Bacteroides fragilis group.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {4082}, pmid = {37429841}, issn = {2041-1723}, mesh = {Humans ; *Methyltransferases/genetics ; Bacteroides fragilis/genetics ; Epigenomics ; DNA Methylation/genetics ; *Bacteriophages/genetics ; Bacteroides ; Epigenesis, Genetic ; }, abstract = {Three types of DNA methyl modifications have been detected in bacterial genomes, and mechanistic studies have demonstrated roles for DNA methylation in physiological functions ranging from phage defense to transcriptional control of virulence and host-pathogen interactions. Despite the ubiquity of methyltransferases and the immense variety of possible methylation patterns, epigenomic diversity remains unexplored for most bacterial species. Members of the Bacteroides fragilis group (BFG) reside in the human gastrointestinal tract as key players in symbiotic communities but also can establish anaerobic infections that are increasingly multi-drug resistant. In this work, we utilize long-read sequencing technologies to perform pangenomic (n = 383) and panepigenomic (n = 268) analysis of clinical BFG isolates cultured from infections seen at the NIH Clinical Center over four decades. Our analysis reveals that single BFG species harbor hundreds of DNA methylation motifs, with most individual motif combinations occurring uniquely in single isolates, implying immense unsampled methylation diversity within BFG epigenomes. Mining of BFG genomes identified more than 6000 methyltransferase genes, approximately 1000 of which were associated with intact prophages. Network analysis revealed substantial gene flow among disparate phage genomes, implying a role for genetic exchange between BFG phages as one of the ultimate sources driving BFG epigenome diversity.}, } @article {pmid37424551, year = {2023}, author = {Narayanan, S and Couger, B and Bates, H and Gupta, SK and Malayer, J and Ramachandran, A}, title = {Characterization of three Francisella tularensis genomes from Oklahoma, USA.}, journal = {Access microbiology}, volume = {5}, number = {6}, pages = {acmi000451}, pmid = {37424551}, issn = {2516-8290}, abstract = {Francisella tularensis , the causative agent for tularaemia, is a Tier 1 select agent, and a pan-species pathogen of global significance due to its zoonotic potential. Consistent genome characterization of the pathogen is essential to identify novel genes, virulence factors, antimicrobial resistance genes, for studying phylogenetics and other features of interest. This study was conducted to understand the genetic variations among genomes of F. tularensis isolated from two felines and one human source. Pan-genome analysis revealed that 97.7 % of genes were part of the core genome. All three F. tularensis isolates were assigned to sequence type A based on single nucleotide polymorphisms (SNPs) in sdhA. Most of the virulence genes were part of the core genome. An antibiotic resistance gene coding for class A beta-lactamase was detected in all three isolates. Phylogenetic analysis showed that these isolates clustered with other isolates reported from Central and South-Central USA. Assessment of large sets of the F. tularensis genome sequences is essential in understanding pathogen dynamics, geographical distribution and potential zoonotic implications.}, } @article {pmid37423939, year = {2023}, author = {Priyamvada, P and Ramaiah, S}, title = {Pan-genome and reverse vaccinology approaches to design multi-epitope vaccine against Epstein-Barr virus associated with colorectal cancer.}, journal = {Immunologic research}, volume = {}, number = {}, pages = {}, pmid = {37423939}, issn = {1559-0755}, abstract = {Epstein-Barr virus (EBV) is a global lymphotropic virus and has been associated with various malignancies, among which colorectal cancer (CRC) is the prevalent one causing mortality worldwide. In the recent past, numerous research efforts have been made to develop a potential vaccine against this virus; however, none is effective possibly due to their low throughput, laboriousness, and lack of sensitivity. In this study, we designed a multi-epitope subunit vaccine that targets latent membrane protein (LMP-2B) of EBV using pan-genome and reverse vaccinology approaches. Twenty-three major histocompatibility complex (MHC) epitopes (five class-I and eighteen class-II) and eight B-cell epitopes, which have been found to be antigenic, immunogenic, and non-toxic, were selected for the vaccine construction. Furthermore, 24 vaccine constructs (VCs) were designed from the predicted epitopes and out of which VC1 was selected and finalized based on its structural parameters. The functionality of VC1 was validated through molecular docking with different immune receptors (MHC class-I, MHC class-II, and TLRs). The binding affinity, molecular and immune simulation revealed that the VC1 had more stable interaction and is believed to elicit good immune responses against EBV. HIGHLIGHTS: Pan-genome and reverse vaccinology approaches were used to design a multi-epitope subunit vaccine against LMP-2B protein of EBV. Epitopes were selected based on the antigenic, immunogenic, and non-toxic properties. Twenty-four vaccine constructs (VCs) were designed from the predicted epitopes. Designed vaccine VC1 has shown good binding affinity and molecular and immune simulation. VC1 was validated using molecular docking with different immune receptors.}, } @article {pmid37409285, year = {2023}, author = {Luo, M and Sarnowski, TJ and Libault, M and Ríos, G and Charron, JB and Mantri, N and Zhang, S}, title = {Editorial: New insights into mechanisms of epigenetic modifiers in plant growth and development, volume II.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1213511}, pmid = {37409285}, issn = {1664-462X}, } @article {pmid37408640, year = {2023}, author = {Kumari, K and Rawat, V and Shadan, A and Sharma, PK and Deb, S and Singh, RP}, title = {In-depth genome and pan-genome analysis of a metal-resistant bacterium Pseudomonas parafulva OS-1.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1140249}, pmid = {37408640}, issn = {1664-302X}, abstract = {A metal-resistant bacterium Pseudomonas parafulva OS-1 was isolated from waste-contaminated soil in Ranchi City, India. The isolated strain OS-1 showed its growth at 25-45°C, pH 5.0-9.0, and in the presence of ZnSO4 (upto 5 mM). Phylogenetic analysis based on 16S rRNA gene sequences revealed that strain OS-1 belonged to the genus Pseudomonas and was most closely related to parafulva species. To unravel the genomic features, we sequenced the complete genome of P. parafulva OS-1 using Illumina HiSeq 4,000 sequencing platform. The results of average nucleotide identity (ANI) analysis indicated the closest similarity of OS-1 to P. parafulva PRS09-11288 and P. parafulva DTSP2. The metabolic potential of P. parafulva OS-1 based on Clusters of Othologous Genes (COG) and Kyoto Encyclopedia of Genes and Genomes (KEGG) indicated a high number of genes related to stress protection, metal resistance, and multiple drug-efflux, etc., which is relatively rare in P. parafulva strains. Compared with other parafulva strains, P. parafulva OS-1 was found to have the unique β-lactam resistance and type VI secretion system (T6SS) gene. Additionally, its genomes encode various CAZymes such as glycoside hydrolases and other genes associated with lignocellulose breakdown, suggesting that strain OS-1 have strong biomass degradation potential. The presence of genomic complexity in the OS-1 genome indicates that horizontal gene transfer (HGT) might happen during evolution. Therefore, genomic and comparative genome analysis of parafulva strains is valuable for further understanding the mechanism of resistance to metal stress and opens a perspective to exploit a newly isolated bacterium for biotechnological applications.}, } @article {pmid37401440, year = {2023}, author = {Glick, L and Mayrose, I}, title = {The Effect of Methodological Considerations on the Construction of Gene-Based Plant Pan-genomes.}, journal = {Genome biology and evolution}, volume = {15}, number = {7}, pages = {}, pmid = {37401440}, issn = {1759-6653}, support = {//Edmond J. Safra Center for Bioinformatics at Tel-Aviv University/ ; US-5089-18//BARD US-Israel Agricultural Research and Development Fund/ ; }, mesh = {*Genomics/methods ; Genome, Plant ; Sequence Analysis, DNA ; Molecular Sequence Annotation ; Plants/genetics ; *Arabidopsis/genetics ; }, abstract = {Pan-genomics is an emerging approach for studying the genetic diversity within plant populations. In contrast to common resequencing studies that compare whole genome sequencing data with a single reference genome, the construction of a pan-genome (PG) involves the direct comparison of multiple genomes to one another, thereby enabling the detection of genomic sequences and genes not present in the reference, as well as the analysis of gene content diversity. Although multiple studies describing PGs of various plant species have been published in recent years, a better understanding regarding the effect of the computational procedures used for PG construction could guide researchers in making more informed methodological decisions. Here, we examine the effect of several key methodological factors on the obtained gene pool and on gene presence-absence detections by constructing and comparing multiple PGs of Arabidopsis thaliana and cultivated soybean, as well as conducting a meta-analysis on published PGs. These factors include the construction method, the sequencing depth, and the extent of input data used for gene annotation. We observe substantial differences between PGs constructed using three common procedures (de novo assembly and annotation, map-to-pan, and iterative assembly) and that results are dependent on the extent of the input data. Specifically, we report low agreement between the gene content inferred using different procedures and input data. Our results should increase the awareness of the community to the consequences of methodological decisions made during the process of PG construction and emphasize the need for further investigation of commonly applied methodologies.}, } @article {pmid37397999, year = {2023}, author = {Raghuram, V and Gunoskey, JJ and Hofstetter, KS and Jacko, NF and Shumaker, MJ and Hu, YJ and Read, TD and David, MZ}, title = {Comparison of genomic diversity between single and pooled Staphylococcus aureus colonies isolated from human colonisation cultures.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37397999}, support = {R01 AI139188/AI/NIAID NIH HHS/United States ; R01 AI158452/AI/NIAID NIH HHS/United States ; }, abstract = {The most common approach to sampling the bacterial populations within an infected or colonised host is to sequence genomes from a single colony obtained from a culture plate. However, it is recognized that this method does not capture the genetic diversity in the population. An alternative is to sequence a mixture containing multiple colonies ("pool-seq"), but this has the disadvantage that it is a non-homogeneous sample, making it difficult to perform specific experiments. We compared differences in measures of genetic diversity between eight single-colony isolates (singles) and pool-seq on a set of 2286 S. aureus culture samples. The samples were obtained by swabbing three body sites on 85 human participants quarterly for a year, who initially presented with a methicillin-resistant S. aureus skin and soft-tissue infection (SSTI). We compared parameters such as sequence quality, contamination, allele frequency, nucleotide diversity and pangenome diversity in each pool to the corresponding singles. Comparing singles from the same culture plate, we found that 18% of sample collections contained mixtures of multiple Multilocus sequence types (MLSTs or STs). We showed that pool-seq data alone could predict the presence of multi-ST populations with 95% accuracy. We also showed that pool-seq could be used to estimate the number of polymorphic sites in the population. Additionally, we found that the pool may contain clinically relevant genes such as antimicrobial resistance markers that may be missed when only examining singles. These results highlight the potential advantage of analysing genome sequences of total populations obtained from clinical cultures rather than single colonies.}, } @article {pmid37396358, year = {2023}, author = {Simpson, AC and Eedara, VVR and Singh, NK and Damle, N and Parker, CW and Karouia, F and Mason, CE and Venkateswaran, K}, title = {Comparative genomic analysis of Cohnella hashimotonis sp. nov. isolated from the International Space Station.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1166013}, pmid = {37396358}, issn = {1664-302X}, abstract = {A single strain from the family Paenibacillaceae was isolated from the wall behind the Waste Hygiene Compartment aboard the International Space Station (ISS) in April 2018, as part of the Microbial Tracking mission series. This strain was identified as a gram-positive, rod-shaped, oxidase-positive, catalase-negative motile bacterium in the genus Cohnella, designated as F6_2S_P_1[T]. The 16S sequence of the F6_2S_P_1[T] strain places it in a clade with C. rhizosphaerae and C. ginsengisoli, which were originally isolated from plant tissue or rhizosphere environments. The closest 16S and gyrB matches to strain F6_2S_P_1[T] are to C. rhizosphaerae with 98.84 and 93.99% sequence similarity, while a core single-copy gene phylogeny from all publicly available Cohnella genomes places it as more closely related to C. ginsengisoli. Average nucleotide identity (ANI) and digital DNA-DNA hybridization (dDDH) values to any described Cohnella species are <89 and <22%, respectively. The major fatty acids for strain F6_2S_P_1[T] are anteiso-C15:0 (51.7%), iso-C16:0 (23.1%), and iso-C15:0 (10.5%), and it is able to metabolize a wide range of carbon compounds. Given the results of the ANI and dDDH analyses, this ISS strain is a novel species within the genus Cohnella for which we propose the name Cohnella hashimotonis, with the type strain F6_2S_P_1[T] (=NRRL B-65657[T] and DSMZ 115098[T]). Because no closely related Cohnella genomes were available, this study generated the whole-genome sequences (WGSs) of the type strains for C. rhizosphaerae and C. ginsengisoli. Phylogenetic and pangenomic analysis reveals that F6_2S_P_1[T], C. rhizosphaerae, and C. ginsengisoli, along with two uncharacterized Cohnella strains, possess a shared set of 332 gene clusters which are not shared with any other WGS of Cohnella species, and form a distinct clade branching off from C. nanjingensis. Functional traits were predicted for the genomes of strain F6_2S_P_1[T] and other members of this clade.}, } @article {pmid37395662, year = {2023}, author = {Moreno, E and Middlebrook, EA and Altamirano-Silva, P and Al Dahouk, S and Araj, GF and Arce-Gorvel, V and Arenas-Gamboa, Á and Ariza, J and Barquero-Calvo, E and Battelli, G and Bertu, WJ and Blasco, JM and Bosilkovski, M and Cadmus, S and Caswell, CC and Celli, J and Chacón-Díaz, C and Chaves-Olarte, E and Comerci, DJ and Conde-Álvarez, R and Cook, E and Cravero, S and Dadar, M and De Boelle, X and De Massis, F and Díaz, R and Escobar, GI and Fernández-Lago, L and Ficht, TA and Foster, JT and Garin-Bastuji, B and Godfroid, J and Gorvel, JP and Güler, L and Erdenliğ-Gürbilek, S and Gusi, AM and Guzmán-Verri, C and Hai, J and Hernández-Mora, G and Iriarte, M and Jacob, NR and Keriel, A and Khames, M and Köhler, S and Letesson, JJ and Loperena-Barber, M and López-Goñi, I and McGiven, J and Melzer, F and Mora-Cartin, R and Moran-Gilad, J and Muñoz, PM and Neubauer, H and O'Callaghan, D and Ocholi, R and Oñate, Á and Pandey, P and Pappas, G and Pembroke, JT and Roop, M and Ruiz-Villalonos, N and Ryan, MP and Salcedo, SP and Salvador-Bescós, M and Sangari, FJ and de Lima Santos, R and Seimenis, A and Splitter, G and Suárez-Esquivel, M and Tabbaa, D and Trangoni, MD and Tsolis, RM and Vizcaíno, N and Wareth, G and Welburn, SC and Whatmore, A and Zúñiga-Ripa, A and Moriyón, I}, title = {If You're Not Confused, You're Not Paying Attention: Ochrobactrum Is Not Brucella.}, journal = {Journal of clinical microbiology}, volume = {61}, number = {8}, pages = {e0043823}, pmid = {37395662}, issn = {1098-660X}, support = {001/WHO_/World Health Organization/International ; R01 AI158372/AI/NIAID NIH HHS/United States ; }, mesh = {Animals ; Humans ; *Brucella/genetics ; *Ochrobactrum/genetics ; Phylogeny ; *Brucellosis/diagnosis/microbiology ; Attention ; Confusion ; }, abstract = {Bacteria of the genus Brucella are facultative intracellular parasites that cause brucellosis, a severe animal and human disease. Recently, a group of taxonomists merged the brucellae with the primarily free-living, phylogenetically related Ochrobactrum spp. in the genus Brucella. This change, founded only on global genomic analysis and the fortuitous isolation of some opportunistic Ochrobactrum spp. from medically compromised patients, has been automatically included in culture collections and databases. We argue that clinical and environmental microbiologists should not accept this nomenclature, and we advise against its use because (i) it was presented without in-depth phylogenetic analyses and did not consider alternative taxonomic solutions; (ii) it was launched without the input of experts in brucellosis or Ochrobactrum; (iii) it applies a non-consensus genus concept that disregards taxonomically relevant differences in structure, physiology, population structure, core-pangenome assemblies, genome structure, genomic traits, clinical features, treatment, prevention, diagnosis, genus description rules, and, above all, pathogenicity; and (iv) placing these two bacterial groups in the same genus creates risks for veterinarians, medical doctors, clinical laboratories, health authorities, and legislators who deal with brucellosis, a disease that is particularly relevant in low- and middle-income countries. Based on all this information, we urge microbiologists, bacterial collections, genomic databases, journals, and public health boards to keep the Brucella and Ochrobactrum genera separate to avoid further bewilderment and harm.}, } @article {pmid37395647, year = {2023}, author = {Queiroz, VF and Carvalho, JVRP and de Souza, FG and Lima, MT and Santos, JD and Rocha, KLS and de Oliveira, DB and Araújo, JP and Ullmann, LS and Rodrigues, RAL and Abrahão, JS}, title = {Analysis of the Genomic Features and Evolutionary History of Pithovirus-Like Isolates Reveals Two Major Divergent Groups of Viruses.}, journal = {Journal of virology}, volume = {97}, number = {7}, pages = {e0041123}, pmid = {37395647}, issn = {1098-5514}, mesh = {Humans ; *Genome, Viral/genetics ; Genomics ; *Giant Viruses/classification/genetics ; *Phylogeny ; Genetic Variation ; Evolution, Molecular ; }, abstract = {New representatives of the phylum Nucleocytoviricota have been rapidly described in the last decade. Despite this, not all viruses of this phylum are allocated to recognized taxonomic families, as is the case for orpheovirus, pithovirus, and cedratvirus, which form the proposed family Pithoviridae. In this study, we performed comprehensive comparative genomic analyses of 8 pithovirus-like isolates, aiming to understand their common traits and evolutionary history. Structural and functional genome annotation was performed de novo for all the viruses, which served as a reference for pangenome construction. The synteny analysis showed substantial differences in genome organization between these viruses, with very few and short syntenic blocks shared between orpheovirus and its relatives. It was possible to observe an open pangenome with a significant increase in the slope when orpheovirus was added, alongside a decrease in the core genome. Network analysis placed orpheovirus as a distant and major hub with a large fraction of unique clusters of orthologs, indicating a distant relationship between this virus and its relatives, with only a few shared genes. Additionally, phylogenetic analyses of strict core genes shared with other viruses of the phylum reinforced the divergence of orpheovirus from pithoviruses and cedratviruses. Altogether, our results indicate that although pithovirus-like isolates share common features, this group of ovoid-shaped giant viruses presents substantial differences in gene contents, genomic architectures, and the phylogenetic history of several core genes. Our data indicate that orpheovirus is an evolutionarily divergent viral entity, suggesting its allocation to a different viral family, Orpheoviridae. IMPORTANCE Giant viruses that infect amoebae form a monophyletic group named the phylum Nucleocytoviricota. Despite being genomically and morphologically very diverse, the taxonomic categories of some clades that form this phylum are not yet well established. With advances in isolation techniques, the speed at which new giant viruses are described has increased, escalating the need to establish criteria to define the emerging viral taxa. In this work, we performed a comparative genomic analysis of representatives of the putative family Pithoviridae. Based on the dissimilarity of orpheovirus from the other viruses of this putative family, we propose that orpheovirus be considered a member of an independent family, Orpheoviridae, and suggest criteria to demarcate families consisting of ovoid-shaped giant viruses.}, } @article {pmid37395521, year = {2023}, author = {Meng, PQ and Zhang, Q and Ding, Y and Lin, JX and Chen, F}, title = {Evolutionary and Pan-genome Analysis of Three Important Black-pigmented Periodontal Pathogens.}, journal = {The Chinese journal of dental research}, volume = {26}, number = {2}, pages = {93-104}, doi = {10.3290/j.cjdr.b4128023}, pmid = {37395521}, issn = {1867-5646}, mesh = {*Prevotella/genetics/metabolism ; Phylogeny ; Prevotella intermedia/genetics/metabolism ; *Porphyromonas gingivalis/genetics/metabolism ; Prevotella nigrescens/genetics ; }, abstract = {OBJECTIVE: To analyse the pan-genome of three black-pigmented periodontal pathogens: Porphyromonas gingivalis, Prevotella intermedia and Prevotella nigrescens.

METHODS: Pan-genome analyses of 66, 33 and 5 publicly available whole-genome sequences of P. gingivalis, P. intermedia and P. nigrescens, respectively, were performed using Pan-genome Analysis Pipeline software (version 1.2.1; Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing, PR China). Phylogenetic trees were constructed based on the entire pan-genome and single nucleotide polymorphisms within the core genome. The distribution and abundance of virulence genes in the core and dispensable genomes were also compared in the three species.

RESULTS: All three species possess an open pan-genome. The core genome of P. gingivalis, P. intermedia and P. nigrescens included 1001, 1514 and 1745 orthologous groups, respectively, which were mainly related to basic cellular functions such as metabolism. The dispensable genome of P. gingivalis, P. intermedia and P. nigrescens was composed of 2814, 2689 and 906 orthologous groups, respectively, and it was enriched in genes involved in pathogenicity or with unknown functions. Phylogenetic trees presented a clear separation of P. gingivalis, P. intermedia and P. nigrescens, verifying the reclassification of the black-pigmented species. Furthermore, the three species shared almost the same virulence factors involved in adhesion, proteolysis and evasion of host defences. Some of these virulence genes were conserved across species whereas others belonged to the dispensable genome, which might be acquired through horizontal gene transfer.

CONCLUSION: This study highlighted the usefulness of pan-genome analysis to infer evolutionary cues for black-pigmented species, indicating their homology and phylogenomic diversity.}, } @article {pmid37393724, year = {2023}, author = {He, Y and Pan, J and Huang, D and Sanford, RA and Peng, S and Wei, N and Sun, W and Shi, L and Jiang, Z and Jiang, Y and Hu, Y and Li, S and Li, Y and Li, M and Dong, Y}, title = {Distinct microbial structure and metabolic potential shaped by significant environmental gradient impacted by ferrous slag weathering.}, journal = {Environment international}, volume = {178}, number = {}, pages = {108067}, doi = {10.1016/j.envint.2023.108067}, pmid = {37393724}, issn = {1873-6750}, mesh = {Humans ; *Bacteria/genetics ; Metagenome ; *Microbiota ; Weather ; Carbon/metabolism ; }, abstract = {Alkaline ferrous slags pose global environmental issues and long-term risks to ambient environments. To explore the under-investigated microbial structure and biogeochemistry in such unique ecosystems, combined geochemical, microbial, ecological and metagenomic analyses were performed in the areas adjacent to a ferrous slag disposal plant in Sichuan, China. Different levels of exposure to ultrabasic slag leachate had resulted in a significant geochemical gradient of pH (8.0-12.4), electric potential (-126.9 to 437.9 mV), total organic carbon (TOC, 1.5-17.3 mg/L), and total nitrogen (TN, 0.17-1.01 mg/L). Distinct microbial communities were observed depending on their exposure to the strongly alkaline leachate. High pH and Ca[2+] concentrations were associated with low microbial diversity and enrichment of bacterial classes Gamma-proteobacteria and Deinococci in the microbial communities exposed to the leachate. Combined metagenomic analyses of 4 leachate-unimpacted and 2-impacted microbial communities led to the assembly of one Serpentinomonas pangenome and 81 phylogenetically diversified metagenome assembled genomes (MAGs). The prevailing taxa in the leachate-impacted habitats (e.g., Serpentinomonas and Meiothermus spp.) were phylogenetically related to those in active serpentinizing ecosystems, suggesting the analogous processes between the man-made and natural systems. More importantly, they accounted for significant abundance of most functional genes associated with environmental adaptation and major element cycling. Their metabolic potential (e.g., cation/H[+] antiporters, carbon fixation on lithospheric carbon source, and respiration coupling sulfur oxidization and oxygen or nitrate reduction) may support these taxa to survive and prosper in these unique geochemical niches. This study provides fundamental understandings of the adaptive strategies of microorganisms in response to the strong environmental perturbation by alkali tailings. It also contributes to a better comprehension of how to remediate environments affected by alkaline industrial material.}, } @article {pmid37389215, year = {2023}, author = {Conte, AL and Brunetti, F and Marazzato, M and Longhi, C and Maurizi, L and Raponi, G and Palamara, AT and Grassi, S and Conte, MP}, title = {Atopic dermatitis-derived Staphylococcus aureus strains: what makes them special in the interplay with the host.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1194254}, pmid = {37389215}, issn = {2235-2988}, mesh = {Humans ; *Dermatitis, Atopic ; Staphylococcus aureus/genetics ; Multilocus Sequence Typing ; Genotype ; Skin ; }, abstract = {BACKGROUND: Atopic dermatitis (AD) is a chronic inflammatory skin condition whose pathogenesis involves genetic predisposition, epidermal barrier dysfunction, alterations in the immune responses and microbial dysbiosis. Clinical studies have shown a link between Staphylococcus aureus and the pathogenesis of AD, although the origins and genetic diversity of S. aureus colonizing patients with AD is poorly understood. The aim of the study was to investigate if specific clones might be associated with the disease.

METHODS: WGS analyses were performed on 38 S. aureus strains, deriving from AD patients and healthy carriers. Genotypes (i.e. MLST, spa-, agr- and SCCmec-typing), genomic content (e.g. virulome and resistome), and the pan-genome structure of strains have been investigated. Phenotypic analyses were performed to determine the antibiotic susceptibility, the biofilm production and the invasiveness within the investigated S. aureus population.

RESULTS: Strains isolated from AD patients revealed a high degree of genetic heterogeneity and a shared set of virulence factors and antimicrobial resistance genes, suggesting that no genotype and genomic content are uniquely associated with AD. The same strains were characterized by a lower variability in terms of gene content, indicating that the inflammatory conditions could exert a selective pressure leading to the optimization of the gene repertoire. Furthermore, genes related to specific mechanisms, like post-translational modification, protein turnover and chaperones as well as intracellular trafficking, secretion and vesicular transport, were significantly more enriched in AD strains. Phenotypic analysis revealed that all of our AD strains were strong or moderate biofilm producers, while less than half showed invasive capabilities.

CONCLUSIONS: We conclude that in AD skin, the functional role played by S. aureus may depend on differential gene expression patterns and/or on post-translational modification mechanisms rather than being associated with peculiar genetic features.}, } @article {pmid37386186, year = {2023}, author = {Ahsan, MU and Liu, Q and Perdomo, JE and Fang, L and Wang, K}, title = {A survey of algorithms for the detection of genomic structural variants from long-read sequencing data.}, journal = {Nature methods}, volume = {20}, number = {8}, pages = {1143-1158}, pmid = {37386186}, issn = {1548-7105}, support = {GM132713/GM/NIGMS NIH HHS/United States ; }, mesh = {Humans ; Sequence Analysis, DNA/methods ; *Algorithms ; *Genome ; Genomic Structural Variation ; High-Throughput Nucleotide Sequencing/methods ; Genome, Human ; }, abstract = {As long-read sequencing technologies are becoming increasingly popular, a number of methods have been developed for the discovery and analysis of structural variants (SVs) from long reads. Long reads enable detection of SVs that could not be previously detected from short-read sequencing, but computational methods must adapt to the unique challenges and opportunities presented by long-read sequencing. Here, we summarize over 50 long-read-based methods for SV detection, genotyping and visualization, and discuss how new telomere-to-telomere genome assemblies and pangenome efforts can improve the accuracy and drive the development of SV callers in the future.}, } @article {pmid37382545, year = {2023}, author = {Chen, X and Zhang, H and Feng, J and Zhang, L and Zheng, M and Luo, H and Zhuo, H and Xu, N and Zhang, X and Chen, C and Qu, P and Li, Y}, title = {Comparative Genomic Analysis Reveals Genetic Diversity and Pathogenic Potential of Haemophilus seminalis and Emended Description of Haemophilus seminalis.}, journal = {Microbiology spectrum}, volume = {11}, number = {4}, pages = {e0477222}, pmid = {37382545}, issn = {2165-0497}, mesh = {Humans ; RNA, Ribosomal, 16S/genetics ; *Hemin ; *Haemophilus/genetics ; Haemophilus influenzae ; Genomics ; Phylogeny ; Genetic Variation ; }, abstract = {Haemophilus seminalis is a newly proposed species that is phylogenetically related to Haemophilus haemolyticus. The distribution of H. seminalis in the human population, its genomic diversity, and its pathogenic potential are still unclear. This study reports the finding of our comparative genomic analyses of four newly isolated Haemophilus strains (SZY H8, SZY H35, SZY H36, and SZY H68) from human sputum specimens (Guangzhou, China) along with the publicly available genomes of other phylogenetically related Haemophilus species. Based on pairwise comparisons of the 16S rRNA gene sequences, the four isolates showed <98.65% sequence identity to the type strains of all known Haemophilus species but were identified as belonging to H. seminalis, based on comparable phenotypic and genotypic features. Additionally, the four isolates showed high genome-genome relatedness indices (>95% ANI values) with 17 strains that were previously identified as either "Haemophilus intermedius" or hemin (X-factor)-independent H. haemolyticus and therefore required a more detailed classification study. Phylogenetically, these isolates, along with the two previously described H. seminalis isolates (a total of 23 isolates), shared a highly homologous lineage that is distinct from the clades of the main H. haemolyticus and Haemophilus influenzae strains. These isolates present an open pangenome with multiple virulence genes. Notably, all 23 isolates have a functional heme biosynthesis pathway that is similar to that of Haemophilus parainfluenzae. The phenotype of hemin (X-factor) independence and the analysis of the ispD, pepG, and moeA genes can be used to distinguish these isolates from H. haemolyticus and H. influenzae. Based on the above findings, we propose a reclassification for all "H. intermedius" and two H. haemolyticus isolates belonging to H. seminalis with an emended description of H. seminalis. This study provides a more accurate identification of Haemophilus isolates for use in the clinical laboratory and a better understanding of the clinical significance and genetic diversity in human environments. IMPORTANCE As a versatile opportunistic pathogen, the accurate identification of Haemophilus species is a challenge in clinical practice. In this study, we characterized the phenotypic and genotypic features of four H. seminalis strains that were isolated from human sputum specimens and propose the "H. intermedius" and hemin (X-factor)-independent H. haemolyticus isolates as belonging to H. seminalis. The prediction of virulence-related genes indicates that H. seminalis isolates carry several virulence genes that are likely to play an important role in its pathogenicity. In addition, we depict that the genes ispD, pepG, and moeA can be used as biomarkers for distinguishing H. seminalis from H. haemolyticus and H. influenzae. Our findings provide some insights into the identification, epidemiology, genetic diversity, pathogenic potential, and antimicrobial resistance of the newly proposed H. seminalis.}, } @article {pmid37382302, year = {2023}, author = {Puente-Sánchez, F and Hoetzinger, M and Buck, M and Bertilsson, S}, title = {Exploring environmental intra-species diversity through non-redundant pangenome assemblies.}, journal = {Molecular ecology resources}, volume = {23}, number = {7}, pages = {1724-1736}, doi = {10.1111/1755-0998.13826}, pmid = {37382302}, issn = {1755-0998}, support = {892961//H2020 Marie Skłodowska-Curie Actions/ ; 2019-02336//Svenska Forskningsrådet Formas/ ; 2017-04422//Vetenskapsrådet/ ; 2018-05973//Vetenskapsrådet/ ; }, mesh = {Phylogeny ; *Bacteria/genetics ; Metagenome ; Algorithms ; *Microbiota ; Metagenomics/methods ; }, abstract = {At the genome level, microorganisms are highly adaptable both in terms of allele and gene composition. Such heritable traits emerge in response to different environmental niches and can have a profound influence on microbial community dynamics. As a consequence, any individual genome or population will contain merely a fraction of the total genetic diversity of any operationally defined "species", whose ecological potential can thus be only fully understood by studying all of their genomes and the genes therein. This concept, known as the pangenome, is valuable for studying microbial ecology and evolution, as it partitions genomes into core (present in all the genomes from a species, and responsible for housekeeping and species-level niche adaptation among others) and accessory regions (present only in some, and responsible for intra-species differentiation). Here we present SuperPang, an algorithm producing pangenome assemblies from a set of input genomes of varying quality, including metagenome-assembled genomes (MAGs). SuperPang runs in linear time and its results are complete, non-redundant, preserve gene ordering and contain both coding and non-coding regions. Our approach provides a modular view of the pangenome, identifying operons and genomic islands, and allowing to track their prevalence in different populations. We illustrate this by analysing intra-species diversity in Polynucleobacter, a bacterial genus ubiquitous in freshwater ecosystems, characterized by their streamlined genomes and their ecological versatility. We show how SuperPang facilitates the simultaneous analysis of allelic and gene content variation under different environmental pressures, allowing us to study the drivers of microbial diversification at unprecedented resolution.}, } @article {pmid37379037, year = {2023}, author = {Madhusoodanan, J}, title = {A More Diverse and Complete Reference Human Genome Is Poised to Change Medicine.}, journal = {JAMA}, volume = {330}, number = {3}, pages = {205-206}, doi = {10.1001/jama.2023.9498}, pmid = {37379037}, issn = {1538-3598}, mesh = {Humans ; *Genome, Human/genetics ; *Genomics/standards ; *Medicine/trends ; }, } @article {pmid37377491, year = {2023}, author = {Karanth, S and Patel, J and Shirmohammadi, A and Pradhan, AK}, title = {Machine learning to predict foodborne salmonellosis outbreaks based on genome characteristics and meteorological trends.}, journal = {Current research in food science}, volume = {6}, number = {}, pages = {100525}, pmid = {37377491}, issn = {2665-9271}, abstract = {Several studies have shown a correlation between outbreaks of Salmonella enterica and meteorological trends, especially related to temperature and precipitation. Additionally, current studies based on outbreaks are performed on data for the species Salmonella enterica, without considering its intra-species and genetic heterogeneity. In this study, we analyzed the effect of differential gene expression and a suite of meteorological factors on salmonellosis outbreak scale (typified by case numbers) using a combination of machine learning and count-based modeling methods. Elastic Net regularization model was used to identify significant genes from a Salmonella pan-genome, and a multi-variable Poisson regression developed to fit the individual and mixed effects data. The best-fit Elastic Net model (α = 0.50; λ = 2.18) identified 53 significant gene features. The final multi-variable Poisson regression model (χ[2] = 5748.22; pseudo R[2] = 0.669; probability > χ[2] = 0) identified 127 significant predictor terms (p < 0.10), comprising 45 gene-only predictors, average temperature, average precipitation, and average snowfall, and 79 gene-meteorological interaction terms. The significant genes ranged in functionality from cellular signaling and transport, virulence, metabolism, and stress response, and included gene variables not considered as significant by the baseline model. This study presents a holistic approach towards evaluating multiple data sources (such as genomic and environmental data) to predict outbreak scale, which could help in revising the estimates for human health risk.}, } @article {pmid37375105, year = {2023}, author = {Myintzaw, P and Pennone, V and McAuliffe, O and Begley, M and Callanan, M}, title = {Association of Virulence, Biofilm, and Antimicrobial Resistance Genes with Specific Clonal Complex Types of Listeria monocytogenes.}, journal = {Microorganisms}, volume = {11}, number = {6}, pages = {}, pmid = {37375105}, issn = {2076-2607}, support = {15F604, 2019R495//Department of Agriculture Food and the Marine/ ; }, abstract = {Precise classification of foodborne pathogen Listeria monocytogenes is a necessity in efficient foodborne disease surveillance, outbreak detection, and source tracking throughout the food chain. In this study, a total of 150 L. monocytogenes isolates from various food products, food processing environments, and clinical sources were investigated for variations in virulence, biofilm formation, and the presence of antimicrobial resistance genes based on their Whole-Genome Sequences. Clonal complex (CC) determination based on Multi-Locus Sequence Typing (MLST) revealed twenty-eight CC-types including eight isolates representing novel CC-types. The eight isolates comprising the novel CC-types share the majority of the known (cold and acid) stress tolerance genes and are all genetic lineage II, serogroup 1/2a-3a. Pan-genome-wide association analysis by Scoary using Fisher's exact test identified eleven genes specifically associated with clinical isolates. Screening for the presence of antimicrobial and virulence genes using the ABRicate tool uncovered variations in the presence of Listeria Pathogenicity Islands (LIPIs) and other known virulence genes. Specifically, the distributions of actA, ecbA, inlF, inlJ, lapB, LIPI-3, and vip genes across isolates were found to be significantly CC-dependent while the presence of ami, inlF, inlJ, and LIPI-3 was associated with clinical isolates specifically. In addition, Roary-derived phylogenetic grouping based on Antimicrobial-Resistant Genes (AMRs) revealed that the thiol transferase (FosX) gene was present in all lineage I isolates, and the presence of the lincomycin resistance ABC-F-type ribosomal protection protein (lmo0919_fam) was also genetic-lineage-dependent. More importantly, the genes found to be specific to CC-type were consistent when a validation analysis was performed with fully assembled, high-quality complete L. monocytogenes genome sequences (n = 247) extracted from the National Centre for Biotechnology Information (NCBI) microbial genomes database. This work highlights the usefulness of MLST-based CC typing using the Whole-Genome Sequence as a tool in classifying isolates.}, } @article {pmid37374997, year = {2023}, author = {Negrete-Paz, AM and Vázquez-Marrufo, G and Gutiérrez-Moraga, A and Vázquez-Garcidueñas, MS}, title = {Pangenome Reconstruction of Mycobacterium tuberculosis as a Guide to Reveal Genomic Features Associated with Strain Clinical Phenotype.}, journal = {Microorganisms}, volume = {11}, number = {6}, pages = {}, pmid = {37374997}, issn = {2076-2607}, support = {PICIR-021//Instituto de Ciencia, Tecnología e Innovación de Michoacán/ ; }, abstract = {Tuberculosis (TB) is one of the leading causes of human deaths worldwide caused by infectious diseases. TB infection by Mycobacterium tuberculosis can occur in the lungs, causing pulmonary tuberculosis (PTB), or in any other organ of the body, resulting in extrapulmonary tuberculosis (EPTB). There is no consensus on the genetic determinants of this pathogen that may contribute to EPTB. In this study, we constructed the M. tuberculosis pangenome and used it as a tool to seek genomic signatures associated with the clinical presentation of TB based on its accessory genome differences. The analysis carried out in the present study includes the raw reads of 490 M. tuberculosis genomes (PTB n = 245, EPTB n = 245) retrieved from public databases that were assembled, as well as ten genomes from Mexican strains (PTB n = 5, EPTB n = 5) that were sequenced and assembled. All genomes were annotated and then used to construct the pangenome with Roary and Panaroo. The pangenome obtained using Roary consisted of 2231 core genes and 3729 accessory genes. On the other hand, the pangenome resulting from Panaroo consisted of 2130 core genes and 5598 accessory genes. Associations between the distribution of accessory genes and the PTB/EPTB phenotypes were examined using the Scoary and Pyseer tools. Both tools found a significant association between the hspR, plcD, Rv2550c, pe_pgrs5, pe_pgrs25, and pe_pgrs57 genes and the PTB genotype. In contrast, the deletion of the aceA, esxR, plcA, and ppe50 genes was significantly associated with the EPTB phenotype. Rv1759c and Rv3740 were found to be associated with the PTB phenotype according to Scoary; however, these associations were not observed when using Pyseer. The robustness of the constructed pangenome and the gene-phenotype associations is supported by several factors, including the analysis of a large number of genomes, the inclusion of the same number of PTB/EPTB genomes, and the reproducibility of results thanks to the different bioinformatic tools used. Such characteristics surpass most of previous M. tuberculosis pangenomes. Thus, it can be inferred that the deletion of these genes can lead to changes in the processes involved in stress response and fatty acid metabolism, conferring phenotypic advantages associated with pulmonary or extrapulmonary presentation of TB. This study represents the first attempt to use the pangenome to seek gene-phenotype associations in M. tuberculosis.}, } @article {pmid37374927, year = {2023}, author = {Uljanovas, D and Gölz, G and Fleischmann, S and Kudirkiene, E and Kasetiene, N and Grineviciene, A and Tamuleviciene, E and Aksomaitiene, J and Alter, T and Malakauskas, M}, title = {Genomic Characterization of Arcobacter butzleri Strains Isolated from Various Sources in Lithuania.}, journal = {Microorganisms}, volume = {11}, number = {6}, pages = {}, pmid = {37374927}, issn = {2076-2607}, support = {01KI1712//Federal Ministry of Education and Research/ ; }, abstract = {Arcobacter (A.) butzleri, the most widespread species within the genus Arcobacter, is considered as an emerging pathogen causing gastroenteritis in humans. Here, we performed a comparative genome-wide analysis of 40 A. butzleri strains from Lithuania to determine the genetic relationship, pangenome structure, putative virulence, and potential antimicrobial- and heavy-metal-resistance genes. Core genome single nucleotide polymorphism (cgSNP) analysis revealed low within-group variability (≤4 SNPs) between three milk strains (RCM42, RCM65, RCM80) and one human strain (H19). Regardless of the type of input (i.e., cgSNPs, accessory genome, virulome, resistome), these strains showed a recurrent phylogenetic and hierarchical grouping pattern. A. butzleri demonstrated a relatively large and highly variable accessory genome (comprising of 6284 genes with around 50% of them identified as singletons) that only partially correlated to the isolation source. Downstream analysis of the genomes resulted in the detection of 115 putative antimicrobial- and heavy-metal-resistance genes and 136 potential virulence factors that are associated with the induction of infection in host (e.g., cadF, degP, iamA), survival and environmental adaptation (e.g., flagellar genes, CheA-CheY chemotaxis system, urease cluster). This study provides additional knowledge for a better A. butzleri-related risk assessment and highlights the need for further genomic epidemiology studies in Lithuania and other countries.}, } @article {pmid37374141, year = {2023}, author = {Abondio, P and Cilli, E and Luiselli, D}, title = {Human Pangenomics: Promises and Challenges of a Distributed Genomic Reference.}, journal = {Life (Basel, Switzerland)}, volume = {13}, number = {6}, pages = {}, pmid = {37374141}, issn = {2075-1729}, abstract = {A pangenome is a collection of the common and unique genomes that are present in a given species. It combines the genetic information of all the genomes sampled, resulting in a large and diverse range of genetic material. Pangenomic analysis offers several advantages compared to traditional genomic research. For example, a pangenome is not bound by the physical constraints of a single genome, so it can capture more genetic variability. Thanks to the introduction of the concept of pangenome, it is possible to use exceedingly detailed sequence data to study the evolutionary history of two different species, or how populations within a species differ genetically. In the wake of the Human Pangenome Project, this review aims at discussing the advantages of the pangenome around human genetic variation, which are then framed around how pangenomic data can inform population genetics, phylogenetics, and public health policy by providing insights into the genetic basis of diseases or determining personalized treatments, targeting the specific genetic profile of an individual. Moreover, technical limitations, ethical concerns, and legal considerations are discussed.}, } @article {pmid37372961, year = {2023}, author = {Abdul Aziz, M and Masmoudi, K}, title = {Insights into the Transcriptomics of Crop Wild Relatives to Unravel the Salinity Stress Adaptive Mechanisms.}, journal = {International journal of molecular sciences}, volume = {24}, number = {12}, pages = {}, pmid = {37372961}, issn = {1422-0067}, support = {12F041//United Arab Emirates University/ ; }, mesh = {*Transcriptome ; *Plant Breeding ; Gene Expression Profiling ; Genomics ; Salt Tolerance/genetics ; Salinity ; }, abstract = {The narrow genomic diversity of modern cultivars is a major bottleneck for enhancing the crop's salinity stress tolerance. The close relatives of modern cultivated plants, crop wild relatives (CWRs), can be a promising and sustainable resource to broaden the diversity of crops. Advances in transcriptomic technologies have revealed the untapped genetic diversity of CWRs that represents a practical gene pool for improving the plant's adaptability to salt stress. Thus, the present study emphasizes the transcriptomics of CWRs for salinity stress tolerance. In this review, the impacts of salt stress on the plant's physiological processes and development are overviewed, and the transcription factors (TFs) regulation of salinity stress tolerance is investigated. In addition to the molecular regulation, a brief discussion on the phytomorphological adaptation of plants under saline environments is provided. The study further highlights the availability and use of transcriptomic resources of CWR and their contribution to pangenome construction. Moreover, the utilization of CWRs' genetic resources in the molecular breeding of crops for salinity stress tolerance is explored. Several studies have shown that cytoplasmic components such as calcium and kinases, and ion transporter genes such as Salt Overly Sensitive 1 (SOS1) and High-affinity Potassium Transporters (HKTs) are involved in the signaling of salt stress, and in mediating the distribution of excess Na[+] ions within the plant cells. Recent comparative analyses of transcriptomic profiling through RNA sequencing (RNA-Seq) between the crops and their wild relatives have unraveled several TFs, stress-responsive genes, and regulatory proteins for generating salinity stress tolerance. This review specifies that the use of CWRs transcriptomics in combination with modern breeding experimental approaches such as genomic editing, de novo domestication, and speed breeding can accelerate the CWRs utilization in the breeding programs for enhancing the crop's adaptability to saline conditions. The transcriptomic approaches optimize the crop genomes with the accumulation of favorable alleles that will be indispensable for designing salt-resilient crops.}, } @article {pmid37369325, year = {2023}, author = {Thorwall, S and Trivedi, V and Ottum, E and Wheeldon, I}, title = {Population genomics-guided engineering of phenazine biosynthesis in Pseudomonas chlororaphis.}, journal = {Metabolic engineering}, volume = {78}, number = {}, pages = {223-234}, doi = {10.1016/j.ymben.2023.06.008}, pmid = {37369325}, issn = {1096-7184}, mesh = {*Pseudomonas chlororaphis/genetics/metabolism ; Metagenomics ; Genome-Wide Association Study ; Pseudomonas/genetics/metabolism ; Phenazines/metabolism ; Bacterial Proteins/genetics/metabolism ; }, abstract = {The emergence of next-generation sequencing (NGS) technologies has made it possible to not only sequence entire genomes, but also identify metabolic engineering targets across the pangenome of a microbial population. This study leverages NGS data as well as existing molecular biology and bioinformatics tools to identify and validate genomic signatures for improving phenazine biosynthesis in Pseudomonas chlororaphis. We sequenced a diverse collection of 34 Pseudomonas isolates using short- and long-read sequencing techniques and assembled whole genomes using the NGS reads. In addition, we assayed three industrially relevant phenotypes (phenazine production, biofilm formation, and growth temperature) for these isolates in two different media conditions. We then provided the whole genomes and phenazine production data to a unitig-based microbial genome-wide association study (mGWAS) tool to identify novel genomic signatures responsible for phenazine production in P. chlororaphis. Post-processing of the mGWAS analysis results yielded 330 significant hits influencing the biosynthesis of one or more phenazine compounds. Based on a quantitative metric (called the phenotype score), we elucidated the most influential hits for phenazine production and experimentally validated them in vivo in the most optimal phenazine producing strain. Two genes significantly increased phenazine-1-carboxamide (PCN) production: a histidine transporter (ProY_1), and a putative carboxypeptidase (PS__04251). A putative MarR-family transcriptional regulator decreased PCN titer when overexpressed in a high PCN producing isolate. Overall, this work seeks to demonstrate the utility of a population genomics approach as an effective strategy in enabling the identification of targets for metabolic engineering of bioproduction hosts.}, } @article {pmid37365340, year = {2023}, author = {Chin, CS and Behera, S and Khalak, A and Sedlazeck, FJ and Sudmant, PH and Wagner, J and Zook, JM}, title = {Multiscale analysis of pangenomes enables improved representation of genomic diversity for repetitive and clinically relevant genes.}, journal = {Nature methods}, volume = {20}, number = {8}, pages = {1213-1221}, pmid = {37365340}, issn = {1548-7105}, support = {R35 GM142916/GM/NIGMS NIH HHS/United States ; R35GM142916/GM/NIGMS NIH HHS/United States ; 1U01HG011758-01/HG/NHGRI NIH HHS/United States ; UM1 HG008898/HG/NHGRI NIH HHS/United States ; U01 HG011758/HG/NHGRI NIH HHS/United States ; }, mesh = {Male ; Humans ; *Genomics ; *Genome, Human ; Major Histocompatibility Complex ; }, abstract = {Advancements in sequencing technologies and assembly methods enable the regular production of high-quality genome assemblies characterizing complex regions. However, challenges remain in efficiently interpreting variation at various scales, from smaller tandem repeats to megabase rearrangements, across many human genomes. We present a PanGenome Research Tool Kit (PGR-TK) enabling analyses of complex pangenome structural and haplotype variation at multiple scales. We apply the graph decomposition methods in PGR-TK to the class II major histocompatibility complex demonstrating the importance of the human pangenome for analyzing complicated regions. Moreover, we investigate the Y-chromosome genes, DAZ1/DAZ2/DAZ3/DAZ4, of which structural variants have been linked to male infertility, and X-chromosome genes OPN1LW and OPN1MW linked to eye disorders. We further showcase PGR-TK across 395 complex repetitive medically important genes. This highlights the power of PGR-TK to resolve complex variation in regions of the genome that were previously too complex to analyze.}, } @article {pmid37364097, year = {2023}, author = {Fayyaz, A and Robinson, G and Chang, PL and Bekele, D and Yimer, S and Carrasquilla-Garcia, N and Negash, K and Surendrarao, A and von Wettberg, EJB and Kemal, SA and Tesfaye, K and Fikre, A and Farmer, AD and Cook, DR}, title = {Hiding in plain sight: Genome-wide recombination and a dynamic accessory genome drive diversity in Fusarium oxysporum f.sp. ciceris.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {27}, pages = {e2220570120}, pmid = {37364097}, issn = {1091-6490}, mesh = {*Fusarium/genetics ; Host Specificity ; Genomics ; Agriculture ; Plant Diseases/genetics ; }, abstract = {Understanding the origins of variation in agricultural pathogens is of fundamental interest and practical importance, especially for diseases that threaten food security. Fusarium oxysporum is among the most important of soil-borne pathogens, with a global distribution and an extensive host range. The pathogen is considered to be asexual, with horizontal transfer of chromosomes providing an analog of assortment by meiotic recombination. Here, we challenge those assumptions based on the results of population genomic analyses, describing the pathogen's diversity and inferring its origins and functional consequences in the context of a single, long-standing agricultural system. We identify simultaneously low nucleotide distance among strains, and unexpectedly high levels of genetic and genomic variability. We determine that these features arise from a combination of genome-scale recombination, best explained by widespread sexual reproduction, and presence-absence variation consistent with chromosomal rearrangement. Pangenome analyses document an accessory genome more than twice the size of the core genome, with contrasting evolutionary dynamics. The core genome is stable, with low diversity and high genetic differentiation across geographic space, while the accessory genome is paradoxically more diverse and unstable but with lower genetic differentiation and hallmarks of contemporary gene flow at local scales. We suggest a model in which episodic sexual reproduction generates haplotypes that are selected and then maintained through clone-like dynamics, followed by contemporary genomic rearrangements that reassort the accessory genome among sympatric strains. Taken together, these processes contribute unique genome content, including reassortment of virulence determinants that may explain observed variation in pathogenic potential.}, } @article {pmid37361319, year = {2023}, author = {Torres-Morales, J and Mark Welch, JL and Dewhirst, FE and Borisy, GG}, title = {Site-specialization of human oral Gemella species.}, journal = {Journal of oral microbiology}, volume = {15}, number = {1}, pages = {2225261}, pmid = {37361319}, issn = {2000-2297}, abstract = {Gemella species are core members of the human oral microbiome in healthy subjects and are regarded as commensals, although they can cause opportunistic infections. Our objective was to evaluate the site-specialization of Gemella species among various habitats within the mouth by combining pangenomics and metagenomics. With pangenomics, we identified genome relationships and categorized genes as core and accessory to each species. With metagenomics, we identified the primary oral habitat of individual genomes. Our results establish that the genomes of three species, G. haemolysans, G. sanguinis and G. morbillorum, are abundant and prevalent in human mouths at different oral sites: G. haemolysans on buccal mucosa and keratinized gingiva; G. sanguinis on tongue dorsum, throat, and tonsils; and G. morbillorum in dental plaque. The gene-level basis of site-specificity was investigated by identifying genes that were core to Gemella genomes at a specific oral site but absent from other Gemella genomes. The riboflavin biosynthesis pathway was present in G. haemolysans genomes associated with buccal mucosa but absent from the rest of the genomes. Overall, metapangenomics show that Gemella species have clear ecological preferences in the oral cavity of healthy humans and provides an approach to identifying gene-level drivers of site specificity.}, } @article {pmid37359562, year = {2023}, author = {Touray, BJB and Hanafy, M and Phanse, Y and Hildebrand, R and Talaat, AM}, title = {Protective RNA nanovaccines against Mycobacterium avium subspecies hominissuis.}, journal = {Frontiers in immunology}, volume = {14}, number = {}, pages = {1188754}, pmid = {37359562}, issn = {1664-3224}, mesh = {Animals ; Mice ; *Mycobacterium avium/physiology ; *Mycobacterium tuberculosis ; Interleukin-2 ; RNA ; RNA, Messenger/genetics ; }, abstract = {The induction of an effective immune response is critical for the success of mRNA-based therapeutics. Here, we developed a nanoadjuvant system compromised of Quil-A and DOTAP (dioleoyl 3 trimethylammonium propane), hence named QTAP, for the efficient delivery of mRNA vaccine constructs into cells. Electron microscopy indicated that the complexation of mRNA with QTAP forms nanoparticles with an average size of 75 nm and which have ~90% encapsulation efficiency. The incorporation of pseudouridine-modified mRNA resulted in higher transfection efficiency and protein translation with low cytotoxicity than unmodified mRNA. When QTAP-mRNA or QTAP alone transfected macrophages, pro-inflammatory pathways (e.g., NLRP3, NF-kb, and MyD88) were upregulated, an indication of macrophage activation. In C57Bl/6 mice, QTAP nanovaccines encoding Ag85B and Hsp70 transcripts (QTAP-85B+H70) were able to elicit robust IgG antibody and IFN- ɣ, TNF-α, IL-2, and IL-17 cytokines responses. Following aerosol challenge with a clinical isolate of M. avium ss. hominissuis (M.ah), a significant reduction of mycobacterial counts was observed in lungs and spleens of only immunized animals at both 4- and 8-weeks post-challenge. As expected, reduced levels of M. ah were associated with diminished histological lesions and robust cell-mediated immunity. Interestingly, polyfunctional T-cells expressing IFN- ɣ, IL-2, and TNF- α were detected at 8 but not 4 weeks post-challenge. Overall, our analysis indicated that QTAP is a highly efficient transfection agent and could improve the immunogenicity of mRNA vaccines against pulmonary M. ah, an infection of significant public health importance, especially to the elderly and to those who are immune compromised.}, } @article {pmid37358412, year = {2023}, author = {Li, T and Huang, J and Yang, S and Chen, J and Yao, Z and Zhong, M and Zhong, X and Ye, X}, title = {Pan-Genome-Wide Association Study of Serotype 19A Pneumococci Identifies Disease-Associated Genes.}, journal = {Microbiology spectrum}, volume = {11}, number = {4}, pages = {e0407322}, pmid = {37358412}, issn = {2165-0497}, mesh = {Humans ; *Streptococcus pneumoniae ; Serogroup ; Genome-Wide Association Study ; *Pneumococcal Infections ; Pneumococcal Vaccines/genetics ; Serotyping ; }, abstract = {Despite the widespread implementation of pneumococcal vaccines, hypervirulent Streptococcus pneumoniae serotype 19A is endemic worldwide. It is still unclear whether specific genetic elements contribute to complex pathogenicity of serotype 19A isolates. We performed a large-scale pan-genome-wide association study (pan-GWAS) of 1,292 serotype 19A isolates sampled from patients with invasive disease and asymptomatic carriers. To address the underlying disease-associated genotypes, a comprehensive analysis using three methods (Scoary, a linear mixed model, and random forest) was performed to compare disease and carriage isolates to identify genes consistently associated with disease phenotype. By using three pan-GWAS methods, we found consensus on statistically significant associations between genotypes and disease phenotypes (disease or carriage), with a subset of 30 consistently significant disease-associated genes. The results of functional annotation revealed that these disease-associated genes had diverse predicted functions, including those that participated in mobile genetic elements, antibiotic resistance, virulence, and cellular metabolism. Our findings suggest the multifactorial pathogenicity nature of this hypervirulent serotype and provide important evidence for the design of novel protein-based vaccines to prevent and control pneumococcal disease. IMPORTANCE It is important to understand the genetic and pathogenic characteristics of S. pneumoniae serotype 19A, which may provide important information for the prevention and treatment of pneumococcal disease. This global large-sample pan-GWAS study has identified a subset of 30 consistently significant disease-associated genes that are involved in mobile genetic elements, antibiotic resistance, virulence, and cellular metabolism. These findings suggest the multifactorial pathogenicity nature of hypervirulent S. pneumoniae serotype 19A isolates and provide implications for the design of novel protein-based vaccines.}, } @article {pmid37356834, year = {2023}, author = {Prakash, JAJ and Jacob, JJ and Rachel, T and Vasudevan, K and Amladi, A and Iyadurai, R and Manesh, A and Veeraraghavan, B}, title = {Genomic analysis of Brucella melitensis reveals new insights into phylogeny and evolutionary divergence.}, journal = {Indian journal of medical microbiology}, volume = {44}, number = {}, pages = {100360}, doi = {10.1016/j.ijmmb.2023.02.003}, pmid = {37356834}, issn = {1998-3646}, mesh = {Humans ; *Brucella melitensis/genetics ; Phylogeny ; Genome-Wide Association Study ; *Brucellosis/epidemiology ; Genomics ; Genotype ; }, abstract = {PURPOSE: Brucellosis is a bacterial zoonotic disease caused by genus Brucella. The disease is often transmitted to humans by direct or indirect contact with infected livestock or from laboratory exposure. In this study two clinical isolates of Brucella melitensis were subjected to whole genome sequencing (WGS) using Ion Torrent PGM and Oxford Nanopore MinIon platform.

METHODS: The two hybrid complete genomes were subjected to core gene SNP analysis to identify the relative evolutionary position. To distinguish between the various lineages of B. melitensis, Pangenome analysis was carried out.

RESULTS: Phylogenetic analysis revealed that both the study isolates (ST8) clustered along the other Asian isolates that formed genotype II. Genome wide analyses of 326 B melitensis isolates suggests 2171 gene clusters were shared across all the genomes while 3552 gene clusters were considered as accessory genes.

CONCLUSION: Here we attempted to provide the gain and loss of six unique genes that defined the phylogenetic lineages and complex evolutionary process. As the severity and prevalence of human brucellosis is increasing a better understanding of Brucella genomics and transmission dynamics is needed.}, } @article {pmid37356030, year = {2023}, author = {Gupta, RK and Tikariha, H and Purohit, HJ and Khardenavis, AA}, title = {Pangenome-driven insights into nitrogen metabolic characteristics of Citrobacter portucalensis strain AAK_AS5 associated with wastewater nitrogen removal.}, journal = {Archives of microbiology}, volume = {205}, number = {7}, pages = {270}, pmid = {37356030}, issn = {1432-072X}, mesh = {*Wastewater ; *Denitrification ; Nitrates ; Ammonia ; Nitrogen/metabolism ; Nitrification ; Citrobacter/genetics/metabolism ; Heterotrophic Processes ; Aerobiosis ; Nitrites/metabolism ; }, abstract = {Nitrogen metabolism in the genus Citrobacter is very poorly studied despite its several implications in wastewater treatment. In the current study, Citrobacter portucalensis strain AAK_AS5 was assessed for remediation of simulated wastewater supplemented with different inorganic nitrogen sources. Combination of (NH4)2SO4 with KNO3 was the most preferred for achieving high growth density followed by (NH4)2SO4 and KNO3 alone. This was in agreement with highest ammonical nitrogen removal of 92.9% in the presence of combined nitrogen sources and the corresponding nitrate nitrogen removal of 93% in the presence of KNO3. Furthermore, these removal capacities were validated by investigating the uniqueness and the spread of metabolic features through pan-genomic approach that revealed the largest number of unique genes (2097) and accessory genes (705) in strain AAK_AS5. Of the total 44 different types of nitrogen metabolism-related genes, 39 genes were associated with the core genome, while 5 genes such as gltI, nasA, nasR, nrtA, and ntrC uniquely belonged to the accessory genome. Strain AAK_AS5 possessed three major nitrate removal pathways viz., assimilatory and dissimilatory nitrate reduction to ammonia (ANRA & DNRA), and denitrification; however, the absence of nitrification was compensated by ammonia assimilation catalyzed by gene products of the GDH and GS-GOGAT pathways. narGHIJ encoding the respiratory nitrate reductase was commonly identified in all the studied genomes, while genes such as nirK, norB, and nosZ were uniquely present in the strain AAK_AS5 only. A markedly different genetic content and metabolic diversity between the strains reflected their adaptive evolution in the environment thus highlighting the significance of C. portucalensis AAK_AS5 for potential application in nitrogen removal from wastewater.}, } @article {pmid37354526, year = {2023}, author = {Masutani, B and Suzuki, Y and Suzuki, Y and Morishita, S}, title = {JTK: targeted diploid genome assembler.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {7}, pages = {}, pmid = {37354526}, issn = {1367-4811}, mesh = {*Diploidy ; Sequence Analysis, DNA ; *High-Throughput Nucleotide Sequencing ; Genome ; Genomics ; Haplotypes ; }, abstract = {MOTIVATION: Diploid assembly, or determining sequences of homologous chromosomes separately, is essential to elucidate genetic differences between haplotypes. One approach is to call and phase single nucleotide variants (SNVs) on a reference sequence. However, this approach becomes unstable on large segmental duplications (SDs) or structural variations (SVs) because the alignments of reads deriving from these regions tend to be unreliable. Another approach is to use highly accurate PacBio HiFi reads to output diploid assembly directly. Nonetheless, HiFi reads cannot phase homozygous regions longer than their length and require oxford nanopore technology (ONT) reads or Hi-C to produce a fully phased assembly. Is a single long-read sequencing technology sufficient to create an accurate diploid assembly?

RESULTS: Here, we present JTK, a megabase-scale diploid genome assembler. It first randomly samples kilobase-scale sequences (called 'chunks') from the long reads, phases variants found on them, and produces two haplotypes. The novel idea of JTK is to utilize chunks to capture SNVs and SVs simultaneously. From 60-fold ONT reads on the HG002 and a Japanese sample, it fully assembled two haplotypes with approximately 99.9% accuracy on the histocompatibility complex (MHC) and the leukocyte receptor complex (LRC) regions, which was impossible by the reference-based approach. In addition, in the LRC region on a Japanese sample, JTK output an assembly of better contiguity than those built from high-coverage HiFi+Hi-C. In the coming age of pan-genomics, JTK would complement the reference-based phasing method to assemble the difficult-to-assemble but medically important regions.

JTK is available at https://github.com/ban-m/jtk, and the datasets are available at https://doi.org/10.5281/zenodo.7790310 or JGAS000580 in DDBJ.}, } @article {pmid37353434, year = {2023}, author = {Wang, B and Dang, N and Yang, X and Xu, S and Ye, K}, title = {The human pangenome reference: the beginning of a new era for genomics.}, journal = {Science bulletin}, volume = {68}, number = {14}, pages = {1484-1487}, doi = {10.1016/j.scib.2023.06.014}, pmid = {37353434}, issn = {2095-9281}, mesh = {Humans ; *Genomics ; *Software ; }, } @article {pmid37349950, year = {2023}, author = {Pei, Z and Liu, Y and Yi, Z and Liao, J and Wang, H and Zhang, H and Chen, W and Lu, W}, title = {Diversity within the species Clostridium butyricum: pan-genome, phylogeny, prophage, carbohydrate utilization, and antibiotic resistance.}, journal = {Journal of applied microbiology}, volume = {134}, number = {7}, pages = {}, doi = {10.1093/jambio/lxad127}, pmid = {37349950}, issn = {1365-2672}, support = {32021005//National Natural Science Foundation of China/ ; BK20221070//Natural Science Foundation of Jiangsu Province/ ; }, mesh = {Humans ; *Clostridium butyricum/genetics ; Prophages/genetics ; Phylogeny ; Drug Resistance, Microbial/genetics ; Carbohydrates ; }, abstract = {AIMS: Clostridium butyricum has been recognized as a strong candidate for the "next generation of probiotics" due to its beneficial roles on humans. Owing to our current understanding of this species is limited, it is imperative to unveil the genetic variety and biological properties of C. butyricum on sufficient strains.

METHODS AND RESULTS: We isolated 53 C. butyricum strains and collected 25 publicly available genomes to comprehensively assess the genomic and phenotypic diversity of this species. Average nucleotide identity and phylogeny suggested that multiple C. butyricum strains might share the same niche. Clostridium butyricum genomes were replete with prophage elements, but the CRISPR-positive strain efficiently inhibited prophage integration. Clostridium butyricum utilizes cellulose, alginate, and soluble starch universally, and shows general resistance to aminoglycoside antibiotics.

CONCLUSIONS: Clostridium butyricum exhibited a broad genetic diversity from the extraordinarily open pan-genome, extremely convergent core genome, and ubiquitous prophages. In carbohydrate utilization and antibiotic resistance, partial genotypes have a certain guiding significance for phenotypes.}, } @article {pmid37349608, year = {2023}, author = {Manivannan, A and Cheeran Amal, T}, title = {Deciphering the complex cotton genome for improving fiber traits and abiotic stress resilience in sustainable agriculture.}, journal = {Molecular biology reports}, volume = {50}, number = {8}, pages = {6937-6953}, pmid = {37349608}, issn = {1573-4978}, mesh = {*Genome, Plant/genetics ; *Plant Breeding ; Polyploidy ; Agriculture ; Stress, Physiological/genetics ; Gossypium/genetics ; Cotton Fiber ; }, abstract = {BACKGROUND: Understanding the complex cotton genome is of paramount importance in devising a strategy for sustainable agriculture. Cotton is probably the most economically important cash crop known for its cellulose-rich fiber content. The cotton genome has become an ideal model for deciphering polyploidization due to its polyploidy, setting it apart from other major crops. However, the main challenge in understanding the functional and regulatory functions of many genes in cotton is still the complex cotton polyploidy genome, which is not limited to a single role. Cotton production is vulnerable to the sensitive effects of climate change, which can alter or aggravate soil, pests, and diseases. Thus, conventional plant breeding coupled with advanced technologies has led to substantial progress being made in cotton production.

GENOMICS APPROACHES IN COTTON: In the frontier areas of genomics research, cotton genomics has gained momentum accomplished by robust high-throughput sequencing platforms combined with novel computational tools to make the cotton genome more tractable. Advances in long-read sequencing have allowed for the generation of the complete set of cotton gene transcripts giving incisive scientific knowledge in cotton improvement. In contrast, the integration of the latest sequencing platforms has been used to generate multiple high-quality reference genomes in diploid and tetraploid cotton. While pan-genome and 3D genomic studies are still in the early stages in cotton, it is anticipated that rapid advances in sequencing, assembly algorithms, and analysis pipelines will have a greater impact on advanced cotton research.

CONCLUSIONS: This review article briefly compiles substantial contributions in different areas of the cotton genome, which include genome sequencing, genes, and their molecular regulatory networks in fiber development and stress tolerance mechanism. This will greatly help us in understanding the robust genomic organization which in turn will help unearth candidate genes for functionally important agronomic traits.}, } @article {pmid37341494, year = {2023}, author = {Potter, RF and Zhang, K and Reimler, B and Marino, J and Muenks, CE and Alvarado, K and Wallace, MA and Westblade, LF and McElvania, E and Yarbrough, ML and Hunstad, DA and Dantas, G and Burnham, CD}, title = {Uncharacterized and lineage-specific accessory genes within the Proteus mirabilis pan-genome landscape.}, journal = {mSystems}, volume = {8}, number = {4}, pages = {e0015923}, pmid = {37341494}, issn = {2379-5077}, support = {R01 AI158418/AI/NIAID NIH HHS/United States ; R01 HS027621/HS/AHRQ HHS/United States ; R01 AI155893/AI/NIAID NIH HHS/United States ; U01 AI123394/AI/NIAID NIH HHS/United States ; }, mesh = {Humans ; *Proteus mirabilis/genetics ; *Proteomics ; Phylogeny ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Proteus mirabilis is a Gram-negative bacterium recognized for its unique swarming motility and urease activity. A previous proteomic report on four strains hypothesized that, unlike other Gram-negative bacteria, P. mirabilis may not exhibit significant intraspecies variation in gene content. However, there has not been a comprehensive analysis of large numbers of P. mirabilis genomes from various sources to support or refute this hypothesis. We performed comparative genomic analysis on 2,060 Proteus genomes. We sequenced the genomes of 893 isolates recovered from clinical specimens from three large US academic medical centers, combined with 1,006 genomes from NCBI Assembly and 161 genomes assembled from Illumina reads in the public domain. We used average nucleotide identity (ANI) to delineate species and subspecies, core genome phylogenetic analysis to identify clusters of highly related P. mirabilis genomes, and pan-genome annotation to identify genes of interest not present in the model P. mirabilis strain HI4320. Within our cohort, Proteus is composed of 10 named species and 5 uncharacterized genomospecies. P. mirabilis can be subdivided into three subspecies; subspecies 1 represented 96.7% (1,822/1,883) of all genomes. The P. mirabilis pan-genome includes 15,399 genes outside of HI4320, and 34.3% (5,282/15,399) of these genes have no putative assigned function. Subspecies 1 is composed of several highly related clonal groups. Prophages and gene clusters encoding putatively extracellular-facing proteins are associated with clonal groups. Uncharacterized genes not present in the model strain P. mirabilis HI4320 but with homology to known virulence-associated operons can be identified within the pan-genome. IMPORTANCE Gram-negative bacteria use a variety of extracellular facing factors to interact with eukaryotic hosts. Due to intraspecies genetic variability, these factors may not be present in the model strain for a given organism, potentially providing incomplete understanding of host-microbial interactions. In contrast to previous reports on P. mirabilis, but similar to other Gram-negative bacteria, P. mirabilis has a mosaic genome with a linkage between phylogenetic position and accessory genome content. P. mirabilis encodes a variety of genes that may impact host-microbe dynamics beyond what is represented in the model strain HI4320. The diverse, whole-genome characterized strain bank from this work can be used in conjunction with reverse genetic and infection models to better understand the impact of accessory genome content on bacterial physiology and pathogenesis of infection.}, } @article {pmid37337218, year = {2023}, author = {Smith, TPL and Bickhart, DM and Boichard, D and Chamberlain, AJ and Djikeng, A and Jiang, Y and Low, WY and Pausch, H and Demyda-Peyrás, S and Prendergast, J and Schnabel, RD and Rosen, BD and , }, title = {The Bovine Pangenome Consortium: democratizing production and accessibility of genome assemblies for global cattle breeds and other bovine species.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {139}, pmid = {37337218}, issn = {1474-760X}, mesh = {Cattle/genetics ; Animals ; *Polymorphism, Single Nucleotide ; *Genomics ; Genome ; }, abstract = {The Bovine Pangenome Consortium (BPC) is an international collaboration dedicated to the assembly of cattle genomes to develop a more complete representation of cattle genomic diversity. The goal of the BPC is to provide genome assemblies and a community-agreed pangenome representation to replace breed-specific reference assemblies for cattle genomics. The BPC invites partners sharing our vision to participate in the production of these assemblies and the development of a common, community-approved, pangenome reference as a public resource for the research community (https://bovinepangenome.github.io/). This community-driven resource will provide the context for comparison between studies and the future foundation for cattle genomic selection.}, } @article {pmid37333201, year = {2023}, author = {Tran, TH and Roberts, AQ and F Escapa, I and Gao, W and Segre, JA and Kong, HH and Conlan, S and Kelly, MS and Lemon, KP}, title = {Metabolic capabilities are highly conserved among human nasal-associated Corynebacterium species in pangenomic analyses.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37333201}, support = {K23 AI135090/AI/NIAID NIH HHS/United States ; R01 GM117174/GM/NIGMS NIH HHS/United States ; R35 GM141806/GM/NIGMS NIH HHS/United States ; }, abstract = {UNLABELLED: Corynebact e rium species are globally ubiquitous in human nasal microbiota across the lifespan. Moreover, nasal microbiota profiles typified by higher relative abundances of Corynebacterium are often positively associated with health. Among the most common human nasal Corynebacterium species are C. propinquum , C. pseudodiphtheriticum, C. accolens , and C. tuberculostearicum . Based on the prevalence of these species, at least two likely coexist in the nasal microbiota of 82% of adults. To gain insight into the functions of these four species, we identified genomic, phylogenomic, and pangenomic properties and estimated the functional protein repertoire and metabolic capabilities of 87 distinct human nasal Corynebacterium strain genomes: 31 from Botswana and 56 from the U.S. C. pseudodiphtheriticum had geographically distinct clades consistent with localized strain circulation, whereas some strains from the other species had wide geographic distribution across Africa and North America. All four species had similar genomic and pangenomic structures. Gene clusters assigned to all COG metabolic categories were overrepresented in the persistent (core) compared to the accessory genome of each species indicating limited strain-level variability in metabolic capacity. Moreover, core metabolic capabilities were highly conserved among the four species indicating limited species-level metabolic variation. Strikingly, strains in the U.S. clade of C. pseudodiphtheriticum lacked genes for assimilatory sulfate reduction present in the Botswanan clade and in the other studied species, indicating a recent, geographically related loss of assimilatory sulfate reduction. Overall, the minimal species and strain variability in metabolic capacity implies coexisting strains might have limited ability to occupy distinct metabolic niches.

IMPORTANCE: Pangenomic analysis with estimation of functional capabilities facilitates our understanding of the full biologic diversity of bacterial species. We performed systematic genomic, phylogenomic, and pangenomic analyses with qualitative estimation of the metabolic capabilities of four common human nasal Corynebacterium species generating a foundational resource. The prevalence of each species in human nasal microbiota is consistent with the common coexistence of at least two species. We identified a notably high level of metabolic conservation within and among species indicating limited options for species to occupy distinct metabolic niches and pointing to the importance of investigating interactions among nasal Corynebacterium species. Comparing strains from two continents, C. pseudodiphtheriticum had restricted geographic strain distribution characterized by an evolutionarily recent loss of assimilatory sulfate reduction in North American strains. Our findings contribute to understanding the functions of Corynebacterium within human nasal microbiota and to evaluating their potential for future use as biotherapeutics.}, } @article {pmid37323942, year = {2023}, author = {Awori, RM and Waturu, CN and Pidot, SJ and Amugune, NO and Bode, HB}, title = {Draft genomes, phylogenomic reconstruction and comparative genome analysis of three Xenorhabdus strains isolated from soil-dwelling nematodes in Kenya.}, journal = {Access microbiology}, volume = {5}, number = {5}, pages = {}, pmid = {37323942}, issn = {2516-8290}, abstract = {As a proven source of potent and selective antimicrobials, Xenorhabdus bacteria are important to an age plagued with difficult-to-treat microbial infections. Yet, only 27 species have been described to date. In this study, a novel Xenorhabdus species was discovered through genomic studies on three isolates from Kenyan soils. Soils in Western Kenya were surveyed for steinernematids and Steinernema isolates VH1 and BG5 were recovered from red volcanic loam soils from cultivated land in Vihiga and clay soils from riverine land in Bungoma respectively. From the two nematode isolates, Xenorhabdus sp. BG5 and Xenorhabdus sp. VH1 were isolated. The genomes of these two, plus that of X. griffiniae XN45 - this was previously isolated from Steinernema sp. scarpo that also originated from Kenyan soils - were sequenced and assembled. Nascent genome assemblies of the three isolates were of good quality with over 70 % of their proteome having known functions. These three isolates formed the X. griffiniae clade in a phylogenomic reconstruction of the genus. Their species were delineated using three overall genome relatedness indices: an unnamed species of the genus, Xenorhabdus sp. BG5, X. griffiniae VH1 and X. griffiniae XN45. A pangenome analysis of this clade revealed that over 70 % of species-specific genes encoded unknown functions. Transposases were linked to genomic islands in Xenorhabdus sp. BG5. Thus, overall genome-related indices sufficiently delineated species of two new Xenorhabdus isolates from Kenya, both of which were closely related to X. griffiniae . The functions encoded by most species-specific genes in the X. griffiniae clade remain unknown.}, } @article {pmid37323913, year = {2023}, author = {Huang, W and Hu, S and Zhu, Y and Liu, S and Zhou, X and Fang, Y and Lu, Y and Wang, R}, title = {Metagenomic surveillance and comparative genomic analysis of Chlamydia psittaci in patients with pneumonia.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1157888}, pmid = {37323913}, issn = {1664-302X}, abstract = {Chlamydia psittaci, a strictly intracellular bacterium, is an underestimated etiologic agent leading to infections in a broad range of animals and mild illness or pneumonia in humans. In this study, the metagenomes of bronchoalveolar lavage fluids from the patients with pneumonia were sequenced and highly abundant C. psittaci was found. The target-enriched metagenomic reads were recruited to reconstruct draft genomes with more than 99% completeness. Two C. psittaci strains from novel sequence types were detected and these were closely related to the animal-borne isolates derived from the lineages of ST43 and ST28, indicating the zoonotic transmissions of C. psittaci would benefit its prevalence worldwide. Comparative genomic analysis combined with public isolate genomes revealed that the pan-genome of C. psittaci possessed a more stable gene repertoire than those of other extracellular bacteria, with ~90% of the genes per genome being conserved core genes. Furthermore, the evidence for significantly positive selection was identified in 20 virulence-associated gene products, particularly bacterial membrane-embedded proteins and type three secretion machines, which may play important roles in the pathogen-host interactions. This survey uncovered novel strains of C. psittaci causing pneumonia and the evolutionary analysis characterized prominent gene candidates involved in bacterial adaptation to immune pressures. The metagenomic approach is of significance to the surveillance of difficult-to-culture intracellular pathogens and the research into molecular epidemiology and evolutionary biology of C. psittaci.}, } @article {pmid37323667, year = {2023}, author = {Yang, MR and Su, SF and Wu, YW}, title = {Using bacterial pan-genome-based feature selection approach to improve the prediction of minimum inhibitory concentration (MIC).}, journal = {Frontiers in genetics}, volume = {14}, number = {}, pages = {1054032}, pmid = {37323667}, issn = {1664-8021}, abstract = {Background: Predicting the resistance profiles of antimicrobial resistance (AMR) pathogens is becoming more and more important in treating infectious diseases. Various attempts have been made to build machine learning models to classify resistant or susceptible pathogens based on either known antimicrobial resistance genes or the entire gene set. However, the phenotypic annotations are translated from minimum inhibitory concentration (MIC), which is the lowest concentration of antibiotic drugs in inhibiting certain pathogenic strains. Since the MIC breakpoints that classify a strain to be resistant or susceptible to specific antibiotic drug may be revised by governing institutes, we refrained from translating these MIC values into the categories "susceptible" or "resistant" but instead attempted to predict the MIC values using machine learning approaches. Results: By applying a machine learning feature selection approach on a Salmonella enterica pan-genome, in which the protein sequences were clustered to identify highly similar gene families, we showed that the selected features (genes) performed better than known AMR genes, and that models built on the selected genes achieved very accurate MIC prediction. Functional analysis revealed that about half of the selected genes were annotated as hypothetical proteins (i.e., with unknown functional roles), and that only a small portion of known AMR genes were among the selected genes, indicating that applying feature selection on the entire gene set has the potential of uncovering novel genes that may be associated with and may contribute to pathogenic antimicrobial resistances. Conclusion: The application of the pan-genome-based machine learning approach was indeed capable of predicting MIC values with very high accuracy. The feature selection process may also identify novel AMR genes for inferring bacterial antimicrobial resistance phenotypes.}, } @article {pmid37322453, year = {2023}, author = {Sun, Y and Kou, DR and Li, Y and Ni, JP and Wang, J and Zhang, YM and Wang, QN and Jiang, B and Wang, X and Sun, YX and Xu, XT and Tan, XJ and Zhang, YJ and Kong, XD}, title = {Pan-genome of Citrullus genus highlights the extent of presence/absence variation during domestication and selection.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {332}, pmid = {37322453}, issn = {1471-2164}, support = {31871964,31401753,32100352//National Natural Science Foundation of China/ ; 202003a06020009//Major Science and Technology Projects in Anhui Province/ ; 6111612//Foundation project of Jiangsu Academy of Agricultural Sciences/ ; }, mesh = {*Citrullus/genetics ; Domestication ; Plant Breeding ; Genome, Plant ; Sequence Analysis, DNA ; }, abstract = {The rich genetic diversity in Citrullus lanatus and the other six species in the Citrullus genus provides important sources in watermelon breeding. Here, we present the Citrullus genus pan-genome based on the 400 Citrullus genus resequencing data, showing that 477 Mb contigs and 6249 protein-coding genes were absent in the Citrullus lanatus reference genome. In the Citrullus genus pan-genome, there are a total of 8795 (30.5%) genes that exhibit presence/absence variations (PAVs). Presence/absence variation (PAV) analysis showed that a lot of gene PAV were selected during the domestication and improvement, such as 53 favorable genes and 40 unfavorable genes were identified during the C. mucosospermus to C. lanatus landrace domestication. We also identified 661 resistance gene analogs (RGAs) in the Citrullus genus pan-genome, which contains 90 RGAs (89 variable and 1 core gene) located on the pangenome additional contigs. By gene PAV-based GWAS, 8 gene presence/absence variations were found associated with flesh color. Finally, based on the results of gene PAV selection analysis between watermelon populations with different fruit colors, we identified four non-reference candidate genes associated with carotenoid accumulation, which had a significantly higher frequency in the white flesh. These results will provide an important source for watermelon breeding.}, } @article {pmid37318846, year = {2023}, author = {Zang, X and Lv, H and Huang, P and Sun, Z and Gu, C and Ding, W and Jiao, X and Huang, J}, title = {Genomic Insights into Pangenome and Antimicrobial Resistance in Campylobacter spp. Isolated from Chickens at Specific Growth Stages.}, journal = {Foodborne pathogens and disease}, volume = {20}, number = {7}, pages = {303-312}, doi = {10.1089/fpd.2023.0008}, pmid = {37318846}, issn = {1556-7125}, mesh = {Animals ; Chickens ; Anti-Bacterial Agents/pharmacology ; *Campylobacter jejuni ; *Campylobacter Infections/veterinary ; Phylogeny ; Drug Resistance, Bacterial/genetics ; *Campylobacter ; *Anti-Infective Agents ; Genomics ; }, abstract = {Improved understanding of the genetic basis of Campylobacter spp. colonization of poultry at specific growth stage is the key to developing a farm-based strategy to prevent flock colonization. In this study, 39 Campylobacter spp. strains (chicken isolates, n = 29; environmental isolates, n = 10) were collected from six marked chickens at the growth stage from week 7 to week 13. Then, we use comparative genomics techniques to analyze the temporal genomic characteristics of Campylobacter spp. in individual chickens across a production cycle. Genotype, average nucleotide identity (ANI), and phylogenetic trees all indicated the evolutionary relationships between the strains from different sampling weeks. The clustering of isolates was not dependent on sampling time and sample source, indicating that strains could persist over several weeks in a flock. Notably, 10 antimicrobial resistance (AMR) genes were identified in the genome of Campylobacter coli isolates, and the genomes of isolates sampled at week 11 harbored fewer AMR genes and insertion sequences (IS) than the isolates from other weeks. Consistent with this, pangenome-wide association analysis demonstrated that gene acquisition and loss could happen at week 11 and week 13. These genes were mainly associated with cell membrane biogenesis, ion metabolism, and DNA replication, suggesting that genomic change may be related to Campylobacter adaptive response. This is a novel study focused on the genetic changes occurring in Campylobacter spp. isolates in a particular space and time; it highlights that accessory genes and AMR genes were overall stable at chicken farm, which will help us understand the survival and the transmission route of Campylobacter spp. better, and have the potential to inform the strategy on the safety control of market-ready chickens.}, } @article {pmid37317256, year = {2023}, author = {Stone, NE and McDonough, RF and Hamond, C and LeCount, K and Busch, JD and Dirsmith, KL and Rivera-Garcia, S and Soltero, F and Arnold, LM and Weiner, Z and Galloway, RL and Schlater, LK and Nally, JE and Sahl, JW and Wagner, DM}, title = {DNA Capture and Enrichment: A Culture-Independent Approach for Characterizing the Genomic Diversity of Pathogenic Leptospira Species.}, journal = {Microorganisms}, volume = {11}, number = {5}, pages = {}, pmid = {37317256}, issn = {2076-2607}, abstract = {Because they are difficult to culture, obtaining genomic information from Leptospira spp. is challenging, hindering the overall understanding of leptospirosis. We designed and validated a culture-independent DNA capture and enrichment system for obtaining Leptospira genomic information from complex human and animal samples. It can be utilized with a variety of complex sample types and diverse species as it was designed using the pan-genome of all known pathogenic Leptospira spp. This system significantly increases the proportion of Leptospira DNA contained within DNA extracts obtained from complex samples, oftentimes reaching >95% even when some estimated starting proportions were <1%. Sequencing enriched extracts results in genomic coverage similar to sequenced isolates, thereby enabling enriched complex extracts to be analyzed together with whole genome sequences from isolates, which facilitates robust species identification and high-resolution genotyping. The system is flexible and can be readily updated when new genomic information becomes available. Implementation of this DNA capture and enrichment system will improve efforts to obtain genomic data from unculturable Leptospira-positive human and animal samples. This, in turn, will lead to a better understanding of the overall genomic diversity and gene content of Leptospira spp. that cause leptospirosis, aiding epidemiology and the development of improved diagnostics and vaccines.}, } @article {pmid37316739, year = {2023}, author = {Fudge, JB}, title = {Combining 47 human genomes into a single pangenome.}, journal = {Nature biotechnology}, volume = {41}, number = {6}, pages = {766}, doi = {10.1038/s41587-023-01842-4}, pmid = {37316739}, issn = {1546-1696}, } @article {pmid37316654, year = {2023}, author = {Gao, Y and Yang, X and Chen, H and Tan, X and Yang, Z and Deng, L and Wang, B and Kong, S and Li, S and Cui, Y and Lei, C and Wang, Y and Pan, Y and Ma, S and Sun, H and Zhao, X and Shi, Y and Yang, Z and Wu, D and Wu, S and Zhao, X and Shi, B and Jin, L and Hu, Z and , and Lu, Y and Chu, J and Ye, K and Xu, S}, title = {A pangenome reference of 36 Chinese populations.}, journal = {Nature}, volume = {619}, number = {7968}, pages = {112-121}, pmid = {37316654}, issn = {1476-4687}, mesh = {Humans ; *East Asian People/classification/genetics ; *Ethnicity/genetics ; *Genome, Human/genetics ; Sequence Analysis, DNA ; Ultraviolet Rays ; *Human Genetics/standards ; *Minority Groups ; Ethnic and Racial Minorities ; Reference Standards ; Haplotypes/genetics ; Euchromatin/genetics ; *Genetic Variation ; Alleles ; DNA Repair/genetics ; Keratins/genetics/metabolism ; Longevity/genetics ; Immunity/genetics ; }, abstract = {Human genomics is witnessing an ongoing paradigm shift from a single reference sequence to a pangenome form, but populations of Asian ancestry are underrepresented. Here we present data from the first phase of the Chinese Pangenome Consortium, including a collection of 116 high-quality and haplotype-phased de novo assemblies based on 58 core samples representing 36 minority Chinese ethnic groups. With an average 30.65× high-fidelity long-read sequence coverage, an average contiguity N50 of more than 35.63 megabases and an average total size of 3.01 gigabases, the CPC core assemblies add 189 million base pairs of euchromatic polymorphic sequences and 1,367 protein-coding gene duplications to GRCh38. We identified 15.9 million small variants and 78,072 structural variants, of which 5.9 million small variants and 34,223 structural variants were not reported in a recently released pangenome reference[1]. The Chinese Pangenome Consortium data demonstrate a remarkable increase in the discovery of novel and missing sequences when individuals are included from underrepresented minority ethnic groups. The missing reference sequences were enriched with archaic-derived alleles and genes that confer essential functions related to keratinization, response to ultraviolet radiation, DNA repair, immunological responses and lifespan, implying great potential for shedding new light on human evolution and recovering missing heritability in complex disease mapping.}, } @article {pmid37316594, year = {2023}, author = {}, title = {A pangenome reference representative of 36 minority Chinese ethnic groups.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {37316594}, issn = {1476-4687}, } @article {pmid37313509, year = {2022}, author = {Reeves, PA and Richards, CM}, title = {A pan-genome data structure induced by pooled sequencing facilitates variant mining in heterogeneous germplasm.}, journal = {Molecular breeding : new strategies in plant improvement}, volume = {42}, number = {7}, pages = {36}, pmid = {37313509}, issn = {1572-9788}, abstract = {UNLABELLED: Valuable genetic variation lies unused in gene banks due to the difficulty of exploiting heterogeneous germplasm accessions. Advances in molecular breeding, including transgenics and genome editing, present the opportunity to exploit hidden sequence variation directly. Here we describe the pan-genome data structure induced by whole-genome sequencing of pooled individuals from wild populations of Patellifolia spp., a source of disease resistance genes for the related crop species sugar beet (Beta vulgaris). We represent the pan-genome as a map of reads from pooled sequencing of a heterogeneous population sample to a reference genome, plus a BLAST data base of the mapped reads. We show that this basic data structure can be queried by reference genome position or homology to identify sequence variants present in the wild relative, at genes of agronomic interest in the crop, a process known as allele or variant mining. Further we demonstrate the possibility of cataloging variants in all Patellifolia genomic regions that have corresponding single copy orthologous regions in sugar beet. The data structure, termed a "pooled read archive," can be produced, altered, and queried using standard tools to facilitate discovery of agronomically-important sequence variation.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s11032-022-01308-6.}, } @article {pmid37313015, year = {2022}, author = {Seyum, EG and Bille, NH and Abtew, WG and Munyengwa, N and Bell, JM and Cros, D}, title = {Genomic selection in tropical perennial crops and plantation trees: a review.}, journal = {Molecular breeding : new strategies in plant improvement}, volume = {42}, number = {10}, pages = {58}, pmid = {37313015}, issn = {1572-9788}, abstract = {UNLABELLED: To overcome the multiple challenges currently faced by agriculture, such as climate change and soil deterioration, more efficient plant breeding strategies are required. Genomic selection (GS) is crucial for the genetic improvement of quantitative traits, as it can increase selection intensity, shorten the generation interval, and improve selection accuracy for traits that are difficult to phenotype. Tropical perennial crops and plantation trees are of major economic importance and have consequently been the subject of many GS articles. In this review, we discuss the factors that affect GS accuracy (statistical models, linkage disequilibrium, information concerning markers, relatedness between training and target populations, the size of the training population, and trait heritability) and the genetic gain expected in these species. The impact of GS will be particularly strong in tropical perennial crops and plantation trees as they have long breeding cycles and constrained selection intensity. Future GS prospects are also discussed. High-throughput phenotyping will allow constructing of large training populations and implementing of phenomic selection. Optimized modeling is needed for longitudinal traits and multi-environment trials. The use of multi-omics, haploblocks, and structural variants will enable going beyond single-locus genotype data. Innovative statistical approaches, like artificial neural networks, are expected to efficiently handle the increasing amounts of heterogeneous multi-scale data. Targeted recombinations on sites identified from profiles of marker effects have the potential to further increase genetic gain. GS can also aid re-domestication and introgression breeding. Finally, GS consortia will play an important role in making the best of these opportunities.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s11032-022-01326-4.}, } @article {pmid37310928, year = {2023}, author = {Li, R and Gong, M and Zhang, X and Wang, F and Liu, Z and Zhang, L and Yang, Q and Xu, Y and Xu, M and Zhang, H and Zhang, Y and Dai, X and Gao, Y and Zhang, Z and Fang, W and Yang, Y and Fu, W and Cao, C and Yang, P and Ghanatsaman, ZA and Negari, NJ and Nanaei, HA and Yue, X and Song, Y and Lan, X and Deng, W and Wang, X and Pan, C and Xiang, R and Ibeagha-Awemu, EM and Heslop-Harrison, PJS and Rosen, BD and Lenstra, JA and Gan, S and Jiang, Y}, title = {A sheep pangenome reveals the spectrum of structural variations and their effects on tail phenotypes.}, journal = {Genome research}, volume = {33}, number = {3}, pages = {463-477}, pmid = {37310928}, issn = {1549-5469}, mesh = {Animals ; Sheep/genetics ; *Genome-Wide Association Study ; *Tail ; 5' Untranslated Regions ; Alleles ; Phenotype ; }, abstract = {Structural variations (SVs) are a major contributor to genetic diversity and phenotypic variations, but their prevalence and functions in domestic animals are largely unexplored. Here we generated high-quality genome assemblies for 15 individuals from genetically diverse sheep breeds using Pacific Biosciences (PacBio) high-fidelity sequencing, discovering 130.3 Mb nonreference sequences, from which 588 genes were annotated. A total of 149,158 biallelic insertions/deletions, 6531 divergent alleles, and 14,707 multiallelic variations with precise breakpoints were discovered. The SV spectrum is characterized by an excess of derived insertions compared to deletions (94,422 vs. 33,571), suggesting recent active LINE expansions in sheep. Nearly half of the SVs display low to moderate linkage disequilibrium with surrounding single-nucleotide polymorphisms (SNPs) and most SVs cannot be tagged by SNP probes from the widely used ovine 50K SNP chip. We identified 865 population-stratified SVs including 122 SVs possibly derived in the domestication process among 690 individuals from sheep breeds worldwide. A novel 168-bp insertion in the 5' untranslated region (5' UTR) of HOXB13 is found at high frequency in long-tailed sheep. Further genome-wide association study and gene expression analyses suggest that this mutation is causative for the long-tail trait. In summary, we have developed a panel of high-quality de novo assemblies and present a catalog of structural variations in sheep. Our data capture abundant candidate functional variations that were previously unexplored and provide a fundamental resource for understanding trait biology in sheep.}, } @article {pmid37303795, year = {2023}, author = {Zhang, Z and Cui, M and Chen, P and Li, J and Mao, Z and Mao, Y and Li, Z and Guo, Q and Wang, C and Liao, X and Liu, H}, title = {Insight into the phylogeny and metabolic divergence of Monascus species (M. pilosus, M. ruber, and M. purpureus) at the genome level.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1199144}, pmid = {37303795}, issn = {1664-302X}, abstract = {BACKGROUND: Species of the genus Monascus are economically important and widely used in the production of food colorants and monacolin K. However, they have also been known to produce the mycotoxin citrinin. Currently, taxonomic knowledge of this species at the genome level is insufficient.

METHODS: This study presents genomic similarity analyses through the analysis of the average nucleic acid identity of the genomic sequence and the whole genome alignment. Subsequently, the study constructed a pangenome of Monascus by reannotating all the genomes and identifying a total of 9,539 orthologous gene families. Two phylogenetic trees were constructed based on 4,589 single copy orthologous protein sequences and all the 5,565 orthologous proteins, respectively. In addition, carbohydrate active enzymes, secretome, allergic proteins, as well as secondary metabolite gene clusters were compared among the included 15 Monascus strains.

RESULTS: The results clearly revealed a high homology between M. pilosus and M. ruber, and their distant relationship with M. purpureus. Accordingly, all the included 15 Monascus strains should be classified into two distinctly evolutionary clades, namely the M. purpureus clade and the M. pilosus-M. ruber clade. Moreover, gene ontology enrichment showed that the M. pilosus-M. ruber clade had more orthologous genes involved with environmental adaptation than the M. purpureus clade. Compared to Aspergillus oryzae, all the Monascus species had a substantial gene loss of carbohydrate active enzymes. Potential allergenic and fungal virulence factor proteins were also found in the secretome of Monascus. Furthermore, this study identified the pigment synthesis gene clusters present in all included genomes, but with multiple nonessential genes inserted in the gene cluster of M. pilosus and M. ruber compared to M. purpureus. The citrinin gene cluster was found to be intact and highly conserved only among M. purpureus genomes. The monacolin K gene cluster was found only in the genomes of M. pilosus and M. ruber, but the sequence was more conserved in M. ruber.

CONCLUSION: This study provides a paradigm for phylogenetic analysis of the genus Monascus, and it is believed that this report will lead to a better understanding of these food microorganisms in terms of classification, metabolic differentiation, and safety.}, } @article {pmid37298462, year = {2023}, author = {Wekesa, C and Kiprotich, K and Okoth, P and Asudi, GO and Muoma, JO and Furch, ACU and Oelmüller, R}, title = {Molecular Characterization of Indigenous Rhizobia from Kenyan Soils Nodulating with Common Beans.}, journal = {International journal of molecular sciences}, volume = {24}, number = {11}, pages = {}, pmid = {37298462}, issn = {1422-0067}, mesh = {*Rhizobium/genetics ; Kenya ; *Phaseolus/microbiology ; Soil ; Symbiosis/genetics ; Nitrogen ; }, abstract = {Kenya is the seventh most prominent producer of common beans globally and the second leading producer in East Africa. However, the annual national productivity is low due to insufficient quantities of vital nutrients and nitrogen in the soils. Rhizobia are symbiotic bacteria that fix nitrogen through their interaction with leguminous plants. Nevertheless, inoculating beans with commercial rhizobia inoculants results in sparse nodulation and low nitrogen supply to the host plants because these strains are poorly adapted to the local soils. Several studies describe native rhizobia with much better symbiotic capabilities than commercial strains, but only a few have conducted field studies. This study aimed to test the competence of new rhizobia strains that we isolated from Western Kenya soils and for which the symbiotic efficiency was successfully determined in greenhouse experiments. Furthermore, we present and analyze the whole-genome sequence for a promising candidate for agricultural application, which has high nitrogen fixation features and promotes common bean yields in field studies. Plants inoculated with the rhizobial isolate S3 or with a consortium of local isolates (COMB), including S3, produced a significantly higher number of seeds and seed dry weight when compared to uninoculated control plants at two study sites. The performance of plants inoculated with commercial isolate CIAT899 was not significantly different from uninoculated plants (p > 0.05), indicating tight competition from native rhizobia for nodule occupancy. Pangenome analysis and the overall genome-related indices showed that S3 is a member of R. phaseoli. However, synteny analysis revealed significant differences in the gene order, orientation, and copy numbers between S3 and the reference R. phaseoli. Isolate S3 is phylogenomically similar to R. phaseoli. However, it has undergone significant genome rearrangements (global mutagenesis) to adapt to harsh conditions in Kenyan soils. Its high nitrogen fixation ability shows optimal adaptation to Kenyan soils, and the strain can potentially replace nitrogenous fertilizer application. We recommend that extensive fieldwork in other parts of the country over a period of five years be performed on S3 to check on how the yield changes with varying whether conditions.}, } @article {pmid37296461, year = {2023}, author = {Schmidt, S and Khan, S and Alanko, JN and Pibiri, GE and Tomescu, AI}, title = {Matchtigs: minimum plain text representation of k-mer sets.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {136}, pmid = {37296461}, issn = {1474-760X}, support = {851093//H2020 European Research Council/ ; 322595//Academy of Finland/ ; 328877//Academy of Finland/ ; 101006879//Horizon 2020 Framework Programme/ ; }, mesh = {*Software ; Sequence Analysis, DNA ; *Algorithms ; Bacteria ; }, abstract = {We propose a polynomial algorithm computing a minimum plain-text representation of k-mer sets, as well as an efficient near-minimum greedy heuristic. When compressing read sets of large model organisms or bacterial pangenomes, with only a minor runtime increase, we shrink the representation by up to 59% over unitigs and 26% over previous work. Additionally, the number of strings is decreased by up to 97% over unitigs and 90% over previous work. Finally, a small representation has advantages in downstream applications, as it speeds up SSHash-Lite queries by up to 4.26× over unitigs and 2.10× over previous work.}, } @article {pmid37291196, year = {2023}, author = {He, Q and Tang, S and Zhi, H and Chen, J and Zhang, J and Liang, H and Alam, O and Li, H and Zhang, H and Xing, L and Li, X and Zhang, W and Wang, H and Shi, J and Du, H and Wu, H and Wang, L and Yang, P and Xing, L and Yan, H and Song, Z and Liu, J and Wang, H and Tian, X and Qiao, Z and Feng, G and Guo, R and Zhu, W and Ren, Y and Hao, H and Li, M and Zhang, A and Guo, E and Yan, F and Li, Q and Liu, Y and Tian, B and Zhao, X and Jia, R and Feng, B and Zhang, J and Wei, J and Lai, J and Jia, G and Purugganan, M and Diao, X}, title = {A graph-based genome and pan-genome variation of the model plant Setaria.}, journal = {Nature genetics}, volume = {55}, number = {7}, pages = {1232-1242}, pmid = {37291196}, issn = {1546-1718}, mesh = {Chromosome Mapping ; *Setaria Plant/genetics/metabolism ; Plant Breeding ; Phenotype ; Quantitative Trait Loci ; Genome, Plant/genetics ; Phylogeny ; Plant Proteins/genetics ; }, abstract = {Setaria italica (foxtail millet), a founder crop of East Asian agriculture, is a model plant for C4 photosynthesis and developing approaches to adaptive breeding across multiple climates. Here we established the Setaria pan-genome by assembling 110 representative genomes from a worldwide collection. The pan-genome is composed of 73,528 gene families, of which 23.8%, 42.9%, 29.4% and 3.9% are core, soft core, dispensable and private genes, respectively; 202,884 nonredundant structural variants were also detected. The characterization of pan-genomic variants suggests their importance during foxtail millet domestication and improvement, as exemplified by the identification of the yield gene SiGW3, where a 366-bp presence/absence promoter variant accompanies gene expression variation. We developed a graph-based genome and performed large-scale genetic studies for 68 traits across 13 environments, identifying potential genes for millet improvement at different geographic sites. These can be used in marker-assisted breeding, genomic selection and genome editing to accelerate crop improvement under different climatic conditions.}, } @article {pmid37291142, year = {2023}, author = {Eché, C and Iampietro, C and Birbes, C and Dréau, A and Kuchly, C and Di Franco, A and Klopp, C and Faraut, T and Djebali, S and Castinel, A and Zytnicki, M and Denis, E and Boussaha, M and Grohs, C and Boichard, D and Gaspin, C and Milan, D and Donnadieu, C}, title = {A Bos taurus sequencing methods benchmark for assembly, haplotyping, and variant calling.}, journal = {Scientific data}, volume = {10}, number = {1}, pages = {369}, pmid = {37291142}, issn = {2052-4463}, mesh = {Animals ; Cattle ; Female ; Benchmarking ; Genome ; *Genomics ; *High-Throughput Nucleotide Sequencing ; Sequence Analysis, DNA ; }, abstract = {Inspired by the production of reference data sets in the Genome in a Bottle project, we sequenced one Charolais heifer with different technologies: Illumina paired-end, Oxford Nanopore, Pacific Biosciences (HiFi and CLR), 10X Genomics linked-reads, and Hi-C. In order to generate haplotypic assemblies, we also sequenced both parents with short reads. From these data, we built two haplotyped trio high quality reference genomes and a consensus assembly, using up-to-date software packages. The assemblies obtained using PacBio HiFi reaches a size of 3.2 Gb, which is significantly larger than the 2.7 Gb ARS-UCD1.2 reference. The BUSCO score of the consensus assembly reaches a completeness of 95.8%, among highly conserved mammal genes. We also identified 35,866 structural variants larger than 50 base pairs. This assembly is a contribution to the bovine pangenome for the "Charolais" breed. These datasets will prove to be useful resources enabling the community to gain additional insight on sequencing technologies for applications such as SNP, indel or structural variant calling, and de novo assembly.}, } @article {pmid37289488, year = {2023}, author = {Mossop, M and Robinson, L and Jiang, JH and Peleg, AY and Blakeway, LV and Macesic, N and Perry, A and Bourke, S and Ulhuq, FR and Palmer, T}, title = {Characterisation of key genotypic and phenotypic traits of clinical cystic fibrosis Staphylococcus aureus isolates.}, journal = {Journal of medical microbiology}, volume = {72}, number = {6}, pages = {}, doi = {10.1099/jmm.0.001703}, pmid = {37289488}, issn = {1473-5644}, mesh = {Animals ; Sheep ; Staphylococcus aureus ; *Cystic Fibrosis/complications/microbiology ; *Coinfection/microbiology ; Agar ; Phenotype ; *Staphylococcal Infections/microbiology ; Anti-Bacterial Agents/pharmacology ; }, abstract = {Introduction. One third of people with CF in the UK are co-infected by both Staphylococcus aureus and Pseudomonas aeruginosa. Chronic bacterial infection in CF contributes to the gradual destruction of lung tissue, and eventually respiratory failure in this group.Gap Statement. The contribution of S. aureus to cystic fibrosis (CF) lung decline in the presence or absence of P. aeruginosa is unclear. Defining the molecular and phenotypic characteristics of a range of S. aureus clinical isolates will help further understand its pathogenic capabilities.Aim. Our objective was to use molecular and phenotypic tools to characterise twenty-five clinical S. aureus isolates collected from mono- and coinfection with P. aeruginosa from people with CF at the Royal Victoria Infirmary, Newcastle upon Tyne.Methodology. Genomic DNA was extracted and sequenced. Multilocus sequence typing was used to construct phylogeny from the seven housekeeping genes. A pangenome was calculated using Roary, and cluster of Orthologous groups were assigned using eggNOG-mapper which were used to determine differences within core, accessory, and unique genomes. Characterisation of sequence type, clonal complex, agr and spa types was carried out using PubMLST, eBURST, AgrVATE and spaTyper, respectively. Antibiotic resistance was determined using Kirby-Bauer disc diffusion tests. Phenotypic testing of haemolysis was carried out using ovine red blood cell agar plates and mucoid phenotypes visualised using Congo red agar.Results. Clinical strains clustered closely based on agr type, sequence type and clonal complex. COG analysis revealed statistically significant enrichment of COG families between core, accessory and unique pangenome groups. The unique genome was significantly enriched for replication, recombination and repair, and defence mechanisms. The presence of known virulence genes and toxins were high within this group, and unique genes were identified in 11 strains. Strains which were isolated from the same patient all surpassed average nucleotide identity thresholds, however, differed in phenotypic traits. Antimicrobial resistance to macrolides was significantly higher in the coinfection group.Conclusion. There is huge variation in genetic and phenotypic capabilities of S. aureus strains. Further studies on how these may differ in relation to other species in the CF lung may give insight into inter-species interactions.}, } @article {pmid37285390, year = {2023}, author = {Rubin, JD and Vogel, NA and Gopalakrishnan, S and Sackett, PW and Renaud, G}, title = {HaploCart: Human mtDNA haplogroup classification using a pangenomic reference graph.}, journal = {PLoS computational biology}, volume = {19}, number = {6}, pages = {e1011148}, pmid = {37285390}, issn = {1553-7358}, mesh = {Humans ; *DNA, Mitochondrial/genetics ; Bayes Theorem ; Haplotypes/genetics ; *Mitochondria/genetics ; Mutation ; }, abstract = {Current mitochondrial DNA (mtDNA) haplogroup classification tools map reads to a single reference genome and perform inference based on the detected mutations to this reference. This approach biases haplogroup assignments towards the reference and prohibits accurate calculations of the uncertainty in assignment. We present HaploCart, a probabilistic mtDNA haplogroup classifier which uses a pangenomic reference graph framework together with principles of Bayesian inference. We demonstrate that our approach significantly outperforms available tools by being more robust to lower coverage or incomplete consensus sequences and producing phylogenetically-aware confidence scores that are unbiased towards any haplogroup. HaploCart is available both as a command-line tool and through a user-friendly web interface. The C++ program accepts as input consensus FASTA, FASTQ, or GAM files, and outputs a text file with the haplogroup assignments of the samples along with the level of confidence in the assignments. Our work considerably reduces the amount of data required to obtain a confident mitochondrial haplogroup assignment.}, } @article {pmid37285209, year = {2023}, author = {Liu, R and Ma, L and Wang, H and Liu, D and Lu, X and Huang, X and Huang, S and Liu, X}, title = {Comparative genomics reveals intraspecific divergence of Acidithiobacillus ferrooxidans: insights from evolutionary adaptation.}, journal = {Microbial genomics}, volume = {9}, number = {6}, pages = {}, pmid = {37285209}, issn = {2057-5858}, mesh = {*Acidithiobacillus/genetics/metabolism ; Genomics/methods ; Iron/metabolism ; Adaptation, Physiological/genetics ; }, abstract = {Acidithiobacillus ferrooxidans serves as a model chemolithoautotrophic organism in extremely acidic environments, which has attracted much attention due to its unique metabolism and strong adaptability. However, little was known about the divergences along the evolutionary process based on whole genomes. Herein, we isolated six strains of A. ferrooxidans from mining areas in China and Zambia, and used comparative genomics to investigate the intra-species divergences. The results indicated that A. ferrooxidans diverged into three groups from a common ancestor, and the pan-genome is 'open'. The ancestral reconstruction of A. ferrooxidans indicated that genome sizes experienced a trend of increase in the very earliest days before a decreasing tendency during the evolutionary process, suggesting that both gene gain and gene loss played crucial roles in A. ferrooxidans genome flexibility. Meanwhile, 23 single-copy orthologous groups (OGs) were under positive selection. The differences of rusticyanin (Rus) sequences (the key protein in the iron oxidation pathway) and type IV secretion system (T4SS) composition in the A. ferrooxidans were both related to their group divergences, which contributed to their intraspecific diversity. This study improved our understanding of the divergent evolution and environmental adaptation of A. ferrooxidans at the genome level in extreme conditions, which provided theoretical support for the survival mechanism of living creatures at the extreme.}, } @article {pmid37278719, year = {2023}, author = {Noll, N and Molari, M and Shaw, LP and Neher, RA}, title = {PanGraph: scalable bacterial pan-genome graph construction.}, journal = {Microbial genomics}, volume = {9}, number = {6}, pages = {}, pmid = {37278719}, issn = {2057-5858}, support = {/WT_/Wellcome Trust/United Kingdom ; 220422/Z/20/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {*Genomics ; *Genome, Bacterial ; }, abstract = {The genomic diversity of microbes is commonly parameterized as SNPs relative to a reference genome of a well-characterized, but arbitrary, isolate. However, any reference genome contains only a fraction of the microbial pangenome, the total set of genes observed in a given species. Reference-based approaches are thus blind to the dynamics of the accessory genome, as well as variation within gene order and copy number. With the widespread usage of long-read sequencing, the number of high-quality, complete genome assemblies has increased dramatically. In addition to pangenomic approaches that focus on the variation in the sets of genes present in different genomes, complete assemblies allow investigations of the evolution of genome structure and gene order. This latter problem, however, is computationally demanding with few tools available that shed light on these dynamics. Here, we present PanGraph, a Julia-based library and command line interface for aligning whole genomes into a graph. Each genome is represented as a path along vertices, which in turn encapsulate homologous multiple sequence alignments. The resultant data structure succinctly summarizes population-level nucleotide and structural polymorphisms and can be exported into several common formats for either downstream analysis or immediate visualization.}, } @article {pmid37275147, year = {2023}, author = {Salvà-Serra, F and Pérez-Pantoja, D and Donoso, RA and Jaén-Luchoro, D and Fernández-Juárez, V and Engström-Jakobsson, H and Moore, ERB and Lalucat, J and Bennasar-Figueras, A}, title = {Comparative genomics of Stutzerimonas balearica (Pseudomonas balearica): diversity, habitats, and biodegradation of aromatic compounds.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1159176}, pmid = {37275147}, issn = {1664-302X}, abstract = {Stutzerimonas balearica (Pseudomonas balearica) has been found principally in oil-polluted environments. The capability of S. balearica to thrive from the degradation of pollutant compounds makes it a species of interest for potential bioremediation applications. However, little has been reported about the diversity of S. balearica. In this study, genome sequences of S. balearica strains from different origins were analyzed, revealing that it is a diverse species with an open pan-genome that will continue revealing new genes and functionalities as the genomes of more strains are sequenced. The nucleotide signatures and intra- and inter-species variation of the 16S rRNA genes of S. balearica were reevaluated. A strategy of screening 16S rRNA gene sequences in public databases enabled the detection of 158 additional strains, of which only 23% were described as S. balearica. The species was detected from a wide range of environments, although mostly from aquatic and polluted environments, predominantly related to petroleum oil. Genomic and phenotypic analyses confirmed that S. balearica possesses varied inherent capabilities for aromatic compounds degradation. This study increases the knowledge of the biology and diversity of S. balearica and will serve as a basis for future work with the species.}, } @article {pmid37274318, year = {2023}, author = {Lopez, MES and Gontijo, MTP and Cardoso, RR and Batalha, LS and Eller, MR and Bazzolli, DMS and Vidigal, PMP and Mendonça, RCS}, title = {Complete genome analysis of Tequatrovirus ufvareg1, a Tequatrovirus species inhibiting Escherichia coli O157:H7.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1178248}, pmid = {37274318}, issn = {2235-2988}, mesh = {Humans ; *Escherichia coli O157/genetics ; *Bacteriophages/genetics ; Genome ; Genomics ; Base Sequence ; }, abstract = {INTRODUCTION: Bacteriophages infecting human pathogens have been considered potential biocontrol agents, and studying their genetic content is essential to their safe use in the food industry. Tequatrovirus ufvareg1 is a bacteriophage named UFV-AREG1, isolated from cowshed wastewater and previously tested for its ability to inhibit Escherichia coli O157:H7.

METHODS: T. ufvareg1 was previously isolated using E. coli O157:H7 (ATCC 43895) as a bacterial host. The same strain was used for bacteriophage propagation and the one-step growth curve. The genome of the T. ufvareg1 was sequenced using 305 Illumina HiSeq, and the genome comparison was calculated by VIRIDIC and VIPTree.

RESULTS: Here, we characterize its genome and compare it to other Tequatrovirus. T. ufvareg1 virions have an icosahedral head (114 x 86 nm) and a contracted tail (117 x 23 nm), with a latent period of 25 min, and an average burst size was 18 phage particles per infected E. coli cell. The genome of the bacteriophage T. ufvareg1 contains 268 coding DNA sequences (CDS) and ten tRNA genes distributed in both negative and positive strains. T. ufvareg1 genome also contains 40 promoters on its regulatory regions and two rho-independent terminators. T. ufvareg1 shares an average intergenomic similarity (VIRIDC) of 88.77% and an average genomic similarity score (VipTree) of 88.91% with eight four reference genomes for Tequatrovirus available in the NCBI RefSeq database. The pan-genomic analysis confirmed the high conservation of Tequatrovirus genomes. Among all CDS annotated in the T. ufvareg1 genome, there are 123 core genes, 38 softcore genes, 94 shell genes, and 13 cloud genes. None of 268 CDS was classified as being exclusive of T. ufvareg1.

CONCLUSION: The results in this paper, combined with other previously published findings, indicate that T. ufvareg1 bacteriophage is a potential candidate for food protection against E. coli O157:H7 in foods.}, } @article {pmid37267130, year = {2023}, author = {van den Brandt, AVD and Jonkheer, EM and van Workum, DM and van de Wetering, H and Smit, S and Vilanova, A}, title = {PanVA: Pangenomic Variant Analysis.}, journal = {IEEE transactions on visualization and computer graphics}, volume = {PP}, number = {}, pages = {}, doi = {10.1109/TVCG.2023.3282364}, pmid = {37267130}, issn = {1941-0506}, abstract = {Genomics researchers increasingly use multiple reference genomes to comprehensively explore genetic variants underlying differences in detectable characteristics between organisms. Pangenomes allow for an efficient data representation of multiple related genomes and their associated metadata. However, current visual analysis approaches for exploring these complex genotype-phenotype relationships are often based on single reference approaches or lack adequate support for interpreting the variants in the genomic context with heterogeneous (meta)data. This design study introduces PanVA, a visual analytics design for pangenomic variant analysis developed with the active participation of genomics researchers. The design uniquely combines tailored visual representations with interactions such as sorting, grouping, and aggregation, allowing users to navigate and explore different perspectives on complex genotype-phenotype relations. Through evaluation in the context of plants and pathogen research, we show that PanVA helps researchers explore variants in genes and generate hypotheses about their role in phenotypic variation.}, } @article {pmid37261234, year = {2023}, author = {Fatima, S and Ishaq, Z and Irfan, M and AlAsmari, AF and Achakzai, JK and Zaheer, T and Ali, A and Akbar, A}, title = {Whole-genome sequencing of multidrug resistance Salmonella Typhi clinical strains isolated from Balochistan, Pakistan.}, journal = {Frontiers in public health}, volume = {11}, number = {}, pages = {1151805}, pmid = {37261234}, issn = {2296-2565}, mesh = {Humans ; *Salmonella typhi/genetics ; Multilocus Sequence Typing ; Pakistan ; *Anti-Bacterial Agents/pharmacology ; Virulence Factors/genetics ; Whole Genome Sequencing ; Drug Resistance, Multiple ; }, abstract = {INTRODUCTION: Salmonella enterica serovar Typhi (S. Typhi) is a major cause of morbidity and mortality in developing countries, contributing significantly to the global disease burden.

METHODS: In this study, S. Typhi strains were isolated from 100 patients exhibiting symptoms of typhoid fever at a tertiary care hospital in Pakistan. Antimicrobial testing of all isolates was performed to determine the sensitivity and resistance pattern. Three MDR strains, namely QS194, QS430, and QS468, were subjected to whole genome sequencing for genomic characterization.

RESULTS AND DISCUSSION: MLST analysis showed that QS194, belonged to ST19, which is commonly associated with Salmonella enterica serovar typhimurium. In contrast, QS430 and QS468, belonged to ST1, a sequence type frequently associated with S. Typhi. PlasmidFinder identified the presence of IncFIB(S) and IncFII(S) plasmids in QS194, while IncQ1 was found in QS468. No plasmid was detected in QS430. CARD-based analysis showed that the strains were largely resistant to a variety of antibiotics and disinfecting agents/antiseptics, including fluoroquinolones, cephalosporins, monobactams, cephamycins, penams, phenicols, tetracyclines, rifamycins, aminoglycosides, etc. The S. Typhi strains possessed various virulence factors, such as Vi antigen, Agf/Csg, Bcf, Fim, Pef, etc. The sequencing data indicated that the strains had antibiotic resistance determinants and shared common virulence factors. Pangenome analysis of the selected S. Typhi strains identified 13,237 genes, with 3,611 being core genes, 2,093 shell genes, and 7,533 cloud genes. Genome-based typing and horizontal gene transfer analysis revealed that the strains had different evolutionary origins and may have adapted to distinct environments or host organisms. These findings provide important insights into the genetic characteristics of S. Typhi strains and their potential association with various ecological niches and host organisms.}, } @article {pmid37258301, year = {2023}, author = {Ahmed, O and Rossi, M and Boucher, C and Langmead, B}, title = {Efficient taxa identification using a pangenome index.}, journal = {Genome research}, volume = {33}, number = {7}, pages = {1069-1077}, doi = {10.1101/gr.277642.123}, pmid = {37258301}, issn = {1549-5469}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; R35 GM139602/GM/NIGMS NIH HHS/United States ; T32 GM119998/GM/NIGMS NIH HHS/United States ; }, mesh = {*Algorithms ; Sequence Analysis ; *Bacteria/genetics ; }, abstract = {Tools that classify sequencing reads against a database of reference sequences require efficient index data-structures. The r-index is a compressed full-text index that answers substring presence/absence, count, and locate queries in space proportional to the amount of distinct sequence in the database: [Formula: see text] space, where r is the number of Burrows-Wheeler runs. To date, the r-index has lacked the ability to quickly classify matches according to which reference sequences (or sequence groupings, i.e., taxa) a match overlaps. We present new algorithms and methods for solving this problem. Specifically, given a collection D of d documents, [Formula: see text] over an alphabet of size σ, we extend the r-index with [Formula: see text] additional words to support document listing queries for a pattern [Formula: see text] that occurs in [Formula: see text] documents in D in [Formula: see text] time and [Formula: see text] space, where w is the machine word size. Applied in a bacterial mock community experiment, our method is up to three times faster than a comparable method that uses the standard r-index locate queries. We show that our method classifies both simulated and real nanopore reads at the strain level with higher accuracy compared with other approaches. Finally, we present strategies for compacting this structure in applications in which read lengths or match lengths can be bounded.}, } @article {pmid37256057, year = {2023}, author = {Zhao, W and Zeng, W and Pang, B and Luo, M and Peng, Y and Xu, J and Kan, B and Li, Z and Lu, X}, title = {Oxford nanopore long-read sequencing enables the generation of complete bacterial and plasmid genomes without short-read sequencing.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1179966}, pmid = {37256057}, issn = {1664-302X}, abstract = {INTRODUCTION: Genome-based analysis is crucial in monitoring antibiotic-resistant bacteria (ARB)and antibiotic-resistance genes (ARGs). Short-read sequencing is typically used to obtain incomplete draft genomes, while long-read sequencing can obtain genomes of multidrug resistance (MDR) plasmids and track the transmission of plasmid-borne antimicrobial resistance genes in bacteria. However, long-read sequencing suffers from low-accuracy base calling, and short-read sequencing is often required to improve genome accuracy. This increases costs and turnaround time.

METHODS: In this study, a novel ONT sequencing method is described, which uses the latest ONT chemistry with improved accuracy to assemble genomes of MDR strains and plasmids from long-read sequencing data only. Three strains of Salmonella carrying MDR plasmids were sequenced using the ONT SQK-LSK114 kit with flow cell R10.4.1, and de novo genome assembly was performed with average read accuracy (Q > 10) of 98.9%.

RESULTS AND DISCUSSION: For a 5-Mb-long bacterial genome, finished genome sequences with accuracy of >99.99% could be obtained at 75× sequencing coverage depth using Flye and Medaka software. Thus, this new ONT method greatly improves base-calling accuracy, allowing for the de novo assembly of high-quality finished bacterial or plasmid genomes without the need for short-read sequencing. This saves both money and time and supports the application of ONT data in critical genome-based epidemiological analyses. The novel ONT approach described in this study can take the place of traditional combination genome assembly based on short- and long-read sequencing, enabling pangenomic analyses based on high-quality complete bacterial and plasmid genomes to monitor the spread of antibiotic-resistant bacteria and antibiotic resistance genes.}, } @article {pmid37250090, year = {2023}, author = {Zhang, JX and Xu, JH and Yuan, B and Wang, XD and Mao, XH and Wang, JL and Zhang, XL and Yuan, Y}, title = {Detection of Burkholderia pseudomallei with CRISPR-Cas12a based on specific sequence tags.}, journal = {Frontiers in public health}, volume = {11}, number = {}, pages = {1153352}, pmid = {37250090}, issn = {2296-2565}, mesh = {Humans ; *Burkholderia pseudomallei/genetics ; *Melioidosis/diagnosis/genetics/microbiology ; CRISPR-Cas Systems ; }, abstract = {Melioidosis is a bacterial infection caused by Burkholderia pseudomallei (B. pseudomallei), posing a significant threat to public health. Rapid and accurate detection of B. pseudomallei is crucial for preventing and controlling melioidosis. However, identifying B. pseudomallei is challenging due to its high similarity to other species in the same genus. To address this issue, this study proposed a dual-target method that can specifically identify B. pseudomallei in less than 40 min. We analyzed 1722 B. pseudomallei genomes to construct large-scale pan-genomes and selected specific sequence tags in their core genomes that effectively distinguish B. pseudomallei from its closely related species. Specifically, we selected two specific tags, LC1 and LC2, which we combined with the Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR)-CRISPR associated proteins (Cas12a) system and recombinase polymerase amplification (RPA) pre-amplification. Our analysis showed that the dual-target RPA-CRISPR/Cas12a assay has a sensitivity of approximately 0.2 copies/reaction and 10 fg genomic DNA for LC1, and 2 copies/reaction and 20 fg genomic DNA for LC2. Additionally, our method can accurately and rapidly detect B. pseudomallei in human blood and moist soil samples using the specific sequence tags mentioned above. In conclusion, the dual-target RPA-CRISPR/Cas12a method is a valuable tool for the rapid and accurate identification of B. pseudomallei in clinical and environmental samples, aiding in the prevention and control of melioidosis.}, } @article {pmid37249320, year = {2023}, author = {}, title = {New Genomic Sequencing Resource Could Improve Care.}, journal = {Cancer discovery}, volume = {13}, number = {7}, pages = {1506-1507}, doi = {10.1158/2159-8290.CD-NB2023-0042}, pmid = {37249320}, issn = {2159-8290}, mesh = {Humans ; Base Sequence ; *Genomics ; }, abstract = {The first draft of a human pangenomic reference, which includes 47 individuals selected to maximize biogeographic diversity, offers a path to more accurate and effective screening for disease. This broader and more complete view of genetic diversity could lead to new targets for cancer therapies.}, } @article {pmid37249052, year = {2023}, author = {Chen, Y and Guo, Y and Xie, X and Wang, Z and Miao, L and Yang, Z and Jiao, Y and Xie, C and Liu, J and Hu, Z and Xin, M and Yao, Y and Ni, Z and Sun, Q and Peng, H and Guo, W}, title = {Pangenome-based trajectories of intracellular gene transfers in Poaceae unveil high cumulation in Triticeae.}, journal = {Plant physiology}, volume = {193}, number = {1}, pages = {578-594}, pmid = {37249052}, issn = {1532-2548}, mesh = {*Poaceae/genetics ; Triticum/genetics ; Genome, Plant/genetics ; *Oryza/genetics ; Zea mays/genetics ; Evolution, Molecular ; }, abstract = {Intracellular gene transfers (IGTs) between the nucleus and organelles, including plastids and mitochondria, constantly reshape the nuclear genome during evolution. Despite the substantial contribution of IGTs to genome variation, the dynamic trajectories of IGTs at the pangenomic level remain elusive. Here, we developed an approach, IGTminer, that maps the evolutionary trajectories of IGTs using collinearity and gene reannotation across multiple genome assemblies. We applied IGTminer to create a nuclear organellar gene (NOG) map across 67 genomes covering 15 Poaceae species, including important crops. The resulting NOGs were verified by experiments and sequencing data sets. Our analysis revealed that most NOGs were recently transferred and lineage specific and that Triticeae species tended to have more NOGs than other Poaceae species. Wheat (Triticum aestivum) had a higher retention rate of NOGs than maize (Zea mays) and rice (Oryza sativa), and the retained NOGs were likely involved in photosynthesis and translation pathways. Large numbers of NOG clusters were aggregated in hexaploid wheat during 2 rounds of polyploidization, contributing to the genetic diversity among modern wheat accessions. We implemented an interactive web server to facilitate the exploration of NOGs in Poaceae. In summary, this study provides resources and insights into the roles of IGTs in shaping interspecies and intraspecies genome variation and driving plant genome evolution.}, } @article {pmid37246787, year = {2023}, author = {Qian, C and Xu, M and Huang, Z and Tan, M and Fu, C and Zhou, T and Cao, J and Zhou, C}, title = {Complete genome sequence of the emerging pathogen Cysteiniphilum spp. and comparative genomic analysis with genus Francisella: Insights into its genetic diversity and potential virulence traits.}, journal = {Virulence}, volume = {14}, number = {1}, pages = {2214416}, pmid = {37246787}, issn = {2150-5608}, mesh = {Humans ; Virulence/genetics ; *Genome, Bacterial ; *Francisella tularensis/genetics ; Phylogeny ; Genomics ; DNA Transposable Elements ; Genetic Variation ; }, abstract = {Cysteiniphilum is a newly discovered genus in 2017 and is phylogenetically closely related to highly pathogenic Francisella tularensis. Recently, it has become an emerging pathogen in humans. However, the complete genome sequence of genus Cysteiniphilum is lacking, and the genomic characteristics of genetic diversity, evolutionary dynamics, and pathogenicity have not been characterized. In this study, the complete genome of the first reported clinical isolate QT6929 of genus Cysteiniphilum was sequenced, and comparative genomics analyses to Francisella genus were conducted to unveil the genomic landscape and diversity of the genus Cysteiniphilum. Our results showed that the complete genome of QT6929 consists of one 2.61 Mb chromosome and a 76,819 bp plasmid. The calculated average nucleotide identity and DNA-DNA hybridization values revealed that two clinical isolates QT6929 and JM-1 should be reclassified as two novel species in genus Cysteiniphilum. Pan-genome analysis revealed genomic diversity within the genus Cysteiniphilum and an open pan-genome state. Genomic plasticity analysis exhibited abundant mobile genetic elements including genome islands, insertion sequences, prophages, and plasmids on Cysteiniphilum genomes, which facilitated the broad exchange of genetic material between Cysteiniphilum and other genera like Francisella and Legionella. Several potential virulence genes associated with lipopolysaccharide/lipooligosaccharide, capsule, and haem biosynthesis specific to clinical isolates were predicted and might contribute to their pathogenicity in humans. Incomplete Francisella pathogenicity island was identified in most Cysteiniphilum genomes. Overall, our study provides an updated phylogenomic relationship of members of the genus Cysteiniphilum and comprehensive genomic insights into this rare emerging pathogen.}, } @article {pmid37243202, year = {2023}, author = {Lobb, B and Shapter, A and Doxey, AC and Nissimov, JI}, title = {Functional Profiling and Evolutionary Analysis of a Marine Microalgal Virus Pangenome.}, journal = {Viruses}, volume = {15}, number = {5}, pages = {}, pmid = {37243202}, issn = {1999-4915}, mesh = {*Phycodnaviridae/genetics ; Genomics ; Phylogeny ; }, abstract = {Phycodnaviridae are large double-stranded DNA viruses, which facilitate studies of host-virus interactions and co-evolution due to their prominence in algal infection and their role in the life cycle of algal blooms. However, the genomic interpretation of these viruses is hampered by a lack of functional information, stemming from the surprising number of hypothetical genes of unknown function. It is also unclear how many of these genes are widely shared within the clade. Using one of the most extensively characterized genera, Coccolithovirus, as a case study, we combined pangenome analysis, multiple functional annotation tools, AlphaFold structural modeling, and literature analysis to compare the core and accessory pangenome and assess support for novel functional predictions. We determined that the Coccolithovirus pangenome shares 30% of its genes with all 14 strains, making up the core. Notably, 34% of its genes were found in at most three strains. Core genes were enriched in early expression based on a transcriptomic dataset of Coccolithovirus EhV-201 algal infection, were more likely to be similar to host proteins than the non-core set, and were more likely to be involved in vital functions such as replication, recombination, and repair. In addition, we generated and collated annotations for the EhV representative EhV-86 from 12 different annotation sources, building up information for 142 previously hypothetical and putative membrane proteins. AlphaFold was further able to predict structures for 204 EhV-86 proteins with a modelling accuracy of good-high. These functional clues, combined with generated AlphaFold structures, provide a foundational framework for the future characterization of this model genus (and other giant viruses) and a further look into the evolution of the Coccolithovirus proteome.}, } @article {pmid37240287, year = {2023}, author = {Xia, L and Wang, H and Zhao, X and Obel, HO and Yu, X and Lou, Q and Chen, J and Cheng, C}, title = {Chloroplast Pan-Genomes and Comparative Transcriptomics Reveal Genetic Variation and Temperature Adaptation in the Cucumber.}, journal = {International journal of molecular sciences}, volume = {24}, number = {10}, pages = {}, pmid = {37240287}, issn = {1422-0067}, support = {2021YFD1200200//National Key R&D Program of China/ ; PZCZ201719//Jiangsu Agricultural Innovation of New Cultivars/ ; }, mesh = {Phylogeny ; *Cucumis sativus/genetics ; *Genome, Chloroplast ; Temperature ; Transcriptome ; Chloroplasts/genetics ; Gene Expression Profiling ; Genetic Variation ; }, abstract = {Although whole genome sequencing, genetic variation mapping, and pan-genome studies have been done on a large group of cucumber nuclear genomes, organelle genome information is largely unclear. As an important component of the organelle genome, the chloroplast genome is highly conserved, which makes it a useful tool for studying plant phylogeny, crop domestication, and species adaptation. Here, we have constructed the first cucumber chloroplast pan-genome based on 121 cucumber germplasms, and investigated the genetic variations of the cucumber chloroplast genome through comparative genomic, phylogenetic, haplotype, and population genetic structure analysis. Meanwhile, we explored the changes in expression of cucumber chloroplast genes under high- and low-temperature stimulation via transcriptome analysis. As a result, a total of 50 complete chloroplast genomes were successfully assembled from 121 cucumber resequencing data, ranging in size from 156,616-157,641 bp. The 50 cucumber chloroplast genomes have typical quadripartite structures, consisting of a large single copy (LSC, 86,339-86,883 bp), a small single copy (SSC, 18,069-18,363 bp), and two inverted repeats (IRs, 25,166-25,797 bp). Comparative genomic, haplotype, and population genetic structure results showed that there is more genetic variation in Indian ecotype cucumbers compared to other cucumber cultivars, which means that many genetic resources remain to be explored in Indian ecotype cucumbers. Phylogenetic analysis showed that the 50 cucumber germplasms could be classified into 3 types: East Asian, Eurasian + Indian, and Xishuangbanna + Indian. The transcriptomic analysis showed that matK were significantly up-regulated under high- and low-temperature stresses, further demonstrating that cucumber chloroplasts respond to temperature adversity by regulating lipid metabolism and ribosome metabolism. Further, accD has higher editing efficiency under high-temperature stress, which may contribute to the heat tolerance. These studies provide useful insight into genetic variation in the chloroplast genome, and established the foundation for exploring the mechanisms of temperature-stimulated chloroplast adaptation.}, } @article {pmid37239397, year = {2023}, author = {Dey, S and Gaur, M and Sykes, EME and Prusty, M and Elangovan, S and Dixit, S and Pati, S and Kumar, A and Subudhi, E}, title = {Unravelling the Evolutionary Dynamics of High-Risk Klebsiella pneumoniae ST147 Clones: Insights from Comparative Pangenome Analysis.}, journal = {Genes}, volume = {14}, number = {5}, pages = {}, pmid = {37239397}, issn = {2073-4425}, mesh = {Humans ; *beta-Lactamases/genetics ; Klebsiella pneumoniae/genetics ; Phylogeny ; Bayes Theorem ; *Klebsiella Infections/epidemiology/genetics/drug therapy ; }, abstract = {BACKGROUND: The high prevalence and rapid emergence of antibiotic resistance in high-risk Klebsiella pneumoniae (KP) ST147 clones is a global health concern and warrants molecular surveillance.

METHODS: A pangenome analysis was performed using publicly available ST147 complete genomes. The characteristics and evolutionary relationships among ST147 members were investigated through a Bayesian phylogenetic analysis.

RESULTS: The large number of accessory genes in the pangenome indicates genome plasticity and openness. Seventy-two antibiotic resistance genes were found to be linked with antibiotic inactivation, efflux, and target alteration. The exclusive detection of the blaOXA-232 gene within the ColKp3 plasmid of KP_SDL79 suggests its acquisition through horizontal gene transfer. The association of seventy-six virulence genes with the acrAB efflux pump, T6SS system and type I secretion system describes its pathogenicity. The presence of Tn6170, a putative Tn7-like transposon in KP_SDL79 with an insertion at the flanking region of the tnsB gene, establishes its transmission ability. The Bayesian phylogenetic analysis estimates ST147's initial divergence in 1951 and the most recent common ancestor for the entire KP population in 1621.

CONCLUSIONS: Present study highlights the genetic diversity and evolutionary dynamics of high-risk clones of K. pneumoniae. Further inter-clonal diversity studies will help us understand its outbreak more precisely and pave the way for therapeutic interventions.}, } @article {pmid37229109, year = {2023}, author = {Jha, UC and Nayyar, H and Chattopadhyay, A and Beena, R and Lone, AA and Naik, YD and Thudi, M and Prasad, PVV and Gupta, S and Dixit, GP and Siddique, KHM}, title = {Major viral diseases in grain legumes: designing disease resistant legumes from plant breeding and OMICS integration.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1183505}, pmid = {37229109}, issn = {1664-462X}, abstract = {Grain legumes play a crucial role in human nutrition and as a staple crop for low-income farmers in developing and underdeveloped nations, contributing to overall food security and agroecosystem services. Viral diseases are major biotic stresses that severely challenge global grain legume production. In this review, we discuss how exploring naturally resistant grain legume genotypes within germplasm, landraces, and crop wild relatives could be used as promising, economically viable, and eco-environmentally friendly solution to reduce yield losses. Studies based on Mendelian and classical genetics have enhanced our understanding of key genetic determinants that govern resistance to various viral diseases in grain legumes. Recent advances in molecular marker technology and genomic resources have enabled us to identify genomic regions controlling viral disease resistance in various grain legumes using techniques such as QTL mapping, genome-wide association studies, whole-genome resequencing, pangenome and 'omics' approaches. These comprehensive genomic resources have expedited the adoption of genomics-assisted breeding for developing virus-resistant grain legumes. Concurrently, progress in functional genomics, especially transcriptomics, has helped unravel underlying candidate gene(s) and their roles in viral disease resistance in legumes. This review also examines the progress in genetic engineering-based strategies, including RNA interference, and the potential of synthetic biology techniques, such as synthetic promoters and synthetic transcription factors, for creating viral-resistant grain legumes. It also elaborates on the prospects and limitations of cutting-edge breeding technologies and emerging biotechnological tools (e.g., genomic selection, rapid generation advances, and CRISPR/Cas9-based genome editing tool) in developing virus-disease-resistant grain legumes to ensure global food security.}, } @article {pmid37228750, year = {2023}, author = {Groza, C and Chen, X and Pacis, A and Simon, MM and Pramatarova, A and Aracena, KA and Pastinen, T and Barreiro, LB and Bourque, G}, title = {Genome graphs detect human polymorphisms in active epigenomic state during influenza infection.}, journal = {Cell genomics}, volume = {3}, number = {5}, pages = {100294}, pmid = {37228750}, issn = {2666-979X}, abstract = {Genetic variants, including mobile element insertions (MEIs), are known to impact the epigenome. We hypothesized that genome graphs, which encapsulate genetic diversity, could reveal missing epigenomic signals. To test this, we sequenced the epigenome of monocyte-derived macrophages from 35 ancestrally diverse individuals before and after influenza infection, allowing us to investigate the role of MEIs in immunity. We characterized genetic variants and MEIs using linked reads and built a genome graph. Mapping epigenetic data revealed 2.3%-3% novel peaks for H3K4me1, H3K27ac chromatin immunoprecipitation sequencing (ChIP-seq), and ATAC-seq. Additionally, the use of a genome graph modified some quantitative trait loci estimates and revealed 375 polymorphic MEIs in an active epigenomic state. Among these is an AluYh3 polymorphism whose chromatin state changed after infection and was associated with the expression of TRIM25, a gene that restricts influenza RNA synthesis. Our results demonstrate that graph genomes can reveal regulatory regions that would have been overlooked by other approaches.}, } @article {pmid37227251, year = {2023}, author = {Tonkin-Hill, G and Corander, J and Parkhill, J}, title = {Challenges in prokaryote pangenomics.}, journal = {Microbial genomics}, volume = {9}, number = {5}, pages = {}, pmid = {37227251}, issn = {2057-5858}, mesh = {Phylogeny ; *Evolution, Molecular ; *Prokaryotic Cells ; Bacteria/genetics ; Gene Transfer, Horizontal ; }, abstract = {Horizontal gene transfer (HGT) and the resulting patterns of gene gain and loss are a fundamental part of bacterial evolution. Investigating these patterns can help us to understand the role of selection in the evolution of bacterial pangenomes and how bacteria adapt to a new niche. Predicting the presence or absence of genes can be a highly error-prone process that can confound efforts to understand the dynamics of horizontal gene transfer. This review discusses both the challenges in accurately constructing a pangenome and the potential consequences errors can have on downstream analyses. We hope that by summarizing these issues researchers will be able to avoid potential pitfalls, leading to improved bacterial pangenome analyses.}, } @article {pmid37224809, year = {2023}, author = {Wisecaver, JH and Auber, RP and Pendleton, AL and Watervoort, NF and Fallon, TR and Riedling, OL and Manning, SR and Moore, BS and Driscoll, WW}, title = {Extreme genome diversity and cryptic speciation in a harmful algal-bloom-forming eukaryote.}, journal = {Current biology : CB}, volume = {33}, number = {11}, pages = {2246-2259.e8}, pmid = {37224809}, issn = {1879-0445}, support = {F32 ES032276/ES/NIEHS NIH HHS/United States ; R21 ES032056/ES/NIEHS NIH HHS/United States ; }, mesh = {Harmful Algal Bloom/physiology ; Phylogeny ; *Haptophyta/genetics ; *Toxins, Biological ; DNA/genetics ; }, abstract = {Harmful algal blooms of the toxic haptophyte Prymnesium parvum are a recurrent problem in many inland and estuarine waters around the world. Strains of P. parvum vary in the toxins they produce and in other physiological traits associated with harmful algal blooms, but the genetic basis for this variation is unknown. To investigate genome diversity in this morphospecies, we generated genome assemblies for 15 phylogenetically and geographically diverse strains of P. parvum, including Hi-C guided, near-chromosome-level assemblies for two strains. Comparative analysis revealed considerable DNA content variation between strains, ranging from 115 to 845 Mbp. Strains included haploids, diploids, and polyploids, but not all differences in DNA content were due to variation in genome copy number. Haploid genome size between strains of different chemotypes differed by as much as 243 Mbp. Syntenic and phylogenetic analyses indicate that UTEX 2797, a common laboratory strain from Texas, is a hybrid that retains two phylogenetically distinct haplotypes. Investigation of gene families variably present across the strains identified several functional categories associated with metabolic and genome size variation in P. parvum, including genes for the biosynthesis of toxic metabolites and proliferation of transposable elements. Together, our results indicate that P. parvum comprises multiple cryptic species. These genomes provide a robust phylogenetic and genomic framework for investigations into the eco-physiological consequences of the intra- and inter-specific genetic variation present in P. parvum and demonstrate the need for similar resources for other harmful algal-bloom-forming morphospecies.}, } @article {pmid37222600, year = {2023}, author = {Tchan, BGO and Ngazoa-Kakou, S and Aka, N and Apia, NKB and Hammoudi, N and Drancourt, M and Saad, J}, title = {PPE Barcoding Identifies Biclonal Mycobacterium ulcerans Buruli Ulcer, Côte d'Ivoire.}, journal = {Microbiology spectrum}, volume = {11}, number = {3}, pages = {e0034223}, pmid = {37222600}, issn = {2165-0497}, support = {Fondation Méditerranée Infection//Aix-Marseille Université (AMU)/ ; }, mesh = {Humans ; *Buruli Ulcer/microbiology ; *Mycobacterium ulcerans/genetics ; Cote d'Ivoire ; Real-Time Polymerase Chain Reaction ; Personal Protective Equipment ; }, abstract = {Mycobacterium ulcerans, an environmental opportunistic pathogen, causes necrotic cutaneous and subcutaneous lesions, named Buruli ulcers, in tropical countries. PCR-derived tests used to detect M. ulcerans in environmental and clinical samples do not allow one-shot detection, identification, and typing of M. ulcerans among closely related Mycobacterium marinum complex mycobacteria. We established a 385-member M. marinum/M. ulcerans complex whole-genome sequence database by assembling and annotating 341 M. marinum/M. ulcerans complex genomes and added 44 M. marinum/M. ulcerans complex whole-genome sequences already deposited in the NCBI database. Pangenome, core genome, and single-nucleotide polymorphism (SNP) distance-based comparisons sorted the 385 strains into 10 M. ulcerans taxa and 13 M. marinum taxa, correlating with the geographic origin of strains. Aligning conserved genes identified one PPE (proline-proline-glutamate) gene sequence to be species and intraspecies specific, thereby genotyping the 23 M. marinum/M. ulcerans complex taxa. PCR sequencing of the PPE gene correctly genotyped nine M. marinum/M. ulcerans complex isolates among one M. marinum taxon and three M. ulcerans taxa in the African taxon (T2.4). Further, successful PPE gene PCR sequencing in 15/21 (71.4%) swabs collected from suspected Buruli ulcer lesions in Côte d'Ivoire exhibited positive M. ulcerans IS2404 real-time PCR and identified the M. ulcerans T2.4.1 genotype in eight swabs and M. ulcerans T2.4.1/T2.4.2 mixed genotypes in seven swabs. PPE gene sequencing could be used as a proxy for whole-genome sequencing for the one-shot detection, identification, and typing of clinical M. ulcerans strains, offering an unprecedented tool for identifying M. ulcerans mixed infections. IMPORTANCE We describe a new targeted sequencing approach that characterizes the PPE gene to disclose the simultaneous presence of different variants of a single pathogenic microorganism. This approach has direct implications on the understanding of pathogen diversity and natural history and potential therapeutic implications when dealing with obligate and opportunistic pathogens, such as Mycobacterium ulcerans presented here as a prototype.}, } @article {pmid37221394, year = {2023}, author = {Drott, MT and Park, SC and Wang, YW and Harrow, L and Keller, NP and Pringle, A}, title = {Pangenomics of the death cap mushroom Amanita phalloides, and of Agaricales, reveals dynamic evolution of toxin genes in an invasive range.}, journal = {The ISME journal}, volume = {17}, number = {8}, pages = {1236-1246}, pmid = {37221394}, issn = {1751-7370}, support = {R01 GM112739/GM/NIGMS NIH HHS/United States ; T32 ES007015/ES/NIEHS NIH HHS/United States ; }, mesh = {*Amanita/genetics ; *Agaricales/genetics ; Computational Biology ; }, abstract = {The poisonous European mushroom Amanita phalloides (the "death cap") is invading California. Whether the death caps' toxic secondary metabolites are evolving as it invades is unknown. We developed a bioinformatic pipeline to identify the MSDIN genes underpinning toxicity and probed 88 death cap genomes from an invasive Californian population and from the European range, discovering a previously unsuspected diversity of MSDINs made up of both core and accessory elements. Each death cap individual possesses a unique suite of MSDINs, and toxin genes are significantly differentiated between Californian and European samples. MSDIN genes are maintained by strong natural selection, and chemical profiling confirms MSDIN genes are expressed and result in distinct phenotypes; our chemical profiling also identified a new MSDIN peptide. Toxin genes are physically clustered within genomes. We contextualize our discoveries by probing for MSDINs in genomes from across the order Agaricales, revealing MSDIN diversity originated in independent gene family expansions among genera. We also report the discovery of an MSDIN in an Amanita outside the "lethal Amanitas" clade. Finally, the identification of an MSDIN gene and its associated processing gene (POPB) in Clavaria fumosa suggest the origin of MSDINs is older than previously suspected. The dynamic evolution of MSDINs underscores their potential to mediate ecological interactions, implicating MSDINs in the ongoing invasion. Our data change the understanding of the evolutionary history of poisonous mushrooms, emphasizing striking parallels to convergently evolved animal toxins. Our pipeline provides a roadmap for exploring secondary metabolites in other basidiomycetes and will enable drug prospecting.}, } @article {pmid37217946, year = {2023}, author = {Leonard, AS and Crysnanto, D and Mapel, XM and Bhati, M and Pausch, H}, title = {Graph construction method impacts variation representation and analyses in a bovine super-pangenome.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {124}, pmid = {37217946}, issn = {1474-760X}, mesh = {Animals ; *Cattle/genetics ; *Genome ; Minisatellite Repeats ; *Sequence Analysis, DNA/methods ; }, abstract = {BACKGROUND: Several models and algorithms have been proposed to build pangenomes from multiple input assemblies, but their impact on variant representation, and consequently downstream analyses, is largely unknown.

RESULTS: We create multi-species super-pangenomes using pggb, cactus, and minigraph with the Bos taurus taurus reference sequence and eleven haplotype-resolved assemblies from taurine and indicine cattle, bison, yak, and gaur. We recover 221 k nonredundant structural variations (SVs) from the pangenomes, of which 135 k (61%) are common to all three. SVs derived from assembly-based calling show high agreement with the consensus calls from the pangenomes (96%), but validate only a small proportion of variations private to each graph. Pggb and cactus, which also incorporate base-level variation, have approximately 95% exact matches with assembly-derived small variant calls, which significantly improves the edit rate when realigning assemblies compared to minigraph. We use the three pangenomes to investigate 9566 variable number tandem repeats (VNTRs), finding 63% have identical predicted repeat counts in the three graphs, while minigraph can over or underestimate the count given its approximate coordinate system. We examine a highly variable VNTR locus and show that repeat unit copy number impacts the expression of proximal genes and non-coding RNA.

CONCLUSIONS: Our findings indicate good consensus between the three pangenome methods but also show their individual strengths and weaknesses that need to be considered when analysing different types of variants from multiple input assemblies.}, } @article {pmid37217755, year = {2023}, author = {}, title = {Combining reference genomes into a pangenome graph improves accuracy and reduces bias.}, journal = {Nature biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37217755}, issn = {1546-1696}, } @article {pmid37216590, year = {2023}, author = {Geoffroy, V and Lamouche, JB and Guignard, T and Nicaise, S and Kress, A and Scheidecker, S and Le Béchec, A and Muller, J}, title = {The AnnotSV webserver in 2023: updated visualization and ranking.}, journal = {Nucleic acids research}, volume = {51}, number = {W1}, pages = {W39-W45}, pmid = {37216590}, issn = {1362-4962}, mesh = {Humans ; Genome, Human ; High-Throughput Nucleotide Sequencing ; *INDEL Mutation ; *Polymorphism, Single Nucleotide ; Restriction Mapping ; Sequence Analysis, DNA ; Whole Genome Sequencing ; Disease/genetics ; *Software ; }, abstract = {Much of the human genetics variant repertoire is composed of single nucleotide variants (SNV) and small insertion/deletions (indel) but structural variants (SV) remain a major part of our modified DNA. SV detection has often been a complex question to answer either because of the necessity to use different technologies (array CGH, SNP array, Karyotype, Optical Genome Mapping…) to detect each category of SV or to get an appropriate resolution (Whole Genome Sequencing). Thanks to the deluge of pangenomic analysis, Human geneticists are accumulating SV and their interpretation remains time consuming and challenging. The AnnotSV webserver (https://www.lbgi.fr/AnnotSV/) aims at being an efficient tool to (i) annotate and interpret SV potential pathogenicity in the context of human diseases, (ii) recognize potential false positive variants from all the SV identified and (iii) visualize the patient variants repertoire. The most recent developments in the AnnotSV webserver are: (i) updated annotations sources and ranking, (ii) three novel output formats to allow diverse utilization (analysis, pipelines), as well as (iii) two novel user interfaces including an interactive circos view.}, } @article {pmid37214944, year = {2023}, author = {Fan, J and Khan, J and Singh, NP and Pibiri, GE and Patro, R}, title = {Fulgor: A fast and compact k-mer index for large-scale matching and color queries.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37214944}, abstract = {The problem of sequence identification or matching - determining the subset of references from a given collection that are likely to contain a query nucleotide sequence - is relevant for many important tasks in Computational Biology, such as metagenomics and pan-genome analysis. Due to the complex nature of such analyses and the large scale of the reference collections a resource efficient solution to this problem is of utmost importance. The reference collection should therefore be pre-processed into an index for fast queries. This poses the threefold challenge of designing an index that is efficient to query, has light memory usage, and scales well to large collections. To solve this problem, we describe how recent advancements in associative, order-preserving, k-mer dictionaries can be combined with a compressed inverted index to implement a fast and compact colored de Bruijn graph data structure. This index takes full advantage of the fact that unitigs in the colored de Bruijn graph are monochromatic (all k-mers in a unitig have the same set of references of origin, or "color"), leveraging the order-preserving property of its dictionary. In fact, k-mers are kept in unitig order by the dictionary, thereby allowing for the encoding of the map from k-mers to their inverted lists in as little as 1 +o(1) bits per unitig. Hence, one inverted list per unitig is stored in the index with almost no space/time overhead. By combining this property with simple but effective compression methods for inverted lists, the index achieves very small space. We implement these methods in a tool called Fulgor. Compared to Themisto, the prior state of the art, Fulgor indexes a heterogeneous collection of 30,691 bacterial genomes in 3.8× less space, a collection of 150,000 Salmonella enterica genomes in approximately 2× less space, is at least twice as fast for color queries, and is 2 - 6&times faster to construct.}, } @article {pmid37214799, year = {2023}, author = {Ferrero-Serrano, Á and Chakravorty, D and Kirven, KJ and Assmann, SM}, title = {Oryza CLIMtools: An Online Portal for Investigating Genome-Environment Associations in Rice.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37214799}, support = {R01 GM126079/GM/NIGMS NIH HHS/United States ; T32 GM102057/GM/NIGMS NIH HHS/United States ; }, abstract = {Elite crop varieties display an evident mismatch between their current distributions and the suitability of the local climate for their productivity. To this end, we present Oryza CLIMtools (https://gramene.org/CLIMtools/oryza_v1.0/), the first resource for pan-genome prediction of climate-associated genetic variants in a crop species. Oryza CLIMtools consists of interactive web-based databases that allow the user to: i) explore the local environments of traditional rice varieties (landraces) in South-Eastern Asia, and; ii) investigate the environment × genome associations for 658 Indica and 283 Japonica rice landrace accessions collected from geo-referenced local environments and included in the 3K Rice Genomes Project. We exemplify the value of these resources, identifying an interplay between flowering time and temperature in the local environment that is facilitated by adaptive natural variation in OsHD2 and disrupted by maladaptive variation in OsSOC1 . Prior QTL analysis has suggested the importance of heterotrimeric G proteins in the control of agronomic traits. Accordingly, we analyzed the climate associations of natural variants in the different heterotrimeric G protein subunits. We identified a coordinated role of G proteins in adaptation to the prevailing Potential Evapotranspiration gradient and their regulation of key agronomic traits including plant height and seed and panicle length. We conclude by highlighting the prospect of targeting heterotrimeric G proteins to produce crops that are climate-change-ready.}, } @article {pmid37213867, year = {2023}, author = {Zachariasen, T and Petersen, AØ and Brejnrod, A and Vestergaard, GA and Eklund, A and Nielsen, HB}, title = {Identification of representative species-specific genes for abundance measurements.}, journal = {Bioinformatics advances}, volume = {3}, number = {1}, pages = {vbad060}, pmid = {37213867}, issn = {2635-0041}, abstract = {MOTIVATION: Metagenomic binning facilitates the reconstruction of genomes and identification of Metagenomic Species Pan-genomes or Metagenomic Assembled Genomes. We propose a method for identifying a set of de novo representative genes, termed signature genes, which can be used to measure the relative abundance and used as markers of each metagenomic species with high accuracy.

RESULTS: An initial set of the 100 genes that correlate with the median gene abundance profile of the entity is selected. A variant of the coupon collector's problem was utilized to evaluate the probability of identifying a certain number of unique genes in a sample. This allows us to reject the abundance measurements of strains exhibiting a significantly skewed gene representation. A rank-based negative binomial model is employed to assess the performance of different gene sets across a large set of samples, facilitating identification of an optimal signature gene set for the entity. When benchmarked the method on a synthetic gene catalog, our optimized signature gene sets estimate relative abundance significantly closer to the true relative abundance compared to the starting gene sets extracted from the metagenomic species. The method was able to replicate results from a study with real data and identify around three times as many metagenomic entities.

The code used for the analysis is available on GitHub: https://github.com/trinezac/SG_optimization.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics Advances online.}, } @article {pmid37213168, year = {2023}, author = {Youngblom, MA and Shockey, AC and Callaghan, MM and Dillard, JP and Pepperell, CS}, title = {The Gonococcal Genetic Island defines distinct sub-populations of Neisseria gonorrhoeae.}, journal = {Microbial genomics}, volume = {9}, number = {5}, pages = {}, pmid = {37213168}, issn = {2057-5858}, support = {R01 AI047958/AI/NIAID NIH HHS/United States ; R01 AI113287/AI/NIAID NIH HHS/United States ; }, mesh = {Humans ; *Neisseria gonorrhoeae/genetics ; DNA ; *Gonorrhea ; Type IV Secretion Systems/genetics ; Genomics ; }, abstract = {The incidence of gonorrhoea is increasing at an alarming pace, and therapeutic options continue to narrow as a result of worsening drug resistance. Neisseria gonorrhoeae is naturally competent, allowing the organism to adapt rapidly to selection pressures including antibiotics. A sub-population of N. gonorrhoeae carries the Gonococcal Genetic Island (GGI), which encodes a type IV secretion system (T4SS) that secretes chromosomal DNA. Previous research has shown that the GGI increases transformation efficiency in vitro, but the extent to which it contributes to horizontal gene transfer (HGT) during infection is unknown. Here we analysed genomic data from clinical isolates of N. gonorrhoeae to better characterize GGI+ and GGI- sub-populations and to delineate patterns of variation at the locus itself. We found the element segregating at an intermediate frequency (61%), and it appears to act as a mobile genetic element with examples of gain, loss, exchange and intra-locus recombination within our sample. We further found evidence suggesting that GGI+ and GGI- sub-populations preferentially inhabit distinct niches with different opportunities for HGT. Previously, GGI+ isolates were reported to be associated with more severe clinical infections, and our results suggest this could be related to metal-ion trafficking and biofilm formation. The co-segregation of GGI+ and GGI- isolates despite mobility of the element suggests that both niches inhabited by N. gonorrhoeae remain important to its overall persistence as has been demonstrated previously for cervical- and urethral-adapted sub-populations. These data emphasize the complex population structure of N. gonorrhoeae and its capacity to adapt to diverse niches.}, } @article {pmid37207930, year = {2023}, author = {Qanmber, G and You, Q and Yang, Z and Fan, L and Zhang, Z and Chai, M and Gao, B and Li, F and Yang, Z}, title = {Transcriptional and translational landscape fine-tune genome annotation and explores translation control in cotton.}, journal = {Journal of advanced research}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jare.2023.05.004}, pmid = {37207930}, issn = {2090-1224}, abstract = {INTRODUCTION: The unavailability of intergenic region annotation in whole genome sequencing and pan-genomics hinders efforts to enhance crop improvement.

OBJECTIVES: Despite advances in research, the impact of post-transcriptional regulation on fiber development and translatome profiling at different stages of fiber growth in cotton (G. hirsutum) remains unexplored.

METHODS: We utilized a combination of reference-guided de novo transcriptome assembly and ribosome profiling techniques to uncover the hidden mechanisms of translational control in eight distinct tissues of upland cotton.

RESULTS: Our study identified P-site distribution at three-nucleotide periodicity and dominant ribosome footprint at 27 nucleotides. Specifically, we have detected 1,589 small open reading frames (sORFs), including 1,376 upstream ORFs (uORFs) and 213 downstream ORFs (dORFs), as well as 552 long non-coding RNAs (lncRNAs) with potential coding functions, which fine-tune the annotation of the cotton genome. Further, we have identified novel genes and lncRNAs with strong translation efficiency (TE), while sORFs were found to affect mRNA transcription levels during fiber elongation. The reliability of these findings was confirmed by the high consistency in correlation and synergetic fold change between RNA-sequencing (RNA-seq) and Ribosome-sequencing (Ribo-seq) analyses. Additionally, integrated omics analysis of the normal fiber ZM24 and short fiber pag1 cotton mutant revealed several differentially expressed genes (DEGs), and fiber-specific expressed (high/low) genes associated with sORFs (uORFs and dORFs). These findings were further supported by the overexpression and knockdown of GhKCS6, a gene associated with sORFs in cotton, and demonstrated the potential regulation of the mechanism governing fiber elongation on both the transcriptional and post-transcriptional levels.

CONCLUSION: Reference-guided transcriptome assembly and the identification of novel transcripts fine-tune the annotation of the cotton genome and predicted the landscape of fiber development. Our approach provided a high-throughput method, based on multi-omics, for discovering unannotated ORFs, hidden translational control, and complex regulatory mechanisms in crop plants.}, } @article {pmid37202927, year = {2023}, author = {Zhang, B and Huang, H and Tibbs-Cortes, LE and Vanous, A and Zhang, Z and Sanguinet, K and Garland-Campbell, KA and Yu, J and Li, X}, title = {Streamline unsupervised machine learning to survey and graph indel-based haplotypes from pan-genomes.}, journal = {Molecular plant}, volume = {16}, number = {6}, pages = {975-978}, doi = {10.1016/j.molp.2023.05.005}, pmid = {37202927}, issn = {1752-9867}, mesh = {*Unsupervised Machine Learning ; Haplotypes/genetics ; *Genome ; Algorithms ; INDEL Mutation/genetics ; }, } @article {pmid37202771, year = {2023}, author = {Ahmed, OY and Rossi, M and Gagie, T and Boucher, C and Langmead, B}, title = {SPUMONI 2: improved classification using a pangenome index of minimizer digests.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {122}, pmid = {37202771}, issn = {1474-760X}, support = {R01HG011392/HG/NHGRI NIH HHS/United States ; }, mesh = {*Algorithms ; *Genomics ; Metagenomics ; Databases, Factual ; Sequence Analysis, DNA ; }, abstract = {Genomics analyses use large reference sequence collections, like pangenomes or taxonomic databases. SPUMONI 2 is an efficient tool for sequence classification of both short and long reads. It performs multi-class classification using a novel sampled document array. By incorporating minimizers, SPUMONI 2's index is 65 times smaller than minimap2's for a mock community pangenome. SPUMONI 2 achieves a speed improvement of 3-fold compared to SPUMONI and 15-fold compared to minimap2. We show SPUMONI 2 achieves an advantageous mix of accuracy and efficiency in practical scenarios such as adaptive sampling, contamination detection and multi-class metagenomics classification.}, } @article {pmid37202587, year = {2023}, author = {Anbazhagan, S and Himani, KM and Karthikeyan, R and Prakasan, L and Dinesh, M and Nair, SS and Lalsiamthara, J and Abhishek, and Ramachandra, SG and Chaturvedi, VK and Chaudhuri, P and Thomas, P}, title = {Comparative genomics of Brucella abortus and Brucella melitensis unravels the gene sharing, virulence factors and SNP diversity among the standard, vaccine and field strains.}, journal = {International microbiology : the official journal of the Spanish Society for Microbiology}, volume = {}, number = {}, pages = {}, pmid = {37202587}, issn = {1618-1905}, abstract = {Brucella abortus and Brucella melitensis are the primary etiological agents of brucellosis in large and small ruminants, respectively. There are limited comparative genomic studies involving Brucella strains that explore the relatedness among both species. In this study, we involved strains (n=44) representing standard, vaccine and Indian field origin for pangenome, single nucleotide polymorphism (SNP) and phylogenetic analysis. Both species shared a common gene pool representing 2884 genes out of a total 3244 genes. SNP-based phylogenetic analysis indicated higher SNP diversity among B. melitensis (3824) strains in comparison to B. abortus (540) strains, and a clear demarcation was identified between standard/vaccine and field strains. The analysis for virulence genes revealed that virB3, virB7, ricA, virB5, ipx5, wbkC, wbkB, and acpXL genes were highly conserved in most of the Brucella strains. Interestingly, virB10 gene was found to have high variability among the B. abortus strains. The cgMLST analysis revealed distinct sequence types for the standard/vaccine and field strains. B. abortus strains from north-eastern India fall within similar sequence type differing from other strains. In conclusion, the analysis revealed a highly shared core genome among two Brucella species. SNP analysis revealed B. melitensis strains exhibit high diversity as compared to B. abortus strains. Strains with absence or high polymorphism of virulence genes can be exploited for the development of novel vaccine candidates effective against both B. abortus and B. melitensis.}, } @article {pmid37196842, year = {2023}, author = {Tian, R and Xu, S and Li, P and Li, M and Liu, Y and Wang, K and Liu, G and Li, Y and Dai, L and Zhang, W}, title = {Characterization of G-type Clostridium perfringens bacteriophages and their disinfection effect on chicken meat.}, journal = {Anaerobe}, volume = {81}, number = {}, pages = {102736}, doi = {10.1016/j.anaerobe.2023.102736}, pmid = {37196842}, issn = {1095-8274}, mesh = {Animals ; Humans ; Clostridium perfringens/genetics ; *Bacteriophages/genetics ; Chickens ; Disinfection ; Phylogeny ; Anti-Bacterial Agents/pharmacology ; *Clostridium Infections/prevention & control/veterinary ; *Poultry Diseases ; *Enteritis ; Meat ; }, abstract = {OBJECTIVE: Clostridium perfringens is one of most important bacterial pathogens in the poultry industry and mainly causes necrotizing enteritis (NE). This pathogen and its toxins can cause foodborne diseases in humans through the food chain. In China, with the rise of antibiotic resistance and the banning of antibiotic growth promoters (AGPs) in poultry farming, food contamination and NE are becoming more prevalent. Bacteriophages are a viable technique to control C. perfringens as an alternative to antibiotics. We isolated Clostridium phage from the environment, providing a new method for the prevention of NE and C. perfringens contamination in meat.

METHODS: In this study, we selected C. perfringens strains from various regions and animal sources in China for phage isolation. The biological characteristics of Clostridium phage were studied in terms of host range, MOI, one-step curve, temperature and pH stability. We sequenced and annotated the genome of the Clostridium phage and performed phylogenetic and pangenomic analyses. Finally, we studied its antibacterial activity against bacterial culture and its disinfection effect against C. perfringens in meat.

RESULTS: A Clostridium phage, named ZWPH-P21 (P21), was isolated from chicken farm sewage in Jiangsu, China. P21 has been shown to specifically lyse C. perfringens type G. Further analysis of basic biological characteristics showed that P21 was stable under the conditions of pH 4-11 and temperature 4-60 °C, and the optimal multiple severity of infection (MOI) was 0.1. In addition, P21 could form a "halo" on agar plates, suggesting that the phage may encode depolymerase. Genome sequence analysis showed that P21 was the most closely related to Clostridium phage CPAS-15 belonging to the Myoviridae family, with a recognition rate of 97.24% and a query coverage rate of 98%. No virulence factors or drug resistance genes were found in P21. P21 showed promising antibacterial activity in vitro and in chicken disinfection experiments. In conclusion, P21 has the potential to be used for preventing and controlling C. perfringens in chicken food production.}, } @article {pmid37195730, year = {2023}, author = {Tanwar, AS and Shruptha, P and Jnana, A and Brand, A and Ballal, M and Satyamoorthy, K and Murali, TS}, title = {Emerging Pathogens in Planetary Health and Lessons from Comparative Genome Analyses of Three Clostridia Species.}, journal = {Omics : a journal of integrative biology}, volume = {27}, number = {6}, pages = {247-259}, doi = {10.1089/omi.2023.0034}, pmid = {37195730}, issn = {1557-8100}, mesh = {Aged ; Humans ; *Clostridioides difficile/genetics ; Proteomics ; *Clostridium Infections ; Virulence/genetics ; Genomics ; }, abstract = {Clostridioides difficile (CD) is a major planetary health burden. A Gram-positive opportunistic pathogen, CD, colonizes the large intestine and is implicated in sepsis, pseudomembranous colitis, and colorectal cancer. C. difficile infection typically following antibiotic exposure results in dysbiosis of the gut microbiome, and is one of the leading causes of diarrhea in the elderly population. While several studies have focused on the toxigenic strains of CD, gut commensals such as Clostridium butyricum (CB) and Clostridium tertium (CT) could harbor toxin/virulence genes, and thus pose a threat to human health. In this study, we sequenced and characterized three isolates, namely, CT (MALS001), CB (MALS002), and CD (MALS003) for their antimicrobial, cytotoxic, antiproliferative, genomic, and proteomic profiles. Although in vitro cytotoxic and antiproliferative potential were observed predominantly in CD MALS003, genome analysis revealed pathogenic potential of CB MALS002 and CT MALS001. Pangenome analysis revealed the presence of several accessory genes typically involved in fitness, virulence, and resistance characteristics in the core genomes of sequenced strains. The presence of an array of virulence and antimicrobial resistance genes in CB MALS002 and CT MALS001 suggests their potential role as emerging pathogens with significant impact on planetary health.}, } @article {pmid37195188, year = {2023}, author = {Murik, O and Zeevi, DA and Mann, T and Kashat, L and Assous, MV and Megged, O and Yagupsky, P}, title = {Whole-Genome Sequencing Reveals Differences among Kingella kingae Strains from Carriers and Patients with Invasive Infections.}, journal = {Microbiology spectrum}, volume = {11}, number = {3}, pages = {e0389522}, pmid = {37195188}, issn = {2165-0497}, mesh = {Humans ; Child, Preschool ; *Kingella kingae/genetics ; Virulence/genetics ; Virulence Factors/genetics ; *Endocarditis ; *Bacteremia/pathology ; }, abstract = {As a result of the increasing use of sensitive nucleic acid amplification tests, Kingella kingae is being recognized as a common pathogen of early childhood, causing medical conditions ranging from asymptomatic oropharyngeal colonization to bacteremia, osteoarthritis, and life-threatening endocarditis. However, the genomic determinants associated with the different clinical outcomes are unknown. Employing whole-genome sequencing, we studied 125 international K. kingae isolates derived from 23 healthy carriers and 102 patients with invasive infections, including bacteremia (n = 23), osteoarthritis (n = 61), and endocarditis (n = 18). We compared their genomic structures and contents to identify genomic determinants associated with the different clinical conditions. The mean genome size of the strains was 2,024,228 bp, and the pangenome comprised 4,026 predicted genes, of which 1,460 (36.3%) were core genes shared by >99% of the isolates. No single gene discriminated between carried and invasive strains; however, 43 genes were significantly more frequent in invasive isolates, compared to asymptomatically carried organisms, and a few showed a significant differential distribution among isolates from skeletal system infections, bacteremia, and endocarditis. The gene encoding the iron-regulated protein FrpC was uniformly absent in all 18 endocarditis-associated strains but was present in one-third of other invasive isolates. Similar to other members of the Neisseriaceae family, the K. kingae differences in invasiveness and tropism for specific body tissues appear to depend on combinations of multiple virulence-associated determinants that are widely distributed throughout the genome. The potential role of the absence of the FrpC protein in the pathogenesis of endocardial invasion deserves further investigation. IMPORTANCE The wide range of clinical severities exhibited by invasive Kingella kingae infections strongly suggests that isolates differ in their genomic contents, and strains associated with life-threatening endocarditis may harbor distinct genomic determinants that result in cardiac tropism and severe tissue damage. The results of the present study show that no single gene discriminated between asymptomatically carried isolates and invasive strains. However, 43 putative genes were significantly more frequent among invasive isolates than among pharyngeal colonizers. In addition, several genes displayed a significant differential distribution among isolates from bacteremia, skeletal system infections, and endocarditis, suggesting that the virulence and tissue tropism of K. kingae are multifactorial and polygenic, depending on changes in the allele content and genomic organization. Further analysis of these putative genes may identify genomic determinants of the invasiveness of K. kingae and its affinity for specific body tissues and potential targets for a future protective vaccine.}, } @article {pmid37193328, year = {2023}, author = {Kalaivanan, NS and Ghoshal, T and Lakshmi, MA and Mondal, KK and Kulshreshtha, A and Singh, KBM and Thakur, JK and Supriya, P and Bhatnagar, S and Mani, C}, title = {Complete genome resource unravels the close relation of an Indian Xanthomonas oryzae pv. oryzae strain IXOBB0003 with Philippines strain causing bacterial blight of rice.}, journal = {3 Biotech}, volume = {13}, number = {6}, pages = {187}, pmid = {37193328}, issn = {2190-572X}, abstract = {UNLABELLED: Xanthomonas oryzae pv. oryzae (Xoo) is a pathogen of concern for rice growers as it limits the production potential of rice varieties worldwide. Due to their high genomic plasticity, the pathogen continues to evolve, nullifying the deployed resistance mechanisms. It is pertinent to monitor the evolving Xoo population for the virulent novel stains, and the affordable sequencing technologies made the task feasible with an in-depth understanding of their pathogenesis arsenals. We present the complete genome of a highly virulent Indian Xoo strain IXOBB0003, predominantly found in northwestern parts of India, by employing next-generation sequencing and single-molecule sequencing in real-time technologies. The final genome assembly comprises 4,962,427 bp and has 63.96% GC content. The pan genome analysis reveals that strain IXOBB0003 houses total of 3655 core genes, 1276 accessory genes and 595 unique genes. Comparative analysis of the predicted gene clusters of coding sequences and protein count of strain IXOBB0003 depicts 3687 of almost 90% gene clusters shared by other Asian strains, 17 unique to IXOBB0003 and 139 CDSs of IXOBB0003 are shared with PXO99[A]. AnnoTALE-based studies revealed 16 TALEs conferred from the whole genome sequence. Prominent TALEs of our strain are found orthologous to TALEs of the Philippines strain PXO99[A]. The genomic features of Indian Xoo strain IXOBB0003 and in comparison with other Asian strains would certainly contribute significantly while formulating novel strategies for BB management.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s13205-023-03596-x.}, } @article {pmid37192177, year = {2023}, author = {Price, RJ and Davik, J and Fernandéz Fernandéz, F and Bates, HJ and Lynn, S and Nellist, CF and Buti, M and Røen, D and Šurbanovski, N and Alsheikh, M and Harrison, RJ and Sargent, DJ}, title = {Chromosome-scale genome sequence assemblies of the 'Autumn Bliss' and 'Malling Jewel' cultivars of the highly heterozygous red raspberry (Rubus idaeus L.) derived from long-read Oxford Nanopore sequence data.}, journal = {PloS one}, volume = {18}, number = {5}, pages = {e0285756}, pmid = {37192177}, issn = {1932-6203}, mesh = {*Rubus/genetics ; *Nanopores ; Genome ; Genomics ; Sequence Analysis, DNA ; Centromere ; }, abstract = {Red raspberry (Rubus idaeus L.) is an economically valuable soft-fruit species with a relatively small (~300 Mb) but highly heterozygous diploid (2n = 2x = 14) genome. Chromosome-scale genome sequences are a vital tool in unravelling the genetic complexity controlling traits of interest in crop plants such as red raspberry, as well as for functional genomics, evolutionary studies, and pan-genomics diversity studies. In this study, we developed genome sequences of a primocane fruiting variety ('Autumn Bliss') and a floricane variety ('Malling Jewel'). The use of long-read Oxford Nanopore Technologies sequencing data yielded long read lengths that permitted well resolved genome sequences for the two cultivars to be assembled. The de novo assemblies of 'Malling Jewel' and 'Autumn Bliss' contained 79 and 136 contigs respectively, and 263.0 Mb of the 'Autumn Bliss' and 265.5 Mb of the 'Malling Jewel' assembly could be anchored unambiguously to a previously published red raspberry genome sequence of the cultivar 'Anitra'. Single copy ortholog analysis (BUSCO) revealed high levels of completeness in both genomes sequenced, with 97.4% of sequences identified in 'Autumn Bliss' and 97.7% in 'Malling Jewel'. The density of repetitive sequence contained in the 'Autumn Bliss' and 'Malling Jewel' assemblies was significantly higher than in the previously published assembly and centromeric and telomeric regions were identified in both assemblies. A total of 42,823 protein coding regions were identified in the 'Autumn Bliss' assembly, whilst 43,027 were identified in the 'Malling Jewel' assembly. These chromosome-scale genome sequences represent an excellent genomics resource for red raspberry, particularly around the highly repetitive centromeric and telomeric regions of the genome that are less complete in the previously published 'Anitra' genome sequence.}, } @article {pmid37186225, year = {2023}, author = {Kuzmanović, N and diCenzo, GC and Bunk, B and Spröer, C and Frühling, A and Neumann-Schaal, M and Overmann, J and Smalla, K}, title = {Genomics of the "tumorigenes" clade of the family Rhizobiaceae and description of Rhizobium rhododendri sp. nov.}, journal = {MicrobiologyOpen}, volume = {12}, number = {2}, pages = {e1352}, pmid = {37186225}, issn = {2045-8827}, mesh = {Phylogeny ; DNA, Bacterial/genetics ; *Rhizobiaceae ; *Rhizobium/genetics ; Agrobacterium/genetics ; Genomics ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Fatty Acids ; Bacterial Typing Techniques ; }, abstract = {Tumorigenic members of the family Rhizobiaceae, known as agrobacteria, are responsible for crown and cane gall diseases of various crops worldwide. Tumorigenic agrobacteria are commonly found in the genera Agrobacterium, Allorhizobium, and Rhizobium. In this study, we analyzed a distinct "tumorigenes" clade of the genus Rhizobium, which includes the tumorigenic species Rhizobium tumorigenes, as well as strains causing crown gall disease on rhododendron. Here, high-quality, closed genomes of representatives of the "tumorigenes" clade were generated, followed by comparative genomic and phylogenomic analyses. Additionally, the phenotypic characteristics of representatives of the "tumorigenes" clade were analyzed. Our results showed that the tumorigenic strains isolated from rhododendron represent a novel species of the genus Rhizobium for which the name Rhizobium rhododendri sp. nov. is proposed. This species also includes additional strains originating from blueberry and Himalayan blackberry in the United States, whose genome sequences were retrieved from GenBank. Both R. tumorigenes and R. rhododendri contain multipartite genomes, including a chromosome, putative chromids, and megaplasmids. Synteny and phylogenetic analyses indicated that a large putative chromid of R. rhododendri resulted from the cointegration of an ancestral megaplasmid and two putative chromids, following its divergence from R. tumorigenes. Moreover, gene clusters specific for both species of the "tumorigenes" clade were identified, and their biological functions and roles in the ecological diversification of R. rhododendri and R. tumorigenes were predicted and discussed.}, } @article {pmid37180381, year = {2023}, author = {Pham, HH and Kim, DH and Nguyen, TL}, title = {Wide-genome selection of lactic acid bacteria harboring genes that promote the elimination of antinutritional factors.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1145041}, pmid = {37180381}, issn = {1664-462X}, abstract = {Anti-nutritional factors (ANFs) substances in plant products, such as indigestible non-starchy polysaccharides (α-galactooligosaccharides, α-GOS), phytate, tannins, and alkaloids can impede the absorption of many critical nutrients and cause major physiological disorders. To enhance silage quality and its tolerance threshold for humans as well as other animals, ANFs must be reduced. This study aims to identify and compare the bacterial species/strains that are potential use for industrial fermentation and ANFs reduction. A pan-genome study of 351 bacterial genomes was performed, and binary data was processed to quantify the number of genes involved in the removal of ANFs. Among four pan-genomes analysis, all 37 tested Bacillus subtilis genomes had one phytate degradation gene, while 91 out of 150 Enterobacteriacae genomes harbor at least one genes (maximum three). Although, no gene encoding phytase detected in genomes of Lactobacillus and Pediococcus species, they have genes involving indirectly in metabolism of phytate-derivatives to produce Myo-inositol, an important compound in animal cells physiology. In contrast, genes related to production of lectin, tannase and saponin degrading enzyme did not include in genomes of B. subtilis and Pediococcus species. Our findings suggest a combination of bacterial species and/or unique strains in fermentation, for examples, two Lactobacillus strains (DSM 21115 and ATCC 14869) with B. subtilis SRCM103689, would maximize the efficiency in reducing the ANFs concentration. In conclusion, this study provides insights into bacterial genomes analysis for maximizing nutritional value in plant-based food. Further investigations of gene numbers and repertories correlated to metabolism of different ANFs will help clarifying the efficiency of time consuming and food qualities.}, } @article {pmid37180261, year = {2023}, author = {Meng, X and Chen, F and Xiong, M and Hao, H and Wang, KJ}, title = {A new pathogenic isolate of Kocuria kristinae identified for the first time in the marine fish Larimichthys crocea.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1129568}, pmid = {37180261}, issn = {1664-302X}, abstract = {In recent years, new emerging pathogenic microorganisms have frequently appeared in animals, including marine fish, possibly due to climate change, anthropogenic activities, and even cross-species transmission of pathogenic microorganisms among animals or between animals and humans, which poses a serious issue for preventive medicine. In this study, a bacterium was clearly characterized among 64 isolates from the gills of diseased large yellow croaker Larimichthys crocea that were raised in marine aquaculture. This strain was identified as K. kristinae by biochemical tests with a VITEK 2.0 analysis system and 16S rRNA sequencing and named K. kristinae_LC. The potential genes that might encode virulence-factors were widely screened through sequence analysis of the whole genome of K. kristinae_LC. Many genes involved in the two-component system and drug-resistance were also annotated. In addition, 104 unique genes in K. kristinae_LC were identified by pan genome analysis with the genomes of this strain from five different origins (woodpecker, medical resource, environment, and marine sponge reef) and the analysis results demonstrated that their predicted functions might be associated with adaptation to living conditions such as higher salinity, complex marine biomes, and low temperature. A significant difference in genomic organization was found among the K. kristinae strains that might be related to their hosts living in different environments. The animal regression test for this new bacterial isolate was carried out using L. crocea, and the results showed that this bacterium could cause the death of L. crocea and that the fish mortality was dose-dependent within 5 days post infection, indicating the pathogenicity of K. kristinae_LC to marine fish. Since K. kristinae has been reported as a pathogen for humans and bovines, in our study, we revealed a new isolate of K. kristinae_LC from marine fish for the first time, suggesting the potentiality of cross-species transmission among animals or from marine animals to humans, from which we would gain insight to help in future public prevention strategies for new emerging pathogens.}, } @article {pmid37175750, year = {2023}, author = {An, B and Cai, H and Li, B and Zhang, S and He, Y and Wang, R and Jiao, C and Guo, Y and Xu, L and Xu, Y}, title = {Molecular Evolution of Histone Methylation Modification Families in the Plant Kingdom and Their Genome-Wide Analysis in Barley.}, journal = {International journal of molecular sciences}, volume = {24}, number = {9}, pages = {}, pmid = {37175750}, issn = {1422-0067}, support = {2021CFA064//Hubei Outstanding Youth Fund/ ; 2021BBA225//Hubei Key Research and Development Program/ ; 2021-620-000-001-01//Hubei Agricultural Science and Tech-nology Innovation Center Innovation Team Project/ ; }, mesh = {Humans ; *Hordeum/metabolism ; Histones/genetics/metabolism ; Methylation ; Plants/metabolism ; Phylogeny ; Evolution, Molecular ; Gene Expression Regulation, Plant ; Plant Proteins/genetics/metabolism ; Genome, Plant ; }, abstract = {In this study, based on the OneKP database and through comparative genetic analysis, we found that HMT and HDM may originate from Chromista and are highly conserved in green plants, and that during the evolution from algae to land plants, histone methylation modifications gradually became complex and diverse, which is more conducive to the adaptation of plants to complex and variable environments. We also characterized the number of members, genetic similarity, and phylogeny of HMT and HDM families in barley using the barley pangenome and the Tibetan Lasa Goumang genome. The results showed that HMT and HDM were highly conserved in the domestication of barley, but there were some differences in the Lasa Goumang SDG subfamily. Expression analysis showed that HvHMTs and HvHDMs were highly expressed in specific tissues and had complex expression patterns under multiple stress treatments. In summary, the amplification and variation of HMT and HDM facilitate plant adaptation to complex terrestrial environments, while they are highly conserved in barley and play an important role in barley growth and development with abiotic stresses. In brief, our findings provide a novel perspective on the origin and evolutionary history of plant HvHMTs and HvHDMs, and lay a foundation for further investigation of their functions in barley.}, } @article {pmid37173388, year = {2023}, author = {Abdella, B and Abozahra, NA and Shokrak, NM and Mohamed, RA and El-Helow, ER}, title = {Whole spectrum of Aeromonas hydrophila virulence determinants and the identification of novel SNPs using comparative pathogenomics.}, journal = {Scientific reports}, volume = {13}, number = {1}, pages = {7712}, pmid = {37173388}, issn = {2045-2322}, mesh = {Animals ; Humans ; Aeromonas hydrophila ; Virulence/genetics ; Polymorphism, Single Nucleotide ; Virulence Factors/genetics ; *Aeromonas ; Fishes ; *Fish Diseases/microbiology ; *Gram-Negative Bacterial Infections/veterinary/microbiology ; }, abstract = {Aeromonas hydrophila is a ubiquitous fish pathogen and an opportunistic human pathogen. It is mostly found in aquatic habitats, but it has also been isolated from food and bottled mineral waters. It causes hemorrhagic septicemia, ulcerative disease, and motile Aeromonas septicemia (MAS) in fish and other aquatic animals. Moreover, it might cause gastroenteritis, wound infections, and septicemia in humans. Different variables influence A. hydrophila virulence, including the virulence genes expressed, host susceptibility, and environmental stresses. The identification of virulence factors for a bacterial pathogen will help in the development of preventive and control measures. 95 Aeromonas spp. genomes were examined in the current study, and 53 strains were determined to be valid A. hydrophila. These genomes were examined for pan- and core-genomes using a comparative genomics technique. A. hydrophila has an open pan-genome with 18,306 total genes and 1620 genes in its core-genome. In the pan-genome, 312 virulence genes have been detected. The effector delivery system category had the largest number of virulence genes (87), followed by immunological modulation and motility genes (69 and 46, respectively). This provides new insight into the pathogenicity of A. hydrophila. In the pan-genome, a few distinctive single-nucleotide polymorphisms (SNPs) have been identified in four genes, namely: D-glycero-beta-D-manno-heptose-1,7-bisphosphate 7-phosphatase, chemoreceptor glutamine deamidase, Spermidine N (1)-acetyltransferase, and maleylpyruvate isomerase, which are present in all A. hydrophila genomes, which make them molecular marker candidates for precise identification of A. hydrophila. Therefore, for precise diagnostic and discrimination results, we suggest these genes be considered when designing primers and probes for sequencing, multiplex-PCR, or real-time PCR.}, } @article {pmid37173271, year = {2023}, author = {Raza, A and Bohra, A and Varshney, RK}, title = {Pan-genome for pearl millet that beats the heat.}, journal = {Trends in plant science}, volume = {28}, number = {8}, pages = {857-860}, doi = {10.1016/j.tplants.2023.04.016}, pmid = {37173271}, issn = {1878-4372}, mesh = {*Pennisetum/genetics ; Hot Temperature ; Plant Breeding ; }, abstract = {A better understanding of crop genomes reveals that structural variations (SVs) are crucial for genetic improvement. A graph-based pan-genome by Yan et al. uncovered 424 085 genomic SVs and provided novel insights into heat tolerance of pearl millet. We discuss how these SVs can fast-track pearl millet breeding under harsh environments.}, } @article {pmid37171844, year = {2023}, author = {Büchler, T and Olbrich, J and Ohlebusch, E}, title = {Efficient short read mapping to a pangenome that is represented by a graph of ED strings.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {5}, pages = {}, pmid = {37171844}, issn = {1367-4811}, mesh = {Humans ; Sequence Analysis, DNA/methods ; *Software ; *Genome, Human ; Algorithms ; }, abstract = {MOTIVATION: A pangenome represents many diverse genome sequences of the same species. In order to cope with small variations as well as structural variations, recent research focused on the development of graph-based models of pangenomes. Mapping is the process of finding the original location of a DNA read in a reference sequence, typically a genome. Using a pangenome instead of a (linear) reference genome can, e.g. reduce mapping bias, the tendency to incorrectly map sequences that differ from the reference genome. Mapping reads to a graph, however, is more complex and needs more resources than mapping to a reference genome. Reducing the complexity of the graph by encoding simple variations like SNPs in a simple way can accelerate read mapping and reduce the memory requirements at the same time.

RESULTS: We introduce graphs based on elastic-degenerate strings (ED strings, EDS) and the linearized form of these EDS graphs as a new representation for pangenomes. In this representation, small variations are encoded directly in the sequence. Structural variations are encoded in a graph structure. This reduces the size of the representation in comparison to sequence graphs. In the linearized form, mapping techniques that are known from ordinary strings can be applied with appropriate adjustments. Since most variations are expressed directly in the sequence, the mapping process rarely has to take edges of the EDS graph into account. We developed a prototypical software tool GED-MAP that uses this representation together with a minimizer index to map short reads to the pangenome. Our experiments show that the new method works on a whole human genome scale, taking structural variants properly into account. The advantage of GED-MAP, compared with other pangenomic short read mappers, is that the new representation allows for a simple indexing method. This makes GED-MAP fast and memory efficient.

Sources are available at: https://github.com/thomas-buechler-ulm/gedmap.}, } @article {pmid37167256, year = {2023}, author = {Riborg, A and Gulla, S and Fiskebeck, EZ and Ryder, D and Verner-Jeffreys, DW and Colquhoun, DJ and Welch, TJ}, title = {Pan-genome survey of the fish pathogen Yersinia ruckeri links accessory- and amplified genes to virulence.}, journal = {PloS one}, volume = {18}, number = {5}, pages = {e0285257}, pmid = {37167256}, issn = {1932-6203}, mesh = {Animals ; Yersinia ruckeri/genetics ; Virulence/genetics ; *Yersinia Infections ; Serogroup ; *Oncorhynchus mykiss ; *Fish Diseases ; }, abstract = {While both virulent and putatively avirulent Yersinia ruckeri strains exist in aquaculture environments, the relationship between the distribution of virulence-associated factors and de facto pathogenicity in fish remains poorly understood. Pan-genome analysis of 18 complete genomes, representing established virulent and putatively avirulent lineages of Y. ruckeri, revealed the presence of a number of accessory genetic determinants. Further investigation of 68 draft genome assemblies revealed that the distribution of certain putative virulence factors correlated well with virulence and host-specificity. The inverse-autotransporter invasin locus yrIlm was, however, the only gene present in all virulent strains, while absent in lineages regarded as avirulent. Strains known to be associated with significant mortalities in salmonid aquaculture display a combination of serotype O1-LPS and yrIlm, with the well-documented highly virulent lineages, represented by MLVA clonal complexes 1 and 2, displaying duplication of the yrIlm locus. Duplication of the yrIlm locus was further found to have evolved over time in clonal complex 1, where some modern, highly virulent isolates display up to three copies.}, } @article {pmid37165242, year = {2023}, author = {Liao, WW and Asri, M and Ebler, J and Doerr, D and Haukness, M and Hickey, G and Lu, S and Lucas, JK and Monlong, J and Abel, HJ and Buonaiuto, S and Chang, XH and Cheng, H and Chu, J and Colonna, V and Eizenga, JM and Feng, X and Fischer, C and Fulton, RS and Garg, S and Groza, C and Guarracino, A and Harvey, WT and Heumos, S and Howe, K and Jain, M and Lu, TY and Markello, C and Martin, FJ and Mitchell, MW and Munson, KM and Mwaniki, MN and Novak, AM and Olsen, HE and Pesout, T and Porubsky, D and Prins, P and Sibbesen, JA and Sirén, J and Tomlinson, C and Villani, F and Vollger, MR and Antonacci-Fulton, LL and Baid, G and Baker, CA and Belyaeva, A and Billis, K and Carroll, A and Chang, PC and Cody, S and Cook, DE and Cook-Deegan, RM and Cornejo, OE and Diekhans, M and Ebert, P and Fairley, S and Fedrigo, O and Felsenfeld, AL and Formenti, G and Frankish, A and Gao, Y and Garrison, NA and Giron, CG and Green, RE and Haggerty, L and Hoekzema, K and Hourlier, T and Ji, HP and Kenny, EE and Koenig, BA and Kolesnikov, A and Korbel, JO and Kordosky, J and Koren, S and Lee, H and Lewis, AP and Magalhães, H and Marco-Sola, S and Marijon, P and McCartney, A and McDaniel, J and Mountcastle, J and Nattestad, M and Nurk, S and Olson, ND and Popejoy, AB and Puiu, D and Rautiainen, M and Regier, AA and Rhie, A and Sacco, S and Sanders, AD and Schneider, VA and Schultz, BI and Shafin, K and Smith, MW and Sofia, HJ and Abou Tayoun, AN and Thibaud-Nissen, F and Tricomi, FF and Wagner, J and Walenz, B and Wood, JMD and Zimin, AV and Bourque, G and Chaisson, MJP and Flicek, P and Phillippy, AM and Zook, JM and Eichler, EE and Haussler, D and Wang, T and Jarvis, ED and Miga, KH and Garrison, E and Marschall, T and Hall, IM and Li, H and Paten, B}, title = {A draft human pangenome reference.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {312-324}, pmid = {37165242}, issn = {1476-4687}, support = {U41 HG010972/HG/NHGRI NIH HHS/United States ; R01 HG010169/HG/NHGRI NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; U24 HG007497/HG/NHGRI NIH HHS/United States ; R01 HG002385/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; Diploidy ; *Genome, Human/genetics ; Haplotypes/genetics ; Sequence Analysis, DNA ; *Genomics/standards ; Reference Standards ; Cohort Studies ; Alleles ; Genetic Variation ; }, abstract = {Here the Human Pangenome Reference Consortium presents a first draft of the human pangenome reference. The pangenome contains 47 phased, diploid assemblies from a cohort of genetically diverse individuals[1]. These assemblies cover more than 99% of the expected sequence in each genome and are more than 99% accurate at the structural and base pair levels. Based on alignments of the assemblies, we generate a draft pangenome that captures known variants and haplotypes and reveals new alleles at structurally complex loci. We also add 119 million base pairs of euchromatic polymorphic sequences and 1,115 gene duplications relative to the existing reference GRCh38. Roughly 90 million of the additional base pairs are derived from structural variation. Using our draft pangenome to analyse short-read data reduced small variant discovery errors by 34% and increased the number of structural variants detected per haplotype by 104% compared with GRCh38-based workflows, which enabled the typing of the vast majority of structural variant alleles per sample.}, } @article {pmid37165241, year = {2023}, author = {Guarracino, A and Buonaiuto, S and de Lima, LG and Potapova, T and Rhie, A and Koren, S and Rubinstein, B and Fischer, C and , and Gerton, JL and Phillippy, AM and Colonna, V and Garrison, E}, title = {Recombination between heterologous human acrocentric chromosomes.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {335-343}, pmid = {37165241}, issn = {1476-4687}, support = {R01 CA266339/CA/NCI NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; U41 HG010972/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *Centromere/genetics ; *Chromosomes, Human/genetics ; DNA, Ribosomal/genetics ; *Recombination, Genetic/genetics ; Translocation, Genetic/genetics ; Cytogenetics ; Telomere/genetics ; }, abstract = {The short arms of the human acrocentric chromosomes 13, 14, 15, 21 and 22 (SAACs) share large homologous regions, including ribosomal DNA repeats and extended segmental duplications[1,2]. Although the resolution of these regions in the first complete assembly of a human genome-the Telomere-to-Telomere Consortium's CHM13 assembly (T2T-CHM13)-provided a model of their homology[3], it remained unclear whether these patterns were ancestral or maintained by ongoing recombination exchange. Here we show that acrocentric chromosomes contain pseudo-homologous regions (PHRs) indicative of recombination between non-homologous sequences. Utilizing an all-to-all comparison of the human pangenome from the Human Pangenome Reference Consortium[4] (HPRC), we find that contigs from all of the SAACs form a community. A variation graph[5] constructed from centromere-spanning acrocentric contigs indicates the presence of regions in which most contigs appear nearly identical between heterologous acrocentric chromosomes in T2T-CHM13. Except on chromosome 15, we observe faster decay of linkage disequilibrium in the pseudo-homologous regions than in the corresponding short and long arms, indicating higher rates of recombination[6,7]. The pseudo-homologous regions include sequences that have previously been shown to lie at the breakpoint of Robertsonian translocations[8], and their arrangement is compatible with crossover in inverted duplications on chromosomes 13, 14 and 21. The ubiquity of signals of recombination between heterologous acrocentric chromosomes seen in the HPRC draft pangenome suggests that these shared sequences form the basis for recurrent Robertsonian translocations, providing sequence and population-based confirmation of hypotheses first developed from cytogenetic studies 50 years ago[9].}, } @article {pmid37165237, year = {2023}, author = {Vollger, MR and Dishuck, PC and Harvey, WT and DeWitt, WS and Guitart, X and Goldberg, ME and Rozanski, AN and Lucas, J and Asri, M and , and Munson, KM and Lewis, AP and Hoekzema, K and Logsdon, GA and Porubsky, D and Paten, B and Harris, K and Hsieh, P and Eichler, EE}, title = {Increased mutation and gene conversion within human segmental duplications.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {325-334}, pmid = {37165237}, issn = {1476-4687}, support = {R01 HG002385/HG/NHGRI NIH HHS/United States ; R35 GM133428/GM/NIGMS NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; U01 HG010973/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *Gene Conversion/genetics ; Genome, Human/genetics ; *Mutation ; *Segmental Duplications, Genomic ; Polymorphism, Single Nucleotide/genetics ; Haplotypes/genetics ; Exons/genetics ; Cytosine/chemistry ; Guanine/chemistry ; CpG Islands/genetics ; }, abstract = {Single-nucleotide variants (SNVs) in segmental duplications (SDs) have not been systematically assessed because of the limitations of mapping short-read sequencing data[1,2]. Here we constructed 1:1 unambiguous alignments spanning high-identity SDs across 102 human haplotypes and compared the pattern of SNVs between unique and duplicated regions[3,4]. We find that human SNVs are elevated 60% in SDs compared to unique regions and estimate that at least 23% of this increase is due to interlocus gene conversion (IGC) with up to 4.3 megabase pairs of SD sequence converted on average per human haplotype. We develop a genome-wide map of IGC donors and acceptors, including 498 acceptor and 454 donor hotspots affecting the exons of about 800 protein-coding genes. These include 171 genes that have 'relocated' on average 1.61 megabase pairs in a subset of human haplotypes. Using a coalescent framework, we show that SD regions are slightly evolutionarily older when compared to unique sequences, probably owing to IGC. SNVs in SDs, however, show a distinct mutational spectrum: a 27.1% increase in transversions that convert cytosine to guanine or the reverse across all triplet contexts and a 7.6% reduction in the frequency of CpG-associated mutations when compared to unique DNA. We reason that these distinct mutational properties help to maintain an overall higher GC content of SD DNA compared to that of unique DNA, probably driven by GC-biased conversion between paralogous sequences[5,6].}, } @article {pmid37165235, year = {2023}, author = {Massarat, A and Gymrek, M and McStay, B and Jónsson, H}, title = {Human pangenome supports analysis of complex genomic regions.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {256-258}, pmid = {37165235}, issn = {1476-4687}, mesh = {Humans ; *Genomics ; *Genome ; }, } @article {pmid37165229, year = {2023}, author = {Liverpool, L}, title = {First human 'pangenome' aims to catalogue genetic diversity.}, journal = {Nature}, volume = {617}, number = {7961}, pages = {444-445}, pmid = {37165229}, issn = {1476-4687}, mesh = {Humans ; *Genetic Variation/genetics ; *Genome, Human/genetics ; *Genomics/trends ; }, } @article {pmid37165225, year = {2023}, author = {Petrić Howe, N and Bundell, S}, title = {'Pangenome' aims to capture the breadth of human diversity.}, journal = {Nature}, volume = {}, number = {}, pages = {}, doi = {10.1038/d41586-023-01579-9}, pmid = {37165225}, issn = {1476-4687}, } @article {pmid37165083, year = {2023}, author = {Hickey, G and Monlong, J and Ebler, J and Novak, AM and Eizenga, JM and Gao, Y and , and Marschall, T and Li, H and Paten, B}, title = {Pangenome graph construction from genome alignments with Minigraph-Cactus.}, journal = {Nature biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37165083}, issn = {1546-1696}, support = {U01 HG010971/HG/NHGRI NIH HHS/United States ; }, abstract = {Pangenome references address biases of reference genomes by storing a representative set of diverse haplotypes and their alignment, usually as a graph. Alternate alleles determined by variant callers can be used to construct pangenome graphs, but advances in long-read sequencing are leading to widely available, high-quality phased assemblies. Constructing a pangenome graph directly from assemblies, as opposed to variant calls, leverages the graph's ability to represent variation at different scales. Here we present the Minigraph-Cactus pangenome pipeline, which creates pangenomes directly from whole-genome alignments, and demonstrate its ability to scale to 90 human haplotypes from the Human Pangenome Reference Consortium. The method builds graphs containing all forms of genetic variation while allowing use of current mapping and genotyping tools. We measure the effect of the quality and completeness of reference genomes used for analysis within the pangenomes and show that using the CHM13 reference from the Telomere-to-Telomere Consortium improves the accuracy of our methods. We also demonstrate construction of a Drosophila melanogaster pangenome.}, } @article {pmid37164484, year = {2023}, author = {Porubsky, D and Vollger, MR and Harvey, WT and Rozanski, AN and Ebert, P and Hickey, G and Hasenfeld, P and Sanders, AD and Stober, C and , and Korbel, JO and Paten, B and Marschall, T and Eichler, EE}, title = {Gaps and complex structurally variant loci in phased genome assemblies.}, journal = {Genome research}, volume = {33}, number = {4}, pages = {496-510}, pmid = {37164484}, issn = {1549-5469}, support = {R01 HG002385/HG/NHGRI NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; U01 HG010973/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *DNA, Satellite/genetics ; *Polymorphism, Genetic ; Haplotypes ; Segmental Duplications, Genomic ; Sequence Analysis, DNA ; }, abstract = {There has been tremendous progress in phased genome assembly production by combining long-read data with parental information or linked-read data. Nevertheless, a typical phased genome assembly generated by trio-hifiasm still generates more than 140 gaps. We perform a detailed analysis of gaps, assembly breaks, and misorientations from 182 haploid assemblies obtained from a diversity panel of 77 unique human samples. Although trio-based approaches using HiFi are the current gold standard, chromosome-wide phasing accuracy is comparable when using Strand-seq instead of parental data. Importantly, the majority of assembly gaps cluster near the largest and most identical repeats (including segmental duplications [35.4%], satellite DNA [22.3%], or regions enriched in GA/AT-rich DNA [27.4%]). Consequently, 1513 protein-coding genes overlap assembly gaps in at least one haplotype, and 231 are recurrently disrupted or missing from five or more haplotypes. Furthermore, we estimate that 6-7 Mbp of DNA are misorientated per haplotype irrespective of whether trio-free or trio-based approaches are used. Of these misorientations, 81% correspond to bona fide large inversion polymorphisms in the human species, most of which are flanked by large segmental duplications. We also identify large-scale alignment discontinuities consistent with 11.9 Mbp of deletions and 161.4 Mbp of insertions per haploid genome. Although 99% of this variation corresponds to satellite DNA, we identify 230 regions of euchromatic DNA with frequent expansions and contractions, nearly half of which overlap with 197 protein-coding genes. Such variable and incompletely assembled regions are important targets for future algorithmic development and pangenome representation.}, } @article {pmid37154680, year = {2023}, author = {Castillo, AI and Almeida, RPP}, title = {The Multifaceted Role of Homologous Recombination in a Fastidious Bacterial Plant Pathogen.}, journal = {Applied and environmental microbiology}, volume = {89}, number = {5}, pages = {e0043923}, pmid = {37154680}, issn = {1098-5336}, support = {S10 OD018174/CD/ODCDC CDC HHS/United States ; }, mesh = {Phylogeny ; *Genetic Variation ; Genome, Bacterial ; *Xylella/genetics ; Homologous Recombination ; Plants/genetics ; Plant Diseases/microbiology ; }, abstract = {Homologous recombination plays a key function in the evolution of bacterial genomes. Within Xylella fastidiosa, an emerging plant pathogen with increasing host and geographic ranges, it has been suggested that homologous recombination facilitates host switching, speciation, and the development of virulence. We used 340 whole-genome sequences to study the relationship between inter- and intrasubspecific homologous recombination, random mutation, and natural selection across individual X. fastidiosa genes. Individual gene orthologs were identified and aligned, and a maximum likelihood (ML) gene tree was generated. Each gene alignment and tree pair were then used to calculate gene-wide and branch-specific r/m values (relative effect of recombination to mutation), gene-wide and branch-site nonsynonymous over synonymous substitution rates (dN/dS values; episodic selection), and branch length (as a proxy for mutation rate). The relationships between these variables were evaluated at the global level (i.e., for all genes among and within a subspecies), among specific functional classes (i.e., COGs), and between pangenome components (i.e., accessory versus core genes). Our analysis showed that r/m varied widely among genes as well as across X. fastidiosa subspecies. While r/m and dN/dS values were positively correlated in some instances (e.g., core genes in X. fastidiosa subsp. fastidiosa and both core and accessory genes in X. fastidiosa subsp. multiplex), low correlation coefficients suggested no clear biological significance. Overall, our results indicate that, in addition to its adaptive role in certain genes, homologous recombination acts as a homogenizing and a neutral force across phylogenetic clades, gene functional groups, and pangenome components. IMPORTANCE There is ample evidence that homologous recombination occurs frequently in the economically important plant pathogen Xylella fastidiosa. Homologous recombination has been known to occur among sympatric subspecies and is associated with host-switching events and virulence-linked genes. As a consequence, is it generally assumed that recombinant events in X. fastidiosa are adaptive. This mindset influences expectations of how homologous recombination acts as an evolutionary force as well as how management strategies for X. fastidiosa diseases are determined. Yet, homologous recombination plays roles beyond that of a source for diversification and adaptation. Homologous recombination can act as a DNA repair mechanism, as a means to facilitate nucleotide compositional change, as a homogenization mechanism within populations, or even as a neutral force. Here, we provide a first assessment of long-held beliefs regarding the general role of recombination in adaptation for X. fastidiosa. We evaluate gene-specific variations in homologous recombination rate across three X. fastidiosa subspecies and its relationship to other evolutionary forces (e.g., natural selection, mutation, etc.). These data were used to assess the role of homologous recombination in X. fastidiosa evolution.}, } @article {pmid37153161, year = {2023}, author = {Saroha, T and Patil, PP and Rana, R and Kumar, R and Kumar, S and Singhal, L and Gautam, V and Patil, PB}, title = {Genomic features, antimicrobial susceptibility, and epidemiological insights into Burkholderia cenocepacia clonal complex 31 isolates from bloodstream infections in India.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1151594}, pmid = {37153161}, issn = {2235-2988}, mesh = {Humans ; *Burkholderia cenocepacia/genetics ; Phylogeny ; *Burkholderia Infections/epidemiology ; *Burkholderia cepacia complex/genetics ; Genomics ; *Anti-Infective Agents ; *Sepsis ; Fibrosis ; }, abstract = {INTRODUCTION: Burkholderia cepacia complex (Bcc) clonal complex (CC) 31, the predominant lineage causing devastating outbreaks globally, has been a growing concern of infections in non-cystic fibrosis (NCF) patients in India. B. cenocepacia is very challenging to treat owing to its virulence determinants and antibiotic resistance. Improving the management of these infections requires a better knowledge of their resistance patterns and mechanisms.

METHODS: Whole-genome sequences of 35 CC31 isolates obtained from patient samples, were analyzed against available 210 CC31 genomes in the NCBI database to glean details of resistance, virulence, mobile elements, and phylogenetic markers to study genomic diversity and evolution of CC31 lineage in India.

RESULTS: Genomic analysis revealed that 35 isolates belonging to CC31 were categorized into 11 sequence types (ST), of which five STs were reported exclusively from India. Phylogenetic analysis classified 245 CC31 isolates into eight distinct clades (I-VIII) and unveiled that NCF isolates are evolving independently from the global cystic fibrosis (CF) isolates forming a distinct clade. The detection rate of seven classes of antibiotic-related genes in 35 isolates was 35 (100%) for tetracyclines, aminoglycosides, and fluoroquinolones; 26 (74.2%) for sulphonamides and phenicols; 7 (20%) for beta-lactamases; and 1 (2.8%) for trimethoprim resistance genes. Additionally, 3 (8.5%) NCF isolates were resistant to disinfecting agents and antiseptics. Antimicrobial susceptibility testing revealed that majority of NCF isolates were resistant to chloramphenicol (77%) and levofloxacin (34%). NCF isolates have a comparable number of virulence genes to CF isolates. A well-studied pathogenicity island of B. cenocepacia, GI11 is present in ST628 and ST709 isolates from the Indian Bcc population. In contrast, genomic island GI15 (highly similar to the island found in B. pseudomallei strain EY1) is exclusively reported in ST839 and ST824 isolates from two different locations in India. Horizontal acquisition of lytic phage ST79 of pathogenic B. pseudomallei is demonstrated in ST628 isolates Bcc1463, Bcc29163, and BccR4654 amongst CC31 lineage.

DISCUSSION: The study reveals a high diversity of CC31 lineages among B. cenocepacia isolates from India. The extensive information from this study will facilitate the development of rapid diagnostic and novel therapeutic approaches to manage B. cenocepacia infections.}, } @article {pmid37152722, year = {2023}, author = {Aziz, T and Naveed, M and Jabeen, K and Shabbir, MA and Sarwar, A and Zhennai, Y and Alharbi, M and Alshammari, A and Alasmari, AF}, title = {Integrated genome based evaluation of safety and probiotic characteristics of Lactiplantibacillus plantarum YW11 isolated from Tibetan kefir.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1157615}, pmid = {37152722}, issn = {1664-302X}, abstract = {The comparative genomic analysis of Lactiplantibacillus plantarum YW11 (L. plantarum YW11) isolated from Tibetan kefir involves comparison of the complete genome sequences of the isolated strain with other closely related L. plantarum strains. This type of analysis can be used to identify the genetic diversity among strains and to explore the genetic characteristics of the YW11 strain. The genome of L. plantarum YW11 was found to be composed of a circular single chromosome of 4,597,470 bp with a G + C content of 43.2%. A total of 4,278 open reading frames (ORFs) were identified in the genome and the coding density was found to be 87.8%. A comparative genomic analysis was conducted using two other L. plantarum strains, L. plantarum C11 and L. plantarum LMG21703. Genomic comparison revealed that L. plantarum YW11 shared 72.7 and 75.2% of gene content with L. plantarum C11 and L. plantarum LMG21703, respectively. Most of the genes shared between the three L. plantarum strains were involved in carbohydrate metabolism, energy production and conversion, amino acid metabolism, and transcription. In this analysis, 10 previously sequenced entire genomes of the species were compared using an in-silico technique to discover genomic divergence in genes linked with carbohydrate intake and their potential adaptations to distinct human intestinal environments. The subspecies pan-genome was open, which correlated with its extraordinary capacity to colonize several environments. Phylogenetic analysis revealed that the novel genomes were homogenously grouped among subspecies of l Lactiplantibacillus. L. plantarum was resistant to cefoxitin, erythromycin, and metronidazole, inhibited pathogens including Listeria monocytogenes, Clostridium difficile, Vibrio cholera, and others, and had excellent aerotolerance, which is useful for industrial operations. The comparative genomic analysis of L. plantarum YW11 isolated from Tibetan kefir can provide insights into the genetic characteristics of the strain, which can be used to further understand its role in the production of kefir.}, } @article {pmid37147657, year = {2023}, author = {Mun, T and Vaddadi, NSK and Langmead, B}, title = {Pangenomic genotyping with the marker array.}, journal = {Algorithms for molecular biology : AMB}, volume = {18}, number = {1}, pages = {2}, pmid = {37147657}, issn = {1748-7188}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; R35 GM139602/GM/NIGMS NIH HHS/United States ; R01HG011392/HG/NHGRI NIH HHS/United States ; R35GM139602/GM/NIGMS NIH HHS/United States ; }, abstract = {We present a new method and software tool called rowbowt that applies a pangenome index to the problem of inferring genotypes from short-read sequencing data. The method uses a novel indexing structure called the marker array. Using the marker array, we can genotype variants with respect from large panels like the 1000 Genomes Project while reducing the reference bias that results when aligning to a single linear reference. rowbowt can infer accurate genotypes in less time and memory compared to existing graph-based methods. The method is implemented in the open source software tool rowbowt available at https://github.com/alshai/rowbowt .}, } @article {pmid37144759, year = {2023}, author = {Basharat, Z and Meshal, A}, title = {Pan-genome mediated therapeutic target mining in Kingella kingae and inhibition assessment using traditional Chinese medicinal compounds: an informatics approach.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-14}, doi = {10.1080/07391102.2023.2208221}, pmid = {37144759}, issn = {1538-0254}, abstract = {Kingella kingae causes bacteremia, endocarditis, osteomyelitis, septic arthritis, meningitis, spondylodiscitis, and lower respiratory tract infections in pediatric patients. Usually it demonstrates disease after inflammation of mouth, lips or infections of the upper respiratory tract. To date, therapeutic targets in this bacterium remain unexplored. We have utilized a battery of bioinformatics tools to mine these targets in this study. Core genes were initially inferred from 55 genomes of K. kingae and 39 therapeutic targets were mined using an in-house pipeline. We selected aroG product (KDPG aldolase) involved in chorismate pathway, for inhibition analysis of this bacterium using lead-like metabolites from traditional Chinese medicinal plants. Pharmacophore generation was done using control ZINC36444158 (1,16-bis[(dihydroxyphosphinyl)oxy]hexadecane), followed by molecular docking of top hits from a library of 36,000 compounds. Top prioritized compounds were ZINC95914016, ZINC33833283 and ZINC95914219. ADME profiling and simulation of compound dosing (100 mg tablet) was done to infer compartmental pharmacokinetics in a population of 300 individuals in fasting state. PkCSM based toxicity analysis revealed the compounds ZINC95914016 and ZINC95914219 as safe and with almost similar bioavailability. However, ZINC95914016 takes less time to reach maximum concentration in the plasma and shows several optimal parameters compared to other leads. In light of obtained data, we recommend this compound for further testing and induction in experimental drug design pipeline.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37143156, year = {2023}, author = {Gong, Y and Li, Y and Liu, X and Ma, Y and Jiang, L}, title = {A review of the pangenome: how it affects our understanding of genomic variation, selection and breeding in domestic animals?.}, journal = {Journal of animal science and biotechnology}, volume = {14}, number = {1}, pages = {73}, pmid = {37143156}, issn = {1674-9782}, support = {31961143021//Innovative Research Group Project of the National Natural Science Foundation of China/ ; CARS-39-01//Earmarked Fund for Modern Agro-industry Technology Research System/ ; ASTIP-IAS01//National Defense Science and Technology Innovation Fund of the Chinese Academy of Sciences/ ; }, abstract = {As large-scale genomic studies have progressed, it has been revealed that a single reference genome pattern cannot represent genetic diversity at the species level. While domestic animals tend to have complex routes of origin and migration, suggesting a possible omission of some population-specific sequences in the current reference genome. Conversely, the pangenome is a collection of all DNA sequences of a species that contains sequences shared by all individuals (core genome) and is also able to display sequence information unique to each individual (variable genome). The progress of pangenome research in humans, plants and domestic animals has proved that the missing genetic components and the identification of large structural variants (SVs) can be explored through pangenomic studies. Many individual specific sequences have been shown to be related to biological adaptability, phenotype and important economic traits. The maturity of technologies and methods such as third-generation sequencing, Telomere-to-telomere genomes, graphic genomes, and reference-free assembly will further promote the development of pangenome. In the future, pangenome combined with long-read data and multi-omics will help to resolve large SVs and their relationship with the main economic traits of interest in domesticated animals, providing better insights into animal domestication, evolution and breeding. In this review, we mainly discuss how pangenome analysis reveals genetic variations in domestic animals (sheep, cattle, pigs, chickens) and their impacts on phenotypes and how this can contribute to the understanding of species diversity. Additionally, we also go through potential issues and the future perspectives of pangenome research in livestock and poultry.}, } @article {pmid37138640, year = {2023}, author = {Sorouri, B and Rodriguez, CI and Gaut, BS and Allison, SD}, title = {Variation in Sphingomonas traits across habitats and phylogenetic clades.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1146165}, pmid = {37138640}, issn = {1664-302X}, abstract = {Whether microbes show habitat preferences is a fundamental question in microbial ecology. If different microbial lineages have distinct traits, those lineages may occur more frequently in habitats where their traits are advantageous. Sphingomonas is an ideal bacterial clade in which to investigate how habitat preference relates to traits because these bacteria inhabit diverse environments and hosts. Here we downloaded 440 publicly available Sphingomonas genomes, assigned them to habitats based on isolation source, and examined their phylogenetic relationships. We sought to address whether: (1) there is a relationship between Sphingomonas habitat and phylogeny, and (2) whether there is a phylogenetic correlation between key, genome-based traits and habitat preference. We hypothesized that Sphingomonas strains from similar habitats would cluster together in phylogenetic clades, and key traits that improve fitness in specific environments should correlate with habitat. Genome-based traits were categorized into the Y-A-S trait-based framework for high growth yield, resource acquisition, and stress tolerance. We selected 252 high quality genomes and constructed a phylogenetic tree with 12 well-defined clades based on an alignment of 404 core genes. Sphingomonas strains from the same habitat clustered together within the same clades, and strains within clades shared similar clusters of accessory genes. Additionally, key genome-based trait frequencies varied across habitats. We conclude that Sphingomonas gene content reflects habitat preference. This knowledge of how environment and host relate to phylogeny may also help with future functional predictions about Sphingomonas and facilitate applications in bioremediation.}, } @article {pmid37138622, year = {2023}, author = {Zhou, Y and Jiang, D and Yao, X and Luo, Y and Yang, Z and Ren, M and Zhang, G and Yu, Y and Lu, A and Wang, Y}, title = {Pan-genome wide association study of Glaesserella parasuis highlights genes associated with virulence and biofilm formation.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1160433}, pmid = {37138622}, issn = {1664-302X}, abstract = {Glaesserella parasuis is a gram-negative bacterium that causes fibrotic polyserositis and arthritis in pig, significantly affecting the pig industry. The pan-genome of G. parasuis is open. As the number of genes increases, the core and accessory genomes may show more pronounced differences. The genes associated with virulence and biofilm formation are also still unclear due to the diversity of G. parasuis. Therefore, we have applied a pan-genome-wide association study (Pan-GWAS) to 121 strains G. parasuis. Our analysis revealed that the core genome consists of 1,133 genes associated with the cytoskeleton, virulence, and basic biological processes. The accessory genome is highly variable and is a major cause of genetic diversity in G. parasuis. Furthermore, two biologically important traits (virulence, biofilm formation) of G. parasuis were studied via pan-GWAS to search for genes associated with the traits. A total of 142 genes were associated with strong virulence traits. By affecting metabolic pathways and capturing the host nutrients, these genes are involved in signal pathways and virulence factors, which are beneficial for bacterial survival and biofilm formation. This research lays the foundation for further studies on virulence and biofilm formation and provides potential new drug and vaccine targets against G. parasuis.}, } @article {pmid37138596, year = {2023}, author = {Zhao, Y and Wei, HM and Yuan, JL and Xu, L and Sun, JQ}, title = {A comprehensive genomic analysis provides insights on the high environmental adaptability of Acinetobacter strains.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1177951}, pmid = {37138596}, issn = {1664-302X}, abstract = {Acinetobacter is ubiquitous, and it has a high species diversity and a complex evolutionary pattern. To elucidate the mechanism of its high ability to adapt to various environment, 312 genomes of Acinetobacter strains were analyzed using the phylogenomic and comparative genomics methods. It was revealed that the Acinetobacter genus has an open pan-genome and strong genome plasticity. The pan-genome consists of 47,500 genes, with 818 shared by all the genomes of Acinetobacter, while 22,291 are unique genes. Although Acinetobacter strains do not have a complete glycolytic pathway to directly utilize glucose as carbon source, most of them harbored the n-alkane-degrading genes alkB/alkM (97.1% of tested strains) and almA (96.7% of tested strains), which were responsible for medium-and long-chain n-alkane terminal oxidation reaction, respectively. Most Acinetobacter strains also have catA (93.3% of tested strains) and benAB (92.0% of tested strains) genes that can degrade the aromatic compounds catechol and benzoic acid, respectively. These abilities enable the Acinetobacter strains to easily obtain carbon and energy sources from their environment for survival. The Acinetobacter strains can manage osmotic pressure by accumulating potassium and compatible solutes, including betaine, mannitol, trehalose, glutamic acid, and proline. They respond to oxidative stress by synthesizing superoxide dismutase, catalase, disulfide isomerase, and methionine sulfoxide reductase that repair the damage caused by reactive oxygen species. In addition, most Acinetobacter strains contain many efflux pump genes and resistance genes to manage antibiotic stress and can synthesize a variety of secondary metabolites, including arylpolyene, β-lactone and siderophores among others, to adapt to their environment. These genes enable Acinetobacter strains to survive extreme stresses. The genome of each Acinetobacter strain contained different numbers of prophages (0-12) and genomic islands (GIs) (6-70), and genes related to antibiotic resistance were found in the GIs. The phylogenetic analysis revealed that the alkM and almA genes have a similar evolutionary position with the core genome, indicating that they may have been acquired by vertical gene transfer from their ancestor, while catA, benA, benB and the antibiotic resistance genes could have been acquired by horizontal gene transfer from the other organisms.}, } @article {pmid37138544, year = {2023}, author = {Oddy, J and Chhetry, M and Awal, R and Addy, J and Wilkinson, M and Smith, D and King, R and Hall, C and Testa, R and Murray, E and Raffan, S and Curtis, TY and Wingen, L and Griffiths, S and Berry, S and Elmore, JS and Cryer, N and Moreira de Almeida, I and Halford, NG}, title = {Genetic control of grain amino acid composition in a UK soft wheat mapping population.}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20335}, doi = {10.1002/tpg2.20335}, pmid = {37138544}, issn = {1940-3372}, support = {BB/P016855/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/T017007/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/T50838X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {Wheat (Triticum aestivum L.) is a major source of nutrients for populations across the globe, but the amino acid composition of wheat grain does not provide optimal nutrition. The nutritional value of wheat grain is limited by low concentrations of lysine (the most limiting essential amino acid) and high concentrations of free asparagine (precursor to the processing contaminant acrylamide). There are currently few available solutions for asparagine reduction and lysine biofortification through breeding. In this study, we investigated the genetic architecture controlling grain free amino acid composition and its relationship to other traits in a Robigus × Claire doubled haploid population. Multivariate analysis of amino acids and other traits showed that the two groups are largely independent of one another, with the largest effect on amino acids being from the environment. Linkage analysis of the population allowed identification of quantitative trait loci (QTL) controlling free amino acids and other traits, and this was compared against genomic prediction methods. Following identification of a QTL controlling free lysine content, wheat pangenome resources facilitated analysis of candidate genes in this region of the genome. These findings can be used to select appropriate strategies for lysine biofortification and free asparagine reduction in wheat breeding programs.}, } @article {pmid37138543, year = {2023}, author = {Derbyshire, MC and Marsh, J and Tirnaz, S and Nguyen, HT and Batley, J and Bayer, PE and Edwards, D}, title = {Diversity of fatty acid biosynthesis genes across the soybean pangenome.}, journal = {The plant genome}, volume = {16}, number = {2}, pages = {e20334}, doi = {10.1002/tpg2.20334}, pmid = {37138543}, issn = {1940-3372}, mesh = {*Soybeans/genetics ; *Fatty Acid Desaturases/genetics ; Plant Proteins/genetics ; Plant Breeding ; Fatty Acids ; }, abstract = {Soybean (Glycine max) is a major crop that contributes more than half of global oilseed production. Much research has been directed towards improvement of the fatty acid profile of soybean seeds through marker assisted breeding. Recently published soybean pangenomes, based on thousands of soybean lines, provide an opportunity to identify new alleles that may be involved in fatty acid biosynthesis. In this study, we identify fatty acid biosynthesis genes in soybean pangenomes based on sequence identity with known genes and examine their sequence diversity across diverse soybean collections. We find three possible instances of a gene missing in wild soybean, including FAD8 and FAD2-2D, which may be involved in oleic and linoleic acid desaturation, respectively, although we recommend follow-up research to verify the absence of these genes. More than half of the 53 fatty acid biosynthesis genes identified contained missense variants, including one linked with a previously identified QTL for oil quality. These variants were present in multiple studies based on either short read mappings or alignment of reference grade genomes. Missense variants were found in previously characterized genes including FAD2-1A and FAD2-1B, both of which are involved in desaturation of oleic acid, as well as uncharacterized candidate fatty acid biosynthesis genes. We find that the frequency of missense alleles in fatty acid biosynthesis genes has been reduced significantly more than the global average frequency of missense mutations during domestication, and missense variation in some genes is near absent in modern cultivars. This could be due to the selection for fatty acid profiles in seed, though future work should be conducted towards understanding the phenotypic impacts of these variants.}, } @article {pmid37129508, year = {2023}, author = {Maki, JJ and Howard, M and Connelly, S and Pettengill, MA and Hardy, DJ and Cameron, A}, title = {Species Delineation and Comparative Genomics within the Campylobacter ureolyticus Complex.}, journal = {Journal of clinical microbiology}, volume = {61}, number = {5}, pages = {e0004623}, pmid = {37129508}, issn = {1098-660X}, mesh = {Humans ; *Campylobacter ; *Campylobacter Infections/microbiology ; Genomics ; Anti-Bacterial Agents ; *Gastroenteritis/microbiology ; DNA ; *Campylobacter jejuni/genetics ; }, abstract = {Campylobacter ureolyticus is an emerging pathogen increasingly appreciated as a common cause of gastroenteritis and extra-intestinal infections in humans. Outside the setting of gastroenteritis, little work has been done to describe the genomic content and relatedness of the species, especially regarding clinical isolates. We reviewed the epidemiology of clinical C. ureolyticus cultured by our institution over the past 10 years. Fifty-one unique C. ureolyticus isolates were identified between January 2010 and August 2022, mostly originating from abscesses and blood cultures. To clarify the taxonomic relationships between isolates and to attribute specific genes with different clinical manifestations, we sequenced 19 available isolates from a variety of clinical specimen types and conducted a pangenomic analysis with publicly available C. ureolyticus genomes. Digital DNA:DNA hybridization suggested that these C. ureolyticus comprised a species complex of 10 species clusters (SCs) and several subspecies clusters. Although some orthologous genes or gene functions were enriched in isolates found in different SCs and clinical specimens, no association was significant. Nearly a third of the isolates possessed antimicrobial resistance genes, including the ermA resistance gene, potentially conferring resistance to macrolides, the treatment of choice for severe human campylobacteriosis. This work effectively doubles the number of publicly available C. ureolyticus genomes, provides further clarification of taxonomic relationships within this bacterial complex, and identifies target SCs for future analysis.}, } @article {pmid37127330, year = {2023}, author = {Weller, CA and Andreev, I and Chambers, MJ and Park, M and , and Bloom, JS and Sadhu, MJ}, title = {Highly complete long-read genomes reveal pangenomic variation underlying yeast phenotypic diversity.}, journal = {Genome research}, volume = {33}, number = {5}, pages = {729-740}, pmid = {37127330}, issn = {1549-5469}, support = {ZIA HG200401/ImNIH/Intramural NIH HHS/United States ; ZIB HG000196/ImNIH/Intramural NIH HHS/United States ; }, mesh = {*Saccharomyces cerevisiae/genetics ; Quantitative Trait Loci ; Chromosome Mapping ; Phenotype ; *Saccharomyces cerevisiae Proteins/genetics ; }, abstract = {Understanding the genetic causes of trait variation is a primary goal of genetic research. One way that individuals can vary genetically is through variable pangenomic genes: genes that are only present in some individuals in a population. The presence or absence of entire genes could have large effects on trait variation. However, variable pangenomic genes can be missed in standard genotyping workflows, owing to reliance on aligning short-read sequencing to reference genomes. A popular method for studying the genetic basis of trait variation is linkage mapping, which identifies quantitative trait loci (QTLs), regions of the genome that harbor causative genetic variants. Large-scale linkage mapping in the budding yeast Saccharomyces cerevisiae has found thousands of QTLs affecting myriad yeast phenotypes. To enable the resolution of QTLs caused by variable pangenomic genes, we used long-read sequencing to generate highly complete de novo genome assemblies of 16 diverse yeast isolates. With these assemblies, we resolved QTLs for growth on maltose, sucrose, raffinose, and oxidative stress to specific genes that are absent from the reference genome but present in the broader yeast population at appreciable frequency. Copies of genes also duplicate onto chromosomes where they are absent in the reference genome, and we found that these copies generate additional QTLs whose resolution requires pangenome characterization. Our findings show the need for highly complete genome assemblies to identify the genetic basis of trait variation.}, } @article {pmid37125195, year = {2023}, author = {Saxena, P and Rauniyar, S and Thakur, P and Singh, RN and Bomgni, A and Alaba, MO and Tripathi, AK and Gnimpieba, EZ and Lushbough, C and Sani, RK}, title = {Integration of text mining and biological network analysis: Identification of essential genes in sulfate-reducing bacteria.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1086021}, pmid = {37125195}, issn = {1664-302X}, support = {P20 GM103443/GM/NIGMS NIH HHS/United States ; P20 RR016479/RR/NCRR NIH HHS/United States ; }, abstract = {The growth and survival of an organism in a particular environment is highly depends on the certain indispensable genes, termed as essential genes. Sulfate-reducing bacteria (SRB) are obligate anaerobes which thrives on sulfate reduction for its energy requirements. The present study used Oleidesulfovibrio alaskensis G20 (OA G20) as a model SRB to categorize the essential genes based on their key metabolic pathways. Herein, we reported a feedback loop framework for gene of interest discovery, from bio-problem to gene set of interest, leveraging expert annotation with computational prediction. Defined bio-problem was applied to retrieve the genes of SRB from literature databases (PubMed, and PubMed Central) and annotated them to the genome of OA G20. Retrieved gene list was further used to enrich protein-protein interaction and was corroborated to the pangenome analysis, to categorize the enriched gene sets and the respective pathways under essential and non-essential. Interestingly, the sat gene (dde_2265) from the sulfur metabolism was the bridging gene between all the enriched pathways. Gene clusters involved in essential pathways were linked with the genes from seleno-compound metabolism, amino acid metabolism, secondary metabolite synthesis, and cofactor biosynthesis. Furthermore, pangenome analysis demonstrated the gene distribution, where 69.83% of the 116 enriched genes were mapped under "persistent," inferring the essentiality of these genes. Likewise, 21.55% of the enriched genes, which involves specially the formate dehydrogenases and metallic hydrogenases, appeared under "shell." Our methodology suggested that semi-automated text mining and network analysis may play a crucial role in deciphering the previously unexplored genes and key mechanisms which can help to generate a baseline prior to perform any experimental studies.}, } @article {pmid37122002, year = {2023}, author = {Porubsky, D and Harvey, WT and Rozanski, AN and Ebler, J and Höps, W and Ashraf, H and Hasenfeld, P and , and , and Paten, B and Sanders, AD and Marschall, T and Korbel, JO and Eichler, EE}, title = {Inversion polymorphism in a complete human genome assembly.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {100}, pmid = {37122002}, issn = {1474-760X}, support = {U24 HG007497/HG/NHGRI NIH HHS/United States ; U01 HG010973/HG/NHGRI NIH HHS/United States ; R01 HG002385/HG/NHGRI NIH HHS/United States ; R01 HG010169/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *Genome, Human ; *Polymorphism, Genetic ; Genomic Structural Variation ; Chromosome Inversion ; }, abstract = {The telomere-to-telomere (T2T) complete human reference has significantly improved our ability to characterize genome structural variation. To understand its impact on inversion polymorphisms, we remapped data from 41 genomes against the T2T reference genome and compared it to the GRCh38 reference. We find a ~ 21% increase in sensitivity improving mapping of 63 inversions on the T2T reference. We identify 26 misorientations within GRCh38 and show that the T2T reference is three times more likely to represent the correct orientation of the major human allele. Analysis of 10 additional samples reveals novel rare inversions at chromosomes 15q25.2, 16p11.2, 16q22.1-23.1, and 22q11.21.}, } @article {pmid37115804, year = {2023}, author = {Jacob, JJ and Pragasam, AK and Vasudevan, K and Velmurugan, A and Priya Teekaraman, M and Priya Thirumoorthy, T and Ray, P and Gupta, M and Kapil, A and Bai, SP and Nagaraj, S and Saigal, K and Chandola, TR and Thomas, M and Bavdekar, A and Ebenezer, SE and Shastri, J and De, A and Dutta, S and Alexander, AP and Koshy, RM and Jinka, DR and Singh, A and Srivastava, SK and Anandan, S and Dougan, G and John, J and Kang, G and Veeraraghavan, B and Mutreja, A}, title = {Genomic analysis unveils genome degradation events and gene flux in the emergence and persistence of S. Paratyphi A lineages.}, journal = {PLoS pathogens}, volume = {19}, number = {4}, pages = {e1010650}, pmid = {37115804}, issn = {1553-7374}, mesh = {Humans ; *Typhoid Fever/microbiology ; Salmonella typhi/genetics ; Phylogeny ; Salmonella paratyphi A/genetics ; Anti-Bacterial Agents ; Genomics ; }, abstract = {Paratyphoid fever caused by S. Paratyphi A is endemic in parts of South Asia and Southeast Asia. The proportion of enteric fever cases caused by S. Paratyphi A has substantially increased, yet only limited data is available on the population structure and genetic diversity of this serovar. We examined the phylogenetic distribution and evolutionary trajectory of S. Paratyphi A isolates collected as part of the Indian enteric fever surveillance study "Surveillance of Enteric Fever in India (SEFI)." In the study period (2017-2020), S. Paratyphi A comprised 17.6% (441/2503) of total enteric fever cases in India, with the isolates highly susceptible to all the major antibiotics used for treatment except fluoroquinolones. Phylogenetic analysis clustered the global S. Paratyphi A collection into seven lineages (A-G), and the present study isolates were distributed in lineages A, C and F. Our analysis highlights that the genome degradation events and gene acquisitions or losses are key molecular events in the evolution of new S. Paratyphi A lineages/sub-lineages. A total of 10 hypothetically disrupted coding sequences (HDCS) or pseudogenes-forming mutations possibly associated with the emergence of lineages were identified. The pan-genome analysis identified the insertion of P2/PSP3 phage and acquisition of IncX1 plasmid during the selection in 2.3.2/2.3.3 and 1.2.2 genotypes, respectively. We have identified six characteristic missense mutations associated with lipopolysaccharide (LPS) biosynthesis genes of S. Paratyphi A, however, these mutations confer only a low structural impact and possibly have minimal impact on vaccine effectiveness. Since S. Paratyphi A is human-restricted, high levels of genetic drift are not expected unless these bacteria transmit to naive hosts. However, public-health investigation and monitoring by means of genomic surveillance would be constantly needed to avoid S. Paratyphi A serovar becoming a public health threat similar to the S. Typhi of today.}, } @article {pmid37110377, year = {2023}, author = {Ariute, JC and Felice, AG and Soares, S and da Gama, MAS and de Souza, EB and Azevedo, V and Brenig, B and Aburjaile, F and Benko-Iseppon, AM}, title = {Characterization and Association of Rips Repertoire to Host Range of Novel Ralstonia solanacearum Strains by In Silico Approaches.}, journal = {Microorganisms}, volume = {11}, number = {4}, pages = {}, pmid = {37110377}, issn = {2076-2607}, abstract = {Ralstonia solanacearum species complex (RSSC) cause several phytobacteriosis in many economically important crops around the globe, especially in the tropics. In Brazil, phylotypes I and II cause bacterial wilt (BW) and are indistinguishable by classical microbiological and phytopathological methods, while Moko disease is caused only by phylotype II strains. Type III effectors of RSSC (Rips) are key molecular actors regarding pathogenesis and are associated with specificity to some hosts. In this study, we sequenced and characterized 14 newly RSSC isolates from Brazil's Northern and Northeastern regions, including BW and Moko ecotypes. Virulence and resistance sequences were annotated, and the Rips repertoire was predicted. Confirming previous studies, RSSC pangenome is open as α≅0.77. Genomic information regarding these isolates matches those for R. solanacearum in NCBI. All of them fit in phylotype II with a similarity above 96%, with five isolates in phylotype IIB and nine in phylotype IIA. Almost all R. solanacearum genomes in NCBI are actually from other species in RSSC. Rips repertoire of Moko IIB was more homogeneous, except for isolate B4, which presented ten non-shared Rips. Rips repertoire of phylotype IIA was more diverse in both Moko and BW, with 43 common shared Rips among all 14 isolates. New BW isolates shared more Rips with Moko IIA and Moko IIB than with other public BW genome isolates from Brazil. Rips not shared with other isolates might contribute to individual virulence, but commonly shared Rips are good avirulence candidates. The high number of Rips shared by new Moko and BW isolates suggests they are actually Moko isolates infecting solanaceous hosts. Finally, infection assays and Rips expression on different hosts are needed to better elucidate the association between Rips repertoire and host specificities.}, } @article {pmid37105244, year = {2023}, author = {Henaut-Jacobs, S and Passarelli-Araujo, H and Venancio, TM}, title = {Comparative genomics and phylogenomics of Campylobacter unveil potential novel species and provide insights into niche segregation.}, journal = {Molecular phylogenetics and evolution}, volume = {184}, number = {}, pages = {107786}, doi = {10.1016/j.ympev.2023.107786}, pmid = {37105244}, issn = {1095-9513}, mesh = {*Campylobacter/genetics ; Phylogeny ; Genome, Bacterial ; Genomics/methods ; Bacteria/genetics ; }, abstract = {Campylobacter is a bacterial genus associated with community outbreaks and gastrointestinal symptoms. Studies on Campylobacter generally focus on specific pathogenic species such as C. coli and C. jejuni. Currently, there are thousands of publicly available Campylobacter genomes, allowing a more complete assessment of the genus diversity. In this work, we report a network-based analysis of all available Campylobacter genomes to explore the genus structure and diversity, revealing potentially new species and elucidating genus features. We also hypothesize that the previously established Clade III of C. coli is in fact a novel species (referred here as Campylobacter spp12). Finally, we found a negative correlation between pangenome fluidity and saturation coefficient, with potential implications to the lifestyles of distinct Campylobacter species. Since pangenome analysis depends on the number of available genomes, this correlation could help estimate pangenome metrics of Campylobacter species with less sequenced genomes, helping understand their lifestyle and niche adaptation. Together, our results indicate that the Campylobacter genus should be re-evaluated, with particular attention to the interplay between genome structure and niche segregation.}, } @article {pmid37103716, year = {2023}, author = {Matussek, A and Mernelius, S and Chromek, M and Zhang, J and Frykman, A and Hansson, S and Georgieva, V and Xiong, Y and Bai, X}, title = {Genome-wide association study of hemolytic uremic syndrome causing Shiga toxin-producing Escherichia coli from Sweden, 1994-2018.}, journal = {European journal of clinical microbiology & infectious diseases : official publication of the European Society of Clinical Microbiology}, volume = {42}, number = {6}, pages = {771-779}, pmid = {37103716}, issn = {1435-4373}, support = {SLS884041//Scandinavian Society for Antimicrobial Chemotherapy Foundation/ ; Dnr: 2022-00277//Ruth och Richard Julins Stiftelse/ ; }, mesh = {Humans ; *Shiga-Toxigenic Escherichia coli ; Genome-Wide Association Study ; *Escherichia coli Proteins/genetics ; Sweden/epidemiology ; Phylogeny ; *Escherichia coli Infections/complications/epidemiology/microbiology ; *Hemolytic-Uremic Syndrome/epidemiology/microbiology ; }, abstract = {Shiga toxin-producing Escherichia coli (STEC) infection can cause clinical manifestations ranging from diarrhea to potentially fatal hemolytic uremic syndrome (HUS). This study is aimed at identifying STEC genetic factors associated with the development of HUS in Sweden. A total of 238 STEC genomes from STEC-infected patients with and without HUS between 1994 and 2018 in Sweden were included in this study. Serotypes, Shiga toxin gene (stx) subtypes, and virulence genes were characterized in correlation to clinical symptoms (HUS and non-HUS), and pan-genome wide association study was performed. Sixty-five strains belonged to O157:H7, and 173 belonged to non-O157 serotypes. Our study revealed that strains of O157:H7 serotype especially clade 8 were most commonly found in patients with HUS in Sweden. stx2a and stx2a + stx2c subtypes were significantly associated with HUS. Other virulence factors associated with HUS mainly included intimin (eae) and its receptor (tir), adhesion factors, toxins, and secretion system proteins. Pangenome wide-association study identified numbers of accessory genes significantly overrepresented in HUS-STEC strains, including genes encoding outer membrane proteins, transcriptional regulators, phage-related proteins, and numerous genes related to hypothetical proteins. Whole-genome phylogeny and multiple correspondence analysis of pangenomes could not differentiate HUS-STEC from non-HUS-STEC strains. In O157:H7 cluster, strains from HUS patients clustered closely; however, no significant difference in virulence genes was found in O157 strains from patients with and without HUS. These results suggest that STEC strains from different phylogenetic backgrounds may independently acquire genes determining their pathogenicity and confirm that other non-bacterial factors and/or bacteria-host interaction may affect STEC pathogenesis.}, } @article {pmid37098951, year = {2023}, author = {Rodrigues, C and Lanza, VF and Peixe, L and Coque, TM and Novais, Â}, title = {Phylogenomics of Globally Spread Clonal Groups 14 and 15 of Klebsiella pneumoniae.}, journal = {Microbiology spectrum}, volume = {11}, number = {3}, pages = {e0339522}, pmid = {37098951}, issn = {2165-0497}, support = {POCI/01/0145/FEDER/007728//European Union/ ; PT2020 UID/MULTI/04378/2013//MEC | Fundação para a Ciência e a Tecnologia (FCT)/ ; SFRH/BD/84341/2012//MEC | Fundação para a Ciência e a Tecnologia (FCT)/ ; FEMS-RG-2014-0089//Federation of European Microbiological Societies (FEMS)/ ; 2021.02252.CEECIND/CP1662/CT0009//MEC | Fundação para a Ciência e a Tecnologia (FCT)/ ; MISTAR AC21/2 00041//European Commission (EC)/ ; //MEC | Instituto de Salud Carlos III (ISCIII)/ ; //Fundación Francisco Soria Melguizo/ ; CP22/00164//Instituto de Salud Carlos III (ISCIII)/ ; }, mesh = {Humans ; *Klebsiella pneumoniae ; Phylogeny ; Plasmids/genetics ; beta-Lactamases/genetics ; Anti-Bacterial Agents/pharmacology ; *Klebsiella Infections/epidemiology ; Microbial Sensitivity Tests ; Drug Resistance, Multiple, Bacterial/genetics ; }, abstract = {Klebsiella pneumoniae sequence type 14 (ST14) and ST15 caused outbreaks of CTX-M-15 and/or carbapenemase producers worldwide, but their phylogeny and global dynamics remain unclear. We clarified the evolution of K. pneumoniae clonal group 14 (CG14) and CG15 by analyzing the capsular locus (KL), resistome, virulome, and plasmidome of public genomes (n = 481) and de novo sequences (n = 9) representing main sublineages circulating in Portugal. CG14 and CG15 evolved independently within 6 main subclades defined according to the KL and the accessory genome. The CG14 (n = 65) clade was structured in two large monophyletic subclades, CG14-I (KL2, 86%) and CG14-II (KL16, 14%), whose emergences were dated to 1932 and 1911, respectively. Genes encoding extended-spectrum β-lactamase (ESBL), AmpC, and/or carbapenemases were mostly observed in CG14-I (71% versus 22%). CG15 clade (n = 170) was segregated into subclades CG15-IA (KL19/KL106, 9%), CG15-IB (variable KL types, 6%), CG15-IIA (KL24, 43%) and CG15-IIB (KL112, 37%). Most CG15 genomes carried specific GyrA and ParC mutations and emerged from a common ancestor in 1989. CTX-M-15 was especially prevalent in CG15 (68% CG15 versus 38% CG14) and in CG15-IIB (92%). Plasmidome analysis revealed 27 predominant plasmid groups (PG), including particularly pervasive and recombinant F-type (n = 10), Col (n = 10), and new plasmid types. While blaCTX-M-15 was acquired multiple times by a high diversity of F-type mosaic plasmids, other antibiotic resistance genes (ARGs) were dispersed by IncL (blaOXA-48) or IncC (blaCMY/TEM-24) plasmids. We first demonstrate an independent evolutionary trajectory for CG15 and CG14 and how the acquisition of specific KL, quinolone-resistance determining region (QRDR) mutations (CG15), and ARGs in highly recombinant plasmids could have shaped the expansion and diversification of particular subclades (CG14-I and CG15-IIA/IIB). IMPORTANCE Klebsiella pneumoniae represents a major threat in the burden of antibiotic resistance (ABR). Available studies to explain the origin, the diversity, and the evolution of certain ABR K. pneumoniae populations have mainly been focused on a few clonal groups (CGs) using phylogenetic analysis of the core genome, the accessory genome being overlooked. Here, we provide unique insights into the phylogenetic evolution of CG14 and CG15, two poorly characterized CGs which have contributed to the global dissemination of genes responsible for resistance to first-line antibiotics such as β-lactams. Our results point out an independent evolution of these two CGs and highlight the existence of different subclades structured by the capsular type and the accessory genome. Moreover, the contribution of a turbulent flux of plasmids (especially multireplicon F type and Col) and adaptive traits (antibiotic resistance and metal tolerance genes) to the pangenome reflect the exposure and adaptation of K. pneumoniae under different selective pressures.}, } @article {pmid37098652, year = {2023}, author = {Cui, X and Hu, M and Yao, S and Zhang, Y and Tang, M and Liu, L and Cheng, X and Tong, C and Liu, S}, title = {BnaOmics: A comprehensive platform combining pan-genome and multi-omics data from Brassica napus.}, journal = {Plant communications}, volume = {4}, number = {5}, pages = {100609}, pmid = {37098652}, issn = {2590-3462}, mesh = {*Brassica napus/genetics ; Multiomics ; Chromosome Mapping ; Genome, Plant/genetics ; }, } @article {pmid37098416, year = {2023}, author = {Gong, H and Huang, X and Zhu, W and Chen, J and Huang, Y and Zhao, Z and Weng, J and Che, Y and Wang, J and Wang, X}, title = {Pan-genome analysis of the Burkholderia gladioli PV. Cocovenenans reveal the extent of variation in the toxigenic gene cluster.}, journal = {Food microbiology}, volume = {113}, number = {}, pages = {104249}, doi = {10.1016/j.fm.2023.104249}, pmid = {37098416}, issn = {1095-9998}, mesh = {Humans ; *Burkholderia gladioli/genetics ; Bongkrekic Acid/analysis ; Multigene Family ; *Foodborne Diseases/microbiology ; }, abstract = {Burkholderia gladioli has been reported as the pathogen responsible for cases of foodborne illness in many countries. The poisonous bongkrekic acid (BA) produced by B. gladioli was linked to a gene cluster absent in non-pathogenic strains. The whole genome sequence of eight bacteria strains, which were screened from the collected 175 raw food and environmental samples, were assembled and analyzed to detect a significant association of 19 protein-coding genes with the pathogenic status. Except for the common BA synthesis-related gene, several other genes, including the toxin-antitoxin genes, were also absent in the non-pathogenic strains. The bacteria strains with the BA gene cluster were found to form a single cluster in the analysis of all B. gladioli genome assemblies for the variants in the gene cluster. Divergence of this cluster was detected in the analysis for both the flanking sequences and those of the whole genome level, which indicates its complex origin. Genome recombination was found to cause a precise sequence deletion in the gene cluster region, which was found to be predominant in the non-pathogenic strains indicating the possible effect of horizontal gene transfer. Our study provided new information and resources for understanding the evolution and divergence of the B. gladioli species.}, } @article {pmid37093956, year = {2023}, author = {Baumdicker, F and Kupczok, A}, title = {Tackling the Pangenome Dilemma Requires the Concerted Analysis of Multiple Population Genetic Processes.}, journal = {Genome biology and evolution}, volume = {15}, number = {5}, pages = {}, pmid = {37093956}, issn = {1759-6653}, mesh = {Humans ; *Prokaryotic Cells ; *Gene Transfer, Horizontal ; Computer Simulation ; Mutation ; }, abstract = {The pangenome is the set of all genes present in a prokaryotic population. Most pangenomes contain many accessory genes of low and intermediate frequencies. Different population genetics processes contribute to the shape of these pangenomes, namely selection and fitness-independent processes such as gene transfer, gene loss, and migration. However, their relative importance is unknown and highly debated. Here, we argue that the debate around prokaryotic pangenomes arose due to the imprecise application of population genetics models. Most importantly, two different processes of horizontal gene transfer act on prokaryotic populations, which are frequently confused, despite their fundamentally different behavior. Genes acquired from distantly related organisms (termed here acquiring gene transfer) are most comparable to mutation in nucleotide sequences. In contrast, gene gain within the population (termed here spreading gene transfer) has an effect on gene frequencies that is identical to the effect of positive selection on single genes. We thus show that selection and fitness-independent population genetic processes affecting pangenomes are indistinguishable at the level of single gene dynamics. Nevertheless, population genetics processes are fundamentally different when considering the joint distribution of all accessory genes across individuals of a population. We propose that, to understand to which degree the different processes shaped pangenome diversity, the development of comprehensive models and simulation tools is mandatory. Furthermore, we need to identify summary statistics and measurable features that can distinguish between the processes, where considering the joint distribution of accessory genes across individuals of a population will be particularly relevant.}, } @article {pmid37089548, year = {2023}, author = {Zhong, H and Zheng, N and Wang, J and Zhao, S}, title = {Isolation and pan-genome analysis of Enterobacter hormaechei Z129, a ureolytic bacterium, from the rumen of dairy cow.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1169973}, pmid = {37089548}, issn = {1664-302X}, abstract = {INTRODUCTION: Urea is an important non-protein nitrogen source for ruminants. In the rumen, ureolytic bacteria play critical roles in urea-nitrogen metabolism, however, a few ureolytic strains have been isolated and genomically sequenced. The purpose of this study was to isolate a novel ureolytic bacterial strain from cattle rumen and characterize its genome and function.

METHODS: The ureolytic bacterium was isolated using an anaerobic medium with urea and phenol red as a screening indicator from the rumen fluid of dairy cattle. The genome of isolates was sequenced, assembled, annotated, and comparatively analyzed. The pan-genome analysis was performed using IPGA and the biochemical activity was also analyzed by test kits.

RESULTS: A gram-positive ureolytic strain was isolated. Its genome had a length of 4.52 Mbp and predicted genes of 4223. The 16S rRNA gene and genome GTDB-Tk taxonomic annotation showed that it was a novel strain of Enterobacter hormaechei, and it was named E. hormaechei Z129. The pan-genome analysis showed that Z129 had the highest identity to E. hormaechei ATCC 49162 with a genome average nucleotide identity of 98.69% and possessed 238 unique genes. Strain Z129 was the first E. hormaechei strain isolated from the rumen as we know. The functional annotation of the Z129 genome showed genes related to urea metabolism, including urea transport (urtA-urtE), nickel ion transport (ureJ, tonB, nixA, exbB, exbD, and rcnA), urease activation (ureA-ureG) and ammonia assimilation (gdhA, glnA, glnB, glnE, glnL, glsA, gltB, and gltD) were present. Genes involved in carbohydrate metabolism were also present, including starch hydrolysis (amyE), cellulose hydrolysis (celB and bglX), xylose transport (xylF-xylH) and glycolysis (pgi, pgk, fbaA, eno, pfkA, gap, pyk, gpmL). Biochemical activity analysis showed that Z129 was positive for alkaline phosphatase, leucine arylamidase, acid phosphatase, naphthol-AS-BI-phosphohydrolase, α-glucosidase, β-glucosidase, and pyrrolidone arylaminase, and had the ability to use D-ribose, L-arabinose, and D-lactose. Urea-nitrogen hydrolysis rate of Z129 reached 55.37% at 48 h of incubation.

DISCUSSION: Therefore, the isolated novel ureolytic strain E. hormaechei Z129 had diverse nitrogen and carbon metabolisms, and is a preferred model to study the urea hydrolysis mechanism in the rumen.}, } @article {pmid37084119, year = {2023}, author = {Williams, AN and Croxen, MA and Demczuk, WHB and Martin, I and Tyrrell, GJ}, title = {Genomic characterization of emerging invasive Streptococcus agalactiae serotype VIII in Alberta, Canada.}, journal = {European journal of clinical microbiology & infectious diseases : official publication of the European Society of Clinical Microbiology}, volume = {42}, number = {6}, pages = {747-757}, pmid = {37084119}, issn = {1435-4373}, support = {RCP-19-003-MIF//Ministry of Economic Development and Trade, Government of Alberta/ ; }, mesh = {Infant, Newborn ; Humans ; Female ; Pregnancy ; Aged ; Serogroup ; *Clindamycin/therapeutic use ; Streptococcus agalactiae ; *Streptococcal Infections/microbiology ; Alberta/epidemiology ; Phylogeny ; Multilocus Sequence Typing ; Drug Resistance, Bacterial ; Anti-Bacterial Agents/pharmacology/therapeutic use ; Erythromycin/therapeutic use ; Genomics ; Microbial Sensitivity Tests ; }, abstract = {Invasive Group B Streptococcus (GBS) can infect pregnant women, neonates, and older adults. Invasive GBS serotype VIII is infrequent in Alberta; however, cases have increased in recent years. Here, genomic analysis was used to characterize fourteen adult invasive serotype VIII isolates from 2009 to 2021. Trends in descriptive clinical data and antimicrobial susceptibility results were evaluated for invasive serotype VIII isolates from Alberta. Isolate genomes were sequenced and subjected to molecular sequence typing, virulence and antimicrobial resistance gene identification, phylogenetic analysis, and pangenome determination. Multilocus sequencing typing identified eight ST42 (Clonal Complex; CC19), four ST1 (CC1), and two ST2 (CC1) profiles. Isolates were susceptible to penicillin, erythromycin, chloramphenicol, and clindamycin, apart from one isolate that displayed erythromycin and inducible clindamycin resistance. All isolates carried genes for peptide antibiotic resistance, three isolates for tetracycline resistance, and one for macrolide, lincosamide, and streptogramin resistance. All genomes carried targets currently being considered for protein-based vaccines (e.g., pili and/or Alpha family proteins). Overall, invasive GBS serotype VIII is emerging in Alberta, primarily due to ST42. Characterization and continued surveillance of serotype VIII will be important for outbreak prevention, informing vaccine development, and contributing to our understanding of the global epidemiology of this rare serotype.}, } @article {pmid37082513, year = {2022}, author = {Gangurde, SS and Xavier, A and Naik, YD and Jha, UC and Rangari, SK and Kumar, R and Reddy, MSS and Channale, S and Elango, D and Mir, RR and Zwart, R and Laxuman, C and Sudini, HK and Pandey, MK and Punnuri, S and Mendu, V and Reddy, UK and Guo, B and Gangarao, NVPR and Sharma, VK and Wang, X and Zhao, C and Thudi, M}, title = {Two decades of association mapping: Insights on disease resistance in major crops.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1064059}, pmid = {37082513}, issn = {1664-462X}, abstract = {Climate change across the globe has an impact on the occurrence, prevalence, and severity of plant diseases. About 30% of yield losses in major crops are due to plant diseases; emerging diseases are likely to worsen the sustainable production in the coming years. Plant diseases have led to increased hunger and mass migration of human populations in the past, thus a serious threat to global food security. Equipping the modern varieties/hybrids with enhanced genetic resistance is the most economic, sustainable and environmentally friendly solution. Plant geneticists have done tremendous work in identifying stable resistance in primary genepools and many times other than primary genepools to breed resistant varieties in different major crops. Over the last two decades, the availability of crop and pathogen genomes due to advances in next generation sequencing technologies improved our understanding of trait genetics using different approaches. Genome-wide association studies have been effectively used to identify candidate genes and map loci associated with different diseases in crop plants. In this review, we highlight successful examples for the discovery of resistance genes to many important diseases. In addition, major developments in association studies, statistical models and bioinformatic tools that improve the power, resolution and the efficiency of identifying marker-trait associations. Overall this review provides comprehensive insights into the two decades of advances in GWAS studies and discusses the challenges and opportunities this research area provides for breeding resistant varieties.}, } @article {pmid37074150, year = {2023}, author = {Pugh, HL and Connor, C and Siasat, P and McNally, A and Blair, JMA}, title = {E. coli ST11 (O157:H7) does not encode a functional AcrF efflux pump.}, journal = {Microbiology (Reading, England)}, volume = {169}, number = {4}, pages = {}, pmid = {37074150}, issn = {1465-2080}, support = {BB/M01116X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; MR/N013913/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Humans ; *Escherichia coli/genetics/metabolism ; Membrane Transport Proteins/genetics/metabolism ; Anti-Bacterial Agents/pharmacology/metabolism ; *Escherichia coli Proteins/genetics/metabolism ; Multidrug Resistance-Associated Proteins/metabolism ; Membrane Proteins/metabolism ; }, abstract = {Escherichia coli is a facultative anaerobe found in a wide range of environments. Commonly described as the laboratory workhorse, E. coli is one of the best characterized bacterial species to date, however much of our understanding comes from studies involving the laboratory strain E. coli K-12. Resistance-nodulation-division efflux pumps are found in Gram-negative bacteria and can export a diverse range of substrates, including antibiotics. E. coli K-12 has six RND pumps; AcrB, AcrD, AcrF, CusA, MdtBC and MdtF, and it is frequently reported that all E. coli strains possess these six pumps. However, this is not true of E. coli ST11, a lineage of E. coli, which is primarily composed of the highly virulent important human pathogen, E. coli O157:H7. Here we show that acrF is absent from the pangenome of ST11 and that this lineage of E. coli has a highly conserved insertion within the acrF gene, which when translated encodes 13 amino acids and two stop codons. This insertion was found to be present in 97.59 % of 1787 ST11 genome assemblies. Non-function of AcrF in ST11 was confirmed in the laboratory as complementation with acrF from ST11 was unable to restore AcrF function in E. coli K-12 substr. MG1655 ΔacrB ΔacrF. This shows that the complement of RND efflux pumps present in laboratory bacterial strains may not reflect the situation in virulent strains of bacterial pathogens.}, } @article {pmid37072518, year = {2023}, author = {Eisenstein, M}, title = {Every base everywhere all at once: pangenomics comes of age.}, journal = {Nature}, volume = {616}, number = {7957}, pages = {618-620}, pmid = {37072518}, issn = {1476-4687}, mesh = {*Genomics/methods/standards/trends ; Species Specificity ; *Genome/genetics ; *Genetic Variation ; }, } @article {pmid37066137, year = {2023}, author = {Garrison, E and Guarracino, A and Heumos, S and Villani, F and Bao, Z and Tattini, L and Hagmann, J and Vorbrugg, S and Marco-Sola, S and Kubica, C and Ashbrook, DG and Thorell, K and Rusholme-Pilcher, RL and Liti, G and Rudbeck, E and Nahnsen, S and Yang, Z and Moses, MN and Nobrega, FL and Wu, Y and Chen, H and de Ligt, J and Sudmant, PH and Soranzo, N and Colonna, V and Williams, RW and Prins, P}, title = {Building pangenome graphs.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.04.05.535718}, pmid = {37066137}, abstract = {Pangenome graphs can represent all variation between multiple genomes, but existing methods for constructing them are biased due to reference-guided approaches. In response, we have developed PanGenome Graph Builder (PGGB), a reference-free pipeline for constructing unbi-ased pangenome graphs. PGGB uses all-to-all whole-genome alignments and learned graph embeddings to build and iteratively refine a model in which we can identify variation, measure conservation, detect recombination events, and infer phylogenetic relationships.}, } @article {pmid37065164, year = {2023}, author = {Wan, X and Takala, TM and Huynh, VA and Ahonen, SL and Paulin, L and Björkroth, J and Sironen, T and Kant, R and Saris, P}, title = {Comparative genomics of 40 Weissella paramesenteroides strains.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1128028}, pmid = {37065164}, issn = {1664-302X}, abstract = {Weissella strains are often detected in spontaneously fermented foods. Because of their abilities to produce lactic acid and functional exopolysaccharides as well as their probiotic traits, Weissella spp. improve not only the sensorial properties but also nutritional values of the fermented food products. However, some Weissella species have been associated with human and animal diseases. In the era of vast genomic sequencing, new genomic/genome data are becoming available to the public on daily pace. Detailed genomic analyses are due to provide a full understanding of individual Weissella species. In this study, the genomes of six Weissella paramesenteroides strains were de novo sequenced. The genomes of 42 W. paramesenteroides strains were compared to discover their metabolic and functional potentials in food fermentation. Comparative genomics and metabolic pathway reconstructions revealed that W. paramesenteroides is a compact group of heterofermentative bacteria with good capacity of producing secondary metabolites and vitamin Bs. Since the strains rarely harbored plasmid DNA, they did not commonly possess the genes associated with bacteriocin production. All 42 strains were shown to bear vanT gene from the glycopeptide resistance gene cluster vanG. Yet none of the strains carried virulence genes.}, } @article {pmid37059810, year = {2023}, author = {Olson, ND and Wagner, J and Dwarshuis, N and Miga, KH and Sedlazeck, FJ and Salit, M and Zook, JM}, title = {Variant calling and benchmarking in an era of complete human genome sequences.}, journal = {Nature reviews. Genetics}, volume = {24}, number = {7}, pages = {464-483}, pmid = {37059810}, issn = {1471-0064}, mesh = {Humans ; *Genome, Human ; *Benchmarking ; Genomics ; Sequence Analysis, DNA ; High-Throughput Nucleotide Sequencing ; }, abstract = {Genetic variant calling from DNA sequencing has enabled understanding of germline variation in hundreds of thousands of humans. Sequencing technologies and variant-calling methods have advanced rapidly, routinely providing reliable variant calls in most of the human genome. We describe how advances in long reads, deep learning, de novo assembly and pangenomes have expanded access to variant calls in increasingly challenging, repetitive genomic regions, including medically relevant regions, and how new benchmark sets and benchmarking methods illuminate their strengths and limitations. Finally, we explore the possible future of more complete characterization of human genome variation in light of the recent completion of a telomere-to-telomere human genome reference assembly and human pangenomes, and we consider the innovations needed to benchmark their newly accessible repetitive regions and complex variants.}, } @article {pmid37052486, year = {2023}, author = {Miranda, RP and Turrini, PCG and Bonadio, DT and Zerillo, MM and Berselli, AP and Creste, S and Van Sluys, MA}, title = {Genome Organization of Four Brazilian Xanthomonas albilineans Strains Does Not Correlate with Aggressiveness.}, journal = {Microbiology spectrum}, volume = {11}, number = {3}, pages = {e0280222}, pmid = {37052486}, issn = {2165-0497}, support = {310779/2017-0//Conselho Nacional de Desenvolvimento Científico e Tecnológico (CNPq)/ ; //Conselho Nacional de Desenvolvimento Científico e Tecnológico (CNPq)/ ; Financial code 001//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior (CAPES)/ ; 2008/52074-0//Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP)/ ; 2016/17545-8//Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP)/ ; 2018/24646-0//Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP)/ ; 2018/23646-7//Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP)/ ; 2019/05424-0//Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP)/ ; }, mesh = {*Genome, Bacterial ; Brazil ; Base Sequence ; Genomics ; *Xanthomonas/genetics/metabolism ; }, abstract = {An integrative approach combining genomics, transcriptomics, and cell biology is presented to address leaf scald disease, a major problem for the sugarcane industry. To gain insight into the biology of the causal agent, the complete genome sequences of four Brazilian Xanthomonas albilineans strains with differing virulence capabilities are presented and compared to the GPEPC73 reference strain and FJ1. Based on the aggressiveness index, different strains were compared: Xa04 and Xa11 are highly aggressive, Xa26 is intermediate, and Xa21 is the least, while, based on genome structure, Xa04 shares most of its genomic features with Xa26, and Xa11 share most of its genomic features with Xa21. In addition to presenting more clustered regularly interspaced short palindromic repeats (CRISPR) clusters, four more novel prophage insertions are present than the previously sequenced GPEPC73 and FJ1 strains. Incorporating the aggressiveness index and in vitro cell biology into these genome features indicates that disease establishment is not a result of a single determinant factor, as in most other Xanthomonas species. The Brazilian strains lack the previously described plasmids but present more prophage regions. In pairs, the most virulent and the least virulent share unique prophages. In vitro transcriptomics shed light on the 54 most highly expressed genes among the 4 strains compared to ribosomal proteins (RPs), of these, 3 outer membrane proteins. Finally, comparative albicidin inhibition rings and in vitro growth curves of the four strains also do not correlate with pathogenicity. In conclusion, the results disclose that leaf scald disease is not associated with a single shared characteristic between the most or the least pathogenic strains. IMPORTANCE An integrative approach is presented which combines genomics, transcriptomics, and cell biology to address leaf scald disease. The results presented here disclose that the disease is not associated with a single shared characteristic between the most pathogenic strains or a unique genomic pattern. Sequence data from four Brazilian strains are presented that differ in pathogenicity index: Xa04 and Xa11 are highly virulent, Xa26 is intermediate, and Xa21 is the least pathogenic strain, while, based on genome structure, Xa04 shares with Xa26, and Xa11 shares with X21 most of the genome features. Other than presenting more CRISPR clusters and prophages than the previously sequenced strains, the integration of aggressiveness and cell biology points out that disease establishment is not a result of a single determinant factor as in other xanthomonads.}, } @article {pmid37047101, year = {2023}, author = {Tenea, GN}, title = {Metabiotics Signature through Genome Sequencing and In Vitro Inhibitory Assessment of a Novel Lactococcus lactis Strain UTNCys6-1 Isolated from Amazonian Camu-Camu Fruits.}, journal = {International journal of molecular sciences}, volume = {24}, number = {7}, pages = {}, pmid = {37047101}, issn = {1422-0067}, support = {1034/2022//Universidad Técnica del Norte/ ; }, mesh = {Fruit/chemistry ; *Lactococcus lactis/genetics/metabolism ; RNA, Ribosomal, 16S/genetics ; Base Sequence ; *Bacteriocins/metabolism ; Anti-Bacterial Agents/metabolism ; }, abstract = {Metabiotics are the structural components of probiotic bacteria, functional metabolites, and/or signaling molecules with numerous beneficial properties. A novel Lactococcus lactis strain, UTNCys6-1, was isolated from wild Amazonian camu-camu fruits (Myrciaria dubia), and various functional metabolites with antibacterial capacity were found. The genome size is 2,226,248 base pairs, and it contains 2248 genes, 2191 protein-coding genes (CDSs), 50 tRNAs, 6 rRNAs, 1 16S rRNA, 1 23S rRNA, and 1 tmRNA. The average GC content is 34.88%. In total, 2148 proteins have been mapped to the EggNOG database. The specific annotation consisted of four incomplete prophage regions, one CRISPR-Cas array, six genomic islands (GIs), four insertion sequences (ISs), and four regions of interest (AOI regions) spanning three classes of bacteriocins (enterolysin_A, nisin_Z, and sactipeptides). Based on pangenome analysis, there were 6932 gene clusters, of which 751 (core genes) were commonly observed within the 11 lactococcal strains. Among them, 3883 were sample-specific genes (cloud genes) and 2298 were shell genes, indicating high genetic diversity. A sucrose transporter of the SemiSWEET family (PTS system: phosphoenolpyruvate-dependent transport system) was detected in the genome of UTNCys6-1 but not the other 11 lactococcal strains. In addition, the metabolic profile, antimicrobial susceptibility, and inhibitory activity of both protein-peptide extract (PPE) and exopolysaccharides (EPSs) against several foodborne pathogens were assessed in vitro. Furthermore, UTNCys6-1 was predicted to be a non-human pathogen that was unable to tolerate all tested antibiotics except gentamicin; metabolized several substrates; and lacks virulence factors (VFs), genes related to the production of biogenic amines, and acquired antibiotic resistance genes (ARGs). Overall, this study highlighted the potential of this strain for producing bioactive metabolites (PPE and EPSs) for agri-food and pharmaceutical industry use.}, } @article {pmid37042769, year = {2023}, author = {Ma, X and Sun, T and Zhou, J and Zhi, M and Shen, S and Wang, Y and Gu, X and Li, Z and Gao, H and Wang, P and Feng, Q}, title = {Pangenomic Study of Fusobacterium nucleatum Reveals the Distribution of Pathogenic Genes and Functional Clusters at the Subspecies and Strain Levels.}, journal = {Microbiology spectrum}, volume = {11}, number = {3}, pages = {e0518422}, pmid = {37042769}, issn = {2165-0497}, mesh = {*Fusobacterium nucleatum/genetics ; Phylogeny ; *Genomics ; Base Sequence ; Virulence Factors/genetics ; }, abstract = {Fusobacterium nucleatum is a prevalent periodontal pathogen and is associated with many systemic diseases. Our knowledge of the genomic characteristics and pathogenic effectors of different F. nucleatum strains is limited. In this study, we completed the whole genome assembly of the 4 F. nucleatum strains and carried out a comprehensive pangenomic study of 30 strains with their complete genome sequences. Phylogenetic analysis revealed that the F. nucleatum strains are mainly divided into 4 subspecies, while 1 of the sequenced strains was classified into a new subspecies. Gene composition analysis revealed that a total of 517 "core/soft-core genes" with housekeeping functions widely distributed in almost all the strains. Each subspecies had a unique gene cluster shared by strains within the subspecies. Analysis of the virulence factors revealed that many virulence factors were widely distributed across all the strains, with some present in multiple copies. Some virulence genes showed no consistent occurrence rule at the subspecies level and were specifically distributed in certain strains. The genomic islands mainly revealed strain-specific characteristics instead of subspecies level consistency, while CRISPR types and secondary metabolite biosynthetic gene clusters were identically distributed in F. nucleatum strains from the same subspecies. The variation in amino acid sites in the adhesion protein FadA did not affect the monomer and dimer 3D structures, but it may affect the binding surface and the stability of binding to host receptors. This study provides a basis for the pathogenic study of F. nucleatum at the subspecies and strain levels. IMPORTANCE We used F. nucleatum as an example to analyze the genomic characteristics of oral pathogens at the species, subspecies, and strain levels and elucidate the similarities and differences in functional genes and virulence factors among different subspecies/strains of the same oral pathogen. We believe that the unique biological characteristics of each subspecies/strain can be attributed to the differences in functional gene clusters or the presence/absence of certain virulence genes. This study showed that F. nucleatum strains from the same subspecies had similar functional gene compositions, CRISPR types, and secondary metabolite biosynthetic gene clusters, while pathogenic genes, such as virulence genes, antibiotic resistance genes, and GIs, had more strain level specificity. The findings of this study suggest that, for microbial pathogenicity studies, we should carefully consider the subspecies/strains being used, as different strains may vary greatly.}, } @article {pmid37037626, year = {2023}, author = {Lu, TY and Smaruj, PN and Fudenberg, G and Mancuso, N and Chaisson, MJP}, title = {The motif composition of variable number tandem repeats impacts gene expression.}, journal = {Genome research}, volume = {33}, number = {4}, pages = {511-524}, pmid = {37037626}, issn = {1549-5469}, support = {R01 HG012133/HG/NHGRI NIH HHS/United States ; U01 HG010973/HG/NHGRI NIH HHS/United States ; U24 HG007497/HG/NHGRI NIH HHS/United States ; R01 HG011649/HG/NHGRI NIH HHS/United States ; R01 GM140287/GM/NIGMS NIH HHS/United States ; }, mesh = {Humans ; *Minisatellite Repeats/genetics ; Phenotype ; Haplotypes ; Gene Expression ; *Adenosine Triphosphatases/genetics ; Ubiquitin-Protein Ligases/genetics ; }, abstract = {Understanding the impact of DNA variation on human traits is a fundamental question in human genetics. Variable number tandem repeats (VNTRs) make up ∼3% of the human genome but are often excluded from association analysis owing to poor read mappability or divergent repeat content. Although methods exist to estimate VNTR length from short-read data, it is known that VNTRs vary in both length and repeat (motif) composition. Here, we use a repeat-pangenome graph (RPGG) constructed on 35 haplotype-resolved assemblies to detect variation in both VNTR length and repeat composition. We align population-scale data from the Genotype-Tissue Expression (GTEx) Consortium to examine how variations in sequence composition may be linked to expression, including cases independent of overall VNTR length. We find that 9422 out of 39,125 VNTRs are associated with nearby gene expression through motif variations, of which only 23.4% are accessible from length. Fine-mapping identifies 174 genes to be likely driven by variation in certain VNTR motifs and not overall length. We highlight two genes, CACNA1C and RNF213, that have expression associated with motif variation, showing the utility of RPGG analysis as a new approach for trait association in multiallelic and highly variable loci.}, } @article {pmid37029275, year = {2023}, author = {}, title = {Tomato super-pangenome highlights the potential use of wild relatives in tomato breeding.}, journal = {Nature genetics}, volume = {55}, number = {5}, pages = {744-745}, pmid = {37029275}, issn = {1546-1718}, mesh = {*Solanum lycopersicum/genetics ; Plant Breeding ; Chromosome Mapping ; Genotype ; }, } @article {pmid37025802, year = {2023}, author = {De Mesa, CA and Mendoza, RM and Penir, SMU and de la Peña, LD and Amar, EC and Saloma, CP}, title = {Genomic analysis of Vibrio harveyi strain PH1009, a potential multi-drug resistant pathogen due to acquisition of toxin genes.}, journal = {Heliyon}, volume = {9}, number = {4}, pages = {e14926}, pmid = {37025802}, issn = {2405-8440}, abstract = {In has increasingly been observed that viral and bacterial coinfection frequently occurs among cultured shrimp and this coinfection could exacerbate the disease phenotype. Here, we describe a newly discovered bacterial strain, Vibrio harveyi PH1009 collected from Masbate Island, Philippines that was found to be co-infecting with the White Spot Syndrome virus in a sample of black tiger prawn, Penaeus monodon. The genome of V. harveyi PH1009 was sequenced, assembled, and annotated. Average Nucleotide identity calculation with Vibrio harveyi strains confirmed its taxonomic identity. It is a potential multi-drug and multi-heavy metal resistant strain based on the multiple antibiotic and heavy metal resistance determinants annotated on its genome. Two prophage regions were identified in its genome. One contained genes for Zona occludens toxin (Zot) and Accessory cholera toxin (Ace), essential toxins of toxigenic V. cholerae strains apart from CTX toxins. Pan-genome analysis of V. harveyi strains, including PH1009, revealed an "open" pan-genome for V. harveyi and a core genome mainly composed of genes necessary for growth and metabolism. Phylogenetic tree based on the core genome alignment revealed that PH1009 was closest to strains QT520, CAIM 1754, and 823tez1. Published virulence factors present on the strain QT520 suggest similar pathogenicity with PH1009. However, PH1009 Zot was not found on related strains but was present in strains HENC-01 and CAIM 148. Most unique genes found in the PH1009 strain were identified as hypothetical proteins. Further annotation showed that several of these hypothetical proteins were phage transposases, integrases, and transcription regulators, implying the role of bacteriophages in the distinct genomic features of the PH1009 genome. The PH1009 genome will serve as a valuable genomic resource for comparative genomic studies and in understanding the disease mechanism of the Vibrio harveyi species.}, } @article {pmid37024581, year = {2023}, author = {Li, N and He, Q and Wang, J and Wang, B and Zhao, J and Huang, S and Yang, T and Tang, Y and Yang, S and Aisimutuola, P and Xu, R and Hu, J and Jia, C and Ma, K and Li, Z and Jiang, F and Gao, J and Lan, H and Zhou, Y and Zhang, X and Huang, S and Fei, Z and Wang, H and Li, H and Yu, Q}, title = {Super-pangenome analyses highlight genomic diversity and structural variation across wild and cultivated tomato species.}, journal = {Nature genetics}, volume = {55}, number = {5}, pages = {852-860}, pmid = {37024581}, issn = {1546-1718}, mesh = {*Solanum lycopersicum/genetics ; Genome-Wide Association Study ; Genome, Plant/genetics ; Plant Breeding ; *Solanum/genetics ; Genomics ; }, abstract = {Effective utilization of wild relatives is key to overcoming challenges in genetic improvement of cultivated tomato, which has a narrow genetic basis; however, current efforts to decipher high-quality genomes for tomato wild species are insufficient. Here, we report chromosome-scale tomato genomes from nine wild species and two cultivated accessions, representative of Solanum section Lycopersicon, the tomato clade. Together with two previously released genomes, we elucidate the phylogeny of Lycopersicon and construct a section-wide gene repertoire. We reveal the landscape of structural variants and provide entry to the genomic diversity among tomato wild relatives, enabling the discovery of a wild tomato gene with the potential to increase yields of modern cultivated tomatoes. Construction of a graph-based genome enables structural-variant-based genome-wide association studies, identifying numerous signals associated with tomato flavor-related traits and fruit metabolites. The tomato super-pangenome resources will expedite biological studies and breeding of this globally important crop.}, } @article {pmid37023146, year = {2023}, author = {Hochhauser, D and Millman, A and Sorek, R}, title = {The defense island repertoire of the Escherichia coli pan-genome.}, journal = {PLoS genetics}, volume = {19}, number = {4}, pages = {e1010694}, pmid = {37023146}, issn = {1553-7404}, mesh = {*Escherichia coli/genetics ; *Genome, Bacterial/genetics ; Bacteria/genetics ; }, abstract = {It has become clear in recent years that anti-phage defense systems cluster non-randomly within bacterial genomes in so-called "defense islands". Despite serving as a valuable tool for the discovery of novel defense systems, the nature and distribution of defense islands themselves remain poorly understood. In this study, we comprehensively mapped the defense system repertoire of >1,300 strains of Escherichia coli, the most widely studied organism for phage-bacteria interactions. We found that defense systems are usually carried on mobile genetic elements including prophages, integrative conjugative elements and transposons, which preferentially integrate at several dozens of dedicated hotspots in the E. coli genome. Each mobile genetic element type has a preferred integration position but can carry a diverse variety of defensive cargo. On average, an E. coli genome has 4.7 hotspots occupied by defense system-containing mobile elements, with some strains possessing up to eight defensively occupied hotspots. Defense systems frequently co-localize with other systems on the same mobile genetic element, in agreement with the observed defense island phenomenon. Our data show that the overwhelming majority of the E. coli pan-immune system is carried on mobile genetic elements, explaining why the immune repertoire varies substantially between different strains of the same species.}, } @article {pmid37019751, year = {2023}, author = {Dart, E and Ahlgren, NA}, title = {New tRNA-targeting transposons that hijack phage and vesicles.}, journal = {Trends in genetics : TIG}, volume = {39}, number = {6}, pages = {433-435}, doi = {10.1016/j.tig.2023.03.004}, pmid = {37019751}, issn = {0168-9525}, mesh = {*Bacteriophages/genetics ; Gene Transfer, Horizontal/genetics ; *Cyanobacteria/genetics ; RNA, Transfer/genetics ; Genomic Islands ; }, abstract = {Genomic islands are hotspots for horizontal gene transfer (HGT) in bacteria, but, for Prochlorococcus, an abundant marine cyanobacterium, how these islands form has puzzled scientists. With the discovery of tycheposons, a new family of transposons, Hackl et al. provide evidence for elegant new mechanisms of gene rearrangement and transfer among Prochlorococcus and bacteria more broadly.}, } @article {pmid37018035, year = {2023}, author = {Muzahid, NH and Hussain, MH and Huët, MAL and Dwiyanto, J and Su, TT and Reidpath, D and Mustapha, F and Ayub, Q and Tan, HS and Rahman, S}, title = {Molecular characterization and comparative genomic analysis of Acinetobacter baumannii isolated from the community and the hospital: an epidemiological study in Segamat, Malaysia.}, journal = {Microbial genomics}, volume = {9}, number = {4}, pages = {}, pmid = {37018035}, issn = {2057-5858}, mesh = {Humans ; *Acinetobacter baumannii/genetics ; Malaysia ; Phylogeny ; Prospective Studies ; Hospitals ; Genomics ; }, abstract = {Acinetobacter baumannii is a common cause of multidrug-resistant (MDR) nosocomial infections around the world. However, little is known about the persistence and dynamics of A. baumannii in a healthy community. This study investigated the role of the community as a prospective reservoir for A. baumannii and explored possible links between hospital and community isolates. A total of 12 independent A. baumannii strains were isolated from human faecal samples from the community in Segamat, Malaysia, in 2018 and 2019. Another 15 were obtained in 2020 from patients at the co-located tertiary public hospital. The antimicrobial resistance profile and biofilm formation ability were analysed, and the relatedness of community and hospital isolates was determined using whole-genome sequencing (WGS). Antibiotic profile analysis revealed that 12 out of 15 hospital isolates were MDR, but none of the community isolates were MDR. However, phylogenetic analysis based on single-nucleotide polymorphisms (SNPs) and a pangenome analysis of core genes showed clustering between four community and two hospital strains. Such clustering of strains from two different settings based on their genomes suggests that these strains could persist in both. WGS revealed 41 potential resistance genes on average in the hospital strains, but fewer (n=32) were detected in the community strains. In contrast, 68 virulence genes were commonly seen in strains from both sources. This study highlights the possible transmission threat to public health posed by virulent A. baumannii present in the gut of asymptomatic individuals in the community.}, } @article {pmid37016310, year = {2023}, author = {Commichaux, S and Rand, H and Javkar, K and Molloy, EK and Pettengill, JB and Pightling, A and Hoffmann, M and Pop, M and Jayeola, V and Foley, S and Luo, Y}, title = {Assessment of plasmids for relating the 2020 Salmonella enterica serovar Newport onion outbreak to farms implicated by the outbreak investigation.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {165}, pmid = {37016310}, issn = {1471-2164}, support = {5U01-FD001418//Joint Institute for Food Safety and Applied Nutrition, University of Maryland/ ; }, mesh = {*Salmonella enterica ; Serogroup ; Onions/genetics ; Farms ; Phylogeny ; Plasmids/genetics ; Disease Outbreaks ; }, abstract = {BACKGROUND: The Salmonella enterica serovar Newport red onion outbreak of 2020 was the largest foodborne outbreak of Salmonella in over a decade. The epidemiological investigation suggested two farms as the likely source of contamination. However, single nucleotide polymorphism (SNP) analysis of the whole genome sequencing data showed that none of the Salmonella isolates collected from the farm regions were linked to the clinical isolates-preventing the use of phylogenetics in source identification. Here, we explored an alternative method for analyzing the whole genome sequencing data driven by the hypothesis that if the outbreak strain had come from the farm regions, then the clinical isolates would disproportionately contain plasmids found in isolates from the farm regions due to horizontal transfer.

RESULTS: SNP analysis confirmed that the clinical isolates formed a single, nearly-clonal clade with evidence for ancestry in California going back a decade. The clinical clade had a large core genome (4,399 genes) and a large and sparsely distributed accessory genome (2,577 genes, at least 64% on plasmids). At least 20 plasmid types occurred in the clinical clade, more than were found in the literature for Salmonella Newport. A small number of plasmids, 14 from 13 clinical isolates and 17 from 8 farm isolates, were found to be highly similar (> 95% identical)-indicating they might be related by horizontal transfer. Phylogenetic analysis was unable to determine the geographic origin, isolation source, or time of transfer of the plasmids, likely due to their promiscuous and transient nature. However, our resampling analysis suggested that observing a similar number and combination of highly similar plasmids in random samples of environmental Salmonella enterica within the NCBI Pathogen Detection database was unlikely, supporting a connection between the outbreak strain and the farms implicated by the epidemiological investigation.

CONCLUSION: Horizontally transferred plasmids provided evidence for a connection between clinical isolates and the farms implicated as the source of the outbreak. Our case study suggests that such analyses might add a new dimension to source tracking investigations, but highlights the need for detailed and accurate metadata, more extensive environmental sampling, and a better understanding of plasmid molecular evolution.}, } @article {pmid37016094, year = {2023}, author = {Li, W and Wang, D and Hong, X and Shi, J and Hong, J and Su, S and Loaiciga, CR and Li, J and Liang, W and Shi, J and Zhang, D}, title = {Identification and validation of new MADS-box homologous genes in 3010 rice pan-genome.}, journal = {Plant cell reports}, volume = {42}, number = {6}, pages = {975-988}, pmid = {37016094}, issn = {1432-203X}, support = {B21HJ8104//Yazhou Bay Seed Laboratory Project/ ; B14016//111 Project/ ; }, mesh = {*Genome, Plant/genetics ; *Oryza/genetics/metabolism ; MADS Domain Proteins/genetics/metabolism ; Phylogeny ; Plant Breeding ; Gene Expression Regulation, Plant/genetics ; }, abstract = {Identification and validation of ten new MADS-box homologous genes in 3010 rice pan-genome for rice breeding. The functional genome is significant for rice breeding. MADS-box genes encode transcription factors that are indispensable for rice growth and development. The reported 15,362 novel genes in the rice pan-genome (RPAN) of Asian cultivated rice accessions provided a useful gene reservoir for the identification of more MADS-box candidates to overcome the limitation for the usage of only 75 MADS-box genes identified in Nipponbare for rice breeding. Here, we report the identification and validation of ten MADS-box homologous genes in RPAN. Origin and identity analysis indicated that they are originated from different wild rice accessions and structure of motif analysis revealed high variations in their amino acid sequences. Phylogenetic results with 277 MADS-box genes in 41 species showed that all these ten MADS-box homologous genes belong to type I (SRF-like, M-type). Gene expression analysis confirmed the existence of these ten MADS-box genes in IRIS_313-10,394, all of them were expressed in flower tissues, and six of them were highly expressed during seed development. Altogether, we identified and validated experimentally, for the first time, ten novel MADS-box genes in RPAN, which provides new genetic sources for rice improvement.}, } @article {pmid37012375, year = {2023}, author = {von Meijenfeldt, FAB and Hogeweg, P and Dutilh, BE}, title = {A social niche breadth score reveals niche range strategies of generalists and specialists.}, journal = {Nature ecology & evolution}, volume = {7}, number = {5}, pages = {768-781}, pmid = {37012375}, issn = {2397-334X}, mesh = {*Ecosystem ; *Ecology ; Biological Evolution ; }, abstract = {Generalists can survive in many environments, whereas specialists are restricted to a single environment. Although a classical concept in ecology, niche breadth has remained challenging to quantify for microorganisms because it depends on an objective definition of the environment. Here, by defining the environment of a microorganism as the community it resides in, we integrated information from over 22,000 environmental sequencing samples to derive a quantitative measure of the niche, which we call social niche breadth. At the level of genera, we explored niche range strategies throughout the prokaryotic tree of life. We found that social generalists include opportunists that stochastically dominate local communities, whereas social specialists are stable but low in abundance. Social generalists have a more diverse and open pan-genome than social specialists, but we found no global correlation between social niche breadth and genome size. Instead, we observed two distinct evolutionary strategies, whereby specialists have relatively small genomes in habitats with low local diversity, but relatively large genomes in habitats with high local diversity. Together, our analysis shines data-driven light on microbial niche range strategies.}, } @article {pmid37010293, year = {2023}, author = {Maranga, M and Szczerbiak, P and Bezshapkin, V and Gligorijevic, V and Chandler, C and Bonneau, R and Xavier, RJ and Vatanen, T and Kosciolek, T}, title = {Comprehensive Functional Annotation of Metagenomes and Microbial Genomes Using a Deep Learning-Based Method.}, journal = {mSystems}, volume = {8}, number = {2}, pages = {e0117822}, pmid = {37010293}, issn = {2379-5077}, mesh = {Humans ; Metagenome/genetics ; *Deep Learning ; Molecular Sequence Annotation ; *Microbiota/genetics ; Genome, Microbial ; }, abstract = {Comprehensive protein function annotation is essential for understanding microbiome-related disease mechanisms in the host organisms. However, a large portion of human gut microbial proteins lack functional annotation. Here, we have developed a new metagenome analysis workflow integrating de novo genome reconstruction, taxonomic profiling, and deep learning-based functional annotations from DeepFRI. This is the first approach to apply deep learning-based functional annotations in metagenomics. We validate DeepFRI functional annotations by comparing them to orthology-based annotations from eggNOG on a set of 1,070 infant metagenomes from the DIABIMMUNE cohort. Using this workflow, we generated a sequence catalogue of 1.9 million nonredundant microbial genes. The functional annotations revealed 70% concordance between Gene Ontology annotations predicted by DeepFRI and eggNOG. DeepFRI improved the annotation coverage, with 99% of the gene catalogue obtaining Gene Ontology molecular function annotations, although they are less specific than those from eggNOG. Additionally, we constructed pangenomes in a reference-free manner using high-quality metagenome-assembled genomes (MAGs) and analyzed the associated annotations. eggNOG annotated more genes on well-studied organisms, such as Escherichia coli, while DeepFRI was less sensitive to taxa. Further, we show that DeepFRI provides additional annotations in comparison to the previous DIABIMMUNE studies. This workflow will contribute to novel understanding of the functional signature of the human gut microbiome in health and disease as well as guiding future metagenomics studies. IMPORTANCE The past decade has seen advancement in high-throughput sequencing technologies resulting in rapid accumulation of genomic data from microbial communities. While this growth in sequence data and gene discovery is impressive, the majority of microbial gene functions remain uncharacterized. The coverage of functional information coming from either experimental sources or inferences is low. To solve these challenges, we have developed a new workflow to computationally assemble microbial genomes and annotate the genes using a deep learning-based model DeepFRI. This improved microbial gene annotation coverage to 1.9 million metagenome-assembled genes, representing 99% of the assembled genes, which is a significant improvement compared to 12% Gene Ontology term annotation coverage by commonly used orthology-based approaches. Importantly, the workflow supports pangenome reconstruction in a reference-free manner, allowing us to analyze the functional potential of individual bacterial species. We therefore propose this alternative approach combining deep-learning functional predictions with the commonly used orthology-based annotations as one that could help us uncover novel functions observed in metagenomic microbiome studies.}, } @article {pmid37007277, year = {2023}, author = {Heng, E and Tan, LL and Tay, DWP and Lim, YH and Yang, LK and Seow, DCS and Leong, CY and Ng, V and Ng, SB and Kanagasundaram, Y and Wong, FT and Koduru, L}, title = {Cost-effective hybrid long-short read assembly delineates alternative GC-rich Streptomyces hosts for natural product discovery.}, journal = {Synthetic and systems biotechnology}, volume = {8}, number = {2}, pages = {253-261}, pmid = {37007277}, issn = {2405-805X}, abstract = {With the advent of rapid automated in silico identification of biosynthetic gene clusters (BGCs), genomics presents vast opportunities to accelerate natural product (NP) discovery. However, prolific NP producers, Streptomyces, are exceptionally GC-rich (>80%) and highly repetitive within BGCs. These pose challenges in sequencing and high-quality genome assembly which are currently circumvented via intensive sequencing. Here, we outline a more cost-effective workflow using multiplex Illumina and Oxford Nanopore sequencing with hybrid long-short read assembly algorithms to generate high quality genomes. Our protocol involves subjecting long read-derived assemblies to up to 4 rounds of polishing with short reads to yield accurate BGC predictions. We successfully sequenced and assembled 8 GC-rich Streptomyces genomes whose lengths range from 7.1 to 12.1 Mb with a median N50 of 8.2 Mb. Taxonomic analysis revealed previous misrepresentation among these strains and allowed us to propose a potentially new species, Streptomyces sydneybrenneri. Further comprehensive characterization of their biosynthetic, pan-genomic and antibiotic resistance features especially for molecules derived from type I polyketide synthase (PKS) BGCs reflected their potential as alternative NP hosts. Thus, the genome assemblies and insights presented here are envisioned to serve as gateway for the scientific community to expand their avenues in NP discovery.}, } @article {pmid37003962, year = {2023}, author = {Raza, Q and Rashid, MAR and Waqas, M and Ali, Z and Rana, IA and Khan, SH and Khan, IA and Atif, RM}, title = {Genomic diversity of aquaporins across genus Oryza provides a rich genetic resource for development of climate resilient rice cultivars.}, journal = {BMC plant biology}, volume = {23}, number = {1}, pages = {172}, pmid = {37003962}, issn = {1471-2229}, mesh = {*Oryza/metabolism ; Genomics ; Stress, Physiological/genetics ; Promoter Regions, Genetic ; *Aquaporins/genetics/metabolism ; Plant Proteins/metabolism ; Gene Expression Regulation, Plant ; Phylogeny ; }, abstract = {BACKGROUND: Plant aquaporins are critical genetic players performing multiple biological functions, especially climate resilience and water-use efficiency. Their genomic diversity across genus Oryza is yet to be explored.

RESULTS: This study identified 369 aquaporin-encoding genes from 11 cultivated and wild rice species and further categorized these into four major subfamilies, among which small basic intrinsic proteins are speculated to be ancestral to all land plant aquaporins. Evolutionarily conserved motifs in peptides of aquaporins participate in transmembrane transport of materials and their relatively complex gene structures provide an evolutionary playground for regulation of genome structure and transcription. Duplication and evolution analyses revealed higher genetic conservation among Oryza aquaporins and strong purifying selections are assisting in conserving the climate resilience associated functions. Promoter analysis highlighted enrichment of gene upstream regions with cis-acting regulatory elements involved in diverse biological processes, whereas miRNA target site prediction analysis unveiled substantial involvement of osa-miR2102-3p, osa-miR2927 and osa-miR5075 in post-transcriptional regulation of gene expression patterns. Moreover, expression patterns of japonica aquaporins were significantly perturbed in response to different treatment levels of six phytohormones and four abiotic stresses, suggesting their multifarious roles in plants survival under stressed environments. Furthermore, superior haplotypes of seven conserved orthologous aquaporins for higher thousand-grain weight are reported from a gold mine of 3,010 sequenced rice pangenomes.

CONCLUSIONS: This study unveils the complete genomic atlas of aquaporins across genus Oryza and provides a comprehensive genetic resource for genomics-assisted development of climate-resilient rice cultivars.}, } @article {pmid37000493, year = {2023}, author = {Pagnossin, D and Weir, W and Smith, A and Fuentes, M and Coelho, J and Oravcova, K}, title = {Streptococcus canis genomic epidemiology reveals the potential for zoonotic transfer.}, journal = {Microbial genomics}, volume = {9}, number = {3}, pages = {}, pmid = {37000493}, issn = {2057-5858}, mesh = {Animals ; Humans ; Dogs ; Cats ; Multilocus Sequence Typing ; Phylogeny ; Genome-Wide Association Study ; *Cat Diseases ; *Dog Diseases/epidemiology ; Genomics ; Anti-Bacterial Agents/pharmacology ; }, abstract = {Streptococcus canis, a multi-host pathogen commonly isolated from dogs and cats, has been occasionally reported in severe cases of human infection. To address the gap in knowledge on its virulence and host tropism, we investigated S. canis genomic epidemiology and report the results of this analysis for the first time. We analysed 59 S. canis whole genome sequences originating from a variety of host species, comprising 39 newly sequenced isolates from UK sources, along with all (n=20) publicly available genomes. Antimicrobial resistance (AMR) phenotype was determined for all 39 available isolates. Genomes were screened for determinants of resistance and virulence. We created a core SNP phylogeny and compared strain clustering to multi-locus sequence typing (MLST) and S. canis M-like protein (SCM) typing. We investigated the dataset for signals of host adaptation using phylogenetic analysis, accessory genome clustering and pan-genome-wide association study analysis. A total of 23 % (9/39) of isolates exhibited phenotypic resistance to lincosamides, macrolides and/or tetracyclines. This was complemented by the identification of AMR-encoding genes in all genomes: tetracycline (tetO 14 %, 8/59; and tetM 7 %, 4/59) and lincosamide/macrolide (ermB, 7 %, 4/59). AMR was more common in human (36 %, 4/11) compared to companion animal (18 %, 5/28) isolates. We identified 19 virulence gene homologues, 14 of which were present in all strains analysed. In an S. canis strain isolated from a dog with otitis externa we identified a homologue of S. pyogenes superantigen SMEZ. The MLST and SCM typing schemes were found to be incapable of accurately representing core SNP-based genomic diversity of the S. canis population. No evidence of host adaptation was detected, suggesting the potential for inter-species transmission, including zoonotic transfer.}, } @article {pmid36993855, year = {2023}, author = {Akparov, Z and Hajiyeva, S and Abbasov, M and Kaur, S and Hamwieh, A and Alsamman, AM and Hajiyev, E and Babayeva, S and Izzatullayeva, V and Mustafayeva, Z and Mehdiyeva, S and Mustafayev, O and Shahmuradov, I and Kosarev, P and Solovyev, V and Salamov, A and Jighly, A}, title = {Two major chromosome evolution events with unrivaled conserved gene content in pomegranate.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1039211}, pmid = {36993855}, issn = {1664-462X}, abstract = {Pomegranate has a unique evolutionary history given that different cultivars have eight or nine bivalent chromosomes with possible crossability between the two classes. Therefore, it is important to study chromosome evolution in pomegranate to understand the dynamics of its population. Here, we de novo assembled the Azerbaijani cultivar "Azerbaijan guloyshasi" (AG2017; 2n = 16) and re-sequenced six cultivars to track the evolution of pomegranate and to compare it with previously published de novo assembled and re-sequenced cultivars. High synteny was observed between AG2017, Bhagawa (2n = 16), Tunisia (2n = 16), and Dabenzi (2n = 18), but these four cultivars diverged from the cultivar Taishanhong (2n = 18) with several rearrangements indicating the presence of two major chromosome evolution events. Major presence/absence variations were not observed as >99% of the five genomes aligned across the cultivars, while >99% of the pan-genic content was represented by Tunisia and Taishanhong only. We also revisited the divergence between soft- and hard-seeded cultivars with less structured population genomic data, compared to previous studies, to refine the selected genomic regions and detect global migration routes for pomegranate. We reported a unique admixture between soft- and hard-seeded cultivars that can be exploited to improve the diversity, quality, and adaptability of local pomegranate varieties around the world. Our study adds body knowledge to understanding the evolution of the pomegranate genome and its implications for the population structure of global pomegranate diversity, as well as planning breeding programs aiming to develop improved cultivars.}, } @article {pmid36993842, year = {2023}, author = {Carballo, J and Bellido, AM and Selva, JP and Zappacosta, D and Gallo, CA and Albertini, E and Caccamo, M and Echenique, V}, title = {From tetraploid to diploid, a pangenomic approach to identify genes lost during synthetic diploidization of Eragrostis curvula.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1133986}, pmid = {36993842}, issn = {1664-462X}, abstract = {INTRODUCTION: In Eragrostis curvula, commonly known as weeping lovegrass, a synthetic diploidization event of the facultative apomictic tetraploid Tanganyika INTA cv. originated from the sexual diploid Victoria cv. Apomixis is an asexual reproduction by seeds in which the progeny is genetically identical to the maternal plant.

METHODS: To assess the genomic changes related to ploidy and to the reproductive mode occurring during diploidization, a mapping approach was followed to obtain the first E. curvula pangenome assembly. In this way, gDNA of Tanganyika INTA was extracted and sequenced in 2x250 Illumina pair-end reads and mapped against the Victoria genome assembly. The unmapped reads were used for variant calling, while the mapped reads were assembled using Masurca software.

RESULTS: The length of the assembly was 28,982,419 bp distributed in 18,032 contigs, and the variable genes annotated in these contigs rendered 3,952 gene models. Functional annotation of the genes showed that the reproductive pathway was differentially enriched. PCR amplification in gDNA and cDNA of Tanganyika INTA and Victoria was conducted to validate the presence/absence variation in five genes related to reproduction and ploidy. The polyploid nature of the Tanganyika INTA genome was also evaluated through the variant calling analysis showing the single nucleotide polymorphism (SNP) coverage and allele frequency distribution with a segmental allotetraploid pairing behavior.

DISCUSSION: The results presented here suggest that the genes were lost in Tanganyika INTA during the diploidization process that was conducted to suppress the apomictic pathway, affecting severely the fertility of Victoria cv.}, } @article {pmid36991151, year = {2023}, author = {Zhen, C and Chen, XK and Ge, XF and Liu, WZ}, title = {Streptomonospora mangrovi sp. nov., isolated from mangrove soil showing similar metabolic capabilities, but distinct secondary metabolites profiles.}, journal = {Archives of microbiology}, volume = {205}, number = {4}, pages = {148}, pmid = {36991151}, issn = {1432-072X}, support = {32202121//National Natural Science Foundation of China/ ; }, mesh = {Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Soil ; Fatty Acids/analysis ; DNA, Bacterial/genetics ; Soil Microbiology ; Bacterial Typing Techniques ; Diaminopimelic Acid/analysis ; Sequence Analysis, DNA ; *Actinomycetales/genetics ; }, abstract = {A novel actinomycete, designated strain S1-112[ T], was isolated from a mangrove soil sample from Hainan, China, and characterized using a polyphasic approach. Strain S1-112[ T] showed the highest similarity of the 16S rRNA gene to Streptomonospora nanhaiensis 12A09[T] (99.24%). Their close relationship was further supported by phylogenetic analyses, which placed these two strains within a stable clade. The highest values of digital DNA-DNA hybridization (dDDH, 41.4%) and average nucleotide identity (ANI, 90.55%) were detected between strain S1-112[ T] and Streptomonospora halotolerans NEAU-Jh2-17[ T]. Genotypic and phenotypic characteristics demonstrated that strain S1-112[ T] could be distinguished from its closely related relatives. We also profiled the pan-genome and metabolic features of genomic assemblies of strains belonging to the genus Streptomonospora, indicating similar functional capacities and metabolic activities. However, all of these strains showed promising potential for producing diverse types of secondary metabolites. In conclusion, strain S1-112[ T] represents a novel species of the genus Streptomonospora, for which the name Streptomonospora mangrovi sp. nov. was proposed. The type strain is S1-112[ T] (= JCM 34292[ T]).}, } @article {pmid36982787, year = {2023}, author = {Karetnikov, DI and Vasiliev, GV and Toshchakov, SV and Shmakov, NA and Genaev, MA and Nesterov, MA and Ibragimova, SM and Rybakov, DA and Gavrilenko, TA and Salina, EA and Patrushev, MV and Kochetov, AV and Afonnikov, DA}, title = {Analysis of Genome Structure and Its Variations in Potato Cultivars Grown in Russia.}, journal = {International journal of molecular sciences}, volume = {24}, number = {6}, pages = {}, pmid = {36982787}, issn = {1422-0067}, support = {075-15-2019-1662//The Ministry of Education and Science of the Russian Federation/ ; }, mesh = {*Solanum tuberosum/genetics ; DNA Copy Number Variations ; Genome, Plant ; Genomics ; Tetraploidy ; }, abstract = {Solanum tuberosum L. (common potato) is one of the most important crops produced almost all over the world. Genomic sequences of potato opens the way for studying the molecular variations related to diversification. We performed a reconstruction of genomic sequences for 15 tetraploid potato cultivars grown in Russia using short reads. Protein-coding genes were identified; conserved and variable parts of pan-genome and the repertoire of the NBS-LRR genes were characterized. For comparison, we used additional genomic sequences for twelve South American potato accessions, performed analysis of genetic diversity, and identified the copy number variations (CNVs) in two these groups of potato. Genomes of Russian potato cultivars were more homogeneous by CNV characteristics and have smaller maximum deletion size in comparison with South American ones. Genes with different CNV occurrences in two these groups of potato accessions were identified. We revealed genes of immune/abiotic stress response, transport and five genes related to tuberization and photoperiod control among them. Four genes related to tuberization and photoperiod were investigated in potatoes previously (phytochrome A among them). A novel gene, homologous to the poly(ADP-ribose) glycohydrolase (PARG) of Arabidopsis, was identified that may be involved in circadian rhythm control and contribute to the acclimatization processes of Russian potato cultivars.}, } @article {pmid36981047, year = {2023}, author = {Wartha, S and Bretschneider, N and Dangel, A and Hobmaier, B and Hörmansdorfer, S and Huber, I and Murr, L and Pavlovic, M and Sprenger, A and Wenning, M and Alter, T and Messelhäußer, U}, title = {Genetic Characterization of Listeria from Food of Non-Animal Origin Products and from Producing and Processing Companies in Bavaria, Germany.}, journal = {Foods (Basel, Switzerland)}, volume = {12}, number = {6}, pages = {}, pmid = {36981047}, issn = {2304-8158}, abstract = {Reported cases of listeriosis from food of non-animal origin (FNAO) are increasing. In order to assess the risk of exposure to Listeria monocytogenes from FNAO, the genetic characterization of the pathogen in FNAO products and in primary production and processing plants needs to be investigated. For this, 123 samples of fresh and frozen soft fruit and 407 samples of 39 plants in Bavaria, Germany that produce and process FNAO were investigated for Listeria contamination. As a result, 64 Listeria spp. isolates were detected using ISO 11290-1:2017. Environmental swabs and water and food samples were investigated. L. seeligeri (36/64, 56.25%) was the most frequently identified species, followed by L. monocytogenes (8/64, 12.50%), L. innocua (8/64, 12.50%), L. ivanovii (6/64, 9.38%), L. newyorkensis (5/64, 7.81%), and L. grayi (1/64, 1.56%). Those isolates were subsequently sequenced by whole-genome sequencing and subjected to pangenome analysis to retrieve data on the genotype, serotype, antimicrobial resistance (AMR), and virulence markers. Eight out of sixty-four Listeria spp. isolates were identified as L. monocytogenes. The serogroup analysis detected that 62.5% of the L. monocytogenes isolates belonged to serogroup IIa (1/2a and 3a) and 37.5% to serogroup IVb (4b, 4d, and 4e). Furthermore, the MLST (multilocus sequence typing) analysis of the eight detected L. monocytogenes isolates identified seven different sequence types (STs) and clonal complexes (CCs), i.e., ST1/CC1, ST2/CC2, ST6/CC6, ST7/CC7, ST21/CC21, ST504/CC475, and ST1413/CC739. The core genome MLST analysis also showed high allelic differences and suggests plant-specific isolates. Regarding the AMR, we detected phenotypic resistance against benzylpenicillin, fosfomycin, and moxifloxacin in all eight L. monocytogenes isolates. Moreover, virulence factors, such as prfA, hly, plcA, plcB, hpt, actA, inlA, inlB, and mpl, were identified in pathogenic and nonpathogenic Listeria species. The significance of L. monocytogenes in FNAO is growing and should receive increasing levels of attention.}, } @article {pmid36980919, year = {2023}, author = {Weltzer, ML and Wall, D}, title = {Social Diversification Driven by Mobile Genetic Elements.}, journal = {Genes}, volume = {14}, number = {3}, pages = {}, pmid = {36980919}, issn = {2073-4425}, support = {R35 GM140886/GM/NIGMS NIH HHS/United States ; }, mesh = {*Bacteria/genetics ; *Myxococcales/genetics ; Biological Evolution ; Genome ; Interspersed Repetitive Sequences/genetics ; }, abstract = {Social diversification in microbes is an evolutionary process where lineages bifurcate into distinct populations that cooperate with themselves but not with other groups. In bacteria, this is frequently driven by horizontal transfer of mobile genetic elements (MGEs). Here, the resulting acquisition of new genes changes the recipient's social traits and consequently how they interact with kin. These changes include discriminating behaviors mediated by newly acquired effectors. Since the producing cell is protected by cognate immunity factors, these selfish elements benefit from selective discrimination against recent ancestors, thus facilitating their proliferation and benefiting the host. Whether social diversification benefits the population at large is less obvious. The widespread use of next-generation sequencing has recently provided new insights into population dynamics in natural habitats and the roles MGEs play. MGEs belong to accessory genomes, which often constitute the majority of the pangenome of a taxon, and contain most of the kin-discriminating loci that fuel rapid social diversification. We further discuss mechanisms of diversification and its consequences to populations and conclude with a case study involving myxobacteria.}, } @article {pmid36979037, year = {2023}, author = {Sedeek, AM and Salah, I and Kamel, HL and Soltan, MA and Nour, E and Alshammari, A and Riaz Rajoka, MS and Elsayed, TR}, title = {Genome-Based Analysis of the Potential Bioactivity of the Terrestrial Streptomyces vinaceusdrappus Strain AC-40.}, journal = {Biology}, volume = {12}, number = {3}, pages = {}, pmid = {36979037}, issn = {2079-7737}, abstract = {Streptomyces are factories of antimicrobial secondary metabolites. We isolated a Streptomyces species associated with the Pelargonium graveolens rhizosphere. Its total metabolic extract exhibited potent antibacterial and antifungal properties against all the tested pathogenic microbes. Whole genome sequencing and genome analyses were performed to take a look at its main characteristics and to reconstruct the metabolic pathways that can be associated with biotechnologically useful traits. AntiSMASH was used to identify the secondary metabolite gene clusters. In addition, we searched for known genes associated with plant growth-promoting characteristics. Finally, a comparative and pan-genome analysis with three closely related genomes was conducted. It was identified as Streptomyces vinaceusdrappus strain AC-40. Genome mining indicated the presence of several secondary metabolite gene clusters. Some of them are identical or homologs to gene clusters of known metabolites with antimicrobial, antioxidant, and other bioactivities. It also showed the presence of several genes related to plant growth promotion traits. The comparative genome analysis indicated that at least five of these gene clusters are highly conserved through rochei group genomes. The genotypic and phenotypic characteristics of S. vinaceusdrappus strain AC-40 indicate that it is a promising source of beneficial secondary metabolites with pharmaceutical and biotechnological applications.}, } @article {pmid36975929, year = {2023}, author = {Lu, W and Zhang, T and Zhang, Q and Zhang, N and Jia, L and Ma, S and Xia, Q}, title = {FibH Gene Complete Sequences (FibHome) Revealed Silkworm Pedigree.}, journal = {Insects}, volume = {14}, number = {3}, pages = {}, pmid = {36975929}, issn = {2075-4450}, support = {32122084//National Natural Science Foundation of China/ ; cstc2020jcyj-bshX0092//Chongqing Natural Science Foundation/ ; cstc2021ycjh-bgzxm0005//Chongqing Natural Science Foundation/ ; SWU120012//PhD Start-up Foundation of Southwest University/ ; SWU-KT22042//Fundamental Research Funds for Central Universities/ ; }, abstract = {The highly repetitive and variable fibroin heavy chain (FibH) gene can be used as a silkworm identification; however, only a few complete FibH sequences are known. In this study, we extracted and examined 264 FibH gene complete sequences (FibHome) from a high-resolution silkworm pan-genome. The average FibH lengths of the wild silkworm, local, and improved strains were 19,698 bp, 16,427 bp, and 15,795 bp, respectively. All FibH sequences had a conserved 5' and 3' terminal non-repetitive (5' and 3' TNR, 99.74% and 99.99% identity, respectively) sequence and a variable repetitive core (RC). The RCs differed greatly, but they all shared the same motif. During domestication or breeding, the FibH gene mutated with hexanucleotide (GGTGCT) as the core unit. Numerous variations existed that were not unique to wild and domesticated silkworms. However, the transcriptional factor binding sites, such as fibroin modulator-binding protein, were highly conserved and had 100% identity in the FibH gene's intron and upstream sequences. The local and improved strains with the same FibH gene were divided into four families using this gene as a marker. Family I contained a maximum of 62 strains with the optional FibH (Opti-FibH, 15,960 bp) gene. This study provides new insights into FibH variations and silkworm breeding.}, } @article {pmid36969737, year = {2022}, author = {Baaijens, JA and Bonizzoni, P and Boucher, C and Della Vedova, G and Pirola, Y and Rizzi, R and Sirén, J}, title = {Computational graph pangenomics: a tutorial on data structures and their applications.}, journal = {Natural computing}, volume = {21}, number = {1}, pages = {81-108}, pmid = {36969737}, issn = {1567-7818}, support = {R01 AI141810/AI/NIAID NIH HHS/United States ; R01 HG011392/HG/NHGRI NIH HHS/United States ; }, abstract = {Computational pangenomics is an emerging research field that is changing the way computer scientists are facing challenges in biological sequence analysis. In past decades, contributions from combinatorics, stringology, graph theory and data structures were essential in the development of a plethora of software tools for the analysis of the human genome. These tools allowed computational biologists to approach ambitious projects at population scale, such as the 1000 Genomes Project. A major contribution of the 1000 Genomes Project is the characterization of a broad spectrum of genetic variations in the human genome, including the discovery of novel variations in the South Asian, African and European populations-thus enhancing the catalogue of variability within the reference genome. Currently, the need to take into account the high variability in population genomes as well as the specificity of an individual genome in a personalized approach to medicine is rapidly pushing the abandonment of the traditional paradigm of using a single reference genome. A graph-based representation of multiple genomes, or a graph pangenome, is replacing the linear reference genome. This means completely rethinking well-established procedures to analyze, store, and access information from genome representations. Properly addressing these challenges is crucial to face the computational tasks of ambitious healthcare projects aiming to characterize human diversity by sequencing 1M individuals (Stark et al. 2019). This tutorial aims to introduce readers to the most recent advances in the theory of data structures for the representation of graph pangenomes. We discuss efficient representations of haplotypes and the variability of genotypes in graph pangenomes, and highlight applications in solving computational problems in human and microbial (viral) pangenomes.}, } @article {pmid36968469, year = {2023}, author = {Rehman, MNU and Dawar, FU and Zeng, J and Fan, L and Feng, W and Wang, M and Yang, N and Guo, G and Zheng, J}, title = {Complete genome sequence analysis of Edwardsiella tarda SC002 from hatchlings of Siamese crocodile.}, journal = {Frontiers in veterinary science}, volume = {10}, number = {}, pages = {1140655}, pmid = {36968469}, issn = {2297-1769}, abstract = {Edwardsiella tarda is a Gram-negative, facultative anaerobic rod-shaped bacterium and the causative agent of the systemic disease "Edwardsiellosis". It is commonly prevalent in aquatic organisms with subsequent economic loss and hence has attracted increasing attention from researchers. In this study, we investigated the complete genome sequence of a highly virulent isolate Edwardsiella tarda SC002 isolated from hatchlings of the Siamese crocodile. The genome of SC002 consisted of one circular chromosome of length 3,662,469 bp with a 57.29% G+C content and four novel plasmids. A total of 3,734 protein-coding genes, 12 genomic islands (GIs), 7 prophages, 48 interspersed repeat sequences, 248 tandem repeat sequences, a CRISPR component with a total length of 175 bp, and 171 ncRNAs (tRNA = 106, sRNA = 37, and rRNA = 28) were predicted. In addition, the coding genes of assembled genome were successfully annotated against eight general databases (NR = 3,618/3,734, COG = 2,947/3,734, KEGG = 3,485/3,734, SWISS-PROT = 2,787/3,734, GO = 2,648/3,734, Pfam = 2,648/3,734, CAZy = 130/3,734, and TCDB = 637/3,734) and four pathogenicity-related databases (ARDB = 11/3,734, CARD = 142/3,734, PHI = 538/3,734, and VFDB = 315/3,734). Pan-genome and comparative genome analyses of the complete sequenced genomes confirmed their evolutionary relationships. The present study confirmed that E. tarda SC002 is a potential pathogen bearing a bulk amount of antibiotic resistance, virulence, and pathogenic genes and its open pan-genome may enhance its host range in the future.}, } @article {pmid36968185, year = {2023}, author = {Zhou, H and Yan, F and Hao, F and Ye, H and Yue, M and Woeste, K and Zhao, P and Zhang, S}, title = {Pan-genome and transcriptome analyses provide insights into genomic variation and differential gene expression profiles related to disease resistance and fatty acid biosynthesis in eastern black walnut (Juglans nigra).}, journal = {Horticulture research}, volume = {10}, number = {3}, pages = {uhad015}, pmid = {36968185}, issn = {2662-6810}, abstract = {Walnut (Juglans) species are used as nut crops worldwide. Eastern black walnut (EBW, Juglans nigra), a diploid, horticultural important woody species is native to much of eastern North America. Although it is highly valued for its wood and nut, there are few resources for understanding EBW genetics. Here, we present a high-quality genome assembly of J. nigra based on Illumina, Pacbio, and Hi-C technologies. The genome size was 540.8 Mb, with a scaffold N50 size of 35.1 Mb, and 99.0% of the assembly was anchored to 16 chromosomes. Using this genome as a reference, the resequencing of 74 accessions revealed the effective population size of J. nigra declined during the glacial maximum. A single whole-genome duplication event was identified in the J. nigra genome. Large syntenic blocks among J. nigra, Juglans regia, and Juglans microcarpa predominated, but inversions of more than 600 kb were identified. By comparing the EBW genome with those of J. regia and J. microcarpa, we detected InDel sizes of 34.9 Mb in J. regia and 18.3 Mb in J. microcarpa, respectively. Transcriptomic analysis of differentially expressed genes identified five presumed NBS-LRR (NUCLEOTIDE BINDING SITE-LEUCINE-RICH REPEAT) genes were upregulated during the development of walnut husks and shells compared to developing embryos. We also identified candidate genes with essential roles in seed oil synthesis, including FAD (FATTY ACID DESATURASE) and OLE (OLEOSIN). Our work advances the understanding of fatty acid bioaccumulation and disease resistance in nut crops, and also provides an essential resource for conducting genomics-enabled breeding in walnut.}, } @article {pmid36966465, year = {2023}, author = {Velt, A and Frommer, B and Blanc, S and Holtgräwe, D and Duchêne, É and Dumas, V and Grimplet, J and Hugueney, P and Kim, C and Lahaye, M and Matus, JT and Navarro-Payá, D and Orduña, L and Tello-Ruiz, MK and Vitulo, N and Ware, D and Rustenholz, C}, title = {An improved reference of the grapevine genome reasserts the origin of the PN40024 highly homozygous genotype.}, journal = {G3 (Bethesda, Md.)}, volume = {13}, number = {5}, pages = {}, pmid = {36966465}, issn = {2160-1836}, support = {P30 CA045508/CA/NCI NIH HHS/United States ; }, mesh = {*Genome, Plant ; Genotype ; Chromosome Mapping ; Base Sequence ; Molecular Sequence Annotation ; *Vitis/genetics ; }, abstract = {The genome sequence of the diploid and highly homozygous Vitis vinifera genotype PN40024 serves as the reference for many grapevine studies. Despite several improvements to the PN40024 genome assembly, its current version PN12X.v2 is quite fragmented and only represents the haploid state of the genome with mixed haplotypes. In fact, being nearly homozygous, this genome contains several heterozygous regions that are yet to be resolved. Taking the opportunity of improvements that long-read sequencing technologies offer to fully discriminate haplotype sequences, an improved version of the reference, called PN40024.v4, was generated. Through incorporating long genomic sequencing reads to the assembly, the continuity of the 12X.v2 scaffolds was highly increased with a total number decreasing from 2,059 to 640 and a reduction in N bases of 88%. Additionally, the full alternative haplotype sequence was built for the first time, the chromosome anchoring was improved and the number of unplaced scaffolds was reduced by half. To obtain a high-quality gene annotation that outperforms previous versions, a liftover approach was complemented with an optimized annotation workflow for Vitis. Integration of the gene reference catalogue and its manual curation have also assisted in improving the annotation, while defining the most reliable estimation of 35,230 genes to date. Finally, we demonstrated that PN40024 resulted from 9 selfings of cv. "Helfensteiner" (cross of cv. "Pinot noir" and "Schiava grossa") instead of a single "Pinot noir". These advances will help maintain the PN40024 genome as a gold-standard reference, also contributing toward the eventual elaboration of the grapevine pangenome.}, } @article {pmid36966359, year = {2023}, author = {Yu, Z and Chen, Y and Zhou, Y and Zhang, Y and Li, M and Ouyang, Y and Chebotarov, D and Mauleon, R and Zhao, H and Xie, W and McNally, KL and Wing, RA and Guo, W and Zhang, J}, title = {Rice Gene Index: A comprehensive pan-genome database for comparative and functional genomics of Asian rice.}, journal = {Molecular plant}, volume = {16}, number = {5}, pages = {798-801}, doi = {10.1016/j.molp.2023.03.012}, pmid = {36966359}, issn = {1752-9867}, mesh = {*Oryza/genetics ; Genomics ; Genome, Plant/genetics ; }, } @article {pmid36961900, year = {2023}, author = {Rubio, A and Sprang, M and Garzón, A and Moreno-Rodriguez, A and Pachón-Ibáñez, ME and Pachón, J and Andrade-Navarro, MA and Pérez-Pulido, AJ}, title = {Analysis of bacterial pangenomes reduces CRISPR dark matter and reveals strong association between membranome and CRISPR-Cas systems.}, journal = {Science advances}, volume = {9}, number = {12}, pages = {eadd8911}, pmid = {36961900}, issn = {2375-2548}, mesh = {*CRISPR-Cas Systems/genetics ; Bacteria/genetics ; Genome, Bacterial ; *Bacteriophages/genetics ; }, abstract = {CRISPR-Cas systems are prokaryotic acquired immunity mechanisms, which are found in 40% of bacterial genomes. They prevent viral infections through small DNA fragments called spacers. However, the vast majority of these spacers have not yet been associated with the virus they recognize, and it has been named CRISPR dark matter. By analyzing the spacers of tens of thousands of genomes from six bacterial species, we have been able to reduce the CRISPR dark matter from 80% to as low as 15% in some of the species. In addition, we have observed that, when a genome presents CRISPR-Cas systems, this is accompanied by particular sets of membrane proteins. Our results suggest that when bacteria present membrane proteins that make it compete better in its environment and these proteins are, in turn, receptors for specific phages, they would be forced to acquire CRISPR-Cas.}, } @article {pmid36961866, year = {2023}, author = {Matlock, W and Lipworth, S and Chau, KK and AbuOun, M and Barker, L and Kavanagh, J and Andersson, M and Oakley, S and Morgan, M and Crook, DW and Read, DS and Anjum, M and Shaw, LP and Stoesser, N and , }, title = {Enterobacterales plasmid sharing amongst human bloodstream infections, livestock, wastewater, and waterway niches in Oxfordshire, UK.}, journal = {eLife}, volume = {12}, number = {}, pages = {}, doi = {10.7554/eLife.85302}, pmid = {36961866}, issn = {2050-084X}, support = {MRF_MRF-145-0004-TPG-AVISO/MRF/MRF/United Kingdom ; MRF-145-0004-TPG-AVISO/MRF/MRF/United Kingdom ; }, abstract = {Plasmids enable the dissemination of antimicrobial resistance (AMR) in common Enterobacterales pathogens, representing a major public health challenge. However, the extent of plasmid sharing and evolution between Enterobacterales causing human infections and other niches remains unclear, including the emergence of resistance plasmids. Dense, unselected sampling is highly relevant to developing our understanding of plasmid epidemiology and designing appropriate interventions to limit the emergence and dissemination of plasmid-associated AMR. We established a geographically and temporally restricted collection of human bloodstream infection (BSI)-associated, livestock-associated (cattle, pig, poultry, and sheep faeces, farm soils) and wastewater treatment work (WwTW)-associated (influent, effluent, waterways upstream/downstream of effluent outlets) Enterobacterales. Isolates were collected between 2008-2020 from sites <60km apart in Oxfordshire, UK. Pangenome analysis of plasmid clusters revealed shared 'backbones', with phylogenies suggesting an intertwined ecology where well-conserved plasmid backbones carry diverse accessory functions, including AMR genes. Many plasmid 'backbones' were seen across species and niches, raising the possibility that plasmid movement between these followed by rapid accessory gene change could be relatively common. Overall, the signature of identical plasmid sharing is likely to be a highly transient one, implying that plasmid movement might be occurring at greater rates than previously estimated, raising a challenge for future genomic One Health studies.}, } @article {pmid36961505, year = {2023}, author = {Delgado-Blas, JF and Ovejero, CM and David, S and Serna, C and Pulido-Vadillo, M and Montero, N and Aanensen, DM and Abadia-Patiño, L and Gonzalez-Zorn, B}, title = {Global scenario of the RmtE pan-aminoglycoside-resistance mechanism: emergence of the rmtE4 gene in South America associated with a hospital-related IncL plasmid.}, journal = {Microbial genomics}, volume = {9}, number = {3}, pages = {}, pmid = {36961505}, issn = {2057-5858}, mesh = {Aminoglycosides/pharmacology ; Plasmids/genetics ; Hospitals ; Animals ; Venezuela ; *Klebsiella/isolation & purification ; Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/genetics ; Phylogeny ; }, abstract = {Antimicrobial resistance (AMR) mechanisms, especially those conferring resistance to critically important antibiotics, are a great concern for public health. 16S rRNA methyltransferases (16S-RMTases) abolish the effectiveness of most clinically used aminoglycosides, but some of them are considered sporadic, such as RmtE. The main goals of this work were the genomic analysis of bacteria producing 16S-RMTases from a 'One Health' perspective in Venezuela, and the study of the epidemiological and evolutionary scenario of RmtE variants and their related mobile genetic elements (MGEs) worldwide. A total of 21 samples were collected in 2014 from different animal and environmental sources in the Cumaná region (Venezuela). Highly aminoglycoside-resistant Enterobacteriaceae isolates were selected, identified and screened for 16S-RMTase genes. Illumina and Nanopore whole-genome sequencing data were combined to obtain hybrid assemblies and analyse their sequence type, resistome, plasmidome and pan-genome. Genomic collections of rmtE variants and their associated MGEs were generated to perform epidemiological and phylogenetic analyses. A single 16S-RMTase, the novel RmtE4, was identified in five Klebsiella isolates from wastewater samples of Cumaná. This variant possessed three amino acid modifications with respect to RmtE1-3 (Asn152Asp, Val216Ile and Lys267Ile), representing the most genetic distant among all known and novel variants described in this work, and the second most prevalent. rmtE variants were globally spread, and their geographical distribution was determined by the associated MGEs and the carrying bacterial species. Thus, rmtE4 was found to be confined to Klebsiella isolates from South America, where it was closely related to ISVsa3 and an uncommon IncL plasmid related with hospital environments. This work uncovered the global scenario of RmtE and the existence of RmtE4, which could potentially emerge from South America. Surveillance and control measures should be developed based on these findings in order to prevent the dissemination of this AMR mechanism and preserve public health worldwide.}, } @article {pmid36958270, year = {2023}, author = {Botelho, J and Tüffers, L and Fuss, J and Buchholz, F and Utpatel, C and Klockgether, J and Niemann, S and Tümmler, B and Schulenburg, H}, title = {Phylogroup-specific variation shapes the clustering of antimicrobial resistance genes and defence systems across regions of genome plasticity in Pseudomonas aeruginosa.}, journal = {EBioMedicine}, volume = {90}, number = {}, pages = {104532}, pmid = {36958270}, issn = {2352-3964}, mesh = {Humans ; *Pseudomonas aeruginosa/genetics ; *Genome, Bacterial ; Anti-Bacterial Agents ; Drug Resistance, Bacterial/genetics ; Phylogeny ; Cluster Analysis ; }, abstract = {BACKGROUND: Pseudomonas aeruginosa is an opportunistic pathogen consisting of three phylogroups (hereafter named A, B, and C). Here, we assessed phylogroup-specific evolutionary dynamics across available and also new P. aeruginosa genomes.

METHODS: In this genomic analysis, we first generated new genome assemblies for 18 strains of the major P. aeruginosa clone type (mPact) panel, comprising a phylogenetically diverse collection of clinical and environmental isolates for this species. Thereafter, we combined these new genomes with 1991 publicly available P. aeruginosa genomes for a phylogenomic and comparative analysis. We specifically explored to what extent antimicrobial resistance (AMR) genes, defence systems, and virulence genes vary in their distribution across regions of genome plasticity (RGPs) and "masked" (RGP-free) genomes, and to what extent this variation differs among the phylogroups.

FINDINGS: We found that members of phylogroup B possess larger genomes, contribute a comparatively larger number of pangenome families, and show lower abundance of CRISPR-Cas systems. Furthermore, AMR and defence systems are pervasive in RGPs and integrative and conjugative/mobilizable elements (ICEs/IMEs) from phylogroups A and B, and the abundance of these cargo genes is often significantly correlated. Moreover, inter- and intra-phylogroup interactions occur at the accessory genome level, suggesting frequent recombination events. Finally, we provide here the mPact panel of diverse P. aeruginosa strains that may serve as a valuable reference for functional analyses.

INTERPRETATION: Altogether, our results highlight distinct pangenome characteristics of the P. aeruginosa phylogroups, which are possibly influenced by variation in the abundance of CRISPR-Cas systems and are shaped by the differential distribution of other defence systems and AMR genes.

FUNDING: German Science Foundation, Max-Planck Society, Leibniz ScienceCampus Evolutionary Medicine of the Lung, BMBF program Medical Infection Genomics, Kiel Life Science Postdoc Award.}, } @article {pmid36952017, year = {2023}, author = {Boden, SA and McIntosh, RA and Uauy, C and Krattinger, SG and Dubcovsky, J and Rogers, WJ and Xia, XC and Badaeva, ED and Bentley, AR and Brown-Guedira, G and Caccamo, M and Cattivelli, L and Chhuneja, P and Cockram, J and Contreras-Moreira, B and Dreisigacker, S and Edwards, D and González, FG and Guzmán, C and Ikeda, TM and Karsai, I and Nasuda, S and Pozniak, C and Prins, R and Sen, TZ and Silva, P and Simkova, H and Zhang, Y and , }, title = {Updated guidelines for gene nomenclature in wheat.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {136}, number = {4}, pages = {72}, pmid = {36952017}, issn = {1432-2242}, support = {FT210100810//Australian Research Council/ ; DP210103744//Australian Research Council/ ; DP210100296//Australian Research Council/ ; DP200100762//Australian Research Council/ ; BB/P016855/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/OS/NW/000016/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/P010741/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; 2022-68013-36439//National Institute of Food and Agriculture/ ; 2022-68013-36439//National Institute of Food and Agriculture/ ; 03/A215//UNCPBA/ ; CerealMed//Partnership for Research and Innovation in the Mediterranean Area/ ; INTA-PD-E3-I060//Instituto Nacional de Investigacion Agropecuaria, Uruguay/ ; RYC-2017-21891//Ministerio de Ciencia e Innovación/ ; GINOP-2.3.2-15-2016-00029//Innovation and Technology Ministry/ ; Diversity//Genome Canada/ ; Domestication//Genome Canada/ ; Discovery//Genome Canada/ ; Delivery//Genome Canada/ ; CZ.02.1.01/0.0/0.0/16_019/0000827//European Regional Development Fund/ ; PCI2019-103526//Programa Estatal de l+D+i Orientada a los Retos de la Sociedad/ ; PICT 2019-03256//Agencia Nacional de Promoción de la Investigacion/ ; 2030-21000-024-00D//Agricultural Research Service/ ; INIA_L1_CS_39//Instituto Nacional de Investigación Agropecuaria/ ; INIA_L1_CS_35//Instituto Nacional de Investigación Agropecuaria/ ; PR_FSA_2009_1_1369//Agencia Nacional de Investigación e Innovación/ ; FSA_1_2013_1_12980//Agencia Nacional de Investigación e Innovación/ ; FSA_1_2018_1_152918//Agencia Nacional de Investigación e Innovación/ ; }, mesh = {*Triticum/genetics ; *Plant Breeding ; Phenotype ; Genes, Plant ; Edible Grain/genetics ; }, abstract = {Here, we provide an updated set of guidelines for naming genes in wheat that has been endorsed by the wheat research community. The last decade has seen a proliferation in genomic resources for wheat, including reference- and pan-genome assemblies with gene annotations, which provide new opportunities to detect, characterise, and describe genes that influence traits of interest. The expansion of genetic information has supported growth of the wheat research community and catalysed strong interest in the genes that control agronomically important traits, such as yield, pathogen resistance, grain quality, and abiotic stress tolerance. To accommodate these developments, we present an updated set of guidelines for gene nomenclature in wheat. These guidelines can be used to describe loci identified based on morphological or phenotypic features or to name genes based on sequence information, such as similarity to genes characterised in other species or the biochemical properties of the encoded protein. The updated guidelines provide a flexible system that is not overly prescriptive but provides structure and a common framework for naming genes in wheat, which may be extended to related cereal species. We propose these guidelines be used henceforth by the wheat research community to facilitate integration of data from independent studies and allow broader and more efficient use of text and data mining approaches, which will ultimately help further accelerate wheat research and breeding.}, } @article {pmid36946261, year = {2023}, author = {Liang, Q and Muñoz-Amatriaín, M and Shu, S and Lo, S and Wu, X and Carlson, JW and Davidson, P and Goodstein, DM and Phillips, J and Janis, NM and Lee, EJ and Liang, C and Morrell, PL and Farmer, AD and Xu, P and Close, TJ and Lonardi, S}, title = {A view of the pan-genome of domesticated Cowpea (Vigna unguiculata [L.] Walp.).}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20319}, doi = {10.1002/tpg2.20319}, pmid = {36946261}, issn = {1940-3372}, support = {Cooperative Agreement AID-OAA-A-13-00070//United States Agency for International Development/ ; 32172568//National Natural Science Foundation of China/ ; Hatch Project CA-R-BPS-5306-H//U.S. Department of Agriculture/ ; IIS 1814359//National Science Foundation/ ; IOS 1543963//National Science Foundation/ ; Non-Assistance Cooperative Agreement 58-5030-7-069//Agricultural Research Service/ ; //National Ten-Thousand Talents Program of China/ ; Contract No. DE-AC02-05CH11231//U.S. Department of Energy/ ; 2021C02065-6-3//Major Science and Technology Project of Plant Breeding in Zhejiang Province/ ; }, abstract = {Cowpea, Vigna unguiculata L. Walp., is a diploid warm-season legume of critical importance as both food and fodder in sub-Saharan Africa. This species is also grown in Northern Africa, Europe, Latin America, North America, and East to Southeast Asia. To capture the genomic diversity of domesticates of this important legume, de novo genome assemblies were produced for representatives of six subpopulations of cultivated cowpea identified previously from genotyping of several hundred diverse accessions. In the most complete assembly (IT97K-499-35), 26,026 core and 4963 noncore genes were identified, with 35,436 pan genes when considering all seven accessions. GO terms associated with response to stress and defense response were highly enriched among the noncore genes, while core genes were enriched in terms related to transcription factor activity, and transport and metabolic processes. Over 5 million single nucleotide polymorphisms (SNPs) relative to each assembly and over 40 structural variants >1 Mb in size were identified by comparing genomes. Vu10 was the chromosome with the highest frequency of SNPs, and Vu04 had the most structural variants. Noncore genes harbor a larger proportion of potentially disruptive variants than core genes, including missense, stop gain, and frameshift mutations; this suggests that noncore genes substantially contribute to diversity within domesticated cowpea.}, } @article {pmid36944612, year = {2023}, author = {Zhou, Y and Yu, Z and Chebotarov, D and Chougule, K and Lu, Z and Rivera, LF and Kathiresan, N and Al-Bader, N and Mohammed, N and Alsantely, A and Mussurova, S and Santos, J and Thimma, M and Troukhan, M and Fornasiero, A and Green, CD and Copetti, D and Kudrna, D and Llaca, V and Lorieux, M and Zuccolo, A and Ware, D and McNally, K and Zhang, J and Wing, RA}, title = {Pan-genome inversion index reveals evolutionary insights into the subpopulation structure of Asian rice.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {1567}, pmid = {36944612}, issn = {2041-1723}, support = {S10 OD028621/OD/NIH HHS/United States ; }, mesh = {*Oryza/genetics ; Sequence Analysis, DNA ; Genome, Plant/genetics ; Biological Evolution ; Phylogeny ; }, abstract = {Understanding and exploiting genetic diversity is a key factor for the productive and stable production of rice. Here, we utilize 73 high-quality genomes that encompass the subpopulation structure of Asian rice (Oryza sativa), plus the genomes of two wild relatives (O. rufipogon and O. punctata), to build a pan-genome inversion index of 1769 non-redundant inversions that span an average of ~29% of the O. sativa cv. Nipponbare reference genome sequence. Using this index, we estimate an inversion rate of ~700 inversions per million years in Asian rice, which is 16 to 50 times higher than previously estimated for plants. Detailed analyses of these inversions show evidence of their effects on gene expression, recombination rate, and linkage disequilibrium. Our study uncovers the prevalence and scale of large inversions (≥100 bp) across the pan-genome of Asian rice and hints at their largely unexplored role in functional biology and crop performance.}, } @article {pmid36944262, year = {2023}, author = {Milner, DS and Galindo, LJ and Irwin, NAT and Richards, TA}, title = {Transporter Proteins as Ecological Assets and Features of Microbial Eukaryotic Pangenomes.}, journal = {Annual review of microbiology}, volume = {77}, number = {}, pages = {45-66}, doi = {10.1146/annurev-micro-032421-115538}, pmid = {36944262}, issn = {1545-3251}, mesh = {*Eukaryota/genetics ; *Eukaryotic Cells ; Membrane Transport Proteins ; Gene Duplication ; Phenotype ; }, abstract = {Here we review two connected themes in evolutionary microbiology: (a) the nature of gene repertoire variation within species groups (pangenomes) and (b) the concept of metabolite transporters as accessory proteins capable of providing niche-defining "bolt-on" phenotypes. We discuss the need for improved sampling and understanding of pangenome variation in eukaryotic microbes. We then review the factors that shape the repertoire of accessory genes within pangenomes. As part of this discussion, we outline how gene duplication is a key factor in both eukaryotic pangenome variation and transporter gene family evolution. We go on to outline how, through functional characterization of transporter-encoding genes, in combination with analyses of how transporter genes are gained and lost from accessory genomes, we can reveal much about the niche range, the ecology, and the evolution of virulence of microbes. We advocate for the coordinated systematic study of eukaryotic pangenomes through genome sequencing and the functional analysis of genes found within the accessory gene repertoire.}, } @article {pmid36943133, year = {2023}, author = {Kronen, M and Vázquez-Campos, X and Wilkins, MR and Lee, M and Manefield, MJ}, title = {Evidence for a Putative Isoprene Reductase in Acetobacterium wieringae.}, journal = {mSystems}, volume = {8}, number = {2}, pages = {e0011923}, pmid = {36943133}, issn = {2379-5077}, mesh = {*Oxidoreductases/genetics ; *Acetobacterium/genetics ; Butadienes/metabolism ; }, abstract = {Recent discoveries of isoprene-metabolizing microorganisms suggest they might play an important role in the global isoprene budget. Under anoxic conditions, isoprene can be used as an electron acceptor and is reduced to methylbutene. This study describes the proteogenomic profiling of an isoprene-reducing bacterial culture to identify organisms and genes responsible for the isoprene hydrogenation reaction. A metagenome-assembled genome (MAG) of the most abundant (89% relative abundance) lineage in the enrichment, Acetobacterium wieringae, was obtained. Comparative proteogenomics and reverse transcription-PCR (RT-PCR) identified a putative five-gene operon from the A. wieringae MAG upregulated during isoprene reduction. The operon encodes a putative oxidoreductase, three pleiotropic nickel chaperones (2 × HypA, HypB), and one 4Fe-4S ferredoxin. The oxidoreductase is proposed as the putative isoprene reductase with a binding site for NADH, flavin adenine dinucleotide (FAD), two pairs of canonical [4Fe-4S] clusters, and a putative iron-sulfur cluster site in a Cys6-bonding environment. Well-studied Acetobacterium strains, such as A. woodii DSM 1030, A. wieringae DSM 1911, or A. malicum DSM 4132, do not encode the isoprene-regulated operon but encode, like many other bacteria, a homolog of the putative isoprene reductase (~47 to 49% amino acid sequence identity). Uncharacterized homologs of the putative isoprene reductase are observed across the Firmicutes, Spirochaetes, Tenericutes, Actinobacteria, Chloroflexi, Bacteroidetes, and Proteobacteria, suggesting the ability of biohydrogenation of unfunctionalized conjugated doubled bonds in other unsaturated hydrocarbons. IMPORTANCE Isoprene was recently shown to act as an electron acceptor for a homoacetogenic bacterium. The focus of this study is the molecular basis for isoprene reduction. By comparing a genome from our isoprene-reducing enrichment culture, dominated by Acetobacterium wieringae, with genomes of other Acetobacterium lineages that do not reduce isoprene, we shortlisted candidate genes for isoprene reduction. Using comparative proteogenomics and reverse transcription-PCR we have identified a putative five-gene operon encoding an oxidoreductase referred to as putative isoprene reductase.}, } @article {pmid36941438, year = {2023}, author = {Zheng, X and Xu, S and Wang, Z and Tao, X and Liu, Y and Dai, L and Li, Y and Zhang, W}, title = {Sifting through the core-genome to identify putative cross-protective antigens against Riemerella anatipestifer.}, journal = {Applied microbiology and biotechnology}, volume = {107}, number = {9}, pages = {3085-3098}, pmid = {36941438}, issn = {1432-0614}, mesh = {Animals ; *Poultry Diseases/prevention & control ; Reproducibility of Results ; *Riemerella/genetics ; Vaccines, Subunit ; Ducks ; *Flavobacteriaceae Infections/prevention & control/veterinary ; }, abstract = {Infectious serositis of ducks, caused by Riemerella anatipestifer, is one of the main infectious diseases that harm commercial ducks. Whole-strain-based vaccines with no or few cross-protection were observed between different serotypes of R. anatipestifer, and so far, control of infection is hampered by a lack of effective vaccines, especially subunit vaccines with cross-protection. Since the concept of reverse vaccinology was introduced, it has been widely used to screen for protective antigens in important pathogens. In this study, pan-genome binding reverse vaccinology, an emerging approach to vaccine candidate screening, was used to screen for cross-protective antigens against R. anatipestifer. Thirty proteins were identified from the core-genome as potential cross-protective antigens. Three of these proteins were recombinantly expressed, and their immunoreactivity with five antisera (anti-serotypes 1, 2, 6, 10, and 11) was demonstrated by Western blotting. Our study established a method for high-throughput screening of cross-protective antigens against R. anatipestifer in silico, which will lay the foundation for the development of a cross-protective subunit vaccine controlling R. anatipestifer infection. KEY POINTS: • Pan-genome binding reverse vaccine approach was first established in R. anatipestifer to screen for subunit vaccine candidates. • Thirty potential cross-protective antigens against R. anatipestifer were identified by this method. • The reliability of the method was verified preliminarily by the results of Western blotting of three of these potential antigens.}, } @article {pmid36938359, year = {2023}, author = {Nguyen, HN and Sharp, GM and Stahl-Rommel, S and Velez Justiniano, YA and Castro, CL and Nelman-Gonzalez, M and O'Rourke, A and Lee, MD and Williamson, J and McCool, C and Crucian, B and Clark, KW and Jain, M and Castro-Wallace, SL}, title = {Microbial isolation and characterization from two flex lines from the urine processor assembly onboard the international space station.}, journal = {Biofilm}, volume = {5}, number = {}, pages = {100108}, pmid = {36938359}, issn = {2590-2075}, abstract = {Urine, humidity condensate, and other sources of non-potable water are processed onboard the International Space Station (ISS) by the Water Recovery System (WRS) yielding potable water. While some means of microbial control are in place, including a phosphoric acid/hexavalent chromium urine pretreatment solution, many areas within the WRS are not available for routine microbial monitoring. Due to refurbishment needs, two flex lines from the Urine Processor Assembly (UPA) within the WRS were removed and returned to Earth. The water from within these lines, as well as flush water, was microbially evaluated. Culture and culture-independent analysis revealed the presence of Burkholderia, Paraburkholderia, and Leifsonia. Fungal culture also identified Fusarium and Lecythophora. Hybrid de novo genome analysis of the five distinct Burkholderia isolates identified them as B. contaminans, while the two Paraburkholderia isolates were identified as P. fungorum. Chromate-resistance gene clusters were identified through pangenomic analysis that differentiated these genomes from previously studied isolates recovered from the point-of-use potable water dispenser and/or current NCBI references, indicating that unique populations exist within distinct niches in the WRS. Beyond genomic analysis, fixed samples directly from the lines were imaged by environmental scanning electron microscopy, which detailed networks of fungal-bacterial biofilms. This is the first evidence of biofilm formation within flex lines from the UPA onboard the ISS. For all bacteria isolated, biofilm potential was further characterized, with the B. contaminans isolates demonstrating the most considerable biofilm formation. Moreover, the genomes of the B. contaminans revealed secondary metabolite gene clusters associated with quorum sensing, biofilm formation, antifungal compounds, and hemolysins. The potential production of these gene cluster metabolites was phenotypically evaluated through biofilm, bacterial-fungal interaction, and hemolytic assays. Collectively, these data identify the UPA flex lines as a unique ecological niche and novel area of biofilm growth within the WRS. Further investigation of these organisms and their resistance profiles will enable engineering controls directed toward biofilm prevention in future space station water systems.}, } @article {pmid36936699, year = {2023}, author = {Ali Alghamdi, B and Al-Johani, I and Al-Shamrani, JM and Musamed Alshamrani, H and Al-Otaibi, BG and Almazmomi, K and Yusnoraini Yusof, N}, title = {Antimicrobial resistance in methicillin-resistant staphylococcus aureus.}, journal = {Saudi journal of biological sciences}, volume = {30}, number = {4}, pages = {103604}, pmid = {36936699}, issn = {1319-562X}, abstract = {In the medical community, antibiotics are revered as a miracle because they stop diseases brought on by pathogenic bacteria. Antibiotics have become the cornerstone of contemporary medical advancements ever since penicillin was discovered. Antibiotic resistance developed among germs quickly, placing a strain in the medical field. Methicillin-resistant Staphylococcus aureus (MRSA), Since 1961, has emerged as the major general antimicrobial resistant bacteria (AMR) worldwide. MRSA can easily transmit across the hospital system and has mostly gained resistance to medications called beta-lactamases. This enzyme destroys the cell wall of beta-lactam antibiotics resulting in resistance against that respective antibiotic. Daptomycin, linezolid and vancomycin were previously used to treat MRSA infections. However, due to mutations and Single nucleotide polymorphisms (SNPs) in Open reading frames (ORFs) and SCCmec machinery of respective antibody, MRSA developed resistance against those antibiotics. The MRSA strains (USA300, CC398, CC130 etc.), when their pan-genomes were analyzed were found the genes involved in invoking resistance against the antibiotics as well as the epidemiology of that respective strain. PENC (penicillin plus potassium clavulanate) is the new antibiotic showing potential in treatment of MRSA though it is itself resistant against penicillin alone. In this review, our main focus is on mechanism of development of AMR in MRSA, how different ORFs are involved in evoking resistance in MRSA and what is the core-genome of different antimicrobial resistant MRSA.}, } @article {pmid36935100, year = {2023}, author = {Khan, K and Jalal, K and Uddin, R}, title = {Pangenome profiling of novel drug target against vancomycin-resistant Enterococcus faecium.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-14}, doi = {10.1080/07391102.2023.2191134}, pmid = {36935100}, issn = {1538-0254}, abstract = {Enterococcus faecium is a frequent causative agent of nosocomial infection mainly acquired from outgoing hospital patients (Hospital Acquired Infection-HAIs). They are largely involved in the outbreaks of bacteremia, UTI, and endocarditis with a high transmissibility rate. The recent emergence of VRE strain (i.e. vancomycin resistant enterococcus) turned it into high priority pathogen for which new drug research is of dire need. Therefore, in current study, pangenome and resistome analyses were performed for available antibiotic-resistant genomes (n = 216) of E. faecium. It resulted in the prediction of around 5,059 genes as an accessory gene, 1,076 genes as core and 1,558 genes made up a unique genome fraction. Core genes common to all strains were further used for the identification of potent drug targets by applying subtractive genomics approach. Moreover, the COG functional analysis showed that these genomes are highly enriched in metabolic pathways such as in translational, ribosomal, proteins, carbohydrates and nucleotide transport metabolism. Through subtractive genomics it was observed that 431 proteins were non-homologous to the human proteome, 166 identified as essential for pathogen survival while 26 as potential and unique therapeutic targets. Finally, 3-dehydroquinate dehydrogenase was proposed as a potent drug target for further therapeutic candidate identification. Moreover, the molecular docking and dynamic simulation technique were applied to performed a virtual screening of natural product libraries (i.e., TCM and Ayurvedic compounds) along with 3-amino-4,5-dihydroxy-cyclohex-1-enecarboxylate (DHS) as a standard compound to validate the study. Consequently, Argeloside I, Apigenin-7-O-gentiobioside (from Ayurvedic library), ZINC85571062, and ZINC85570908 (TCM library) compounds were identified as potential inhibitors of 3-dehydroquinate dehydrogenase. The study proposed new compounds as novel therapeutics, however, further experimental validation is needed as a follow-up.Communicated by Ramaswamy H. Sarma.}, } @article {pmid36928221, year = {2023}, author = {Juscamayta-López, E and Valdivia, F and Soto, MP and Nureña, B and Horna, H}, title = {A pangenome approach-based loop-mediated isothermal amplification assay for the specific and early detection of Bordetella pertussis.}, journal = {Scientific reports}, volume = {13}, number = {1}, pages = {4356}, pmid = {36928221}, issn = {2045-2322}, support = {D43 TW007393/TW/FIC NIH HHS/United States ; }, mesh = {Humans ; *Bordetella pertussis/genetics ; *Whooping Cough/diagnosis ; Nucleic Acid Amplification Techniques ; Molecular Diagnostic Techniques ; Sensitivity and Specificity ; }, abstract = {Despite widespread vaccination, Bordetella pertussis continues to cause pertussis infections worldwide, leaving infants at the highest risk of severe illness and death, while people around them are likely the main sources of infection and rapidly spread the disease. Rapid and less complex molecular testing for the specific and timely diagnosis of pertussis remains a challenge that could help to prevent the disease from worsening and prevent its transmission. We aimed to develop and validate a colorimetric loop-mediated isothermal amplification (LAMP) assay using a new target uvrD_2 informed by the pangenome for the specific and early detection of B. pertussis. Compared to that of multitarget quantitative polymerase chain reaction (multitarget qPCR) using a large clinical DNA specimen (n = 600), the diagnostic sensitivity and specificity of the uvrD_2 LAMP assay were 100.0% and 98.6%, respectively, with a 99.7% degree of agreement between the two assays. The novel colorimetric uvrD_2 LAMP assay is highly sensitive and specific for detecting B. pertussis DNA in nasopharyngeal swabs and showed similar diagnostic accuracy to complex and high-cost multitarget qPCR, but it is faster, simpler, and inexpensive, which makes it very helpful for the reliable and timely diagnosis of pertussis in primary health care and resource-limited settings.}, } @article {pmid36925467, year = {2023}, author = {Deng, Y and Jiang, ZM and Han, XF and Su, J and Yu, LY and Liu, WH and Zhang, YQ}, title = {Pangenome analysis of the genus Herbiconiux and proposal of four new species associated with Chinese medicinal plants.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1119226}, pmid = {36925467}, issn = {1664-302X}, abstract = {Five Gram-stain-positive, aerobic, non-motile actinobacterial strains designated as CPCC 205763[T], CPCC 203386[T], CPCC 205716[T], CPCC 203406[T], and CPCC 203407 were obtained from different ecosystems associated with four kinds of Chinese traditional medicinal plants. The 16S rRNA gene sequences of these five strains showed closely related to members of the genus Herbiconiux of the family Microbacteriaceae, with the highest similarities of 97.4-99.7% to the four validly named species of Herbiconiux. In the phylogenetic trees based on 16S rRNA gene sequences and the core genome, these isolates clustered into the clade of the genus Herbiconiux within the lineage of the family Microbacteriaceae. The overall genome relatedness indexes (values of ANI and dDDH) and the phenotypic properties (morphological, physiological and chemotaxonomic characteristics) of these isolates, readily supported to affiliate them to the genus Herbiconiux, representing four novel species, with the isolates CPCC 203406[T] and CPCC 203407 being classified in the same species. For which the names Herbiconiux aconitum sp. nov. (type strain CPCC 205763[T] = I19A-01430[T] = CGMCC 1.60067[T]), Herbiconiux daphne sp. nov. (type strain CPCC 203386[T] = I10A-01569[T] = DSM 24546[T] = KCTC 19839[T]), Herbiconiux gentiana sp. nov. (type strain CPCC 205716[T] = I21A-01427[T] = CGMCC 1.60064[T]), and Herbiconiux oxytropis sp. nov. (type strain CPCC 203406[T] = I10A-02268[T] = DSM 24549[T] = KCTC 19840[T]) were proposed, respectively. In the genomes of these five strains, the putative encoding genes for amidase, endoglucanase, phosphatase, and superoxidative dismutase were retrieved, which were classified as biosynthetic genes/gene-clusters regarding plant growth-promotion (PGP) functions. The positive results from IAA-producing, cellulose-degrading and anti-oxidation experiments further approved their potential PGP bio-functions. Pangenome analysis of the genus Herbiconiux supported the polyphasic taxonomy results and confirmed their bio-function potential.}, } @article {pmid36919598, year = {2023}, author = {Lee, RRQ and Cher, WY and Wang, J and Chen, Y and Chae, E}, title = {Generating minimum set of gRNA to cover multiple targets in multiple genomes with MINORg.}, journal = {Nucleic acids research}, volume = {51}, number = {8}, pages = {e43}, pmid = {36919598}, issn = {1362-4962}, mesh = {Humans ; CRISPR-Cas Systems ; Gene Knockout Techniques ; Polymerase Chain Reaction ; *Software ; *RNA, Guide, CRISPR-Cas Systems ; }, abstract = {MINORg is an offline gRNA design tool that generates the smallest possible combination of gRNA capable of covering all desired targets in multiple non-reference genomes. As interest in pangenomic research grows, so does the workload required for large screens in multiple individuals. MINORg aims to lessen this workload by capitalising on sequence homology to favour multi-target gRNA while simultaneously screening multiple genetic backgrounds in order to generate reusable gRNA panels. We demonstrated the practical application of MINORg by knocking out 11 homologous genes tandemly arrayed in a multi-gene cluster in two Arabidopsis thaliana lineages using three gRNA output by MINORg. We also described a new PCR-free modular cloning system for multiplexing gRNA, and used it to knockout three tandemly arrayed genes in another multi-gene cluster with gRNA designed by MINORg. Source code is freely available at https://github.com/rlrq/MINORg.}, } @article {pmid36919166, year = {2023}, author = {Viana, MVC and Galdino, JH and Profeta, R and Oliveira, M and Tavares, L and de Castro Soares, S and Carneiro, P and Wattam, AR and Azevedo, V}, title = {Analysis of Corynebacterium silvaticum genomes from Portugal reveals a single cluster and a clade suggested to produce diphtheria toxin.}, journal = {PeerJ}, volume = {11}, number = {}, pages = {e14895}, pmid = {36919166}, issn = {2167-8359}, mesh = {Swine ; Animals ; *Diphtheria Toxin/genetics ; Portugal/epidemiology ; Phylogeny ; *Deer/metabolism ; Corynebacterium ; Sus scrofa/metabolism ; Zoonoses ; }, abstract = {BACKGROUND: Corynebacterium silvaticum is a pathogenic, gram-positive bacterial species that causes caseous lymphadenitis in wild boars, domestic pigs and roe deer in Western Europe. It can affect animal production and cause zoonosis. Genome analysis has suggested that one strain from Portugal and one from Austria could probably produce the diphtheria toxin (DT), which inhibits protein synthesis and can cause death.

METHODS: To further investigate the species genetic diversity and probable production of DT by Portuguese strains, eight isolates from this country were sequenced and compared to 38 public ones.

RESULTS: Strains from Portugal are monophyletic, nearly identical, form a unique cluster and have 27 out of 36 known Corynebacterium virulence or niche factors. All of them lack a frameshift in the tox gene and were suggested to produce DT. A phylogenetic analysis shows that the species has diverged into two clades. Clade 1 is composed of strains that were suggested to have the ability to produce DT, represented by the monophyletic strains from Portugal and strain 05-13 from Austria. Clade 2 is composed of strains unable to produce DT due to a frameshifted tox gene. The second clade is represented by strains from Austria, Germany and Switzerland. Ten genome clusters were detected, in which strains from Germany are the most diverse. Strains from Portugal belong to an exclusive cluster. The pangenome has 2,961 proteins and is nearly closed (α = 0.968). Exclusive genes shared by clusters 1 and 2, and Portuguese strains are probably not related to disease manifestation as they share the same host but could play a role in their extra-host environmental adaptation. These results show the potential of the species to cause zoonosis, possibly diphtheria. The identified clusters, exclusively shaded genes, and exclusive STs identified in Portugal could be applied in the identification and epidemiology of the species.}, } @article {pmid36916949, year = {2023}, author = {Svahn, AJ and Suster, CJE and Chang, SL and Rockett, RJ and Sim, EM and Cliff, OM and Wang, Q and Arnott, A and Ramsperger, M and Sorrell, TC and Sintchenko, V and Prokopenko, M}, title = {Pangenome Analysis of a Salmonella Enteritidis Population Links a Major Outbreak to a Gifsy-1-Like Prophage Containing Anti-Inflammatory Gene gogB.}, journal = {Microbiology spectrum}, volume = {11}, number = {2}, pages = {e0279122}, pmid = {36916949}, issn = {2165-0497}, abstract = {A major outbreak of the globally significant Salmonella Enteritidis foodborne pathogen was identified within a large clinical data set by a program of routine WGS of clinical presentations of salmonellosis in New South Wales, Australia. Pangenome analysis helped to quantify and isolate prophage content within the accessory partition of the pangenome. A prophage similar to Gifsy-1 (henceforth GF-1L) was found to occur in all isolates of the outbreak core SNP cluster, and in three other isolates. Further analysis revealed that the GF-1L prophage carried the gogB virulence factor. These observations suggest that GF-1L may be an important marker of virulence for S. Enteritidis population screening and, that anti-inflammatory, gogB-mediated virulence currently associated with Salmonella Typhimurium may also be displayed by S. Enteritidis. IMPORTANCE We examined 5 years of genomic and epidemiological data for the significant global foodborne pathogen, Salmonella enterica. Although Salmonella enterica subspecies enterica serovar Enteritidis (S. Enteritidis) is the leading cause of salmonellosis in the USA and Europe, prior to 2018 it was not endemic in the southern states of Australia. However, in 2018 a large outbreak led to the endemicity of S. Enteritidis in New South Wales, Australia, and a unique opportunity to study this phenomenon. Using pangenome analysis we uncovered that this clone contained a Gifsy-1-like prophage harboring the known virulence factor gogB. The prophage reported has not previously been described in S. Enteritidis isolates.}, } @article {pmid36914349, year = {2023}, author = {Wang, D and Fletcher, GC and Gagic, D and On, SLW and Palmer, JS and Flint, SH}, title = {Comparative genome identification of accessory genes associated with strong biofilm formation in Vibrio parahaemolyticus.}, journal = {Food research international (Ottawa, Ont.)}, volume = {166}, number = {}, pages = {112605}, doi = {10.1016/j.foodres.2023.112605}, pmid = {36914349}, issn = {1873-7145}, mesh = {*Vibrio parahaemolyticus/genetics ; Biofilms ; Genomics ; Operon ; Cellulose ; }, abstract = {Vibrio parahaemolyticus biofilms on the seafood processing plant surfaces are a potential source of seafood contamination and subsequent food poisoning. Strains differ in their ability to form biofilm, but little is known about the genetic characteristics responsible for biofilm development. In this study, pangenome and comparative genome analysis of V. parahaemolyticus strains reveals genetic attributes and gene repertoire that contribute to robust biofilm formation. The study identified 136 accessory genes that were exclusively present in strong biofilm forming strains and these were functionally assigned to the Gene Ontology (GO) pathways of cellulose biosynthesis, rhamnose metabolic and catabolic processes, UDP-glucose processes and O antigen biosynthesis (p < 0.05). Strategies of CRISPR-Cas defence and MSHA pilus-led attachment were implicated via Kyoto Encyclopedia of Genes and Genomes (KEGG) annotation. Higher levels of horizontal gene transfer (HGT) were inferred to confer more putatively novel properties on biofilm-forming V. parahaemolyticus. Furthermore, cellulose biosynthesis, a neglected potential virulence factor, was identified as being acquired from within the order Vibrionales. The cellulose synthase operons in V. parahaemolyticus were examined for their prevalence (22/138, 15.94 %) and were found to consist of the genes bcsG, bcsE, bcsQ, bcsA, bcsB, bcsZ, bcsC. This study provides insights into robust biofilm formation of V. parahaemolyticus at the genomic level and facilitates: identification of key attributes for robust biofilm formation, elucidation of biofilm formation mechanisms and development of potential targets for novel control strategies of persistent V. parahaemolyticus.}, } @article {pmid36912660, year = {2023}, author = {Ranković, T and Nikolić, I and Berić, T and Popović, T and Lozo, J and Medić, O and Stanković, S}, title = {Genome Analysis of Two Pseudomonas syringae pv. aptata Strains with Different Virulence Capacity Isolated from Sugar Beet: Features of Successful Pathogenicity in the Phyllosphere Microbiome.}, journal = {Microbiology spectrum}, volume = {11}, number = {2}, pages = {e0359822}, pmid = {36912660}, issn = {2165-0497}, abstract = {Members of the Pseudomonas syringae species complex are heterogeneous bacteria that are the most abundant bacterial plant pathogens in the plant phyllosphere, with strong abilities to exist on and infect different plant hosts and survive in/outside agroecosystems. In this study, the draft genome sequences of two pathogenic P. syringae pv. aptata strains with different in planta virulence capacities isolated from the phyllosphere of infected sugar beet were analyzed to evaluate putative features of survival strategies and to determine the pathogenic potential of the strains. The draft genomes of P. syringae pv. aptata strains P16 and P21 are 5,974,057 bp and 6,353,752 bp in size, have GC contents of 59.03% and 58.77%, respectively, and contain 3,439 and 3,536 protein-coding sequences, respectively. For both average nucleotide identity and pangenome analysis, P16 and P21 largely clustered with other pv. aptata strains from the same isolation source. We found differences in the repertoire of effectors of the type III secretion system among all 102 selected strains, suggesting that the type III secretion system is a critical factor in the different virulent phenotypes of P. syringae pv. aptata. During genome analysis of the highly virulent strain P21, we discovered genes for T3SS effectors (AvrRpm1, HopAW1, and HopAU1) that were not previously found in genomes of P. syringae pv. aptata. We also identified coding sequences for pantothenate kinase, VapC endonuclease, phospholipase, and pectate lyase in both genomes, which may represent novel effectors of the type III secretion system. IMPORTANCE Genome analysis has an enormous effect on understanding the life strategies of plant pathogens. Comparing similarities with pathogens involved in other epidemics could elucidate the pathogen life cycle when a new outbreak happens. This study represents the first in-depth genome analysis of Pseudomonas syringae pv. aptata, the causative agent of leaf spot disease of sugar beet. Despite the increasing number of disease reports in recent years worldwide, there is still a lack of information about the genomic features, epidemiology, and pathogenic life strategies of this particular pathogen. Our findings provide advances in disease etiology (especially T3SS effector repertoire) and elucidate the role of environmental adaptations required for prevalence in the pathobiome of the sugar beet. From the perspective of the very heterogeneous P. syringae species complex, this type of analysis has specific importance in reporting the characteristics of individual strains.}, } @article {pmid36910224, year = {2023}, author = {Coskun, ÖK and Gomez-Saez, GV and Beren, M and Ozcan, D and Hosgormez, H and Einsiedl, F and Orsi, WD}, title = {Carbon metabolism and biogeography of candidate phylum "Candidatus Bipolaricaulota" in geothermal environments of Biga Peninsula, Turkey.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1063139}, pmid = {36910224}, issn = {1664-302X}, abstract = {Terrestrial hydrothermal springs and aquifers are excellent sites to study microbial biogeography because of their high physicochemical heterogeneity across relatively limited geographic regions. In this study, we performed 16S rRNA gene sequencing and metagenomic analyses of the microbial diversity of 11 different geothermal aquifers and springs across the tectonically active Biga Peninsula (Turkey). Across geothermal settings ranging in temperature from 43 to 79°C, one of the most highly represented groups in both 16S rRNA gene and metagenomic datasets was affiliated with the uncultivated phylum "Candidatus Bipolaricaulota" (former "Ca. Acetothermia" and OP1 division). The highest relative abundance of "Ca. Bipolaricaulota" was observed in a 68°C geothermal brine sediment, where it dominated the microbial community, representing 91% of all detectable 16S rRNA genes. Correlation analysis of "Ca. Bipolaricaulota" operational taxonomic units (OTUs) with physicochemical parameters indicated that salinity was the strongest environmental factor measured associated with the distribution of this novel group in geothermal fluids. Correspondingly, analysis of 23 metagenome-assembled genomes (MAGs) revealed two distinct groups of "Ca. Bipolaricaulota" MAGs based on the differences in carbon metabolism: one group encoding the bacterial Wood-Ljungdahl pathway (WLP) for H2 dependent CO2 fixation is selected for at lower salinities, and a second heterotrophic clade that lacks the WLP that was selected for under hypersaline conditions in the geothermal brine sediment. In conclusion, our results highlight that the biogeography of "Ca. Bipolaricaulota" taxa is strongly correlated with salinity in hydrothermal ecosystems, which coincides with key differences in carbon acquisition strategies. The exceptionally high relative abundance of apparently heterotrophic representatives of this novel candidate Phylum in geothermal brine sediment observed here may help to guide future enrichment experiments to obtain representatives in pure culture.}, } @article {pmid36909378, year = {2023}, author = {Gupta, P and Li, S}, title = {Editorial: Methods in genome, pan-genome, pan-transcriptome, and gene regulatory network (GRN) construction and analysis.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1152708}, pmid = {36909378}, issn = {1664-462X}, } @article {pmid36906708, year = {2023}, author = {Tanabe, Y and Yamaguchi, H and Yoshida, M and Kai, A and Okazaki, Y}, title = {Characterization of a bloom-associated alphaproteobacterial lineage, 'Candidatus Phycosocius': insights into freshwater algal-bacterial interactions.}, journal = {ISME communications}, volume = {3}, number = {1}, pages = {20}, pmid = {36906708}, issn = {2730-6151}, abstract = {Marine bacterial lineages associated with algal blooms, such as the Roseobacter clade, have been well characterized in ecological and genomic contexts, yet such lineages have rarely been explored in freshwater blooms. This study performed phenotypic and genomic analyses of an alphaproteobacterial lineage 'Candidatus Phycosocius' (denoted the CaP clade), one of the few lineages ubiquitously associated with freshwater algal blooms, and described a novel species: 'Ca. Phycosocius spiralis.' Phylogenomic analyses indicated that the CaP clade is a deeply branching lineage in the Caulobacterales. Pangenome analyses revealed characteristic features of the CaP clade: aerobic anoxygenic photosynthesis and essential vitamin B auxotrophy. Genome size varies widely among members of the CaP clade (2.5-3.7 Mb), likely a result of independent genome reductions at each lineage. This includes a loss of tight adherence pilus genes (tad) in 'Ca. P. spiralis' that may reflect its adoption of a unique spiral cell shape and corkscrew-like burrowing activity at the algal surface. Notably, quorum sensing (QS) proteins showed incongruent phylogenies, suggesting that horizontal transfers of QS genes and QS-involved interactions with specific algal partners might drive CaP clade diversification. This study elucidates the ecophysiology and evolution of proteobacteria associated with freshwater algal blooms.}, } @article {pmid36901726, year = {2023}, author = {Sonnenberg, CB and Haugen, P}, title = {Bipartite Genomes in Enterobacterales: Independent Origins of Chromids, Elevated Openness and Donors of Horizontally Transferred Genes.}, journal = {International journal of molecular sciences}, volume = {24}, number = {5}, pages = {}, pmid = {36901726}, issn = {1422-0067}, mesh = {*Genome, Bacterial ; Plasmids ; Bacteria/genetics ; *Gammaproteobacteria ; Codon Usage ; Gene Transfer, Horizontal ; }, abstract = {Multipartite bacteria have one chromosome and one or more chromid. Chromids are believed to have properties that enhance genomic flexibility, making them a favored integration site for new genes. However, the mechanism by which chromosomes and chromids jointly contribute to this flexibility is not clear. To shed light on this, we analyzed the openness of chromosomes and chromids of the two bacteria, Vibrio and Pseudoalteromonas, both which belong to the Enterobacterales order of Gammaproteobacteria, and compared the genomic openness with that of monopartite genomes in the same order. We applied pangenome analysis, codon usage analysis and the HGTector software to detect horizontally transferred genes. Our findings suggest that the chromids of Vibrio and Pseudoalteromonas originated from two separate plasmid acquisition events. Bipartite genomes were found to be more open compared to monopartite. We found that the shell and cloud pangene categories drive the openness of bipartite genomes in Vibrio and Pseudoalteromonas. Based on this and our two recent studies, we propose a hypothesis that explains how chromids and the chromosome terminus region contribute to the genomic plasticity of bipartite genomes.}, } @article {pmid36900455, year = {2023}, author = {López-García, E and Benítez-Cabello, A and Ramiro-García, J and Ladero, V and Arroyo-López, FN}, title = {In Silico Evidence of the Multifunctional Features of Lactiplantibacillus pentosus LPG1, a Natural Fermenting Agent Isolated from Table Olive Biofilms.}, journal = {Foods (Basel, Switzerland)}, volume = {12}, number = {5}, pages = {}, pmid = {36900455}, issn = {2304-8158}, support = {RTI2018-100883-B-I00, MCIU/AEI/FEDER, UE//Ministerio de Ciencia, Innovación y Universidades (Spain)/ ; }, abstract = {In recent years, there has been a growing interest in obtaining probiotic bacteria from plant origins. This is the case of Lactiplantibacillus pentosus LPG1, a lactic acid bacterial strain isolated from table olive biofilms with proven multifunctional features. In this work, we have sequenced and closed the complete genome of L. pentosus LPG1 using both Illumina and PacBio technologies. Our intention is to carry out a comprehensive bioinformatics analysis and whole-genome annotation for a further complete evaluation of the safety and functionality of this microorganism. The chromosomic genome had a size of 3,619,252 bp, with a GC (Guanine-Citosine) content of 46.34%. L. pentosus LPG1 also had two plasmids, designated as pl1LPG1 and pl2LPG1, with lengths of 72,578 and 8713 bp (base pair), respectively. Genome annotation revealed that the sequenced genome consisted of 3345 coding genes and 89 non-coding sequences (73 tRNA and 16 rRNA genes). Taxonomy was confirmed by Average Nucleotide Identity analysis, which grouped L. pentosus LPG1 with other sequenced L. pentosus genomes. Moreover, the pan-genome analysis showed that L. pentosus LPG1 was closely related to the L. pentosus strains IG8, IG9, IG11, and IG12, all of which were isolated from table olive biofilms. Resistome analysis reported the absence of antibiotic resistance genes, whilst PathogenFinder tool classified the strain as a non-human pathogen. Finally, in silico analysis of L. pentosus LPG1 showed that many of its previously reported technological and probiotic phenotypes corresponded with the presence of functional genes. In light of these results, we can conclude that L. pentosus LPG1 is a safe microorganism and a potential human probiotic with a plant origin and application as a starter culture for vegetable fermentations.}, } @article {pmid36899131, year = {2023}, author = {Kim, E and Jung, HI and Park, SH and Kim, HY and Kim, SK}, title = {Comprehensive genome analysis of Burkholderia contaminans SK875, a quorum-sensing strain isolated from the swine.}, journal = {AMB Express}, volume = {13}, number = {1}, pages = {30}, pmid = {36899131}, issn = {2191-0855}, support = {2021//Konkuk University/ ; }, abstract = {The Burkholderia cepacia complex (BCC) is a Gram-negative bacterial, including Burkholderia contaminans species. Although the plain Burkholderia is pervasive from taxonomic and genetic perspectives, a common characteristic is that they may use the quorum-sensing (QS) system. In our previous study, we generated the complete genome sequence of Burkholderia contaminans SK875 isolated from the respiratory tract. To our knowledge, this is the first study to report functional genomic features of B. contaminans SK875 for understanding the pathogenic characteristics. In addition, comparative genomic analysis for five B. contaminans genomes was performed to provide comprehensive information on the disease potential of B. contaminans species. Analysis of average nucleotide identity (ANI) showed that the genome has high similarity (> 96%) with other B. contaminans strains. Five B. contaminans genomes yielded a pangenome of 8832 coding genes, a core genome of 5452 genes, the accessory genome of 2128 genes, and a unique genome of 1252 genes. The 186 genes were specific to B. contaminans SK875, including toxin higB-2, oxygen-dependent choline dehydrogenase, and hypothetical proteins. Genotypic analysis of the antimicrobial resistance of B. contaminans SK875 verified resistance to tetracycline, fluoroquinolone, and aminoglycoside. Compared with the virulence factor database, we identified 79 promising virulence genes such as adhesion system, invasions, antiphagocytic, and secretion systems. Moreover, 45 genes of 57 QS-related genes that were identified in B. contaminans SK875 indicated high sequence homology with other B. contaminans strains. Our results will help to gain insight into virulence, antibiotic resistance, and quorum sensing for B. contaminans species.}, } @article {pmid36898633, year = {2023}, author = {Salaheen, S and Kim, SW and Springer, HR and Hovingh, EP and Van Kessel, JAS and Haley, BJ}, title = {Genomic diversity of antimicrobial-resistant and Shiga toxin gene-harboring non-O157 Escherichia coli from dairy calves.}, journal = {Journal of global antimicrobial resistance}, volume = {33}, number = {}, pages = {164-170}, doi = {10.1016/j.jgar.2023.02.022}, pmid = {36898633}, issn = {2213-7173}, mesh = {Animals ; Cattle ; Humans ; Shiga Toxin ; *Escherichia coli Infections/veterinary ; Phylogeny ; *Shiga-Toxigenic Escherichia coli/genetics ; Genomics ; }, abstract = {OBJECTIVES: Shiga toxin-producing Escherichia coli (STEC) are globally significant foodborne pathogens. Dairy calves are a known reservoir of both O157 and non-O157 STEC. The objective of this study was to comprehensively evaluate the genomic attributes, diversity, virulence factors, and antimicrobial resistance gene (ARG) profiles of the STEC from preweaned and postweaned dairy calves in commercial dairy herds.

METHODS: In total, 31 non-O157 STEC were identified as part of a larger study focused on the pangenome of >1000 E. coli isolates from the faeces of preweaned and postweaned dairy calves on commercial dairy farms. These 31 genomes were sequenced on an Illumina NextSeq500 platform.

RESULTS: Based on the phylogenetic analyses, the STEC isolates were determined to be polyphyletic, with at least three phylogroups: A (32%), B1 (58%), and G (3%). These phylogroups represented at least 16 sequence types and 11 serogroups, including two of the 'big six' serogroups, O103 and O111. Several Shiga toxin gene subtypes were identified in the genomes, including stx1a, stx2a, stx2c, stx2d, and stx2g. Using the ResFinder database, the majority of the isolates (>50%) were determined to be multidrug-resistant strains because they harboured genes conferring resistance to three or more classes of antimicrobials, including some of human health significance (e.g., β-lactams, macrolides, and fosfomycin). Additionally, non-O157 STEC strain persistence and transmission within a farm was observed.

CONCLUSION: Dairy calves are a reservoir of phylogenomically diverse multidrug-resistant non-O157 STEC. Information from this study may inform assessments of public health risk and guide preharvest prevention strategies focusing on STEC reservoirs.}, } @article {pmid36897406, year = {2023}, author = {Xu, Y and Kong, X and Guo, Y and Wang, R and Yao, X and Chen, X and Yan, T and Wu, D and Lu, Y and Dong, J and Zhu, Y and Chen, M and Cen, H and Jiang, L}, title = {Structural variations and environmental specificities of flowering time-related genes in Brassica napus.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {136}, number = {3}, pages = {42}, pmid = {36897406}, issn = {1432-2242}, support = {No. 32130076//Natural Science Foundation of China/ ; 31961143008//Natural Science Foundation of China/ ; 2021C02057//Key Science and Technology Project of Zhejiang Province/ ; }, mesh = {*Brassica napus/genetics ; Quantitative Trait Loci ; Genome-Wide Association Study ; Plant Breeding ; Genotype ; *Arabidopsis/genetics ; }, abstract = {We found that the flowering time order of accessions in a genetic population considerably varied across environments, and homolog copies of essential flowering time genes played different roles in different locations. Flowering time plays a critical role in determining the life cycle length, yield, and quality of a crop. However, the allelic polymorphism of flowering time-related genes (FTRGs) in Brassica napus, an important oil crop, remains unclear. Here, we provide high-resolution graphics of FTRGs in B. napus on a pangenome-wide scale based on single nucleotide polymorphism (SNP) and structural variation (SV) analyses. A total of 1337 FTRGs in B. napus were identified by aligning their coding sequences with Arabidopsis orthologs. Overall, 46.07% of FTRGs were core genes and 53.93% were variable genes. Moreover, 1.94%, 0.74%, and 4.49% FTRGs had significant presence-frequency differences (PFDs) between the spring and semi-winter, spring and winter, and winter and semi-winter ecotypes, respectively. SNPs and SVs across 1626 accessions of 39 FTRGs underlying numerous published qualitative trait loci were analyzed. Additionally, to identify FTRGs specific to an eco-condition, genome-wide association studies (GWASs) based on SNP, presence/absence variation (PAV), and SV were performed after growing and observing the flowering time order (FTO) of plants in a collection of 292 accessions at three locations in two successive years. It was discovered that the FTO of plants in a genetic population changed a lot across various environments, and homolog copies of some key FTRGs played different roles in different locations. This study revealed the molecular basis of the genotype-by-environment (G × E) effect on flowering and recommended a pool of candidate genes specific to locations for breeding selection.}, } @article {pmid36892794, year = {2023}, author = {Xu, Y and Cheng, T and Rao, Q and Zhang, S and Ma, YL}, title = {Comparative genomic analysis of Stenotrophomonas maltophilia unravels their genetic variations and versatility trait.}, journal = {Journal of applied genetics}, volume = {64}, number = {2}, pages = {351-360}, pmid = {36892794}, issn = {2190-3883}, mesh = {*Stenotrophomonas maltophilia/genetics ; Phylogeny ; Phenotype ; Genomics ; Genetic Variation ; }, abstract = {Stenotrophomonas maltophilia is a species with immensely broad phenotypic and genotypic diversity that could widely distribute in natural and clinical environments. However, little attention has been paid to reveal their genome plasticity to diverse environments. In the present study, a comparative genomic analysis of S. maltophilia isolated from clinical and natural sources was systematically explored its genetic diversity of 42 sequenced genomes. The results showed that S. maltophilia owned an open pan-genome and had strong adaptability to different environments. A total of 1612 core genes were existed with an average of 39.43% of each genome, and the shared core genes might be necessary to maintain the basic characteristics of those S. maltophilia strains. Based on the results of the phylogenetic tree, the ANI value, and the distribution of accessory genes, genes associated with the fundamental process of those strains from the same habitat were found to be mostly conserved in evolution. Isolates from the same habitat had a high degree of similarity in COG category, and the most significant KEGG pathways were mainly involved in carbohydrate and amino acid metabolism, indicating that genes related to essential processes were mostly conserved in evolution for the clinical and environmental settings. Meanwhile, the number of resistance and efflux pump gene was significantly higher in the clinical setting than that of in the environmental setting. Collectively, this study highlights the evolutionary relationships of S. maltophilia isolated from clinical and environmental sources, shedding new light on its genomic diversity.}, } @article {pmid36884376, year = {2023}, author = {Zhang, DD and Zhang, XJ and Wu, D and Li, BB and Liu, HC and Zhou, YG and Fang, BZ and Li, WJ and Cai, M}, title = {Aquiflexum gelatinilyticum sp. nov., isolated from river water.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {73}, number = {3}, pages = {}, doi = {10.1099/ijsem.0.005741}, pmid = {36884376}, issn = {1466-5034}, mesh = {*Fatty Acids/chemistry ; *Phospholipids/chemistry ; Rivers/microbiology ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Phylogeny ; Bacterial Typing Techniques ; DNA, Bacterial/genetics ; Base Composition ; Bacteroidetes ; Water/analysis ; }, abstract = {Two Gram-stain-negative, strictly aerobic, rod-shaped, non-motile and non-gliding bacteria, designated as XJ19-10[T] and XJ19-11, were isolated from river water in Xinjiang Uygur Autonomous Region, PR China. Cells of these strains were catalase-, oxidase- and gelatinase-positive and contained carotenoids but no flexirubins. Growth occurred at 10-30 °C, pH 7.0-9.0 and with 0-2.5% (w/v) NaCl. On the basis of the results of 16S rRNA gene sequence and genome analyses, the two isolates represented members of the genus Aquiflexum, and the closest relative was Aquiflexum aquatile Z0201[T] with 16S rRNA gene sequence pairwise similarities of 97.9-98.1%. Furthermore, the average nucleotide identities and digital DNA-DNA hybridization identities between the two isolates and other relatives were all less than 82.9 and 28.2 %, respectively, all below the species delineation thresholds. The results of pan-genomic analysis indicated that the type strain XJ19-10[T] shared 2813 core gene clusters with other three type strains of members of the genus Aquiflexum, as well as having 623 strain-specific clusters. The major polar lipids were phosphatidylethanolamine, phosphatidylcholine, an unidentified aminolipid and unidentified lipids. The predominant fatty acids (>10% of the total contents) were iso-C15 : 0, iso-C15 : 1G, iso-C17 : 0 3-OH and summed feature 9, and MK-7 was the respiratory quinone. On the basis of the results of phenotypic, physiological, chemotaxonomic and genotypic characterization, strains XJ19-10[T] and XJ19-11 are considered to represent a novel species, for which the name Aquiflexum gelatinilyticum sp. nov. is proposed. The type strain is XJ19-10[T] (=CGMCC 1.19385[T] =KCTC 92266[T]).}, } @article {pmid36882215, year = {2023}, author = {Lee, Y and Kim, JH and Yoon, JH and Lee, JS and Sukhoom, A and Kim, W}, title = {Description of Defluviimonas salinarum sp. nov. with the potential of benzene-degradation isolated from saltern in the Yellow Seacoast.}, journal = {FEMS microbiology letters}, volume = {370}, number = {}, pages = {}, doi = {10.1093/femsle/fnad018}, pmid = {36882215}, issn = {1574-6968}, mesh = {*Phospholipids/chemistry ; Seawater/microbiology ; Benzene ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; *Rhodobacteraceae ; Bacterial Typing Techniques ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; }, abstract = {Strain CAU 1641T was isolated from saltern collected in Ganghwa Island, Republic of Korea. The bacterium was an aerobic, Gram-negative, catalase-positive, oxidase-positive, motile, and rod-shaped bacterium. Cell of strain CAU 1641T could grow at 20-40°C and pH 6.0-9.0 with 1.0-3.0% (w/v) NaCl. Stain CAU 1641T shared high 16S rRNA gene sequence similarities with Defluviimonas aquaemixtae KCTC 42108T (98.0%), Defluviimonas denitrificans DSM 18921T (97.6%), and Defluviimonas aestuarii KACC 16442T (97.5%). Phylogenetic trees based on the 16S rRNA gene and the core-genome sequences indicated that strain CAU 1641T belonged to genus Defluviimonas. Strain CAU 1641T contained ubiquinone-10 (Q-10) as the sole respiratory quinone and and summed feature 8 (C18:1ω6c and/or C18:1ω7c) as the predominant fatty acid (86.1%). The pan-genome analysis indicated that the genomes of the strain CAU 1641T and 15 reference strains contain a small core genome. The Average Nucleotide Identity and digital DNA-DNA hybridization values among strain CAU 1641T and reference strains of the genus Defluviimonas were in the range of 77.6%-78.8% and 21.1-22.1%, respectively. The genome of strain CAU 1641T has several genes of benzene degradation. The genomic G + C content was 66.6%. Based on polyphasic and genomic analyses, strain CAU 1641T represents a novel species of the genus Defluviimonas, for which the name Defluviimonas salinarum sp. nov., is proposed. The type strain is CAU 1641T (= KCTC 92081T = MCCC 1K07180T).}, } @article {pmid36876113, year = {2023}, author = {Anderson, BD and Bisanz, JE}, title = {Challenges and opportunities of strain diversity in gut microbiome research.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1117122}, pmid = {36876113}, issn = {1664-302X}, abstract = {Just because two things are related does not mean they are the same. In analyzing microbiome data, we are often limited to species-level analyses, and even with the ability to resolve strains, we lack comprehensive databases and understanding of the importance of strain-level variation outside of a limited number of model organisms. The bacterial genome is highly plastic with gene gain and loss occurring at rates comparable or higher than de novo mutations. As such, the conserved portion of the genome is often a fraction of the pangenome which gives rise to significant phenotypic variation, particularly in traits which are important in host microbe interactions. In this review, we discuss the mechanisms that give rise to strain variation and methods that can be used to study it. We identify that while strain diversity can act as a major barrier in interpreting and generalizing microbiome data, it can also be a powerful tool for mechanistic research. We then highlight recent examples demonstrating the importance of strain variation in colonization, virulence, and xenobiotic metabolism. Moving past taxonomy and the species concept will be crucial for future mechanistic research to understand microbiome structure and function.}, } @article {pmid36875624, year = {2023}, author = {Nawae, W and Naktang, C and Charoensri, S and U-Thoomporn, S and Narong, N and Chusri, O and Tangphatsornruang, S and Pootakham, W}, title = {Resequencing of durian genomes reveals large genetic variations among different cultivars.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1137077}, pmid = {36875624}, issn = {1664-462X}, abstract = {Durian (Durio zibethinus), which yields the fruit known as the "King of Fruits," is an important economic crop in Southeast Asia. Several durian cultivars have been developed in this region. In this study, we resequenced the genomes of three popular durian cultivars in Thailand, including Kradumthong (KD), Monthong (MT), and Puangmanee (PM) to investigate genetic diversities of cultivated durians. KD, MT, and PM genome assemblies were 832.7, 762.6, and 821.6 Mb, and their annotations covered 95.7, 92.4, and 92.7% of the embryophyta core proteins, respectively. We constructed the draft durian pangenome and analyzed comparative genomes with related species in Malvales. Long terminal repeat (LTR) sequences and protein families in durian genomes had slower evolution rates than that in cotton genomes. However, protein families with transcriptional regulation function and protein phosphorylation function involved in abiotic and biotic stress responses appeared to evolve faster in durians. The analyses of phylogenetic relationships, copy number variations (CNVs), and presence/absence variations (PAVs) suggested that the genome evolution of Thai durians was different from that of the Malaysian durian, Musang King (MK). Among the three newly sequenced genomes, the PAV and CNV profiles of disease resistance genes and the expressions of methylesterase inhibitor domain containing genes involved in flowering and fruit maturation in MT were different from those in KD and PM. These genome assemblies and their analyses provide valuable resources to gain a better understanding of the genetic diversity of cultivated durians, which may be useful for the future development of new durian cultivars.}, } @article {pmid36875612, year = {2023}, author = {Shirasawa, K and Moraga, R and Ghelfi, A and Hirakawa, H and Nagasaki, H and Ghamkhar, K and Barrett, BA and Griffiths, AG and Isobe, SN}, title = {An improved reference genome for Trifolium subterraneum L. provides insight into molecular diversity and intra-specific phylogeny.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1103857}, pmid = {36875612}, issn = {1664-462X}, abstract = {Subterranean clover (Trifolium subterraneum L., Ts) is a geocarpic, self-fertile annual forage legume with a compact diploid genome (n = x = 8, 544 Mb/1C). Its resilience and climate adaptivity have made it an economically important species in Mediterranean and temperate zones. Using the cultivar Daliak, we generated higher resolution sequence data, created a new genome assembly TSUd_3.0, and conducted molecular diversity analysis for copy number variant (CNV) and single-nucleotide polymorphism (SNP) among 36 cultivars. TSUd_3.0 substantively improves prior genome assemblies with new Hi-C and long-read sequence data, covering 531 Mb, containing 41,979 annotated genes and generating a 94.4% BUSCO score. Comparative genomic analysis among select members of the tribe Trifolieae indicated TSUd 3.0 corrects six assembly-error inversion/duplications and confirmed phylogenetic relationships. Its synteny with T. pratense, T. repens, Medicago truncatula and Lotus japonicus genomes were assessed, with the more distantly related T. repens and M. truncatula showing higher levels of co-linearity with Ts than between Ts and its close relative T. pratense. Resequencing of 36 cultivars discovered 7,789,537 SNPs subsequently used for genomic diversity assessment and sequence-based clustering. Heterozygosity estimates ranged from 1% to 21% within the 36 cultivars and may be influenced by admixture. Phylogenetic analysis supported subspecific genetic structure, although it indicates four or five groups, rather than the three recognized subspecies. Furthermore, there were incidences where cultivars characterized as belonging to a particular subspecies clustered with another subspecies when using genomic data. These outcomes suggest that further investigation of Ts sub-specific classification using molecular and morpho-physiological data is needed to clarify these relationships. This upgraded reference genome, complemented with comprehensive sequence diversity analysis of 36 cultivars, provides a platform for future gene functional analysis of key traits, and genome-based breeding strategies for climate adaptation and agronomic performance. Pangenome analysis, more in-depth intra-specific phylogenomic analysis using the Ts core collection, and functional genetic and genomic studies are needed to further augment knowledge of Trifolium genomes.}, } @article {pmid36871069, year = {2023}, author = {Nowinski, B and Feng, X and Preston, CM and Birch, JM and Luo, H and Whitman, WB and Moran, MA}, title = {Ecological divergence of syntopic marine bacterial species is shaped by gene content and expression.}, journal = {The ISME journal}, volume = {17}, number = {6}, pages = {813-822}, pmid = {36871069}, issn = {1751-7370}, mesh = {RNA, Ribosomal, 16S/genetics/analysis ; *Genes, Bacterial ; *Rhodobacteraceae/genetics ; Phytoplankton/genetics ; Genomics ; Phylogeny ; Genome, Bacterial ; Seawater/microbiology ; }, abstract = {Identifying mechanisms by which bacterial species evolve and maintain genomic diversity is particularly challenging for the uncultured lineages that dominate the surface ocean. A longitudinal analysis of bacterial genes, genomes, and transcripts during a coastal phytoplankton bloom revealed two co-occurring, highly related Rhodobacteraceae species from the deeply branching and uncultured NAC11-7 lineage. These have identical 16S rRNA gene amplicon sequences, yet their genome contents assembled from metagenomes and single cells indicate species-level divergence. Moreover, shifts in relative dominance of the species during dynamic bloom conditions over 7 weeks confirmed the syntopic species' divergent responses to the same microenvironment at the same time. Genes unique to each species and genes shared but divergent in per-cell inventories of mRNAs accounted for 5% of the species' pangenome content. These analyses uncover physiological and ecological features that differentiate the species, including capacities for organic carbon utilization, attributes of the cell surface, metal requirements, and vitamin biosynthesis. Such insights into the coexistence of highly related and ecologically similar bacterial species in their shared natural habitat are rare.}, } @article {pmid36864624, year = {2023}, author = {Deorowicz, S and Danek, A and Li, H}, title = {AGC: compact representation of assembled genomes with fast queries and updates.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {3}, pages = {}, pmid = {36864624}, issn = {1367-4811}, support = {U01 HG010971/HG/NHGRI NIH HHS/United States ; R01 HG010040/HG/NHGRI NIH HHS/United States ; U01 HG010961/HG/NHGRI NIH HHS/United States ; }, mesh = {Sequence Analysis, DNA ; *Genome ; *Software ; High-Throughput Nucleotide Sequencing ; }, abstract = {MOTIVATION: High-quality sequence assembly is the ultimate representation of complete genetic information of an individual. Several ongoing pangenome projects are producing collections of high-quality assemblies of various species. Each project has already generated assemblies of hundreds of gigabytes on disk, greatly impeding the distribution of and access to such rich datasets.

RESULTS: Here, we show how to reduce the size of the sequenced genomes by 2-3 orders of magnitude. Our tool compresses the genomes significantly better than the existing programs and is much faster. Moreover, its unique feature is the ability to access any contig (or its part) in a fraction of a second and easily append new samples to the compressed collections. Thanks to this, AGC could be useful not only for backup or transfer purposes but also for routine analysis of pangenome sequences in common pipelines. With the rapidly reduced cost and improved accuracy of sequencing technologies, we anticipate more comprehensive pangenome projects with much larger sample sizes. AGC is likely to become a foundation tool to store, distribute and access pangenome data.

The source code of AGC is available at https://github.com/refresh-bio/agc. The package can be installed via Bioconda at https://anaconda.org/bioconda/agc.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid36864101, year = {2023}, author = {Yan, H and Sun, M and Zhang, Z and Jin, Y and Zhang, A and Lin, C and Wu, B and He, M and Xu, B and Wang, J and Qin, P and Mendieta, JP and Nie, G and Wang, J and Jones, CS and Feng, G and Srivastava, RK and Zhang, X and Bombarely, A and Luo, D and Jin, L and Peng, Y and Wang, X and Ji, Y and Tian, S and Huang, L}, title = {Pangenomic analysis identifies structural variation associated with heat tolerance in pearl millet.}, journal = {Nature genetics}, volume = {55}, number = {3}, pages = {507-518}, pmid = {36864101}, issn = {1546-1718}, mesh = {*Pennisetum/genetics ; *Thermotolerance/genetics ; Adaptation, Physiological/genetics ; Genomics ; Gene Expression Profiling ; }, abstract = {Pearl millet is an important cereal crop worldwide and shows superior heat tolerance. Here, we developed a graph-based pan-genome by assembling ten chromosomal genomes with one existing assembly adapted to different climates worldwide and captured 424,085 genomic structural variations (SVs). Comparative genomics and transcriptomics analyses revealed the expansion of the RWP-RK transcription factor family and the involvement of endoplasmic reticulum (ER)-related genes in heat tolerance. The overexpression of one RWP-RK gene led to enhanced plant heat tolerance and transactivated ER-related genes quickly, supporting the important roles of RWP-RK transcription factors and ER system in heat tolerance. Furthermore, we found that some SVs affected the gene expression associated with heat tolerance and SVs surrounding ER-related genes shaped adaptation to heat tolerance during domestication in the population. Our study provides a comprehensive genomic resource revealing insights into heat tolerance and laying a foundation for generating more robust crops under the changing climate.}, } @article {pmid36854668, year = {2023}, author = {Liu, J and Dawe, RK}, title = {Large haplotypes highlight a complex age structure within the maize pan-genome.}, journal = {Genome research}, volume = {33}, number = {3}, pages = {359-370}, pmid = {36854668}, issn = {1549-5469}, mesh = {Haplotypes ; *Zea mays/genetics ; *Centromere/genetics ; Genome, Plant ; Genomics/methods ; }, abstract = {The genomes of maize and other eukaryotes contain stable haplotypes in regions of low recombination. These regions, including centromeres, long heterochromatic blocks, and rDNA arrays, have been difficult to analyze with respect to their diversity and origin. Greatly improved genome assemblies are now available that enable comparative genomics over these and other nongenic spaces. Using 26 complete maize genomes, we developed methods to align intergenic sequences while excluding genes and regulatory regions. The centromere haplotypes (cenhaps) extend for megabases on either side of the functional centromere regions and appear as evolutionary strata, with haplotype divergence/coalescence times dating as far back as 450 thousand years ago (kya). Application of the same methods to other low recombination regions (heterochromatic knobs and rDNA) and all intergenic spaces revealed that deep coalescence times are ubiquitous across the maize pan-genome. Divergence estimates vary over a broad timescale with peaks at ∼16 and 300 kya, reflecting a complex history of gene flow among diverging populations and changes in population size associated with domestication. Cenhaps and other long haplotypes provide vivid displays of this ancient diversity.}, } @article {pmid36853054, year = {2023}, author = {Du, Y and Zou, J and Yin, Z and Chen, T}, title = {Pan-Chromosome and Comparative Analysis of Agrobacterium fabrum Reveal Important Traits Concerning the Genetic Diversity, Evolutionary Dynamics, and Niche Adaptation of the Species.}, journal = {Microbiology spectrum}, volume = {11}, number = {2}, pages = {e0292422}, pmid = {36853054}, issn = {2165-0497}, abstract = {Agrobacterium fabrum has been critical for the development of plant genetic engineering and agricultural biotechnology due to its ability to transform eukaryotic cells. However, the gene composition, evolutionary dynamics, and niche adaptation of this species is still unknown. Therefore, we established a comparative genomic analysis based on a pan-chromosome data set to evaluate the genetic diversity of A. fabrum. Here, 25 A. fabrum genomes were selected for analysis by core genome phylogeny combined with the average nucleotide identity (ANI), amino acid identity (AAI), and in silico DNA-DNA hybridization (DDH) values. An open pan-genome of A. fabrum exhibits genetic diversity with variable accessorial genes as evidenced by a consensus pan-genome of 12 representative genomes. The genomic plasticity of A. fabrum is apparent in its putative sequences for mobile genetic elements (MGEs), limited horizontal gene transfer barriers, and potentially horizontally transferred genes. The evolutionary constraints and functional enrichment in the pan-chromosome were measured by the Clusters of Orthologous Groups (COG) categories using eggNOG-mapper software, and the nonsynonymous/synonymous rate ratio (dN/dS) was determined using HYPHY software. Comparative analysis revealed significant differences in the functional enrichment and the degree of purifying selection between the core genome and non-core genome. We demonstrate that the core gene families undergo stronger purifying selection but have a significant bias to contain one or more positively selected sites. Furthermore, although they shared similar genetic diversity, we observed significant differences between chromosome 1 (Chr I) and the chromid in their functional features and evolutionary constraints. We demonstrate that putative genetic elements responsible for plant infection, ecological adaptation, and speciation represent the core genome, highlighting their importance in the adaptation of A. fabrum to plant-related niches. Our pan-chromosome analysis of A. fabrum provides comprehensive insights into the genetic properties, evolutionary patterns, and niche adaptation of the species. IMPORTANCE Agrobacterium spp. live in diverse plant-associated niches such as soil, the rhizosphere, and vegetation, which are challenged by multiple stressors such as diverse energy sources, plant defenses, and microbial competition. They have evolved the ability to utilize diverse resources, escape plant defenses, and defeat competitors. However, the underlying genetic diversity and evolutionary dynamics of Agrobacterium spp. remain unexplored. We examined the phylogeny and pan-genome of A. fabrum to define intraspecies evolutionary relationships. Our results indicate an open pan-genome and numerous MGEs and horizontally transferred genes among A. fabrum genomes, reflecting the flexibility of the chromosomes and the potential for genetic exchange. Furthermore, we observed significant differences in the functional features and evolutionary constraints between the core and accessory genomes and between Chr I and the chromid, respectively.}, } @article {pmid36852268, year = {2023}, author = {Jiang, YF and Wang, S and Wang, CL and Xu, RH and Wang, WW and Jiang, Y and Wang, MS and Jiang, L and Dai, LH and Wang, JR and Chu, XH and Zeng, YQ and Fang, LZ and Wu, DD and Zhang, Q and Ding, XD}, title = {Pangenome obtained by long-read sequencing of 11 genomes reveal hidden functional structural variants in pigs.}, journal = {iScience}, volume = {26}, number = {3}, pages = {106119}, pmid = {36852268}, issn = {2589-0042}, abstract = {Long-read sequencing (LRS) facilitates both the genome assembly and the discovery of structural variants (SVs). Here, we built a graph-based pig pangenome by incorporating 11 LRS genomes with an average of 94.01% BUSCO completeness score, revealing 206-Mb novel sequences. We discovered 183,352 nonredundant SVs (63% novel), representing 12.12% of the reference genome. By genotyping SVs in an additional 196 short-read sequencing samples, we identified thousands of population stratified SVs. Particularly, we detected 7,568 Tibetan specific SVs, some of which demonstrate significant population differentiation between Tibetan and low-altitude pigs, which might be associated with the high-altitude hypoxia adaptation in Tibetan pigs. Further integrating functional genomic data, the most promising candidate genes within the SVs that might contribute to the high-altitude hypoxia adaptation were discovered. Overall, our study generates a benchmark pangenome resource for illustrating the important roles of SVs in adaptive evolution, domestication, and genetic improvement of agronomic traits in pigs.}, } @article {pmid36851839, year = {2023}, author = {Dallinger, HG and Löschenberger, F and Azrak, N and Ametz, C and Michel, S and Bürstmayr, H}, title = {Genome-wide association mapping for pre-harvest sprouting in European winter wheat detects novel resistance QTL, pleiotropic effects, and structural variation in multiple genomes.}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20301}, doi = {10.1002/tpg2.20301}, pmid = {36851839}, issn = {1940-3372}, support = {BeyondEuropeprojectCAWINT : 855737//Österreichische Forschungsförderungsgesellschaft/ ; }, abstract = {Pre-harvest sprouting (PHS), germination of seeds before harvest, is a major problem in global wheat (Triticum aestivum L.) production, and leads to reduced bread-making quality in affected grain. Breeding for PHS resistance can prevent losses under adverse conditions. Selecting resistant lines in years lacking pre-harvest rain, requires challenging of plants in the field or in the laboratory or using genetic markers. Despite the availability of a wheat reference and pan-genome, linking markers, genes, allelic, and structural variation, a complete understanding of the mechanisms underlying various sources of PHS resistance is still lacking. Therefore, we challenged a population of European wheat varieties and breeding lines with PHS conditions and phenotyped them for PHS traits, grain quality, phenological and agronomic traits to conduct genome-wide association mapping. Furthermore, we compared these marker-trait associations to previously reported PHS loci and evaluated their usefulness for breeding. We found markers associated with PHS on all chromosomes, with strong evidence for novel quantitative trait locus/loci (QTL) on chromosome 1A and 5B. The QTL on chromosome 1A lacks pleiotropic effect, for the QTL on 5B we detected pleiotropic effects on phenology and grain quality. Multiple peaks on chromosome 4A co-located with the major resistance locus Phs-A1, for which two causal genes, TaPM19 and TaMKK3, have been proposed. Mapping markers and genes to the pan-genome and chromosomal alignments provide evidence for structural variation around this major PHS-resistance locus. Although PHS is controlled by many loci distributed across the wheat genome, Phs-A1 on chromosome 4A seems to be the most effective and widely deployed source of resistance, in European wheat varieties.}, } @article {pmid36851180, year = {2023}, author = {Chandrasekar, SS and Kingstad-Bakke, BA and Wu, CW and Phanse, Y and Osorio, JE and Talaat, AM}, title = {A DNA Prime and MVA Boost Strategy Provides a Robust Immunity against Infectious Bronchitis Virus in Chickens.}, journal = {Vaccines}, volume = {11}, number = {2}, pages = {}, pmid = {36851180}, issn = {2076-393X}, support = {2020-67021-31256//United States Department of Agriculture/ ; //Wisconsin Alumni Research Foundation/ ; }, abstract = {Infectious bronchitis (IB) is an acute respiratory disease of chickens caused by the avian coronavirus Infectious Bronchitis Virus (IBV). Modified Live Virus (MLV) vaccines used commercially can revert to virulence in the field, recombine with circulating serotypes, and cause tissue damage in vaccinated birds. Previously, we showed that a mucosal adjuvant system, QuilA-loaded Chitosan (QAC) nanoparticles encapsulating plasmid vaccine encoding for IBV nucleocapsid (N), is protective against IBV. Herein, we report a heterologous vaccination strategy against IBV, where QAC-encapsulated plasmid immunization is followed by Modified Vaccinia Ankara (MVA) immunization, both expressing the same IBV-N antigen. This strategy led to the initiation of robust T-cell responses. Birds immunized with the heterologous vaccine strategy had reduced clinical severity and >two-fold reduction in viral burden in lachrymal fluid and tracheal swabs post-challenge compared to priming and boosting with the MVA-vectored vaccine alone. The outcomes of this study indicate that the heterologous vaccine platform is more immunogenic and protective than a homologous MVA prime/boost vaccination strategy.}, } @article {pmid36848567, year = {2023}, author = {Sierra-Patev, S and Min, B and Naranjo-Ortiz, M and Looney, B and Konkel, Z and Slot, JC and Sakamoto, Y and Steenwyk, JL and Rokas, A and Carro, J and Camarero, S and Ferreira, P and Molpeceres, G and Ruiz-Dueñas, FJ and Serrano, A and Henrissat, B and Drula, E and Hughes, KW and Mata, JL and Ishikawa, NK and Vargas-Isla, R and Ushijima, S and Smith, CA and Donoghue, J and Ahrendt, S and Andreopoulos, W and He, G and LaButti, K and Lipzen, A and Ng, V and Riley, R and Sandor, L and Barry, K and Martínez, AT and Xiao, Y and Gibbons, JG and Terashima, K and Grigoriev, IV and Hibbett, D}, title = {A global phylogenomic analysis of the shiitake genus Lentinula.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {10}, pages = {e2214076120}, pmid = {36848567}, issn = {1091-6490}, support = {/HHMI/Howard Hughes Medical Institute/United States ; R56 AI146096/AI/NIAID NIH HHS/United States ; }, mesh = {*Lentinula ; Phylogeny ; Asia, Eastern ; Thailand ; }, abstract = {Lentinula is a broadly distributed group of fungi that contains the cultivated shiitake mushroom, L. edodes. We sequenced 24 genomes representing eight described species and several unnamed lineages of Lentinula from 15 countries on four continents. Lentinula comprises four major clades that arose in the Oligocene, three in the Americas and one in Asia-Australasia. To expand sampling of shiitake mushrooms, we assembled 60 genomes of L. edodes from China that were previously published as raw Illumina reads and added them to our dataset. Lentinula edodes sensu lato (s. lat.) contains three lineages that may warrant recognition as species, one including a single isolate from Nepal that is the sister group to the rest of L. edodes s. lat., a second with 20 cultivars and 12 wild isolates from China, Japan, Korea, and the Russian Far East, and a third with 28 wild isolates from China, Thailand, and Vietnam. Two additional lineages in China have arisen by hybridization among the second and third groups. Genes encoding cysteine sulfoxide lyase (lecsl) and γ-glutamyl transpeptidase (leggt), which are implicated in biosynthesis of the organosulfur flavor compound lenthionine, have diversified in Lentinula. Paralogs of both genes that are unique to Lentinula (lecsl 3 and leggt 5b) are coordinately up-regulated in fruiting bodies of L. edodes. The pangenome of L. edodes s. lat. contains 20,308 groups of orthologous genes, but only 6,438 orthogroups (32%) are shared among all strains, whereas 3,444 orthogroups (17%) are found only in wild populations, which should be targeted for conservation.}, } @article {pmid36844929, year = {2023}, author = {Nielsen, FD and Møller-Jensen, J and Jørgensen, MG}, title = {Adding context to the pneumococcal core genes using bioinformatic analysis of the intergenic pangenome of Streptococcus pneumoniae.}, journal = {Frontiers in bioinformatics}, volume = {3}, number = {}, pages = {1074212}, pmid = {36844929}, issn = {2673-7647}, abstract = {Introduction: Whole genome sequencing offers great opportunities for linking genotypes to phenotypes aiding in our understanding of human disease and bacterial pathogenicity. However, these analyses often overlook non-coding intergenic regions (IGRs). By disregarding the IGRs, crucial information is lost, as genes have little biological function without expression. Methods/Results: In this study, we present the first complete pangenome of the important human pathogen Streptococcus pneumoniae (pneumococcus), spanning both the genes and IGRs. We show that the pneumococcus species retains a small core genome of IGRs that are present across all isolates. Gene expression is highly dependent on these core IGRs, and often several copies of these core IGRs are found across each genome. Core genes and core IGRs show a clear linkage as 81% of core genes are associated with core IGRs. Additionally, we identify a single IGR within the core genome that is always occupied by one of two highly distinct sequences, scattered across the phylogenetic tree. Discussion: Their distribution indicates that this IGR is transferred between isolates through horizontal regulatory transfer independent of the flanking genes and that each type likely serves different regulatory roles depending on their genetic context.}, } @article {pmid36838392, year = {2023}, author = {Sugrue, I and Hill, D and O'Connor, PM and Day, L and Stanton, C and Hill, C and Ross, RP}, title = {Nisin E Is a Novel Nisin Variant Produced by Multiple Streptococcus equinus Strains.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, pmid = {36838392}, issn = {2076-2607}, support = {SFI/12/RC/2273//Science Foundation Ireland/Ireland ; }, abstract = {Nisin A, the prototypical lantibiotic, is an antimicrobial peptide currently utilised as a food preservative, with potential for therapeutic applications. Here, we describe nisin E, a novel nisin variant produced by two Streptococcus equinus strains, APC4007 and APC4008, isolated from sheep milk. Shotgun whole genome sequencing and analysis revealed biosynthetic gene clusters similar to nisin U, with a unique rearrangement of the core peptide encoding gene within the cluster. The 3100.8 Da peptide by MALDI-TOF mass spectrometry, is 75% identical to nisin A, with 10 differences, including 2 deletions: Ser29 and Ile30, and 8 substitutions: Ile4Lys, Gly18Thr, Asn20Pro, Met21Ile, His27Gly, Val32Phe, Ser33Gly, and Lys34Asn. Nisin E producing strains inhibited species of Lactobacillus, Bacillus, and Clostridiodes and were immune to nisin U. Sequence alignment identified putative promoter sequences across the nisin producer genera, allowing for the prediction of genes in Streptococcus to be potentially regulated by nisin. S. equinus pangenome BLAST analyses detected 6 nisin E operons across 44 publicly available genomes. An additional 20 genomes contained a subset of nisin E transport/immunity and regulatory genes (nseFEGRK), without adjacent peptide production genes. These genes suggest that nisin E response mechanisms, distinct from the canonical nisin immunity and resistance operons, are widespread across the S. equinus species. The discovery of this new nisin variant and its immunity determinants in S. equinus suggests a central role for nisin in the competitive nature of the species.}, } @article {pmid36838372, year = {2023}, author = {Jiang, S and Fan, Q and Zhang, Z and Deng, Y and Wang, L and Dai, Q and Wang, J and Lin, M and Zhou, J and Long, Z and He, G and Zhou, Z}, title = {Biodegradation of Oil by a Newly Isolated Strain Acinetobacter junii WCO-9 and Its Comparative Pan-Genome Analysis.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, pmid = {36838372}, issn = {2076-2607}, support = {2018YFA0901000, 2018YFA0901003//National Key R&D Program of China/ ; NJ202201//Open Fund of Key Laboratory in Luzhou/ ; }, abstract = {Waste oil pollution and the treatment of oily waste present a challenge, and the exploitation of microbial resources is a safe and efficient method to resolve these problems. Lipase-producing microorganisms can directly degrade waste oil and promote the degradation of oily waste and, therefore, have very significant research and application value. The isolation of efficient oil-degrading strains is of great practical significance in research into microbial remediation in oil-contaminated environments and for the enrichment of the microbial lipase resource library. In this study, Acinetobacter junii WCO-9, an efficient oil-degrading bacterium, was isolated from an oil-contaminated soil using olive oil as the sole carbon source, and its enzyme activity of ρ-nitrophenyl decanoate (ρ-NPD) decomposition was 3000 U/L. The WCO-9 strain could degrade a variety of edible oils, and its degradation capability was significantly better than that of the control strain, A junii ATCC 17908. Comparative pan-genome and lipid degradation pathway analyses indicated that A. junii isolated from the same environment shared a similar set of core genes and that the species accumulated more specific genes that facilitated resistance to environmental stresses under different environmental conditions. WCO-9 has accumulated a complete set of oil metabolism genes under a long-term oil-contamination environment, and the compact arrangement of abundant lipase and lipase chaperones has further strengthened the ability of the strain to survive in such environments. This is the main reason why WCO-9 is able to degrade oil significantly more effectively than ATCC 17908. In addition, WCO-9 possesses a specific lipase that is not found in homologous strains. In summary, A. junii WCO-9, with a complete triglyceride degradation pathway and the specific lipase gene, has great potential in environmental remediation and lipase for industry.}, } @article {pmid36838305, year = {2023}, author = {Gonçalves-Oliveira, J and Gutierrez, R and Schlesener, CL and Jaffe, DA and Aguilar-Setién, A and Boulouis, HJ and Nachum-Biala, Y and Huang, BC and Weimer, BC and Chomel, BB and Harrus, S}, title = {Genomic Characterization of Three Novel Bartonella Strains in a Rodent and Two Bat Species from Mexico.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, pmid = {36838305}, issn = {2076-2607}, abstract = {Rodents and bats are the most diverse mammal group that host Bartonella species. In the Americas, they were described as harboring Bartonella species; however, they were mostly characterized to the genotypic level. We describe here Bartonella isolates obtained from blood samples of one rodent (Peromyscus yucatanicus from San José Pibtuch, Yucatan) and two bat species (Desmodus rotundus from Progreso, and Pteronotus parnellii from Chamela-Cuitzmala) from Mexico. We sequenced and described the genomic features of three Bartonella strains and performed phylogenomic and pangenome analyses to decipher their phylogenetic relationships. The mouse-associated genome was closely related to Bartonella vinsonii. The two bat-associated genomes clustered into a single distinct clade in between lineages 3 and 4, suggesting to be an ancestor of the rodent-associated Bartonella clade (lineage 4). These three genomes showed <95% OrthoANI values compared to any other Bartonella genome, and therefore should be considered as novel species. In addition, our analyses suggest that the B. vinsonii complex should be revised, and all B. vinsonii subspecies need to be renamed and considered as full species. The phylogenomic clustering of the bat-associated Bartonella strains and their virulence factor profile (lack of the Vbh/TraG conjugation system remains of the T4SS) suggest that it should be considered as a new lineage clade (L5) within the Bartonella genus.}, } @article {pmid36838222, year = {2023}, author = {Mughal, SR and Niazi, SA and Do, T and Gilbert, SC and Didelot, X and Radford, DR and Beighton, D}, title = {Genomic Diversity among Actinomyces naeslundii Strains and Closely Related Species.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, pmid = {36838222}, issn = {2076-2607}, abstract = {The aim of this study was to investigate and clarify the ambiguous taxonomy of Actinomyces naeslundii and its closely related species using state-of-the-art high-throughput sequencing techniques, and, furthermore, to determine whether sub-clusters identified within Actinomyces oris and Actinomyces naeslundii in a previous study by multi locus sequence typing (MLST) using concatenation of seven housekeeping genes should either be classified as subspecies or distinct species. The strains in this study were broadly classified under Actinomyces naeslundii group as A. naeslundii genospecies I and genospecies II. Based on MLST data analysis, these were further classified as A. oris and A. naeslundii. The whole genome sequencing of selected strains of A. oris (n = 17) and A. naeslundii (n = 19) was carried out using Illumina Genome Analyzer IIxe and Roche 454 allowing paired-end and single-reads sequencing, respectively. The sequences obtained were aligned using CLC Genomic workbench version 5.1 and annotated using RAST (Rapid Annotation using Subsystem Technology) release version 59 accessible online. Additionally, genomes of seven publicly available strains of Actinomyces (k20, MG1, c505, OT175, OT171, OT170, and A. johnsonii) were also included. Comparative genomic analysis (CGA) using Mauve, Progressive Mauve, gene-by-gene, Core, and Pan Genome, and finally Digital DNA-DNA homology (DDH) analysis was carried out. DDH values were obtained using in silico genome-genome comparison. Evolutionary analysis using ClonalFrame was also undertaken. The mutation and recombination events were compared using chi-square test among A. oris and A. naeslundii isolates (analysis methods are not included in the study). CGA results were consistent with previous traditional classification using MLST. It was found that strains of Actinomyces k20, MG1, c505, and OT175 clustered in A. oris group of isolates, while OT171, OT170, and A. johnsonii appeared as separate branches. Similar clustering to MLST was observed for other isolates. The mutation and recombination events were significantly higher in A. oris than A. naeslundii, highlighting the diversity of A. oris strains in the oral cavity. These findings suggest that A. oris forms six distinct groups, whereas A. naeslundii forms three. The correct designation of isolates will help in the identification of clinical Actinomyces isolates found in dental plaque. Easily accessible online genomic sequence data will also accelerate the investigation of the biochemical characterisation and pathogenesis of this important group of micro-organisms.}, } @article {pmid36836896, year = {2023}, author = {Jalal, K and Khan, K and Hayat, A and Alnasser, SM and Meshal, A and Basharat, Z}, title = {Pan-Genomics of Escherichia albertii for Antibiotic Resistance Profiling in Different Genome Fractions and Natural Product Mediated Intervention: In Silico Approach.}, journal = {Life (Basel, Switzerland)}, volume = {13}, number = {2}, pages = {}, pmid = {36836896}, issn = {2075-1729}, abstract = {Escherichia albertii is an emerging, enteric pathogen of significance. It was first isolated in 2003 from a pediatric diarrheal sample from Bangladesh. In this study, a comprehensive in silico strategy was followed to first list out antibiotic-resistant genes from core, accessory and unique genome fractions of 95 available genomes of E. albertii. Then, 56 drug targets were identified from the core essential genome. Finally, ZipA, an essential cell division protein that stabilizes the FtsZ protofilaments by cross-linking them and serves as a cytoplasmic membrane anchor for the Z ring, was selected for further downstream processing. It was computationally modeled using a threading approach, followed by virtual screening of two phytochemical libraries, Ayurvedic (n = 2103 compounds) and Traditional Chinese Medicine (n = 36,043 compounds). ADMET profiling, followed by PBPK modeling in the central body compartment, in a population of 250 non-diseased, 250 cirrhotic and 250 renally impaired people was attempted. ZINC85624912 from Chinese medicinal library showed the highest bioavailability and plasma retention. This is the first attempt to simulate the fate of natural products in the body through PBPK. Dynamics simulation of 20 ns for the top three compounds from both libraries was also performed to validate the stability of the compounds. The obtained information from the current study could aid wet-lab scientists to work on the scaffold of screened drug-like compounds from natural resources and could be useful in our quest for therapy against antibiotic-resistant E. albertii.}, } @article {pmid36835570, year = {2023}, author = {Balabanova, L and Nedashkovskaya, O and Otstavnykh, N and Isaeva, M and Kolpakova, O and Pentehina, I and Seitkalieva, A and Noskova, Y and Stepochkina, V and Son, O and Tekutyeva, L}, title = {Computational Insight into Intraspecies Distinctions in Pseudoalteromonas distincta: Carotenoid-like Synthesis Traits and Genomic Heterogeneity.}, journal = {International journal of molecular sciences}, volume = {24}, number = {4}, pages = {}, pmid = {36835570}, issn = {1422-0067}, support = {075-11-2021-065//the Ministry of Science and Higher Education of the Russian Federation within the framework of Decree of the Government of the Russian Federation № 218/ ; }, mesh = {*Pseudoalteromonas/genetics ; Genomics ; Carotenoids/metabolism ; Glycosylation ; Phenotype ; Phylogeny ; }, abstract = {Advances in the computational annotation of genomes and the predictive potential of current metabolic models, based on more than thousands of experimental phenotypes, allow them to be applied to identify the diversity of metabolic pathways at the level of ecophysiology differentiation within taxa and to predict phenotypes, secondary metabolites, host-associated interactions, survivability, and biochemical productivity under proposed environmental conditions. The significantly distinctive phenotypes of members of the marine bacterial species Pseudoalteromonas distincta and an inability to use common molecular markers make their identification within the genus Pseudoalteromonas and prediction of their biotechnology potential impossible without genome-scale analysis and metabolic reconstruction. A new strain, KMM 6257, of a carotenoid-like phenotype, isolated from a deep-habituating starfish, emended the description of P. distincta, particularly in the temperature growth range from 4 to 37 °C. The taxonomic status of all available closely related species was elucidated by phylogenomics. P. distincta possesses putative methylerythritol phosphate pathway II and 4,4'-diapolycopenedioate biosynthesis, related to C30 carotenoids, and their functional analogues, aryl polyene biosynthetic gene clusters (BGC). However, the yellow-orange pigmentation phenotypes in some strains coincide with the presence of a hybrid BGC encoding for aryl polyene esterified with resorcinol. The alginate degradation and glycosylated immunosuppressant production, similar to brasilicardin, streptorubin, and nucleocidines, are the common predicted features. Starch, agar, carrageenan, xylose, lignin-derived compound degradation, polysaccharide, folate, and cobalamin biosynthesis are all strain-specific.}, } @article {pmid36834516, year = {2023}, author = {Li, H and Tahir Ul Qamar, M and Yang, L and Liang, J and You, J and Wang, L}, title = {Current Progress, Applications and Challenges of Multi-Omics Approaches in Sesame Genetic Improvement.}, journal = {International journal of molecular sciences}, volume = {24}, number = {4}, pages = {}, pmid = {36834516}, issn = {1422-0067}, support = {CAAS-ASTIP-2016-OCRI//the Agricultural Science and Technology Innovation Project of the Chinese Academy of Agricultural Sciences/ ; 2020BBA045//the Key Research Projects of Hubei province/ ; 2021-620-000-001-035//the Science and Technology Innovation Project of Hubei province/ ; Y2022XK11//the Fundamental Research Funds for Central Non-profit Scientific Institution/ ; KF2022002//the Open Project of Key Laboratory of Biology and Genetic Improvement of Oil Crops, Ministry of Agriculture and Rural Affairs, P.R. China/ ; CARS-14//China Agriculture Research System/ ; 2022020801020299//the Knowledge Innovation Program of Wuhan-Shuguang Project/ ; 1610172022010//the Central Public-interest Scientific Institution Basal Research Fund/ ; }, mesh = {*Sesamum/genetics ; Multiomics ; Plant Breeding ; Genomics/methods ; Proteomics/methods ; }, abstract = {Sesame is one of the important traditional oil crops in the world, and has high economic and nutritional value. Recently, due to the novel high throughput sequencing techniques and bioinformatical methods, the study of the genomics, methylomics, transcriptomics, proteomics and metabonomics of sesame has developed rapidly. Thus far, the genomes of five sesame accessions have been released, including white and black seed sesame. The genome studies reveal the function and structure of the sesame genome, and facilitate the exploitation of molecular markers, the construction of genetic maps and the study of pan-genomes. Methylomics focus on the study of the molecular level changes under different environmental conditions. Transcriptomics provide a powerful tool to study abiotic/biotic stress, organ development, and noncoding RNAs, and proteomics and metabonomics also provide some support in studying abiotic stress and important traits. In addition, the opportunities and challenges of multi-omics in sesame genetics breeding were also described. This review summarizes the current research status of sesame from the perspectives of multi-omics and hopes to provide help for further in-depth research on sesame.}, } @article {pmid36833201, year = {2023}, author = {Liu, S and Jiao, J and Tian, CF}, title = {Adaptive Evolution of Rhizobial Symbiosis beyond Horizontal Gene Transfer: From Genome Innovation to Regulation Reconstruction.}, journal = {Genes}, volume = {14}, number = {2}, pages = {}, pmid = {36833201}, issn = {2073-4425}, mesh = {*Rhizobium/genetics ; Symbiosis/genetics ; Gene Transfer, Horizontal ; Ecosystem ; Nitrogen Fixation/genetics ; *Fabaceae/microbiology ; }, abstract = {There are ubiquitous variations in symbiotic performance of different rhizobial strains associated with the same legume host in agricultural practices. This is due to polymorphisms of symbiosis genes and/or largely unexplored variations in integration efficiency of symbiotic function. Here, we reviewed cumulative evidence on integration mechanisms of symbiosis genes. Experimental evolution, in concert with reverse genetic studies based on pangenomics, suggests that gain of the same circuit of key symbiosis genes through horizontal gene transfer is necessary but sometimes insufficient for bacteria to establish an effective symbiosis with legumes. An intact genomic background of the recipient may not support the proper expression or functioning of newly acquired key symbiosis genes. Further adaptive evolution, through genome innovation and reconstruction of regulation networks, may confer the recipient of nascent nodulation and nitrogen fixation ability. Other accessory genes, either co-transferred with key symbiosis genes or stochastically transferred, may provide the recipient with additional adaptability in ever-fluctuating host and soil niches. Successful integrations of these accessory genes with the rewired core network, regarding both symbiotic and edaphic fitness, can optimize symbiotic efficiency in various natural and agricultural ecosystems. This progress also sheds light on the development of elite rhizobial inoculants using synthetic biology procedures.}, } @article {pmid36831244, year = {2023}, author = {Apicella, C and Ruano, CSM and Thilaganathan, B and Khalil, A and Giorgione, V and Gascoin, G and Marcellin, L and Gaspar, C and Jacques, S and Murdoch, CE and Miralles, F and Méhats, C and Vaiman, D}, title = {Pan-Genomic Regulation of Gene Expression in Normal and Pathological Human Placentas.}, journal = {Cells}, volume = {12}, number = {4}, pages = {}, pmid = {36831244}, issn = {2073-4409}, mesh = {Humans ; Pregnancy ; Female ; *Placenta/metabolism ; *Trophoblasts/metabolism ; Transcriptome ; Gene Expression Regulation ; Genomics ; }, abstract = {In this study, we attempted to find genetic variants affecting gene expression (eQTL = expression Quantitative Trait Loci) in the human placenta in normal and pathological situations. The analysis of gene expression in placental diseases (Pre-eclampsia and Intra-Uterine Growth Restriction) is hindered by the fact that diseased placental tissue samples are generally taken at earlier gestations compared to control samples. The difference in gestational age is considered a major confounding factor in the transcriptome regulation of the placenta. To alleviate this significant problem, we propose here a novel approach to pinpoint disease-specific cis-eQTLs. By statistical correction for gestational age at sampling as well as other confounding/surrogate variables systematically searched and identified, we found 43 e-genes for which proximal SNPs influence expression level. Then, we performed the analysis again, removing the disease status from the covariates, and we identified 54 e-genes, 16 of which are identified de novo and, thus, possibly related to placental disease. We found a highly significant overlap with previous studies for the list of 43 e-genes, validating our methodology and findings. Among the 16 disease-specific e-genes, several are intrinsic to trophoblast biology and, therefore, constitute novel targets of interest to better characterize placental pathology and its varied clinical consequences. The approach that we used may also be applied to the study of other human diseases where confounding factors have hampered a better understanding of the pathology.}, } @article {pmid36830307, year = {2023}, author = {Liu, H and Liu, X and He, J and Zhang, L and Zhao, F and Zhou, Z and Hua, X and Yu, Y}, title = {Emergence and Evolution of OXA-23-Producing ST46Pas-ST462Oxf-KL28-OCL1 Carbapenem-Resistant Acinetobacter baumannii Mediated by a Novel ISAba1-Based Tn7534 Transposon.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {12}, number = {2}, pages = {}, pmid = {36830307}, issn = {2079-6382}, support = {2018YFE0102100//National Key Research and Development Program of China grant/ ; 81861138054//National Natural Science Foundation of China/ ; 82072313//National Natural Science Foundation of China/ ; }, abstract = {Carbapenem-resistant Acinetobacter baumannii (CRAB) isolates of global clone 1 (GC1) and global clone 2 (GC2) have been widely reported. Nevertheless, non-GC1 and non-GC2 CRAB strains have been studied less. In particular, no reports concerning sequence type 46 (ST46Pas) CRAB strains have been described thus far. In this work, the genomic features and possible evolution mechanism of ST46Pas OXA-23-producing CRAB isolates from clinical specimens are reported for the first time. Antimicrobial susceptibility testing of three ST46Pas strains revealed identical resistance profiles (resistance to imipenem, meropenem, ciprofloxacin and the combination of cefoperazone/sulbactam at a 2:1 ratio). They were found to belong to ST46Pas and ST462Oxf with capsular polysaccharide 28 (KL28) and lipooligosaccharide 1 (OCL1), respectively. Whole-genome sequencing (WGS) revealed that all contained one copy of chromosomal blaOXA-23, which was located in a novel ISAba1-based Tn7534 composite transposon. In particular, another copy of the Tn7534 composite transposon was identified in an Hgz_103-type plasmid with 9 bp target site duplications (TSDs, ACAACATGC) in the A. baumannii ZHOU strain. As the strains originated from two neighboring intensive care units (ICUs), ST46Pas OXA-23-producing CRAB strains may have evolved via transposition events or a pdif module. Based on the GenBank database, ST46Pas strains were collected from various sources; however, most were collected in Hangzhou (China) from 2014 to 2021. Pan-genome analysis revealed 3276 core genes, 0 soft-core genes, 768 shell genes and 443 cloud genes shared among all ST46Pas strains. In conclusion, the emergence of ST46Pas CRAB strains might present a new threat to healthcare settings; therefore, effective surveillance is required to prevent further dissemination.}, } @article {pmid36828537, year = {2023}, author = {Parker, K and Wood, H and Russell, JA and Yarmosh, D and Shteyman, A and Bagnoli, J and Knight, B and Aspinwall, JR and Jacobs, J and Werking, K and Winegar, R}, title = {Development and Optimization of an Unbiased, Metagenomics-Based Pathogen Detection Workflow for Infectious Disease and Biosurveillance Applications.}, journal = {Tropical medicine and infectious disease}, volume = {8}, number = {2}, pages = {}, pmid = {36828537}, issn = {2414-6366}, support = {HDTRA1-15-C-0013//the Defense Threat Reduction Agency-Joint Science and Technology Office for Chemical and Biological Defense/ ; }, abstract = {Rapid, specific, and sensitive identification of microbial pathogens is critical to infectious disease diagnosis and surveillance. Classical culture-based methods can be applied to a broad range of pathogens but have long turnaround times. Molecular methods, such as PCR, are time-effective but are not comprehensive and may not detect novel strains. Metagenomic shotgun next-generation sequencing (NGS) promises specific identification and characterization of any pathogen (viruses, bacteria, fungi, and protozoa) in a less biased way. Despite its great potential, NGS has yet to be widely adopted by clinical microbiology laboratories due in part to the absence of standardized workflows. Here, we describe a sample-to-answer workflow called PanGIA (Pan-Genomics for Infectious Agents) that includes simplified, standardized wet-lab procedures and data analysis with an easy-to-use bioinformatics tool. PanGIA is an end-to-end, multi-use workflow that can be used for pathogen detection and related applications, such as biosurveillance and biothreat detection. We performed a comprehensive survey and assessment of current, commercially available wet-lab technologies and open-source bioinformatics tools for each workflow component. The workflow includes total nucleic acid extraction from clinical human whole blood and environmental microbial forensic swabs as sample inputs, host nucleic acid depletion, dual DNA and RNA library preparation, shotgun sequencing on an Illumina MiSeq, and sequencing data analysis. The PanGIA workflow can be completed within 24 h and is currently compatible with bacteria and viruses. Here, we present data from the development and application of the clinical and environmental workflows, enabling the specific detection of pathogens associated with bloodstream infections and environmental biosurveillance, without the need for targeted assay development.}, } @article {pmid36824763, year = {2023}, author = {Joubert, PM and Krasileva, KV}, title = {Distinct genomic contexts predict gene presence-absence variation in different pathotypes of a fungal plant pathogen.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {36824763}, support = {DP2 AT011967/AT/NCCIH NIH HHS/United States ; }, abstract = {BACKGROUND: Fungi use the accessory segments of their pan-genomes to adapt to their environments. While gene presence-absence variation (PAV) contributes to shaping these accessory gene reservoirs, whether these events happen in specific genomic contexts remains unclear. Additionally, since pan-genome studies often group together all members of the same species, it is uncertain whether genomic or epigenomic features shaping pan-genome evolution are consistent across populations within the same species. Fungal plant pathogens are useful models for answering these questions because members of the same species often infect distinct hosts, and they frequently rely on gene PAV to adapt to these hosts.

RESULTS: We analyzed gene PAV in the rice and wheat blast fungus, Magnaporthe oryzae , and found that PAV of disease-causing effectors, antibiotic production, and non-self-recognition genes may drive the adaptation of the fungus to its environment. We then analyzed genomic and epigenomic features and data from available datasets for patterns that might help explain these PAV events. We observed that proximity to transposable elements (TEs), gene GC content, gene length, expression level in the host, and histone H3K27me3 marks were different between PAV genes and conserved genes, among other features. We used these features to construct a random forest classifier that was able to predict whether a gene is likely to experience PAV with high precision (86.06%) and recall (92.88%) in rice-infecting M. oryzae . Finally, we found that PAV in wheat- and rice-infecting pathotypes of M. oryzae differed in their number and their genomic context.

CONCLUSIONS: Our results suggest that genomic and epigenomic features of gene PAV can be used to better understand and even predict fungal pan-genome evolution. We also show that substantial intra-species variation can exist in these features.}, } @article {pmid36824272, year = {2023}, author = {Gao, Y and Xu, J and Li, Z and Zhang, Y and Riera, N and Xiong, Z and Ouyang, Z and Liu, X and Lu, Z and Seymour, D and Zhong, B and Wang, N}, title = {Citrus genomic resources unravel putative genetic determinants of Huanglongbing pathogenicity.}, journal = {iScience}, volume = {26}, number = {2}, pages = {106024}, pmid = {36824272}, issn = {2589-0042}, abstract = {Citrus HLB caused by Candidatus Liberibacter asiaticus is a pathogen-triggered immune disease. Here, we identified putative genetic determinants of HLB pathogenicity by integrating citrus genomic resources to characterize the pan-genome of accessions that differ in their response to HLB. Genome-wide association mapping and analysis of allele-specific expression between susceptible, tolerant, and resistant accessions further refined candidates underlying the response to HLB. We first developed a phased diploid assembly of Citrus sinensis 'Newhall' genome and produced resequencing data for 91 citrus accessions that differ in their response to HLB. These data were combined with previous resequencing data from 356 accessions for genome-wide association mapping of the HLB response. Genes determinants for HLB pathogenicity were associated with host immune response, ROS production, and antioxidants. Overall, this study has provided a significant resource of citrus genomic data and identified candidate genes to be further explored to understand the genetic determinants of HLB pathogenicity.}, } @article {pmid36823453, year = {2023}, author = {Webb, EA and Held, NA and Zhao, Y and Graham, ED and Conover, AE and Semones, J and Lee, MD and Feng, Y and Fu, FX and Saito, MA and Hutchins, DA}, title = {Importance of mobile genetic element immunity in numerically abundant Trichodesmium clades.}, journal = {ISME communications}, volume = {3}, number = {1}, pages = {15}, pmid = {36823453}, issn = {2730-6151}, support = {1657757//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 1851222//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 1851222//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 1850719//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 1850719//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 1850719//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 2125191//NSF | Directorate for Biological Sciences (BIO)/ ; }, abstract = {The colony-forming cyanobacteria Trichodesmium spp. are considered one of the most important nitrogen-fixing genera in the warm, low nutrient ocean. Despite this central biogeochemical role, many questions about their evolution, physiology, and trophic interactions remain unanswered. To address these questions, we describe Trichodesmium pangenomic potential via significantly improved genomic assemblies from two isolates and 15 new >50% complete Trichodesmium metagenome-assembled genomes from hand-picked, Trichodesmium colonies spanning the Atlantic Ocean. Phylogenomics identified ~four N2 fixing clades of Trichodesmium across the transect, with T. thiebautii dominating the colony-specific reads. Pangenomic analyses showed that all T. thiebautii MAGs are enriched in COG defense mechanisms and encode a vertically inherited Type III-B Clustered Regularly Interspaced Short Palindromic Repeats and associated protein-based immunity system (CRISPR-Cas). Surprisingly, this CRISPR-Cas system was absent in all T. erythraeum genomes, vertically inherited by T. thiebautii, and correlated with increased signatures of horizontal gene transfer. Additionally, the system was expressed in metaproteomic and transcriptomic datasets and CRISPR spacer sequences with 100% identical hits to field-assembled, putative phage genome fragments were identified. While the currently CO2-limited T. erythraeum is expected to be a 'winner' of anthropogenic climate change, their genomic dearth of known phage resistance mechanisms, compared to T. thiebautii, could put this outcome in question. Thus, the clear demarcation of T. thiebautii maintaining CRISPR-Cas systems, while T. erythraeum does not, identifies Trichodesmium as an ecologically important CRISPR-Cas model system, and highlights the need for more research on phage-Trichodesmium interactions.}, } @article {pmid36819029, year = {2023}, author = {Liu, B and Ren, YS and Su, CY and Abe, Y and Zhu, DH}, title = {Pangenomic analysis of Wolbachia provides insight into the evolution of host adaptation and cytoplasmic incompatibility factor genes.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1084839}, pmid = {36819029}, issn = {1664-302X}, abstract = {INTRODUCTION: The genus Wolbachia provides a typical example of intracellular bacteria that infect the germline of arthropods and filarial nematodes worldwide. Their importance as biological regulators of invertebrates, so it is particularly important to study the evolution, divergence and host adaptation of these bacteria at the genome-wide level.

METHODS: Here, we used publicly available Wolbachia genomes to reconstruct their evolutionary history and explore their adaptation under host selection.

RESULTS: Our findings indicate that segmental and single-gene duplications, such as DNA methylase, bZIP transcription factor, heat shock protein 90, in single monophyletic Wolbachia lineages (including supergroups A and B) may be responsible for improving the ability to adapt to a broad host range in arthropod-infecting strains. In contrast to A strains, high genetic diversity and rapidly evolving gene families occur in B strains, which may promote the ability of supergroup B strains to adapt to new hosts and their large-scale spreading. In addition, we hypothesize that there might have been two independent horizontal transfer events of cif genes in two sublineages of supergroup A strains. Interestingly, during the independent evolution of supergroup A and B strains, the rapid evolution of cif genes in supergroup B strains resulted in the loss of their functional domain, reflected in a possible decrease in the proportion of induced cytoplasmic incompatibility (CI) strains.

DISCUSSION: This present study highlights for reconstructing of evolutionary history, addressing host adaptation-related evolution and exploring the origin and divergence of CI genes in each Wolbachia supergroup. Our results thus not only provide a basis for further exploring the evolutionary history of Wolbachia adaptation under host selection but also reveal a new research direction for studying the molecular regulation of Wolbachia- induced cytoplasmic incompatibility.}, } @article {pmid36817109, year = {2022}, author = {Dereeper, A and Allouch, N and Guerlais, V and Garnier, M and Ma, L and De Jonckheere, JF and Joseph, SJ and Ali, IKM and Talarmin, A and Marcelino, I}, title = {Naegleria genus pangenome reveals new structural and functional insights into the versatility of these free-living amoebae.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1056418}, pmid = {36817109}, issn = {1664-302X}, abstract = {INTRODUCTION: Free-living amoebae of the Naegleria genus belong to the major protist clade Heterolobosea and are ubiquitously distributed in soil and freshwater habitats. Of the 47 Naegleria species described, N. fowleri is the only one being pathogenic to humans, causing a rare but fulminant primary amoebic meningoencephalitis. Some Naegleria genome sequences are publicly available, but the genetic basis for Naegleria diversity and ability to thrive in diverse environments (including human brain) remains unclear.

METHODS: Herein, we constructed a high-quality Naegleria genus pangenome to obtain a comprehensive catalog of genes encoded by these amoebae. For this, we first sequenced, assembled, and annotated six new Naegleria genomes.

RESULTS AND DISCUSSION: Genome architecture analyses revealed that Naegleria may use genome plasticity features such as ploidy/aneuploidy to modulate their behavior in different environments. When comparing 14 near-to-complete genome sequences, our results estimated the theoretical Naegleria pangenome as a closed genome, with 13,943 genes, including 3,563 core and 10,380 accessory genes. The functional annotations revealed that a large fraction of Naegleria genes show significant sequence similarity with those already described in other kingdoms, namely Animalia and Plantae. Comparative analyses highlighted a remarkable genomic heterogeneity, even for closely related strains and demonstrate that Naegleria harbors extensive genome variability, reflected in different metabolic repertoires. If Naegleria core genome was enriched in conserved genes essential for metabolic, regulatory and survival processes, the accessory genome revealed the presence of genes involved in stress response, macromolecule modifications, cell signaling and immune response. Commonly reported N. fowleri virulence-associated genes were present in both core and accessory genomes, suggesting that N. fowleri's ability to infect human brain could be related to its unique species-specific genes (mostly of unknown function) and/or to differential gene expression. The construction of Naegleria first pangenome allowed us to move away from a single reference genome (that does not necessarily represent each species as a whole) and to identify essential and dispensable genes in Naegleria evolution, diversity and biology, paving the way for further genomic and post-genomic studies.}, } @article {pmid36815495, year = {2023}, author = {Favaro, L and Campanaro, S and Fugaban, JII and Treu, L and Jung, ES and d'Ovidio, L and de Oliveira, DP and Liong, MT and Ivanova, IV and Todorov, SD}, title = {Genomic, metabolomic, and functional characterisation of beneficial properties of Pediococcus pentosaceus ST58, isolated from human oral cavity.}, journal = {Beneficial microbes}, volume = {14}, number = {1}, pages = {57-72}, doi = {10.3920/BM2022.0067}, pmid = {36815495}, issn = {1876-2891}, mesh = {Humans ; Pediococcus pentosaceus/genetics/metabolism ; Random Amplified Polymorphic DNA Technique ; RNA, Ribosomal, 16S/genetics ; Pediococcus/genetics/metabolism ; *Probiotics ; *Bacteriocins/genetics/pharmacology ; Anti-Bacterial Agents/pharmacology ; *Listeria monocytogenes ; Genomics ; }, abstract = {Bacteriocins produced by lactic acid bacteria are proteinaceous antibacterial metabolites that normally exhibit bactericidal or bacteriostatic activity against genetically closely related bacteria. In this work, the bacteriocinogenic potential of Pediococcus pentosaceus strain ST58, isolated from oral cavity of a healthy volunteer was evaluated. To better understand the biological role of this strain, its technological and safety traits were deeply investigated through a combined approach considering physiological, metabolomic and genomic properties. Three out of 14 colonies generating inhibition zones were confirmed to be bacteriocin producers and, according to repPCR and RAPD-PCR, differentiation assays, and 16S rRNA sequencing it was confirmed to be replicates of the same strain, identified as P. pentosaceus, named ST58. Based on multiple isolation of the same strain (P. pentosaceus ST58) over the 26 weeks in screening process for the potential bacteriocinogenic strains from the oral cavity of the same volunteer, strain ST58 can be considered a persistent component of oral cavity microbiota. Genomic analysis of P. pentosaceus ST58 revealed the presence of operons encoding for bacteriocins pediocin PA-1 and penocin A. The produced bacteriocin(s) inhibited the growth of Listeria monocytogenes, Enterococcus spp. and some Lactobacillus spp. used to determine the activity spectrum. The highest levels of production (6400 AU/ml) were recorded against L. monocytogenes strains after 24 h of incubation and the antimicrobial activity was inhibited after treatment of the cell-free supernatants with proteolytic enzymes. Noteworthy, P. pentosaceus ST58 also presented antifungal activity and key metabolites potentially involved in these properties were identified. Overall, this strain can be of great biotechnological interest towards the development of effective bio-preservation cultures as well as potential health promoting microbes.}, } @article {pmid36814455, year = {2023}, author = {Christine, TD and Clothilde, C and Mathieu, B and Laurence, A and Valentin, K and Cédric, M and Wing Rod, A and Yves, V and Francois, S}, title = {FrangiPANe, a tool for creating a panreference using left behind reads.}, journal = {NAR genomics and bioinformatics}, volume = {5}, number = {1}, pages = {lqad013}, pmid = {36814455}, issn = {2631-9268}, abstract = {We present here FrangiPANe, a pipeline developed to build panreference using short reads through a map-then-assemble strategy. Applying it to 248 African rice genomes using an improved CG14 reference genome, we identified an average of 8 Mb of new sequences and 5290 new contigs per individual. In total, 1.4 G of new sequences, consisting of 1 306 676 contigs, were assembled. We validated 97.7% of the contigs of the TOG5681 cultivar individual assembly from short reads on a newly long reads genome assembly of the same TOG5681 cultivar. FrangiPANe also allowed the anchoring of 31.5% of the new contigs within the CG14 reference genome, with a 92.5% accuracy at 2 kb span. We annotated in addition 3252 new genes absent from the reference. FrangiPANe was developed as a modular and interactive application to simplify the construction of a panreference using the map-then-assemble approach. It is available as a Docker image containing (i) a Jupyter notebook centralizing codes, documentation and interactive visualization of results, (ii) python scripts and (iii) all the software and libraries requested for each step of the analysis. We foreseen our approach will help leverage large-scale illumina dataset for pangenome studies in GWAS or detection of selection.}, } @article {pmid36807539, year = {2022}, author = {Wang, ZF and Rouard, M and Droc, G and Heslop-Harrison, PJS and Ge, XJ}, title = {Genome assembly of Musa beccarii shows extensive chromosomal rearrangements and genome expansion during evolution of Musaceae genomes.}, journal = {GigaScience}, volume = {12}, number = {}, pages = {}, pmid = {36807539}, issn = {2047-217X}, mesh = {*Musa/genetics ; *Musaceae/genetics ; Genome, Plant ; Chromosomes ; DNA, Ribosomal ; Phylogeny ; }, abstract = {BACKGROUND: Musa beccarii (Musaceae) is a banana species native to Borneo, sometimes grown as an ornamental plant. The basic chromosome number of Musa species is x = 7, 10, or 11; however, M. beccarii has a basic chromosome number of x = 9 (2n = 2x = 18), which is the same basic chromosome number of species in the sister genera Ensete and Musella. Musa beccarii is in the section Callimusa, which is sister to the section Musa. We generated a high-quality chromosome-scale genome assembly of M. beccarii to better understand the evolution and diversity of genomes within the family Musaceae.

FINDINGS: The M. beccarii genome was assembled by long-read and Hi-C sequencing, and genes were annotated using both long Iso-seq and short RNA-seq reads. The size of M. beccarii was the largest among all known Musaceae assemblies (∼570 Mbp) due to the expansion of transposable elements and increased 45S ribosomal DNA sites. By synteny analysis, we detected extensive genome-wide chromosome fusions and fissions between M. beccarii and the other Musa and Ensete species, far beyond those expected from differences in chromosome number. Within Musaceae, M. beccarii showed a reduced number of terpenoid synthase genes, which are related to chemical defense, and enrichment in lipid metabolism genes linked to the physical defense of the cell wall. Furthermore, type III polyketide synthase was the most abundant biosynthetic gene cluster (BGC) in M. beccarii. BGCs were not conserved in Musaceae genomes.

CONCLUSIONS: The genome assembly of M. beccarii is the first chromosome-scale genome assembly in the Callimusa section in Musa, which provides an important genetic resource that aids our understanding of the evolution of Musaceae genomes and enhances our knowledge of the pangenome.}, } @article {pmid36797493, year = {2023}, author = {Rautiainen, M and Nurk, S and Walenz, BP and Logsdon, GA and Porubsky, D and Rhie, A and Eichler, EE and Phillippy, AM and Koren, S}, title = {Telomere-to-telomere assembly of diploid chromosomes with Verkko.}, journal = {Nature biotechnology}, volume = {}, number = {}, pages = {}, pmid = {36797493}, issn = {1546-1696}, support = {F32 GM134558/GM/NIGMS NIH HHS/United States ; R01 HG010169/HG/NHGRI NIH HHS/United States ; Z99 HG999999/ImNIH/Intramural NIH HHS/United States ; R01 HG002385/HG/NHGRI NIH HHS/United States ; }, abstract = {The Telomere-to-Telomere consortium recently assembled the first truly complete sequence of a human genome. To resolve the most complex repeats, this project relied on manual integration of ultra-long Oxford Nanopore sequencing reads with a high-resolution assembly graph built from long, accurate PacBio high-fidelity reads. We have improved and automated this strategy in Verkko, an iterative, graph-based pipeline for assembling complete, diploid genomes. Verkko begins with a multiplex de Bruijn graph built from long, accurate reads and progressively simplifies this graph by integrating ultra-long reads and haplotype-specific markers. The result is a phased, diploid assembly of both haplotypes, with many chromosomes automatically assembled from telomere to telomere. Running Verkko on the HG002 human genome resulted in 20 of 46 diploid chromosomes assembled without gaps at 99.9997% accuracy. The complete assembly of diploid genomes is a critical step towards the construction of comprehensive pangenome databases and chromosome-scale comparative genomics.}, } @article {pmid36795789, year = {2023}, author = {Mohamed, F and Ruiz Rodriguez, LG and Zorzoli, A and Dorfmueller, HC and Raya, RR and Mozzi, F}, title = {Genomic diversity in Fructobacillus spp. isolated from fructose-rich niches.}, journal = {PloS one}, volume = {18}, number = {2}, pages = {e0281839}, pmid = {36795789}, issn = {1932-6203}, support = {109357/Z/15/Z/WT_/Wellcome Trust/United Kingdom ; 105606/Z/14/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Fructose/metabolism ; Phylogeny ; *Leuconostocaceae/genetics/metabolism ; *Lactobacillales/genetics ; Genomics ; }, abstract = {The Fructobacillus genus is a group of obligately fructophilic lactic acid bacteria (FLAB) that requires the use of fructose or another electron acceptor for their growth. In this work, we performed a comparative genomic analysis within the genus Fructobacillus by using 24 available genomes to evaluate genomic and metabolic differences among these organisms. In the genome of these strains, which varies between 1.15- and 1.75-Mbp, nineteen intact prophage regions, and seven complete CRISPR-Cas type II systems were found. Phylogenetic analyses located the studied genomes in two different clades. A pangenome analysis and a functional classification of their genes revealed that genomes of the first clade presented fewer genes involved in the synthesis of amino acids and other nitrogen compounds. Moreover, the presence of genes strictly related to the use of fructose and electron acceptors was variable within the genus, although these variations were not always related to the phylogeny.}, } @article {pmid36794816, year = {2023}, author = {Derrien, M and Mikulic, N and Uyoga, MA and Chenoll, E and Climent, E and Howard-Varona, A and Nyilima, S and Stoffel, NU and Karanja, S and Kottler, R and Stahl, B and Zimmermann, MB and Bourdet-Sicard, R}, title = {Gut microbiome function and composition in infants from rural Kenya and association with human milk oligosaccharides.}, journal = {Gut microbes}, volume = {15}, number = {1}, pages = {2178793}, pmid = {36794816}, issn = {1949-0984}, mesh = {Humans ; Infant ; *Milk, Human/chemistry ; *Gastrointestinal Microbiome/genetics ; Kenya/epidemiology ; Oligosaccharides ; Bifidobacterium/genetics ; }, abstract = {The gut microbiota evolves rapidly after birth, responding dynamically to environmental factors and playing a key role in short- and long-term health. Lifestyle and rurality have been shown to contribute to differences in the gut microbiome, including Bifidobacterium levels, between infants. We studied the composition, function and variability of the gut microbiomes of 6- to 11-month-old Kenyan infants (n = 105). Shotgun metagenomics showed Bifidobacterium longum to be the dominant species. A pangenomic analysis of B. longum in gut metagenomes revealed a high prevalence of B. longum subsp. infantis (B. infantis) in Kenyan infants (80%), and possible co-existence of this subspecies with B. longum subsp. longum. Stratification of the gut microbiome into community (GMC) types revealed differences in composition and functional features. GMC types with a higher prevalence of B. infantis and abundance of B. breve also had a lower pH and a lower abundance of genes encoding pathogenic features. An analysis of human milk oligosaccharides (HMOs) classified the human milk (HM) samples into four groups defined on the basis of secretor and Lewis polymorphisms revealed a higher prevalence of HM group III (Se+, Le-) (22%) than in most previously studied populations, with an enrichment in 2'-fucosyllactose. Our results show that the gut microbiome of partially breastfed Kenyan infants over the age of six months is enriched in bacteria from the Bifidobacterium community, including B. infantis, and that the high prevalence of a specific HM group may indicate a specific HMO-gut microbiome association. This study sheds light on gut microbiome variation in an understudied population with limited exposure to modern microbiome-altering factors.}, } @article {pmid36792708, year = {2023}, author = {Fudge, JB}, title = {Capturing haplotype variation in populations using pangenome references.}, journal = {Nature biotechnology}, volume = {41}, number = {2}, pages = {194}, doi = {10.1038/s41587-023-01691-1}, pmid = {36792708}, issn = {1546-1696}, mesh = {Haplotypes/genetics ; *Genomics ; *Genetic Variation/genetics ; }, } @article {pmid36792019, year = {2023}, author = {Lekired, A and Cherif-Silini, H and Silini, A and Ben Yahia, H and Ouzari, HI}, title = {Comparative genomics reveals the acquisition of mobile genetic elements by the plant growth-promoting Pantoea eucrina OB49 in polluted environments.}, journal = {Genomics}, volume = {115}, number = {2}, pages = {110579}, doi = {10.1016/j.ygeno.2023.110579}, pmid = {36792019}, issn = {1089-8646}, mesh = {*Metals, Heavy ; *Pantoea/genetics ; Biodegradation, Environmental ; Interspersed Repetitive Sequences ; Genomics ; }, abstract = {Heavy metal-tolerant plant growth-promoting bacteria (PGPB) have gained popularity in bioremediation in recent years. A genome-assisted study of a heavy metal-tolerant PGPB Pantoea eucrina OB49 isolated from the rhizosphere of wheat grown on a heavy metal-contaminated site is presented. Comparative pan-genome analysis indicated that OB49 acquired heavy metal resistance genes through horizontal gene transfer. On contigs S10 and S12, OB49 has two arsRBCH operons that give arsenic resistance. On the S12 contig, an arsRBCH operon was discovered in conjunction with the merRTPCADE operon, which provides mercury resistance. P. eucrina OB49 may be involved in an ecological alternative for heavy metal remediation and growth promotion of wheat grown in metal-polluted soils. Our results suggested the detection of mobile genetic elements that harbour the ars operon and the fluoride resistance genes adjacent to the mer operon.}, } @article {pmid36781662, year = {2023}, author = {Thomas, WJW and Zhang, Y and Amas, JC and Cantila, AY and Zandberg, JD and Harvie, SL and Batley, J}, title = {Innovative Advances in Plant Genotyping.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2638}, number = {}, pages = {451-465}, pmid = {36781662}, issn = {1940-6029}, mesh = {Genotype ; *Genotyping Techniques ; *Genome, Plant ; Sequence Analysis, DNA ; Crops, Agricultural/genetics ; Polymorphism, Single Nucleotide ; }, abstract = {Over the past decade, advances in plant genotyping have been critical in enabling the identification of genetic diversity, in understanding evolution, and in dissecting important traits in both crops and native plants. The widespread popularity of single-nucleotide polymorphisms (SNPs) has prompted significant improvements to SNP-based genotyping, including SNP arrays, genotyping by sequencing, and whole-genome resequencing. More recent approaches, including genotyping structural variants, utilizing pangenomes to capture species-wide genetic diversity and exploiting machine learning to analyze genotypic data sets, are pushing the boundaries of what plant genotyping can offer. In this chapter, we highlight these innovations and discuss how they will accelerate and advance future genotyping efforts.}, } @article {pmid36778393, year = {2023}, author = {Bonnie, JK and Ahmed, O and Langmead, B}, title = {DandD: efficient measurement of sequence growth and similarity.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {36778393}, abstract = {Genome assembly databases are growing rapidly. The sequence content in each new assembly can be largely redundant with previous ones, but this is neither conceptually nor algorithmically easy to measure. We propose new methods and a new tool called DandD that addresses the question of how much new sequence is gained when a sequence collection grows. DandD can describe how much human structural variation is being discovered in each new human genome assembly and when discoveries will level off in the future. DandD uses a measure called δ ("delta"), developed initially for data compression. Computing δ directly requires counting k-mers, but DandD can rapidly estimate it using genomic sketches. We also propose δ as an alternative to k-mer-specific cardinalities when computing the Jaccard coefficient, avoiding the pitfalls of a poor choice of k. We demonstrate the utility of DandD's functions for estimating δ, characterizing the rate of pangenome growth, and computing allpairs similarities using k-independent Jaccard. DandD is open source software available at: https://github.com/jessicabonnie/dandd .}, } @article {pmid36777875, year = {2022}, author = {Grimplet, J}, title = {Genomic and Bioinformatic Resources for Perennial Fruit Species.}, journal = {Current genomics}, volume = {23}, number = {4}, pages = {217-233}, pmid = {36777875}, issn = {1389-2029}, abstract = {In the post-genomic era, data management and development of bioinformatic tools are critical for the adequate exploitation of genomics data. In this review, we address the actual situation for the subset of crops represented by the perennial fruit species. The agronomical singularity of these species compared to plant and crop model species provides significant challenges on the implementation of good practices generally not addressed in other species. Studies are usually performed over several years in non-controlled environments, usage of rootstock is common, and breeders heavily rely on vegetative propagation. A reference genome is now available for all the major species as well as many members of the economically important genera for breeding purposes. Development of pangenome for these species is beginning to gain momentum which will require a substantial effort in term of bioinformatic tool development. The available tools for genome annotation and functional analysis will also be presented.}, } @article {pmid36764870, year = {2023}, author = {Dwivedi, SL and Heslop-Harrison, P and Spillane, C and McKeown, PC and Edwards, D and Goldman, I and Ortiz, R}, title = {Evolutionary dynamics and adaptive benefits of deleterious mutations in crop gene pools.}, journal = {Trends in plant science}, volume = {28}, number = {6}, pages = {685-697}, doi = {10.1016/j.tplants.2023.01.006}, pmid = {36764870}, issn = {1878-4372}, mesh = {*Gene Pool ; Mutation/genetics ; *Biological Evolution ; Genomics ; Phenotype ; Genome, Plant/genetics ; Plant Breeding ; }, abstract = {Mutations with deleterious consequences in nature may be conditionally deleterious in crop plants. That is, while some genetic variants may reduce fitness under wild conditions and be subject to purifying selection, they can be under positive selection in domesticates. Such deleterious alleles can be plant breeding targets, particularly for complex traits. The difficulty of distinguishing favorable from unfavorable variants reduces the power of selection, while favorable trait variation and heterosis may be attributable to deleterious alleles. Here, we review the roles of deleterious mutations in crop breeding and discuss how they can be used as a new avenue for crop improvement with emerging genomic tools, including HapMaps and pangenome analysis, aiding the identification, removal, or exploitation of deleterious mutations.}, } @article {pmid36760124, year = {2023}, author = {Jin, S and Han, Z and Hu, Y and Si, Z and Dai, F and He, L and Cheng, Y and Li, Y and Zhao, T and Fang, L and Zhang, T}, title = {Structural variation (SV)-based pan-genome and GWAS reveal the impacts of SVs on the speciation and diversification of allotetraploid cottons.}, journal = {Molecular plant}, volume = {16}, number = {4}, pages = {678-693}, doi = {10.1016/j.molp.2023.02.004}, pmid = {36760124}, issn = {1752-9867}, mesh = {*Gossypium/genetics ; *Genome-Wide Association Study ; Genome, Plant/genetics ; Phenotype ; Tetraploidy ; }, abstract = {Structural variations (SVs) have long been described as being involved in the origin, adaption, and domestication of species. However, the underlying genetic and genomic mechanisms are poorly understood. Here, we report a high-quality genome assembly of Gossypium barbadense acc. Tanguis, a landrace that is closely related to formation of extra-long-staple (ELS) cultivated cotton. An SV-based pan-genome (Pan-SV) was then constructed using a total of 182 593 non-redundant SVs, including 2236 inversions, 97 398 insertions, and 82 959 deletions from 11 assembled genomes of allopolyploid cotton. The utility of this Pan-SV was then demonstrated through population structure analysis and genome-wide association studies (GWASs). Using segregation mapping populations produced through crossing ELS cotton and the landrace along with an SV-based GWAS, certain SVs responsible for speciation, domestication, and improvement in tetraploid cottons were identified. Importantly, some of the SVs presently identified as associated with the yield and fiber quality improvement had not been identified in previous SNP-based GWAS. In particular, a 9-bp insertion or deletion was found to associate with elimination of the interspecific reproductive isolation between Gossypium hirsutum and G. barbadense. Collectively, this study provides new insights into genome-wide, gene-scale SVs linked to important agronomic traits in a major crop species and highlights the importance of SVs during the speciation, domestication, and improvement of cultivated crop species.}, } @article {pmid36753700, year = {2023}, author = {Tanwar, AS and Shruptha, P and Paul, B and Murali, TS and Brand, A and Satyamoorthy, K}, title = {How Can Omics Inform Diabetic Foot Ulcer Clinical Management? A Whole Genome Comparison of Four Clinical Strains of Staphylococcus aureus.}, journal = {Omics : a journal of integrative biology}, volume = {27}, number = {2}, pages = {51-61}, doi = {10.1089/omi.2022.0184}, pmid = {36753700}, issn = {1557-8100}, mesh = {Humans ; Staphylococcus aureus/genetics ; *Diabetic Foot/drug therapy ; Anti-Bacterial Agents/therapeutic use ; Virulence Factors/genetics ; *Staphylococcal Infections ; *Diabetes Mellitus ; }, abstract = {Foot ulcers and associated infections significantly contribute to morbidity and mortality in diabetes. While diverse pathogens are found in the diabetes-related infected ulcers, Staphylococcus aureus remains one of the most virulent and widely prevalent pathogens. The high prevalence of S. aureus in chronic wound infections, especially in clinical settings, is attributed to its ability to evolve and acquire resistance against common antibiotics and to elicit an array of virulence factors. In this study, whole genome comparison of four strains of S. aureus (MUF168, MUF256, MUM270, and MUM475) isolated from diabetic foot ulcer (DFU) infections showing varying resistance patterns was carried out to study the genomic similarity, antibiotic resistance profiling, associated virulence factors, and sequence variations in drug targets. The comparative genome analysis showed strains MUM475 and MUM270 to be highly resistant, MUF256 with moderate levels of resistance, and MUF168 to be the least resistant. Strain MUF256 and MUM475 harbored more virulence factors compared with other two strains. Deleterious sequence variants were observed suggesting potential role in altering drug targets and drug efficacy. This comparative whole genome study offers new molecular insights that may potentially inform evidence-based diagnosis and treatment of DFUs in the clinic.}, } @article {pmid36753463, year = {2023}, author = {Hulin, MT and Hill, L and Jones, JDG and Ma, W}, title = {Pangenomic analysis reveals plant NAD[+] manipulation as an important virulence activity of bacterial pathogen effectors.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {7}, pages = {e2217114120}, pmid = {36753463}, issn = {1091-6490}, support = {BBS/E/J/000PR9797/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Virulence ; *NAD/metabolism ; *Cyclic ADP-Ribose/metabolism ; Bacteria/metabolism ; Plants/metabolism ; Pseudomonas syringae/metabolism ; NAD+ Nucleosidase/genetics/metabolism ; Bacterial Proteins/genetics/metabolism ; Plant Diseases/microbiology ; }, abstract = {Nicotinamide adenine dinucleotide (NAD[+]) has emerged as a key component in prokaryotic and eukaryotic immune systems. The recent discovery that Toll/interleukin-1 receptor (TIR) proteins function as NAD[+] hydrolases (NADase) links NAD[+]-derived small molecules with immune signaling. We investigated pathogen manipulation of host NAD[+] metabolism as a virulence strategy. Using the pangenome of the model bacterial pathogen Pseudomonas syringae, we conducted a structure-based similarity search from 35,000 orthogroups for type III effectors (T3Es) with potential NADase activity. Thirteen T3Es, including five newly identified candidates, were identified that possess domain(s) characteristic of seven NAD[+]-hydrolyzing enzyme families. Most Pseudomonas syringae strains that depend on the type III secretion system to cause disease, encode at least one NAD[+]-manipulating T3E, and many have several. We experimentally confirmed the type III-dependent secretion of a novel T3E, named HopBY, which shows structural similarity to both TIR and adenosine diphosphate ribose (ADPR) cyclase. Homologs of HopBY were predicted to be type VI effectors in diverse bacterial species, indicating potential recruitment of this activity by microbial proteins secreted during various interspecies interactions. HopBY efficiently hydrolyzes NAD[+] and specifically produces 2'cADPR, which can also be produced by TIR immune receptors of plants and by other bacteria. Intriguingly, this effector promoted bacterial virulence, indicating that 2'cADPR may not be the signaling molecule that directly initiates immunity. This study highlights a host-pathogen battleground centered around NAD[+] metabolism and provides insight into the NAD[+]-derived molecules involved in plant immunity.}, } @article {pmid36749783, year = {2023}, author = {Jirakkakul, J and Khoiri, AN and Duangfoo, T and Dulsawat, S and Sutheeworapong, S and Petsong, K and Wattanachaisaereekul, S and Paenkaew, P and Tachaleat, A and Cheevadhanarak, S and Prommeenate, P}, title = {Insights into the genome of Methylobacterium sp. NMS14P, a novel bacterium for growth promotion of maize, chili, and sugarcane.}, journal = {PloS one}, volume = {18}, number = {2}, pages = {e0281505}, pmid = {36749783}, issn = {1932-6203}, mesh = {Zea mays/genetics ; *Saccharum/genetics ; *Methylobacterium/genetics ; RNA, Ribosomal, 16S/genetics ; Edible Grain/genetics ; Phylogeny ; }, abstract = {A novel methylotrophic bacterium designated as NMS14P was isolated from the root of an organic coffee plant (Coffea arabica) in Thailand. The 16S rRNA sequence analysis revealed that this new isolate belongs to the genus Methylobacterium, and its novelty was clarified by genomic and comparative genomic analyses, in which NMS14P exhibited low levels of relatedness with other Methylobacterium-type strains. NMS14P genome consists of a 6,268,579 bp chromosome, accompanied by a 542,519 bp megaplasmid and a 66,590 bp plasmid, namely pNMS14P1 and pNMS14P2, respectively. Several genes conferring plant growth promotion are aggregated on both chromosome and plasmids, including phosphate solubilization, indole-3-acetic acid (IAA) biosynthesis, cytokinins (CKs) production, 1-aminocyclopropane-1-carboxylate (ACC) deaminase activity, sulfur-oxidizing activity, trehalose synthesis, and urea metabolism. Furthermore, pangenome analysis showed that NMS14P possessed the highest number of strain-specific genes accounting for 1408 genes, particularly those that are essential for colonization and survival in a wide array of host environments, such as ABC transporter, chemotaxis, quorum sensing, biofilm formation, and biosynthesis of secondary metabolites. In vivo tests have supported that NMS14P significantly promoted the growth and development of maize, chili, and sugarcane. Collectively, NMS14P is proposed as a novel plant growth-promoting Methylobacterium that could potentially be applied to a broad range of host plants as Methylobacterium-based biofertilizers to reduce and ultimately substitute the use of synthetic agrochemicals for sustainable agriculture.}, } @article {pmid36748949, year = {2023}, author = {Reddy, TS and Zomer, R and Mantri, N}, title = {Nanoformulations as a strategy to overcome the delivery limitations of cannabinoids.}, journal = {Phytotherapy research : PTR}, volume = {37}, number = {4}, pages = {1526-1538}, doi = {10.1002/ptr.7742}, pmid = {36748949}, issn = {1099-1573}, support = {//MGC Pharmaceuticals Limited/ ; }, mesh = {Humans ; *Cannabinoids ; *Cannabidiol/therapeutic use ; Dronabinol/pharmacokinetics ; Pain/drug therapy ; Lipids ; }, abstract = {Medical cannabis has received significant interest in recent years due to its promising benefits in the management of pain, anxiety, depression and neurological and movement disorders. Specifically, the major phytocannabinoids derived from the cannabis plant such as (-) trans-Δ[9] -tetrahydrocannabinol (THC) and cannabidiol (CBD), have been shown to be responsible for the pharmacological and therapeutic properties. Recently, these phytocannabinoids have also attracted special attention in cancer treatment due to their well-known palliative benefits in chemotherapy-induced nausea, vomiting, pain and loss of appetite along with their anticancer activities. Despite the enormous pharmacological benefits, the low aqueous solubility, high instability (susceptibility to extensive first pass metabolism) and poor systemic bioavailability restrict their utilization at clinical perspective. Therefore, drug delivery strategies based on nanotechnology are emerging to improve pharmacokinetic profile and bioavailability of cannabinoids as well as enhance their targeted delivery. Here, we critically review the nano-formulation systems engineered for overcoming the delivery limitations of native phytocannabinoids including polymeric and lipid-based nanoparticles (lipid nano capsules (LNCs), nanostructured lipid carriers (NLCs), nanoemulsions (NE) and self-emulsifying drug delivery systems (SEDDS)), ethosomes and cyclodextrins as well as their therapeutic applications.}, } @article {pmid36748707, year = {2022}, author = {Worden, PJ and Bogema, DR and Micallef, ML and Go, J and Deutscher, AT and Labbate, M and Green, TJ and King, WL and Liu, M and Seymour, JR and Jenkins, C}, title = {Phylogenomic diversity of Vibrio species and other Gammaproteobacteria isolated from Pacific oysters (Crassostrea gigas) during a summer mortality outbreak.}, journal = {Microbial genomics}, volume = {8}, number = {12}, pages = {}, pmid = {36748707}, issn = {2057-5858}, mesh = {Animals ; *Crassostrea ; Phylogeny ; *Gammaproteobacteria ; Australia/epidemiology ; *Vibrio ; Disease Outbreaks ; }, abstract = {The Pacific oyster (PO), Crassostrea gigas, is an important commercial marine species but periodically experiences large stock losses due to disease events known as summer mortality. Summer mortality has been linked to environmental perturbations and numerous viral and bacterial agents, indicating this disease is multifactorial in nature. In 2013 and 2014, several summer mortality events occurred within the Port Stephens estuary (NSW, Australia). Extensive culture and molecular-based investigations were undertaken and several potentially pathogenic Vibrio species were identified. To improve species identification and genomically characterise isolates obtained from this outbreak, whole-genome sequencing (WGS) and subsequent genomic analyses were performed on 48 bacterial isolates, as well as a further nine isolates from other summer mortality studies using the same batch of juveniles. Average nucleotide identity (ANI) identified most isolates to the species level and included members of the Photobacterium, Pseudoalteromonas, Shewanella and Vibrio genera, with Vibrio species making up more than two-thirds of all species identified. Construction of a phylogenomic tree, ANI analysis, and pan-genome analysis of the 57 isolates represents the most comprehensive culture-based phylogenomic survey of Vibrios during a PO summer mortality event in Australian waters and revealed large genomic diversity in many of the identified species. Our analysis revealed limited and inconsistent associations between isolate species and their geographical origins, or host health status. Together with ANI and pan-genome results, these inconsistencies suggest that to determine the role that microbes may have in Pacific oyster summer mortality events, isolate identification must be at the taxonomic level of strain. Our WGS data (specifically, the accessory genomes) differentiated bacterial strains, and coupled with associated metadata, highlight the possibility of predicting a strain's environmental niche and level of pathogenicity.}, } @article {pmid36748586, year = {2023}, author = {Rai, A and Suresh, G and Ria, B and L, V and Pk, S and Ipsita, S and Sasikala, C and Venkata Ramana, C}, title = {Phylogenomic analysis of the genus Alcanivorax: proposal for division of this genus into the emended genus Alcanivorax and two novel genera Alloalcanivorax gen. nov. and Isoalcanivorax gen. nov.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {73}, number = {1}, pages = {}, doi = {10.1099/ijsem.0.005672}, pmid = {36748586}, issn = {1466-5034}, mesh = {Sequence Analysis, DNA ; *Alcanivoraceae/genetics ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Fatty Acids/chemistry ; DNA, Bacterial/genetics ; Bacterial Typing Techniques ; Base Composition ; }, abstract = {The members of the genus Alcanivorax are key players in the removal of petroleum hydrocarbons from polluted marine environments. More than half of the species were described in the last decade using 16S rRNA gene phylogeny and genomic-based metrics. However, the 16S rRNA gene identity (<94 %) between some members of the genus Alcanivorax suggested their imprecise taxonomic status. In this study, we examined the taxonomic positions of Alcanivorax species using 16S rRNA phylogeny and further validated them using phylogenomic-related indexes such as digital DNA-DNA hybridization (dDDH), average nucleotide identity (ANI), average amino acid identity (AAI), percentage of conserved proteins (POCP) and comparative genomic studies. ANI and dDDH values confirmed that all the Alcanivorax species were well described at the species level. The phylotaxogenomic analysis showed that Alcanivorax species formed three clades. The inter-clade values of AAI and POCP were less than 70 %. The pan-genome evaluation depicted that the members shared 1223 core genes and its number increased drastically when analysed clade-wise. Therefore, these results necessitate the transfer of clade II and clade III members into Isoalcanivorax gen. nov. and Alloalcanivorax gen. nov., respectively, along with the emended description of the genus Alcanivorax sensu stricto.}, } @article {pmid36748580, year = {2022}, author = {Wietz, M and López-Pérez, M and Sher, D and Biller, SJ and Rodriguez-Valera, F}, title = {Microbe Profile: Alteromonas macleodii - a widespread, fast-responding, 'interactive' marine bacterium.}, journal = {Microbiology (Reading, England)}, volume = {168}, number = {11}, pages = {}, doi = {10.1099/mic.0.001236}, pmid = {36748580}, issn = {1465-2080}, mesh = {*Genome, Bacterial/genetics ; *Alteromonas/genetics/metabolism ; Phenotype ; Adaptation, Physiological ; Phylogeny ; Seawater/microbiology ; }, abstract = {Alteromonas macleodii is a marine heterotrophic bacterium with widespread distribution - from temperate to tropical oceans, and from surface to deep waters. Strains of A. macleodii exhibit considerable genomic and metabolic variability, and can grow rapidly on diverse organic compounds. A. macleodii is a model organism for the study of population genomics, physiological adaptations and microbial interactions, with individual genomes encoding diverse phenotypic traits influenced by recombination and horizontal gene transfer.}, } @article {pmid36748558, year = {2022}, author = {Cummins, EA and Hall, RJ and Connor, C and McInerney, JO and McNally, A}, title = {Distinct evolutionary trajectories in the Escherichia coli pangenome occur within sequence types.}, journal = {Microbial genomics}, volume = {8}, number = {11}, pages = {}, pmid = {36748558}, issn = {2057-5858}, mesh = {*Escherichia coli/genetics ; *Biological Evolution ; Genomics ; }, abstract = {The Escherichia coli species contains a diverse set of sequence types and there remain important questions regarding differences in genetic content within this population that need to be addressed. Pangenomes are useful vehicles for studying gene content within sequence types. Here, we analyse 21 E. coli sequence type pangenomes using comparative pangenomics to identify variance in both pangenome structure and content. We present functional breakdowns of sequence type core genomes and identify sequence types that are enriched in metabolism, transcription and cell membrane biogenesis genes. We also uncover metabolism genes that have variable core classification, depending on which allele is present. Our comparative pangenomics approach allows for detailed exploration of sequence type pangenomes within the context of the species. We show that ongoing gene gain and loss in the E. coli pangenome is sequence type-specific, which may be a consequence of distinct sequence type-specific evolutionary drivers.}, } @article {pmid36748494, year = {2022}, author = {Li, BB and Zhang, XJ and Wu, D and Zhang, DD and Fang, BZ and Liu, HC and Zhou, YG and Cai, M and Li, WJ and Nie, GX}, title = {Devosia ureilytica sp. nov., isolated from Kuche River in China.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {72}, number = {12}, pages = {}, doi = {10.1099/ijsem.0.005663}, pmid = {36748494}, issn = {1466-5034}, mesh = {*Fatty Acids/chemistry ; *Phospholipids/chemistry ; Phylogeny ; Rivers ; RNA, Ribosomal, 16S/genetics ; Ubiquinone/chemistry ; Sequence Analysis, DNA ; Base Composition ; DNA, Bacterial/genetics ; Bacterial Typing Techniques ; China ; }, abstract = {Two novel strains, designated XJ19-45[T] and XJ19-1, were isolated from water of Kuche River in Xinjiang Uygur Autonomous Region, China. Their cells were Gram-stain-negative, aerobic and motile rods. The phylogenetic analyses based on 16S rRNA genes and genomes showed that the two isolates belonged to the genus Devosia and the closest relative was Devosia subaequoris HST3-14[T]. The 16S rRNA genes sequences pairwise similarities, average nucleotide identities, digital DNA-DNA hybridizations and average amino acid identities between type strain XJ19-45[T] and other relatives were all less than 98.3, 80.3, 23.6 and 85.7 %, respectively, all below the species delineation thresholds. Pan-genomic analysis indicated that the novel isolate XJ19-45[T] shared 1594 core gene clusters with the 11 closely related type strains in Devosia, and the number of strain-specific clusters was 390. The major cellular fatty acids (>10 %) of the two isolates were summed feature 8, C18 : 1 ω7c 11-methyl and C16 : 0. Diphosphatidylglycerol, phosphatidylglycerol and glycolipids were the major polar lipids, and Q10 was the detected respiratory quinone. Based on the results of phenotypic, physiological, chemotaxonomic and genotypic characterizations, we propose that the isolates represent a novel species, for which the name Devosia ureilytica sp. nov. is proposed. The type strain is XJ19-45[T] (=CGMCC 1.19388[T]=KCTC 92263[T]).}, } @article {pmid36747706, year = {2023}, author = {Hoover, RL and Keffer, JL and Polson, SW and Chan, CS}, title = {Gallionellaceae pangenomic analysis reveals insight into phylogeny, metabolic flexibility, and iron oxidation mechanisms.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {36747706}, abstract = {UNLABELLED: The iron-oxidizing Gallionellaceae drive a wide variety of biogeochemical cycles through their metabolisms and biominerals. To better understand the environmental impacts of Gallionellaceae, we need to improve our knowledge of their diversity and metabolisms, especially any novel iron oxidation mechanisms. Here, we used a pangenomic analysis of 103 genomes to resolve Gallionellaceae phylogeny and explore the range of genomic potential. Using a concatenated ribosomal protein tree and key gene patterns, we determined Gallionellaceae has four genera, divided into two groupsâ€"iron-oxidizing bacteria (FeOB) Gallionella , Sideroxydans , and Ferriphaselus with known iron oxidases (Cyc2, MtoA) and nitrite-oxidizing bacteria (NOB) Candidatus Nitrotoga with nitrite oxidase (Nxr). The FeOB and NOB have similar electron transport chains, including genes for reverse electron transport and carbon fixation. Auxiliary energy metabolisms including S oxidation, denitrification, and organotrophy were scattered throughout the Gallionellaceae FeOB. Within FeOB, we found genes that may represent adaptations for iron oxidation, including a variety of extracellular electron uptake (EEU) mechanisms. FeOB genomes encoded more predicted c -type cytochromes overall, notably more multiheme c -type cytochromes (MHCs) with >10 CXXCH motifs. These include homologs of several predicted outer membrane porin-MHC complexes, including MtoAB and Uet. MHCs are known to efficiently conduct electrons across longer distances and function across a wide range of redox potentials that overlap with mineral redox potentials, which can help expand the range of usable iron substrates. Overall, the results of pangenome analyses suggest that the Gallionellaceae genera Gallionella , Sideroxydans , and Ferriphaselus are primarily iron oxidizers, capable of oxidizing dissolved Fe [2+] as well as a range of solid iron or other mineral substrates.

IMPORTANCE: Neutrophilic iron-oxidizing bacteria (FeOB) produce copious iron (oxyhydr)oxides that can profoundly influence biogeochemical cycles, notably the fate of carbon and many metals. To fully understand environmental microbial iron oxidation, we need a thorough accounting of iron oxidation mechanisms. In this study we show the Gallionellaceae FeOB have both known iron oxidases as well as uncharacterized multiheme cytochromes (MHCs). MHCs are predicted to transfer electrons from extracellular substrates and likely confer metabolic capabilities that help Gallionellaceae occupy a range of different iron- and mineral-rich niches. Gallionellaceae appear to specialize in iron oxidation, so it makes sense that they would have multiple mechanisms to oxidize various forms of iron, given the many iron minerals on Earth, as well as the physiological and kinetic challenges faced by FeOB. The multiple iron/mineral oxidation mechanisms may help drive the widespread ecological success of Gallionellaceae.}, } @article {pmid36747219, year = {2023}, author = {Chen, H and King, R and Smith, D and Bayon, C and Ashfield, T and Torriani, S and Kanyuka, K and Hammond-Kosack, K and Bieri, S and Rudd, J}, title = {Combined pangenomics and transcriptomics reveals core and redundant virulence processes in a rapidly evolving fungal plant pathogen.}, journal = {BMC biology}, volume = {21}, number = {1}, pages = {24}, pmid = {36747219}, issn = {1741-7007}, support = {BB/J/00426X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/C000I0250/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Transcriptome ; Virulence/genetics ; *Gene Expression Profiling ; Genome, Fungal ; Genes, Fungal ; Plant Diseases/microbiology ; }, abstract = {BACKGROUND: Studying genomic variation in rapidly evolving pathogens potentially enables identification of genes supporting their "core biology", being present, functional and expressed by all strains or "flexible biology", varying between strains. Genes supporting flexible biology may be considered to be "accessory", whilst the "core" gene set is likely to be important for common features of a pathogen species biology, including virulence on all host genotypes. The wheat-pathogenic fungus Zymoseptoria tritici represents one of the most rapidly evolving threats to global food security and was the focus of this study.

RESULTS: We constructed a pangenome of 18 European field isolates, with 12 also subjected to RNAseq transcription profiling during infection. Combining this data, we predicted a "core" gene set comprising 9807 sequences which were (1) present in all isolates, (2) lacking inactivating polymorphisms and (3) expressed by all isolates. A large accessory genome, consisting of 45% of the total genes, was also defined. We classified genetic and genomic polymorphism at both chromosomal and individual gene scales. Proteins required for essential functions including virulence had lower-than average sequence variability amongst core genes. Both core and accessory genomes encoded many small, secreted candidate effector proteins that likely interact with plant immunity. Viral vector-mediated transient in planta overexpression of 88 candidates failed to identify any which induced leaf necrosis characteristic of disease. However, functional complementation of a non-pathogenic deletion mutant lacking five core genes demonstrated that full virulence was restored by re-introduction of the single gene exhibiting least sequence polymorphism and highest expression.

CONCLUSIONS: These data support the combined use of pangenomics and transcriptomics for defining genes which represent core, and potentially exploitable, weaknesses in rapidly evolving pathogens.}, } @article {pmid36747211, year = {2023}, author = {Jia, Y and Xu, M and Hu, H and Chapman, B and Watt, C and Buerte, B and Han, N and Zhu, M and Bian, H and Li, C and Zeng, Z}, title = {Comparative gene retention analysis in barley, wild emmer, and bread wheat pangenome lines reveals factors affecting gene retention following gene duplication.}, journal = {BMC biology}, volume = {21}, number = {1}, pages = {25}, pmid = {36747211}, issn = {1741-7007}, support = {9176507//Grains Research and Development Corporation/ ; 113731971932//Innovative Research Group Project of the National Natural Science Foundation of China/ ; }, mesh = {*Gene Duplication ; Triticum/genetics ; *Hordeum/genetics ; Bread ; Multigene Family ; Evolution, Molecular ; Phylogeny ; }, abstract = {BACKGROUND: Gene duplication is a prevalent phenomenon and a major driving force underlying genome evolution. The process leading to the fixation of gene duplicates following duplication is critical to understand how genome evolves but remains fragmentally understood. Most previous studies on gene retention are based on gene duplicate analyses in single reference genome. No population-based comparative gene retention analysis has been performed to date.

RESULTS: Taking advantage of recently published genomic data in Triticeae, we dissected a divergent homogentisate phytyltransferase (HPT2) lineage caught in the middle stage of gene fixation following duplication. The presence/absence of HPT2 in barley (diploid), wild emmer (tetraploid), and bread wheat (hexaploid) pangenome lines appears to be associated with gene dosage constraint and environmental adaption. Based on these observations, we adopted a phylogeny-based orthology inference approach and performed comparative gene retention analyses across barley, wild emmer, and bread wheat. This led to the identification of 326 HPT2-pattern-like genes at whole genome scale, representing a pool of gene duplicates in the middle stage of gene fixation. Majority of these HPT2-pattern-like genes were identified as small-scale duplicates, such as dispersed, tandem, and proximal duplications. Natural selection analyses showed that HPT2-pattern-like genes have experienced relaxed selection pressure, which is generally accompanied with partial positive selection and transcriptional divergence. Functional enrichment analyses showed that HPT2-pattern-like genes are over-represented with molecular-binding and defense response functions, supporting the potential role of environmental adaption during gene retention. We also observed that gene duplicates from larger gene family are more likely to be lost, implying a gene dosage constraint effect. Further comparative gene retention analysis in barley and bread wheat pangenome lines revealed combined effects of species-specific selection and gene dosage constraint.

CONCLUSIONS: Comparative gene retention analyses at the population level support gene dosage constraint, environmental adaption, and species-specific selection as three factors that may affect gene retention following gene duplication. Our findings shed light on the evolutionary process leading to the retention of newly formed gene duplicates and will greatly improve our understanding on genome evolution via duplication.}, } @article {pmid36746216, year = {2023}, author = {Jeong, BR and Jang, J and Jin, E}, title = {Genome engineering via gene editing technologies in microalgae.}, journal = {Bioresource technology}, volume = {373}, number = {}, pages = {128701}, doi = {10.1016/j.biortech.2023.128701}, pmid = {36746216}, issn = {1873-2976}, mesh = {*Gene Editing ; *Microalgae/genetics/metabolism ; Genetic Engineering ; Biotechnology ; Metabolic Networks and Pathways ; Metabolic Engineering ; }, abstract = {CRISPR-Cas has revolutionized genetic modification with its comparative simplicity and accuracy, and it can be used even at the genomic level. Microalgae are excellent feedstocks for biofuels and nutraceuticals because they contain high levels of fatty acids, carotenoids, and other metabolites; however, genome engineering for microalgae is not yet as developed as for other model organisms. Microalgal engineering at the genetic and metabolic levels is relatively well established, and a few genomic resources are available. Their genomic information was used for a "safe harbor" site for stable transgene expression in microalgae. This review proposes further genome engineering schemes including the construction of sgRNA libraries, pan-genomic and epigenomic resources, and mini-genomes, which can together be developed into synthetic biology for carbon-based engineering in microalgae. Acetyl-CoA is at the center of carbon metabolic pathways and is further reviewed for the production of molecules including terpenoids in microalgae.}, } @article {pmid36741902, year = {2022}, author = {Srinivas, K and Ghatak, S and Pyngrope, DA and Angappan, M and Milton, AAP and Das, S and Lyngdoh, V and Lamare, JP and Prasad, MCB and Sen, A}, title = {Avian strains of emerging pathogen Escherichia fergusonii are phylogenetically diverse and harbor the greatest AMR dissemination potential among different sources: Comparative genomic evidence.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1080677}, pmid = {36741902}, issn = {1664-302X}, abstract = {INTRODUCTION: Escherichia fergusonii is regarded as an emerging pathogen with zoonotic potential. In the current study, we undertook source-wise comparative genomic analyses (resistome, virulome, mobilome and pangenome) to understand the antimicrobial resistance, virulence, mobile genetic elements and phylogenetic diversity of E. fergusonii.

METHODS: Six E. fergusonii strains (5 multidrug resistant strains and 1 biofilm former) were isolated from poultry (duck faeces and retail chicken samples). Following confirmation by phenotypic and molecular methods, the isolates were further characterized and their genomes were sequenced. Comparative resisto-virulo-mobilome analyses and pangenomics were performed for E. fergusonii genomes, while including 125 other E. fergusonii genomes available from NCBI database.

RESULTS AND DISCUSSION: Avian and porcine strains of E. fergusonii were found to carry significantly higher number of antimicrobial resistance genes (p < 0.05) and mobile genetic elements (plasmids, transposons and integrons) (p < 0.05), while the pathogenic potential of bovine strains was significantly higher compared to other strains (p < 0.05). Pan-genome development trends indicated open pan-genome for all strains (0 < γ < 1). Genomic diversity of avian strains was found to be greater than that from other sources. Phylogenetic analysis revealed close clustering among isolates of similar isolation source and geographical location. Indian isolates of E. fergusonii clustered closely with those from Chinese and a singleton Australian isolate. Overall, being the first pangenomic study on E. fergusonii, our analysis provided important cues on genomic features of the emerging pathogen E. fergusonii while highlighting the potential role of avian strains in dissemination of AMR.}, } @article {pmid36739346, year = {2023}, author = {Lanclos, VC and Rasmussen, AN and Kojima, CY and Cheng, C and Henson, MW and Faircloth, BC and Francis, CA and Thrash, JC}, title = {Ecophysiology and genomics of the brackish water adapted SAR11 subclade IIIa.}, journal = {The ISME journal}, volume = {17}, number = {4}, pages = {620-629}, pmid = {36739346}, issn = {1751-7370}, mesh = {Phylogeny ; *Saline Waters ; Oceans and Seas ; Genomics ; Biological Evolution ; *Alphaproteobacteria/genetics ; Seawater ; }, abstract = {The Order Pelagibacterales (SAR11) is the most abundant group of heterotrophic bacterioplankton in global oceans and comprises multiple subclades with unique spatiotemporal distributions. Subclade IIIa is the primary SAR11 group in brackish waters and shares a common ancestor with the dominant freshwater IIIb (LD12) subclade. Despite its dominance in brackish environments, subclade IIIa lacks systematic genomic or ecological studies. Here, we combine closed genomes from new IIIa isolates, new IIIa MAGS from San Francisco Bay (SFB), and 460 highly complete publicly available SAR11 genomes for the most comprehensive pangenomic study of subclade IIIa to date. Subclade IIIa represents a taxonomic family containing three genera (denoted as subgroups IIIa.1, IIIa.2, and IIIa.3) that had distinct ecological distributions related to salinity. The expansion of taxon selection within subclade IIIa also established previously noted metabolic differentiation in subclade IIIa compared to other SAR11 subclades such as glycine/serine prototrophy, mosaic glyoxylate shunt presence, and polyhydroxyalkanoate synthesis potential. Our analysis further shows metabolic flexibility among subgroups within IIIa. Additionally, we find that subclade IIIa.3 bridges the marine and freshwater clades based on its potential for compatible solute transport, iron utilization, and bicarbonate management potential. Pure culture experimentation validated differential salinity ranges in IIIa.1 and IIIa.3 and provided detailed IIIa cell size and volume data. This study is an important step forward for understanding the genomic, ecological, and physiological differentiation of subclade IIIa and the overall evolutionary history of SAR11.}, } @article {pmid36728698, year = {2023}, author = {Saikia, J and Kotoky, R and Debnath, R and Kumar, N and Gogoi, P and Yadav, A and Saikia, R}, title = {De novogenomic analysis ofEnterobacter asburiaeEBRJ12, a plant growth-promoting rhizobacteria isolated from the rhizosphere of Phaseolus vulgarisL.}, journal = {Journal of applied microbiology}, volume = {134}, number = {2}, pages = {}, doi = {10.1093/jambio/lxac090}, pmid = {36728698}, issn = {1365-2672}, support = {MLP-1016//CSIR/ ; }, mesh = {*Phaseolus ; Rhizosphere ; Siderophores/genetics/metabolism ; Plant Development ; Bacteria ; Plants/microbiology ; Plant Roots/microbiology ; Soil Microbiology ; }, abstract = {AIM: Environmental stresses such as water deficit induced stress are one of the major limiting factors in crop production. However, some plant growth-promoting rhizobacteria (PGPR) can promote plant growth in such adverse condition. Therefore, the objective was to isolate rhizospheric bacteria from Phaseolus vulgaris L. growing in a drought-affected soil and to analyze its plant growth promoting (PGP) efficacy to black gram (Vigna mungo L.) and Bhut jolokia (Capsicum chinense Jacq.). Whole-genome sequencing of the potential bacteria was targeted to analyze the genetic potential of the isolate as a plant growth-promoting agent.

METHODS AND RESULTS: The isolate Enterobacter asburiae EBRJ12 was selected based on its PGP efficacy, which significantly improved plant growth and development. The genomic analysis revealed the presence of one circular chromosome of size 4.8 Mb containing 16 genes for osmotic stress regulation including osmotically inducible protein osmY, outer membrane protein A precursor ompA, aquaporin Z, and an operon for osmoprotectant ABC transporter yehZYXW. Moreover, the genome has a complete genetic cluster for biosynthesis of siderophore Enterobactin and siderophore Aerobactin.The PGP effects were verified with black gram and Bhut jolokia in pot experiments. The isolate significantly increased the shoot length by 35.0% and root length by 58.0% of black gram, while 41.0% and 57.0% of elevation in shoot and root length were observed in Bhut jolokia compared to non-inoculated plants.

CONCLUSIONS: The EBRJ12 has PGP features that could improve the growth in host plants, and the genomic characterization revealed the presence of genetic potential for plant growth promotion.}, } @article {pmid36726175, year = {2023}, author = {Petersen, C and Sørensen, T and Nielsen, MR and Sondergaard, TE and Sørensen, JL and Fitzpatrick, DA and Frisvad, JC and Nielsen, KL}, title = {Comparative genomic study of the Penicillium genus elucidates a diverse pangenome and 15 lateral gene transfer events.}, journal = {IMA fungus}, volume = {14}, number = {1}, pages = {3}, pmid = {36726175}, issn = {2210-6340}, support = {NNF18OC0034952//Novo Nordisk Fonden/ ; }, abstract = {The Penicillia are known to produce a wide range natural products-some with devastating outcome for the agricultural industry and others with unexploited potential in different applications. However, a large-scale overview of the biosynthetic potential of different species has been lacking. In this study, we sequenced 93 Penicillium isolates and, together with eleven published genomes that hold similar assembly characteristics, we established a species phylogeny as well as defining a Penicillium pangenome. A total of 5612 genes were shared between ≥ 98 isolates corresponding to approximately half of the average number of genes a Penicillium genome holds. We further identified 15 lateral gene transfer events that have occurred in this collection of Penicillium isolates, which might have played an important role, such as niche adaption, in the evolution of these fungi. The comprehensive characterization of the genomic diversity in the Penicillium genus supersedes single-reference genomes, which do not necessarily capture the entire genetic variation.}, } @article {pmid36718535, year = {2023}, author = {Lu, Y and Luo, J and An, E and Lu, B and Wei, Y and Chen, X and Lu, K and Liang, S and Hu, H and Han, M and He, S and Shen, J and Guo, D and Bu, N and Yang, L and Xu, W and Lu, C and Xiang, Z and Tong, X and Dai, F}, title = {Deciphering the Genetic Basis of Silkworm Cocoon Colors Provides New Insights into Biological Coloration and Phenotypic Diversification.}, journal = {Molecular biology and evolution}, volume = {40}, number = {2}, pages = {}, pmid = {36718535}, issn = {1537-1719}, mesh = {Animals ; *Bombyx/genetics/metabolism ; Silk/genetics/metabolism ; Base Sequence ; Flavonoids/metabolism ; }, abstract = {The genetic basis of phenotypic variation is a long-standing concern of evolutionary biology. Coloration has proven to be a visual, easily quantifiable, and highly tractable system for genetic analysis and is an ever-evolving focus of biological research. Compared with the homogenized brown-yellow cocoons of wild silkworms, the cocoons of domestic silkworms are spectacularly diverse in color, such as white, green, and yellow-red; this provides an outstanding model for exploring the phenotypic diversification and biological coloration. Herein, the molecular mechanism underlying silkworm green cocoon formation was investigated, which was not fully understood. We demonstrated that five of the seven members of a sugar transporter gene cluster were specifically duplicated in the Bombycidae and evolved new spatial expression patterns predominantly expressed in silk glands, accompanying complementary temporal expression; they synergistically facilitate the uptake of flavonoids, thus determining the green cocoon. Subsequently, polymorphic cocoon coloring landscape involving multiple loci and the evolution of cocoon color from wild to domestic silkworms were analyzed based on the pan-genome sequencing data. It was found that cocoon coloration involved epistatic interaction between loci; all the identified cocoon color-related loci existed in wild silkworms; the genetic segregation, recombination, and variation of these loci shaped the multicolored cocoons of domestic silkworms. This study revealed a new mechanism for flavonoids-based biological coloration that highlights the crucial role of gene duplication followed by functional diversification in acquiring new genetic functions; furthermore, the results in this work provide insight into phenotypic innovation during domestication.}, } @article {pmid36707768, year = {2023}, author = {Sun, Y and Xiao, W and Wang, QN and Wang, J and Kong, XD and Ma, WH and Liu, SX and Ren, P and Xu, LN and Zhang, YJ}, title = {Multiple variation patterns of terpene synthases in 26 maize genomes.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {46}, pmid = {36707768}, issn = {1471-2164}, mesh = {*Zea mays/genetics/metabolism ; Terpenes/metabolism ; *Alkyl and Aryl Transferases/genetics ; Plants/metabolism ; }, abstract = {Terpenoids are important compounds associated with the pest and herbivore resistance mechanisms of plants; consequently, it is essential to identify and explore terpene synthase (TPS) genes in maize. In the present study, we identified 31 TPS genes based on a pan-genome of 26 high-quality maize genomes containing 20 core genes (present in all 26 lines), seven dispensable genes (present in 2 to 23 lines), three near-core genes (present in 24 to 25 lines), and one private gene (present in only 1 line). Evaluation of ka/ks values of TPS in 26 varieties revealed that TPS25 was subjected to positive selection in some varieties. Six ZmTPS had ka/ks values less than 1, indicating that they were subjected to purifying selection. In 26 genomes, significant differences were observed in ZmTPS25 expression between genes affected by structural variation (SV) and those not affected by SV. In some varieties, SV altered the conserved structural domains resulting in a considerable number of atypical genes. The analysis of RNA-seq data of maize Ostrinia furnacalis feeding revealed 10 differentially expressed ZmTPS, 9 of which were core genes. However, many atypical genes for these responsive genes were identified in several genomes. These findings provide a novel resource for functional studies of ZmTPS.}, } @article {pmid36706753, year = {2023}, author = {Younginger, BS and Mayba, O and Reeder, J and Nagarkar, DR and Modrusan, Z and Albert, ML and Byrd, AL}, title = {Enrichment of oral-derived bacteria in inflamed colorectal tumors and distinct associations of Fusobacterium in the mesenchymal subtype.}, journal = {Cell reports. Medicine}, volume = {4}, number = {2}, pages = {100920}, pmid = {36706753}, issn = {2666-3791}, mesh = {Humans ; *Colorectal Neoplasms/genetics ; Fusobacterium/genetics ; Microsatellite Instability ; Transcriptome ; }, abstract = {While the association between colorectal cancer (CRC) features and Fusobacterium has been extensively studied, less is known of other intratumoral bacteria. Here, we leverage whole transcriptomes from 807 CRC samples to dually characterize tumor gene expression and 74 intratumoral bacteria. Seventeen of these species, including 4 Fusobacterium spp., are classified as orally derived and are enriched among right-sided, microsatellite instability-high (MSI-H), and BRAF-mutant tumors. Across consensus molecular subtypes (CMSs), integration of Fusobacterium animalis (Fa) presence and tumor expression reveals that Fa has the most significant associations in mesenchymal CMS4 tumors despite a lower prevalence than in immune CMS1. Within CMS4, the prevalence of Fa is uniquely associated with collagen- and immune-related pathways. Additional Fa pangenome analysis reveals that stress response genes and the adhesion FadA are commonly expressed intratumorally. Overall, this study identifies oral-derived bacteria as enriched in inflamed tumors, and the associations of bacteria and tumor expression are context and species specific.}, } @article {pmid36703158, year = {2023}, author = {Wang, J and Yang, W and Zhang, S and Hu, H and Yuan, Y and Dong, J and Chen, L and Ma, Y and Yang, T and Zhou, L and Chen, J and Liu, B and Li, C and Edwards, D and Zhao, J}, title = {A pangenome analysis pipeline provides insights into functional gene identification in rice.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {19}, pmid = {36703158}, issn = {1474-760X}, mesh = {*Oryza/genetics ; Genome-Wide Association Study ; Genomics/methods ; Genome ; Computational Biology ; }, abstract = {BACKGROUND: A pangenome aims to capture the complete genetic diversity within a species and reduce bias in genetic analysis inherent in using a single reference genome. However, the current linear format of most plant pangenomes limits the presentation of position information for novel sequences. Graph pangenomes have been developed to overcome this limitation. However, bioinformatics analysis tools for graph format genomes are lacking.

RESULTS: To overcome this problem, we develop a novel strategy for pangenome construction and a downstream pangenome analysis pipeline (PSVCP) that captures genetic variants' position information while maintaining a linearized layout. Using PSVCP, we construct a high-quality rice pangenome using 12 representative rice genomes and analyze an international rice panel with 413 diverse accessions using the pangenome as the reference. We show that PSVCP successfully identifies causal structural variations for rice grain weight and plant height. Our results provide insights into rice population structure and genomic diversity. We characterize a new locus (qPH8-1) associated with plant height on chromosome 8 undetected by the SNP-based genome-wide association study (GWAS).

CONCLUSIONS: Our results demonstrate that the pangenome constructed by our pipeline combined with a presence and absence variation-based GWAS can provide additional power for genomic and genetic analysis. The pangenome constructed in this study and the associated genome sequence and genetic variants data provide valuable genomic resources for rice genomics research and improvement in future.}, } @article {pmid36699832, year = {2022}, author = {Lee, G and Choi, H and Liu, H and Han, YH and Paul, NC and Han, GH and Kim, H and Kim, PI and Seo, SI and Song, J and Sang, H}, title = {Biocontrol of the causal brown patch pathogen Rhizoctonia solani by Bacillus velezensis GH1-13 and development of a bacterial strain specific detection method.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1091030}, pmid = {36699832}, issn = {1664-462X}, abstract = {Brown patch caused by the basidiomycete fungus Rhizoctonia solani is an economically important disease of cool-season turfgrasses. In order to manage the disease, different types of fungicides have been applied, but the negative impact of fungicides on the environment continues to rise. In this study, the beneficial bacteria Bacillus velezensis GH1-13 was characterized as a potential biocontrol agent to manage brown patch disease. The strain GH1-13 strongly inhibited the mycelial growth of turf pathogens including different anastomosis groups of R. solani causing brown patch and large patch. R. solani AG2-2(IIIB) hyphae were morphologically changed, and fungal cell death resulted from exposure to the strain GH1-13. In addition, the compatibility of fungicides with the bacterial strain, and the combined application of fungicide azoxystrobin and the strain in brown patch control on creeping bentgrass indicated that the strain could serve as a biocontrol agent. To develop strain-specific detection method, two unique genes from chromosome and plasmid of GH1-13 were found using pan-genome analysis of 364 Bacillus strains. The unique gene from chromosome was successfully detected using both SYBR Green and TaqMan qPCR methods in bacterial DNA or soil DNA samples. This study suggests that application of GH1-13 offers an environmentally friendly approach via reducing fungicide application rates. Furthermore, the developed pipeline of strain-specific detection method could be a useful tool for detecting and studying the dynamics of specific biocontrol agents.}, } @article {pmid36699320, year = {2022}, author = {Hanafy, M and Hansen, C and Phanse, Y and Wu, CW and Nelson, K and Aschenbroich, SA and Talaat, AM}, title = {Characterization of early immune responses elicited by live and inactivated vaccines against Johne's disease in goats.}, journal = {Frontiers in veterinary science}, volume = {9}, number = {}, pages = {1046704}, pmid = {36699320}, issn = {2297-1769}, abstract = {Mycobacterium avium subspecies paratuberculosis (M. paratuberculosis) is the causative agent of Johne's disease, a chronic debilitating condition affecting ruminants causing significant economic losses to the dairy industry. Available inactivated vaccines are not effective in controlling the disease and vaccinated animals can continue to infect newly born calves. Recently, we have shown that a live-attenuated vaccine candidate (pgsN) is protective in goats and calves following challenge with virulent strains of M. paratuberculosis. To decipher the dynamics of the immune responses elicited by both live-attenuated and inactivated vaccines, we analyzed key immunological parameters of goats immunized through different routes when a marker-less pgsN vaccine was used. Within a few weeks, the inactivated vaccine triggered the formation of granulomas both at the site of inoculation and in regional lymph nodes, that increased in size over time and persisted until the end of the experiment. In contrast, granulomas induced by the pgsN vaccine were small and subsided during the study. Interestingly, in this vaccine group, histology demonstrated an initial abundance of intra-histiocytic mycobacterial bacilli at the site of inoculation, with recruitment of very minimal T lymphocytes to poorly organized granulomas. Over time, granulomas became more organized, with recruitment of greater numbers of T and B lymphocytes, which coincided with a lack of mycobacteria. For the inactivated vaccine group, mycobacterial bacilli were identified extracellularly within the center of caseating granulomas, with relatively equal proportions of B- and T-lymphocytes maintained across both early and late times. Despite the differences in granuloma-specific lymphocyte recruitment, markers for cell-mediated immunity (e.g., IFN-γ release) were robust in both injected pgsN and inactivated vaccine groups. In contrast, the intranasal live-attenuated vaccine did not elicit any reaction at site of inoculation, nor cell-mediated immune responses. Finally, 80% of animals in the inactivated vaccine group significantly reacted to purified protein derivatives from M. bovis, while reactivity was detected in only 20% of animals receiving pgsN vaccine, suggesting a higher level of cross reactivity for bovine tuberculosis when inactivated vaccine is used. Overall, these results depict the cellular recruitment strategies driving immune responses elicited by both live-attenuated and inactivated vaccines that target Johne's disease.}, } @article {pmid36698972, year = {2023}, author = {Yang, MR and Wu, YW}, title = {A Cross-Validated Feature Selection (CVFS) approach for extracting the most parsimonious feature sets and discovering potential antimicrobial resistance (AMR) biomarkers.}, journal = {Computational and structural biotechnology journal}, volume = {21}, number = {}, pages = {769-779}, pmid = {36698972}, issn = {2001-0370}, abstract = {Understanding genes and their underlying mechanisms is critical in deciphering how antimicrobial-resistant (AMR) bacteria withstand detrimental effects of antibiotic drugs. At the same time the genes related to AMR phenotypes may also serve as biomarkers for predicting whether a microbial strain is resistant to certain antibiotic drugs. We developed a Cross-Validated Feature Selection (CVFS) approach for robustly selecting the most parsimonious gene sets for predicting AMR activities from bacterial pan-genomes. The core idea behind the CVFS approach is interrogating features among non-overlapping sub-parts of the datasets to ensure the representativeness of the features. By randomly splitting the dataset into disjoint sub-parts, conducting feature selection within each sub-part, and intersecting the features shared by all sub-parts, the CVFS approach is able to achieve the goal of extracting the most representative features for yielding satisfactory AMR activity prediction accuracy. By testing this idea on bacterial pan-genome datasets, we showed that this approach was able to extract the most succinct feature sets that predicted AMR activities very well, indicating the potential of these genes as AMR biomarkers. The functional analysis demonstrated that the CVFS approach was able to extract both known AMR genes and novel ones, suggesting the capabilities of the algorithm in selecting relevant features and highlighting the potential of the novel genes in expanding the antimicrobial resistance gene databases.}, } @article {pmid36698060, year = {2023}, author = {Sivakumar, R and Pranav, PS and Annamanedi, M and Chandrapriya, S and Isloor, S and Rajendhran, J and Hegde, NR}, title = {Genome sequencing and comparative genomic analysis of bovine mastitis-associated Staphylococcus aureus strains from India.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {44}, pmid = {36698060}, issn = {1471-2164}, mesh = {Animals ; Cattle ; Female ; Humans ; Anti-Bacterial Agents ; Genomics ; *Mastitis, Bovine/epidemiology/microbiology ; Multilocus Sequence Typing ; Phylogeny ; *Staphylococcal Infections/microbiology/veterinary ; *Staphylococcus aureus/genetics ; *Genome, Bacterial ; India ; }, abstract = {BACKGROUND: Bovine mastitis accounts for significant economic losses to the dairy industry worldwide. Staphylococcus aureus is the most common causative agent of bovine mastitis. Investigating the prevalence of virulence factors and antimicrobial resistance would provide insight into the molecular epidemiology of mastitis-associated S. aureus strains. The present study is focused on the whole genome sequencing and comparative genomic analysis of 41 mastitis-associated S. aureus strains isolated from India.

RESULTS: The results elucidate explicit knowledge of 15 diverse sequence types (STs) and five clonal complexes (CCs). The clonal complexes CC8 and CC97 were found to be the predominant genotypes comprising 21 and 10 isolates, respectively. The mean genome size was 2.7 Mbp with a 32.7% average GC content. The pan-genome of the Indian strains of mastitis-associated S. aureus is almost closed. The genome-wide SNP-based phylogenetic analysis differentiated 41 strains into six major clades. Sixteen different spa types were identified, and eight isolates were untypeable. The cgMLST analysis of all S. aureus genome sequences reported from India revealed that S. aureus strain MUF256, isolated from wound fluids of a diabetic patient, was the common ancestor. Further, we observed that all the Indian mastitis-associated S. aureus isolates belonging to the CC97 are mastitis-associated. We identified 17 different antimicrobial resistance (AMR) genes among these isolates, and all the isolates used in this study were susceptible to methicillin. We also identified 108 virulence-associated genes and discuss their associations with different genotypes.

CONCLUSION: This is the first study presenting a comprehensive whole genome analysis of bovine mastitis-associated S. aureus isolates from India. Comparative genomic analysis revealed the genome diversity, major genotypes, antimicrobial resistome, and virulome of clinical and subclinical mastitis-associated S. aureus strains.}, } @article {pmid36695592, year = {2023}, author = {Giacomini, JJ and Torres-Morales, J and Dewhirst, FE and Borisy, GG and Mark Welch, JL}, title = {Site Specialization of Human Oral Veillonella Species.}, journal = {Microbiology spectrum}, volume = {11}, number = {1}, pages = {e0404222}, pmid = {36695592}, issn = {2165-0497}, support = {R01 DE016937/DE/NIDCR NIH HHS/United States ; R01 DE022586/DE/NIDCR NIH HHS/United States ; R01 DE030136/DE/NIDCR NIH HHS/United States ; }, mesh = {Humans ; *Veillonella/genetics ; Mouth/microbiology ; Tongue/microbiology ; Palatine Tonsil ; *Microbiota ; }, abstract = {Veillonella species are abundant members of the human oral microbiome with multiple interspecies commensal relationships. Examining the distribution patterns of Veillonella species across the oral cavity is fundamental to understanding their oral ecology. In this study, we used a combination of pangenomic analysis and oral metagenomic information to clarify Veillonella taxonomy and to test the site specialist hypothesis for the Veillonella genus, which contends that most oral bacterial species are adapted to live at specific oral sites. Using isolate genome sequences combined with shotgun metagenomic sequence data, we showed that Veillonella species have clear, differential site specificity: Veillonella parvula showed strong preference for supra- and subgingival plaque, while closely related V. dispar, as well as more distantly related V. atypica, preferred the tongue dorsum, tonsils, throat, and hard palate. In addition, the provisionally named Veillonella sp. Human Microbial Taxon 780 showed strong site specificity for keratinized gingiva. Using comparative genomic analysis, we identified genes associated with thiamine biosynthesis and the reductive pentose phosphate cycle that may enable Veillonella species to occupy their respective habitats. IMPORTANCE Understanding the microbial ecology of the mouth is fundamental for understanding human physiology. In this study, metapangenomics demonstrated that different Veillonella species have clear ecological preferences in the oral cavity of healthy humans, validating the site specialist hypothesis. Furthermore, the gene pool of different Veillonella species was found to be reflective of their ecology, illuminating the potential role of vitamins and carbohydrates in determining Veillonella distribution patterns and interspecies interactions.}, } @article {pmid36693839, year = {2023}, author = {Gao, Y and Guitton-Sert, L and Dessapt, J and Coulombe, Y and Rodrigue, A and Milano, L and Blondeau, A and Larsen, NB and Duxin, JP and Hussein, S and Fradet-Turcotte, A and Masson, JY}, title = {A CRISPR-Cas9 screen identifies EXO1 as a formaldehyde resistance gene.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {381}, pmid = {36693839}, issn = {2041-1723}, mesh = {Humans ; *CRISPR-Cas Systems ; DNA ; DNA Damage/drug effects/genetics ; DNA Repair/drug effects/genetics ; DNA Repair Enzymes/genetics/metabolism ; DNA Replication/drug effects/genetics ; *Exodeoxyribonucleases/genetics/metabolism ; *Fanconi Anemia/chemically induced/genetics ; *Formaldehyde/toxicity ; Genomic Instability/drug effects/genetics ; *Drug Tolerance/genetics ; }, abstract = {Fanconi Anemia (FA) is a rare, genome instability-associated disease characterized by a deficiency in repairing DNA crosslinks, which are known to perturb several cellular processes, including DNA transcription, replication, and repair. Formaldehyde, a by-product of metabolism, is thought to drive FA by generating DNA interstrand crosslinks (ICLs) and DNA-protein crosslinks (DPCs). However, the impact of formaldehyde on global cellular pathways has not been investigated thoroughly. Herein, using a pangenomic CRISPR-Cas9 screen, we identify EXO1 as a critical regulator of formaldehyde-induced DNA lesions. We show that EXO1 knockout cell lines exhibit formaldehyde sensitivity leading to the accumulation of replicative stress, DNA double-strand breaks, and quadriradial chromosomes, a typical feature of FA. After formaldehyde exposure, EXO1 is recruited to chromatin, protects DNA replication forks from degradation, and functions in parallel with the FA pathway to promote cell survival. In vitro, EXO1-mediated exonuclease activity is proficient in removing DPCs. Collectively, we show that EXO1 limits replication stress and DNA damage to counteract formaldehyde-induced genome instability.}, } @article {pmid36691844, year = {2023}, author = {Hu, J and Chen, L and Li, G and Pan, Y and Lu, Y and Chen, J and Xiong, W and Zeng, Z}, title = {Prevalence and genetic characteristics of fosB-positive Staphylococcus aureus in duck farms in Guangdong, China in 2020.}, journal = {The Journal of antimicrobial chemotherapy}, volume = {78}, number = {3}, pages = {802-809}, doi = {10.1093/jac/dkad014}, pmid = {36691844}, issn = {1460-2091}, mesh = {Animals ; Humans ; Staphylococcus aureus/genetics ; Anti-Bacterial Agents ; Ducks ; Farms ; Prevalence ; Microbial Sensitivity Tests ; *Staphylococcal Infections/microbiology ; China/epidemiology ; *Methicillin-Resistant Staphylococcus aureus/genetics ; Proto-Oncogene Proteins c-fos ; }, abstract = {OBJECTIVES: To investigate the epidemiology of fosB-positive Staphylococcus aureus in waterfowl farms in the Pearl River tributaries in Guangdong Province, China in 2020.

METHODS: A total of 63 S. aureus were recovered from 315 samples collected from six duck farms and one goose farm. PFGE, WGS and analysis were performed on 19 fosB-positive S. aureus.

RESULTS: The fosfomycin resistance rate of the strains was as high as 52.4% (33/63), and 30.1% (19/63) of the strains carried fosB. Resistance gene prediction results showed that duck farm environment-derived strains contained the oxazolidinone drug resistance gene optrA. All fosB-positive S. aureus were MRSA and most of them were MDR, mainly ST9-t899 and ST164-t899. PFGE showed that fosB-positive S. aureus from humans and ducks could be clustered into the same clade. In addition, core-genome SNP analysis showed that clonal transmission of S. aureus occurred between humans and water. Pan-genome analysis showed that S. aureus had an open pangenome. The fosB gene was located on 2610-2615 bp plasmids, which all contained a broad host-range plasmid replication protein family 13. Small plasmids carrying the fosB gene could be found in different multilocus STs of S. aureus.

CONCLUSIONS: This study indicated that duck farms in Guangdong, China could be an important reservoir of fosB-positive S. aureus. The spread of drug-resistant bacteria in waterfowl farms requires further monitoring.}, } @article {pmid36688776, year = {2023}, author = {Basak, C and Chakraborty, R}, title = {A novel strain of Shigella isolated from the gut of Lepidocephalichthys guntea has in its genome a complete gene package for Type ll secretion system, and elaborate repertoire of genes responsible for multiple antibiotic-resistance and metal resistance via specific efflux channels.}, journal = {Letters in applied microbiology}, volume = {76}, number = {1}, pages = {}, doi = {10.1093/lambio/ovac049}, pmid = {36688776}, issn = {1472-765X}, support = {//Bangladesh Council of Scientific and Industrial Research/ ; }, mesh = {Animals ; *Anti-Bacterial Agents/pharmacology ; *Drug Resistance, Multiple, Bacterial/genetics ; Escherichia coli ; Genome, Bacterial ; Membrane Transport Proteins/genetics ; Multilocus Sequence Typing ; *Operon ; *Shigella/classification ; }, abstract = {The bacterial strain GCP5 was isolated from the gut of a bottom-dwelling fish Lepidocephalichthys guntea, that lives in the Magurmari River near North Bengal University in Siliguri, India. GCP5 was phylogenetically assigned to the Shigella genus using whole genome-based trees, k-mer analysis, the multilocus species tree (MLST), and single nucleotide polymorphism (SNP)-based trees, and the genetic makeup of the isolate was determined following assembly of the genome sequences and genome annotation with several bioinformatics tools. The presence of a complete package of general-secretory-pathway (gsp) genes, grouped in an operon identical to a well-characterized type II secretion system (T2SS), was confirmed by genome mining of Shigella sp. GCP5. The operon's gsp genes shared the most homology with Escherichia coli gsp genes. A few more high-pathogenicity islands (HPIs) in the GCP5 genome were validated using the pan-genomes analysis pipeline (PGAP) and island viewer. Several antibiotic-resistance genes were found in this genome, as well as the existence of key antibiotic efflux pump families, allowing for the creation of a gene network of several antibiotic efflux transporters. In addition, the genome contained genes specific for nickel transport, the nikABCD system, and the RND family transporter cusCFBA, which confers resistance to copper and silver by effluxing out Cu+ and Ag+ ions.}, } @article {pmid36687647, year = {2022}, author = {Zhang, M and Yu, Y and Wang, Q and Chen, R and Wang, Y and Bai, Y and Song, Z and Lu, X and Hao, Y}, title = {Conjugation of plasmid harboring bla NDM-1 in a clinical Providencia rettgeri strain through the formation of a fusion plasmid.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1071385}, pmid = {36687647}, issn = {1664-302X}, abstract = {Providencia rettgeri has recently gained increased importance owing to the New Delhi metallo-β-lactamase (NDM) and other β-lactamases produced by its clinical isolates. These enzymes reduce the efficiency of antimicrobial therapy. Herein, we reported the findings of whole-genome sequence analysis and a comprehensive pan-genome analysis performed on a multidrug-resistant P. rettgeri 18004577 clinical strain recovered from the urine of a hospitalized patient in Shandong, China, in 2018. Providencia rettgeri 18004577 was found to have a genome assembly size of 4.6 Mb with a G + C content of 41%; a circular plasmid p18004577_NDM of 273.3 Kb, harboring an accessory multidrug-resistant region; and a circular, stable IncT plasmid p18004577_Rts of 146.2 Kb. Additionally, various resistance genes were identified in its genome, including bla NDM-1, bla OXA-10, bla PER-4, aph(3')-VI, ant(2'')-Ia, ant(3')-Ia, sul1, catB8, catA1, mph(E), and tet. Conjugation experiments and whole-genome sequencing revealed that the bla NDM-1 gene could be transferred to the transconjugant via the formation of pJ18004577_NDM, a novel hybrid plasmid. Based on the genetic comparison, the main possible formation process for pJ18004577_NDM was the insertion of the [ΔISKox2-IS26-ΔISKox2]-aph(3')-VI-bla NDM-1 translocatable unit module from p18004577_NDM into plasmid p18004577_Rts in the Russian doll insertion structure (ΔISKox2-IS26-ΔISKox2), which played a role similar to that of IS26 using the "copy-in" route in the mobilization of [aph(3')-VI]-bla NDM-1. The array, multiplicity, and diversity of the resistance and virulence genes in this strain necessitate stringent infection control, antibiotic stewardship, and periodic resistance surveillance/monitoring policies to preempt further horizontal and vertical spread of the resistance genes. Roary analysis based on 30 P. rettgeri strains pan genome identified 415 core, 756 soft core, 5,744 shell, and 12,967 cloud genes, highlighting the "close" nature of P. rettgeri pan-genome. After a comprehensive pan-genome analysis, representative biological information was revealed that included phylogenetic distances, presence or absence of genes across the P. rettgeri bacteria clade, and functional distribution of proteins. Moreover, pan-genome analysis has been shown to be an effective approach to better understand P. rettgeri bacteria because it helps develop various tailored therapeutic strategies based on their biological similarities and differences.}, } @article {pmid36687645, year = {2022}, author = {Hurtado-Páez, U and Álvarez Zuluaga, N and Arango Isaza, RE and Contreras-Moreira, B and Rouzaud, F and Robledo, J}, title = {Pan-genome association study of Mycobacterium tuberculosis lineage-4 revealed specific genes related to the high and low prevalence of the disease in patients from the North-Eastern area of Medellín, Colombia.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1076797}, pmid = {36687645}, issn = {1664-302X}, abstract = {Mycobacterium tuberculosis (Mtb) lineage 4 is responsible for the highest burden of tuberculosis (TB) worldwide. This lineage has been the most prevalent lineage in Colombia, especially in the North-Eastern (NE) area of Medellin, where it has been shown to have a high prevalence of LAM9 SIT42 and Haarlem1 SIT62 sublineages. There is evidence that regardless of environmental factors and host genetics, differences among sublineages of Mtb strains play an important role in the course of infection and disease. Nevertheless, the genetic basis of the success of a sublineage in a specific geographic area remains uncertain. We used a pan-genome-wide association study (pan-GWAS) of 47 Mtb strains isolated from NE Medellin between 2005 and 2008 to identify the genes responsible for the phenotypic differences among high and low prevalence sublineages. Our results allowed the identification of 12 variants in 11 genes, of which 4 genes showed the strongest association to low prevalence (mmpL12, PPE29, Rv1419, and Rv1762c). The first three have been described as necessary for invasion and intracellular survival. Polymorphisms identified in low prevalence isolates may suggest related to a fitness cost of Mtb, which might reflect a decrease in their capacity to be transmitted or to cause an active infection. These results contribute to understanding the success of some sublineages of lineage-4 in a specific geographical area.}, } @article {pmid36687572, year = {2022}, author = {Robinson, LA and Collins, ACZ and Murphy, RA and Davies, JC and Allsopp, LP}, title = {Diversity and prevalence of type VI secretion system effectors in clinical Pseudomonas aeruginosa isolates.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1042505}, pmid = {36687572}, issn = {1664-302X}, abstract = {Pseudomonas aeruginosa is an opportunistic pathogen and a major driver of morbidity and mortality in people with Cystic Fibrosis (CF). The Type VI secretion system (T6SS) is a molecular nanomachine that translocates effectors across the bacterial membrane into target cells or the extracellular environment enabling intermicrobial interaction. P. aeruginosa encodes three T6SS clusters, the H1-, H2- and H3-T6SS, and numerous orphan islands. Genetic diversity of T6SS-associated effectors in P. aeruginosa has been noted in reference strains but has yet to be explored in clinical isolates. Here, we perform a comprehensive bioinformatic analysis of the pangenome and T6SS effector genes in 52 high-quality clinical P. aeruginosa genomes isolated from CF patients and housed in the Personalised Approach to P. aeruginosa strain repository. We confirm that the clinical CF isolate pangenome is open and principally made up of accessory and unique genes that may provide strain-specific advantages. We observed genetic variability in some effector/immunity encoding genes and show that several well-characterised vgrG and PAAR islands are absent from numerous isolates. Our analysis shows clear evidence of disruption to T6SS genomic loci through transposon, prophage, and mobile genetic element insertions. We identified an orphan vgrG island in P. aeruginosa strain PAK and five clinical isolates using in silico analysis which we denote vgrG7, predicting a gene within this cluster to encode a Tle2 lipase family effector. Close comparison of T6SS loci in clinical isolates compared to reference P. aeruginosa strain PAO1 revealed the presence of genes encoding eight new T6SS effectors with the following putative functions: cytidine deaminase, lipase, metallopeptidase, NADase, and pyocin. Finally, the prevalence of characterised and putative T6SS effectors were assessed in 532 publicly available P. aeruginosa genomes, which suggests the existence of accessory effectors. Our in silico study of the P. aeruginosa T6SS exposes a level of genetic diversity at T6SS genomic loci not seen to date within P. aeruginosa, particularly in CF isolates. As understanding the effector repertoire is key to identifying the targets of T6SSs and its efficacy, this comprehensive analysis provides a path for future experimental characterisation of these mediators of intermicrobial competition and host manipulation.}, } @article {pmid36685843, year = {2022}, author = {Stuart, KC and Sherwin, WB and Edwards, RJ and Rollins, LA}, title = {Evolutionary genomics: Insights from the invasive European starlings.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {1010456}, pmid = {36685843}, issn = {1664-8021}, abstract = {Two fundamental questions for evolutionary studies are the speed at which evolution occurs, and the way that this evolution may present itself within an organism's genome. Evolutionary studies on invasive populations are poised to tackle some of these pressing questions, including understanding the mechanisms behind rapid adaptation, and how it facilitates population persistence within a novel environment. Investigation of these questions are assisted through recent developments in experimental, sequencing, and analytical protocols; in particular, the growing accessibility of next generation sequencing has enabled a broader range of taxa to be characterised. In this perspective, we discuss recent genetic findings within the invasive European starlings in Australia, and outline some critical next steps within this research system. Further, we use discoveries within this study system to guide discussion of pressing future research directions more generally within the fields of population and evolutionary genetics, including the use of historic specimens, phenotypic data, non-SNP genetic variants (e.g., structural variants), and pan-genomes. In particular, we emphasise the need for exploratory genomics studies across a range of invasive taxa so we can begin understanding broad mechanisms that underpin rapid adaptation in these systems. Understanding how genetic diversity arises and is maintained in a population, and how this contributes to adaptability, requires a deep understanding of how evolution functions at the molecular level, and is of fundamental importance for the future studies and preservation of biodiversity across the globe.}, } @article {pmid36685320, year = {2023}, author = {Liew, KJ and Zakaria, MR and Hong, CWL and Tan, MCY and Chong, CS}, title = {Draft genome sequence of Joostella atrarenae M1-2[T] with cellulolytic and hemicellulolytic ability.}, journal = {3 Biotech}, volume = {13}, number = {2}, pages = {50}, pmid = {36685320}, issn = {2190-572X}, abstract = {The halophilic genus Joostella is one of the least-studied genera in the family of Flavobacteriaceae. So far, only two species were taxonomically identified with limited genomic analysis in the aspect of application has been reported. Joostella atrarenae M1-2[T] was previously isolated from a seashore sample and it is the second discovered species of the genus Joostella. In this project, the genome of J. atrarenae M1-2[T] was sequenced using NovaSeq 6000. The final assembled genome is comprised of 71 contigs, a total of 3,983,942 bp, a GC ratio of 33.2%, and encoded for 3,416 genes. The 16S rRNA gene sequence of J. atrarenae M1-2[T] shows 97.3% similarity against J. marina DSM 19592[T]. Genome-genome comparison between the two strains by ANI, dDDH, AAI, and POCP shows values of 80.8%, 23.3%, 83.4%, and 74.1% respectively. Pan-genome analysis shows that strain M1-2[T] and J. marina DSM 19592[T] shared a total of 248 core genes. Taken together, strain M-2[T] and J. marina DSM 19592[T] belong to the same genus but are two different species. CAZymes analysis revealed that strain M1-2[T] harbors 109 GHs, 40 GTs, 5 PLs, 9 CEs, and 6 AAs. Among these CAZymes, while 5 genes are related to cellulose degradation, 12 and 24 genes are found to encode for xylanolytic enzymes and other hemicellulases that involve majorly in the side chain removal of the lignocellulose structure, respectively. Furthermore, both the intracellular and extracellular crude extracts of strain M1-2[T] exhibited enzymatic activities against CMC, xylan, pNPG, and pNPX substrates, which corresponding to endoglucanase, xylanase, β-glucosidase, and β-xylosidase, respectively. Collectively, description of genome coupled with the enzyme assay results demonstrated that J. atrarenae M1-2[T] has a role in lignocellulosic biomass degradation, and the strain could be useful for lignocellulosic biorefining.}, } @article {pmid36684744, year = {2022}, author = {Voelker, WG and Krishnan, K and Chougule, K and Alexander, LC and Lu, Z and Olson, A and Ware, D and Songsomboon, K and Ponce, C and Brenton, ZW and Boatwright, JL and Cooper, EA}, title = {Ten new high-quality genome assemblies for diverse bioenergy sorghum genotypes.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1040909}, pmid = {36684744}, issn = {1664-462X}, abstract = {INTRODUCTION: Sorghum (Sorghum bicolor (L.) Moench) is an agriculturally and economically important staple crop that has immense potential as a bioenergy feedstock due to its relatively high productivity on marginal lands. To capitalize on and further improve sorghum as a potential source of sustainable biofuel, it is essential to understand the genomic mechanisms underlying complex traits related to yield, composition, and environmental adaptations.

METHODS: Expanding on a recently developed mapping population, we generated de novo genome assemblies for 10 parental genotypes from this population and identified a comprehensive set of over 24 thousand large structural variants (SVs) and over 10.5 million single nucleotide polymorphisms (SNPs).

RESULTS: We show that SVs and nonsynonymous SNPs are enriched in different gene categories, emphasizing the need for long read sequencing in crop species to identify novel variation. Furthermore, we highlight SVs and SNPs occurring in genes and pathways with known associations to critical bioenergy-related phenotypes and characterize the landscape of genetic differences between sweet and cellulosic genotypes.

DISCUSSION: These resources can be integrated into both ongoing and future mapping and trait discovery for sorghum and its myriad uses including food, feed, bioenergy, and increasingly as a carbon dioxide removal mechanism.}, } @article {pmid36683686, year = {2022}, author = {Bai, Z and Zhang, N and Jin, Y and Chen, L and Mao, Y and Sun, L and Fang, F and Liu, Y and Han, M and Li, G}, title = {Comprehensive analysis of 84 Faecalibacterium prausnitzii strains uncovers their genetic diversity, functional characteristics, and potential risks.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {919701}, pmid = {36683686}, issn = {2235-2988}, mesh = {Humans ; *Faecalibacterium prausnitzii/genetics/metabolism ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Probiotics ; Genetic Variation ; }, abstract = {Faecalibacterium prausnitzii is a beneficial human gut microbe and a candidate for next-generation probiotics. With probiotics now being used in clinical treatments, concerns about their safety and side effects need to be considered. Therefore, it is essential to obtain a comprehensive understanding of the genetic diversity, functional characteristics, and potential risks of different F. prausnitzii strains. In this study, we collected the genetic information of 84 F . prausnitzii strains to conduct a pan-genome analysis with multiple perspectives. Based on single-copy genes and the sequences of 16S rRNA and the compositions of the pan-genome, different phylogenetic analyses of F. prausnitzii strains were performed, which showed the genetic diversity among them. Among the proteins of the pan-genome, we found that the accessory clusters made a greater contribution to the primary genetic functions of F. prausnitzii strains than the core and specific clusters. The functional annotations of F. prausnitzii showed that only a very small number of proteins were related to human diseases and there were no secondary metabolic gene clusters encoding harmful products. At the same time, complete fatty acid metabolism was detected in F. prausnitzii. In addition, we detected harmful elements, including antibiotic resistance genes, virulence factors, and pathogenic genes, and proposed the probiotic potential risk index (PPRI) and probiotic potential risk score (PPRS) to classify these 84 strains into low-, medium-, and high-risk groups. Finally, 15 strains were identified as low-risk strains and prioritized for clinical application. Undoubtedly, our results provide a comprehensive understanding and insight into F. prausnitzii, and PPRI and PPRS can be applied to evaluate the potential risks of probiotics in general and to guide the application of probiotics in clinical application.}, } @article {pmid36678781, year = {2022}, author = {Khan, MA and Amin, A and Farid, A and Ullah, A and Waris, A and Shinwari, K and Hussain, Y and Alsharif, KF and Alzahrani, KJ and Khan, H}, title = {Recent Advances in Genomics-Based Approaches for the Development of Intracellular Bacterial Pathogen Vaccines.}, journal = {Pharmaceutics}, volume = {15}, number = {1}, pages = {}, pmid = {36678781}, issn = {1999-4923}, abstract = {Infectious diseases continue to be a leading cause of morbidity and mortality worldwide. The majority of infectious diseases are caused by intracellular pathogenic bacteria (IPB). Historically, conventional vaccination drives have helped control the pathogenesis of intracellular bacteria and the emergence of antimicrobial resistance, saving millions of lives. However, in light of various limitations, many diseases that involve IPB still do not have adequate vaccines. In response to increasing demand for novel vaccine development strategies, a new area of vaccine research emerged following the advent of genomics technology, which changed the paradigm of vaccine development by utilizing the complete genomic data of microorganisms against them. It became possible to identify genes related to disease virulence, genetic patterns linked to disease virulence, as well as the genetic components that supported immunity and favorable vaccine responses. Complete genomic databases, and advancements in transcriptomics, metabolomics, structural genomics, proteomics, immunomics, pan-genomics, synthetic genomics, and population biology have allowed researchers to identify potential vaccine candidates and predict their effects in patients. New vaccines have been created against diseases for which previously there were no vaccines available, and existing vaccines have been improved. This review highlights the key issues and explores the evolution of vaccines. The increasing volume of IPB genomic data, and their application in novel genome-based techniques for vaccine development, were also examined, along with their characteristics, and the opportunities and obstacles involved. Critically, the application of genomics technology has helped researchers rapidly select and evaluate candidate antigens. Novel vaccines capable of addressing the limitations associated with conventional vaccines have been developed and pressing healthcare issues are being addressed.}, } @article {pmid36677470, year = {2023}, author = {Charles, C and Conde, C and Vorimore, F and Cochard, T and Michelet, L and Boschiroli, ML and Biet, F}, title = {Features of Mycobacterium bovis Complete Genomes Belonging to 5 Different Lineages.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, pmid = {36677470}, issn = {2076-2607}, support = {773830//One Health European Joint Programme (OHEJP)/ ; }, abstract = {Mammalian tuberculosis (TB) is a zoonotic disease mainly due to Mycobacterium bovis (M. bovis). A current challenge for its eradication is understanding its transmission within multi-host systems. Improvements in long-read sequencing technologies have made it possible to obtain complete bacterial genomes that provide a comprehensive view of species-specific genomic features. In the context of TB, new genomic references based on complete genomes genetically close to field strains are also essential to perform precise field molecular epidemiological studies. A total of 10 M. bovis strains representing each genetic lineage identified in France and in other countries were selected for performing complete assembly of their genomes. Pangenome analysis revealed a "closed" pangenome composed of 3900 core genes and only 96 accessory genes. Whole genomes-based alignment using progressive Mauve showed remarkable conservation of the genomic synteny except that the genomes have a variable number of copies of IS6110. Characteristic genomic traits of each lineage were identified through the discovery of specific indels. Altogether, these results provide new genetic features that improve the description of M. bovis lineages. The availability of new complete representative genomes of M. bovis will be useful to epidemiological studies and better understand the transmission of this clonal-evolving pathogen.}, } @article {pmid36677411, year = {2023}, author = {Thakur, P and Alaba, MO and Rauniyar, S and Singh, RN and Saxena, P and Bomgni, A and Gnimpieba, EZ and Lushbough, C and Goh, KM and Sani, RK}, title = {Text-Mining to Identify Gene Sets Involved in Biocorrosion by Sulfate-Reducing Bacteria: A Semi-Automated Workflow.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, pmid = {36677411}, issn = {2076-2607}, support = {P20 GM103443/GM/NIGMS NIH HHS/United States ; }, abstract = {A significant amount of literature is available on biocorrosion, which makes manual extraction of crucial information such as genes and proteins a laborious task. Despite the fast growth of biology related corrosion studies, there is a limited number of gene collections relating to the corrosion process (biocorrosion). Text mining offers a potential solution by automatically extracting the essential information from unstructured text. We present a text mining workflow that extracts biocorrosion associated genes/proteins in sulfate-reducing bacteria (SRB) from literature databases (e.g., PubMed and PMC). This semi-automatic workflow is built with the Named Entity Recognition (NER) method and Convolutional Neural Network (CNN) model. With PubMed and PMCID as inputs, the workflow identified 227 genes belonging to several Desulfovibrio species. To validate their functions, Gene Ontology (GO) enrichment and biological network analysis was performed using UniprotKB and STRING-DB, respectively. The GO analysis showed that metal ion binding, sulfur binding, and electron transport were among the principal molecular functions. Furthermore, the biological network analysis generated three interlinked clusters containing genes involved in metal ion binding, cellular respiration, and electron transfer, which suggests the involvement of the extracted gene set in biocorrosion. Finally, the dataset was validated through manual curation, yielding a similar set of genes as our workflow; among these, hysB and hydA, and sat and dsrB were identified as the metal ion binding and sulfur metabolism genes, respectively. The identified genes were mapped with the pangenome of 63 SRB genomes that yielded the distribution of these genes across 63 SRB based on the amino acid sequence similarity and were further categorized as core and accessory gene families. SRB's role in biocorrosion involves the transfer of electrons from the metal surface via a hydrogen medium to the sulfate reduction pathway. Therefore, genes encoding hydrogenases and cytochromes might be participating in removing hydrogen from the metals through electron transfer. Moreover, the production of corrosive sulfide from the sulfur metabolism indirectly contributes to the localized pitting of the metals. After the corroboration of text mining results with SRB biocorrosion mechanisms, we suggest that the text mining framework could be utilized for genes/proteins extraction and significantly reduce the manual curation time.}, } @article {pmid36677403, year = {2022}, author = {Romero-Calle, DX and Pedrosa-Silva, F and Tomé, LMR and Sousa, TJ and de Oliveira Santos, LTS and de Carvalho Azevedo, VA and Brenig, B and Benevides, RG and Venancio, TM and Billington, C and Góes-Neto, A}, title = {Hybrid Genomic Analysis of Salmonella enterica Serovar Enteritidis SE3 Isolated from Polluted Soil in Brazil.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, pmid = {36677403}, issn = {2076-2607}, support = {001//Coordenação de Aperfeicoamento de Pessoal de Nível Superior/ ; }, abstract = {In Brazil, Salmonella enterica serovar Enteritidis is a significant health threat. Salmonella enterica serovar Enteritidis SE3 was isolated from soil at the Subaé River in Santo Amaro, Brazil, a region contaminated with heavy metals and organic waste. Illumina HiSeq and Oxford Nanopore Technologies MinION sequencing were used for de novo hybrid assembly of the Salmonella SE3 genome. This approach yielded 10 contigs with 99.98% identity with S. enterica serovar Enteritidis OLF-SE2-98984-6. Twelve Salmonella pathogenic islands, multiple virulence genes, multiple antimicrobial gene resistance genes, seven phage defense systems, seven prophages and a heavy metal resistance gene were encoded in the genome. Pangenome analysis of the S. enterica clade, including Salmonella SE3, revealed an open pangenome, with a core genome of 2137 genes. Our study showed the effectiveness of a hybrid sequence assembly approach for environmental Salmonella genome analysis using HiSeq and MinION data. This approach enabled the identification of key resistance and virulence genes, and these data are important to inform the control of Salmonella and heavy metal pollution in the Santo Amaro region of Brazil.}, } @article {pmid36677357, year = {2022}, author = {Myintzaw, P and Pennone, V and McAuliffe, O and Begley, M and Callanan, M}, title = {Variability in Cold Tolerance of Food and Clinical Listeria monocytogenes Isolates.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, pmid = {36677357}, issn = {2076-2607}, support = {15F604 and 2019R495.//Department of Food Agriculture and the Marine, Ireland/ ; }, abstract = {The aim of this study was to investigate the level of strain variability amongst food and clinical Listeria monocytogenes isolates growing at low temperatures (4 and 7 °C) in both laboratory media and real food matrices. Isolates (n = 150) grown in laboratory media demonstrated a large variation in growth profiles measured using optical density. Overall, it was noted that clinical isolates exhibited a significantly higher growth rate (p ≤ 0.05) at 7 °C than the other isolates. Analysis of variance (ANOVA) tests of isolates grouped using Multi Locus Sequence Typing (MLST) revealed that clonal complex 18 (CC18) isolates were significantly (p ≤ 0.05) faster growing at 4 °C than other CC-type isolates while CC101, CC18, CC8, CC37 and CC14 were faster growing than other CC types at 7 °C. Euclidean distance and Ward method-based hierarchical clustering of mean growth rates classified 33.33% of isolates as faster growing. Fast and slow growing representative isolates were selected from the cluster analysis and growth rates were determined using plate count data in laboratory media and model food matrices. In agreement with the optical density experiments, CC18 isolates were faster and CC121 isolates were slower than other CC types in laboratory media, UHT milk and fish pie. The same trend was observed in chocolate milk but the differences were not statistically significant. Moreover, pan-genome analysis (Scoary) of isolate genome sequences only identified six genes of unknown function associated with increased cold tolerance while failing to identify any known cold tolerance genes. Overall, an association that was consistent in laboratory media and real food matrices was demonstrated between isolate CC type and increased cold tolerance.}, } @article {pmid36675897, year = {2023}, author = {Bigey, F and Pasteur, E and Połomska, X and Thomas, S and Crutz-Le Coq, AM and Devillers, H and Neuvéglise, C}, title = {Insights into the Genomic and Phenotypic Landscape of the Oleaginous Yeast Yarrowia lipolytica.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {9}, number = {1}, pages = {}, pmid = {36675897}, issn = {2309-608X}, support = {AIP-Bioressources 2011//National Research Institute for Agriculture, Food and Environment/ ; convention 2012 93 0805//DGAC/ ; Investment for the Future ("Investissements d'Avenir"), grant number ANR-001//SAS PIVERT/ ; }, abstract = {Although Yarrowia lipolytica is a model yeast for the study of lipid metabolism, its diversity is poorly known, as studies generally consider only a few standard laboratory strains. To extend our knowledge of this biotechnological workhorse, we investigated the genomic and phenotypic diversity of 56 natural isolates. Y. lipolytica is classified into five clades with no correlation between clade membership and geographic or ecological origin. A low genetic diversity (π = 0.0017) and a pan-genome (6528 genes) barely different from the core genome (6315 genes) suggest Y. lipolytica is a recently evolving species. Large segmental duplications were detected, totaling 892 genes. With three new LTR-retrotransposons of the Gypsy family (Tyl4, Tyl9, and Tyl10), the transposable element content of genomes appeared diversified but still low (from 0.36% to 3.62%). We quantified 34 traits with substantial phenotypic diversity, but genome-wide association studies failed to evidence any associations. Instead, we investigated known genes and found four mutational events leading to XPR2 protease inactivation. Regarding lipid metabolism, most high-impact mutations were found in family-belonging genes, such as ALK or LIP, and therefore had a low phenotypic impact, suggesting that the huge diversity of lipid synthesis and accumulation is multifactorial or due to complex regulations.}, } @article {pmid36671332, year = {2023}, author = {Fono-Tamo, EUK and Kamika, I and Dewar, JB and Lekota, KE}, title = {Comparative Genomics Revealed a Potential Threat of Aeromonas rivipollensis G87 Strain and Its Antibiotic Resistance.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {12}, number = {1}, pages = {}, pmid = {36671332}, issn = {2079-6382}, support = {TTK200306508304//National Research Foundation/ ; }, abstract = {Aeromonas rivipollensis is an emerging pathogen linked to a broad range of infections in humans. Due to the inability to accurately differentiate Aeromonas species using conventional techniques, in-depth comparative genomics analysis is imperative to identify them. This study characterized 4 A. rivipollensis strains that were isolated from river water in Johannesburg, South Africa, by whole-genome sequencing (WGS). WGS was carried out, and taxonomic classification was employed to profile virulence and antibiotic resistance (AR). The AR profiles of the A. rivipollensis genomes consisted of betalactams and cephalosporin-resistance genes, while the tetracycline-resistance gene (tetE) was only determined to be in the G87 strain. A mobile genetic element (MGE), transposons TnC, was determined to be in this strain that mediates tetracycline resistance MFS efflux tetE. A pangenomic investigation revealed the G87 strain's unique characteristic, which included immunoglobulin A-binding proteins, extracellular polysialic acid, and exogenous sialic acid as virulence factors. The identified polysialic acid and sialic acid genes can be associated with antiphagocytic and antibactericidal properties, respectively. MGEs such as transposases introduce virulence and AR genes in the A. rivipollensis G87 genome. This study showed that A. rivipollensis is generally resistant to a class of beta-lactams and cephalosporins. MGEs pose a challenge in some of the Aeromonas species strains and are subjected to antibiotics resistance and the acquisition of virulence genes in the ecosystem.}, } @article {pmid36671226, year = {2022}, author = {Thakur, Z and Vaid, RK and Anand, T and Tripathi, BN}, title = {Comparative Genome Analysis of 19 Trueperella pyogenes Strains Originating from Different Animal Species Reveal a Genetically Diverse Open Pan-Genome.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {12}, number = {1}, pages = {}, pmid = {36671226}, issn = {2079-6382}, support = {IXX11884//National Research Centre on Equines/ ; }, abstract = {Trueperella pyogenes is a Gram-positive opportunistic pathogen that causes severe cases of mastitis, metritis, and pneumonia in a wide range of animals, resulting in significant economic losses. Although little is known about the virulence factors involved in the disease pathogenesis, a comprehensive comparative genome analysis of T. pyogenes genomes has not been performed till date. Hence, present investigation was carried out to characterize and compare 19 T. pyogenes genomes originating in different geographical origins including the draftgenome of the first Indian origin strain T. pyogenes Bu5. Additionally, candidate virulence determinants that could be crucial for their pathogenesis were also detected and analyzed by using various bioinformatics tools. The pan-genome calculations revealed an open pan-genome of T. pyogenes. In addition, an inventory of virulence related genes, 190 genomic islands, 31 prophage sequences, and 40 antibiotic resistance genes that could play a significant role in organism's pathogenicity were detected. The core-genome based phylogeny of T. pyogenes demonstrates a polyphyletic, host-associated group with a high degree of genomic diversity. The identified core-genome can be further used for screening of drug and vaccine targets. The investigation has provided unique insights into pan-genome, virulome, mobiliome, and resistome of T. pyogenes genomes and laid the foundation for future investigations.}, } @article {pmid36669850, year = {2023}, author = {Tonkin-Hill, G and Gladstone, RA and Pöntinen, AK and Arredondo-Alonso, S and Bentley, SD and Corander, J}, title = {Robust analysis of prokaryotic pangenome gene gain and loss rates with Panstripe.}, journal = {Genome research}, volume = {33}, number = {1}, pages = {129-140}, pmid = {36669850}, issn = {1549-5469}, support = {204016/Z/16/Z//Wellcome Trust/United Kingdom ; 206194//Wellcome Trust/United Kingdom ; }, mesh = {Humans ; Phylogeny ; *Evolution, Molecular ; *Prokaryotic Cells ; Genome, Bacterial ; Gene Transfer, Horizontal ; }, abstract = {Horizontal gene transfer (HGT) plays a critical role in the evolution and diversification of many microbial species. The resulting dynamics of gene gain and loss can have important implications for the development of antibiotic resistance and the design of vaccine and drug interventions. Methods for the analysis of gene presence/absence patterns typically do not account for errors introduced in the automated annotation and clustering of gene sequences. In particular, methods adapted from ecological studies, including the pangenome gene accumulation curve, can be misleading as they may reflect the underlying diversity in the temporal sampling of genomes rather than a difference in the dynamics of HGT. Here, we introduce Panstripe, a method based on generalized linear regression that is robust to population structure, sampling bias, and errors in the predicted presence/absence of genes. We show using simulations that Panstripe can effectively identify differences in the rate and number of genes involved in HGT events, and illustrate its capability by analyzing several diverse bacterial genome data sets representing major human pathogens.}, } @article {pmid36662619, year = {2023}, author = {Secomandi, S and Gallo, GR and Sozzoni, M and Iannucci, A and Galati, E and Abueg, L and Balacco, J and Caprioli, M and Chow, W and Ciofi, C and Collins, J and Fedrigo, O and Ferretti, L and Fungtammasan, A and Haase, B and Howe, K and Kwak, W and Lombardo, G and Masterson, P and Messina, G and Møller, AP and Mountcastle, J and Mousseau, TA and Ferrer Obiol, J and Olivieri, A and Rhie, A and Rubolini, D and Saclier, M and Stanyon, R and Stucki, D and Thibaud-Nissen, F and Torrance, J and Torroni, A and Weber, K and Ambrosini, R and Bonisoli-Alquati, A and Jarvis, ED and Gianfranceschi, L and Formenti, G}, title = {A chromosome-level reference genome and pangenome for barn swallow population genomics.}, journal = {Cell reports}, volume = {42}, number = {1}, pages = {111992}, pmid = {36662619}, issn = {2211-1247}, support = {/HHMI/Howard Hughes Medical Institute/United States ; }, mesh = {Animals ; *Swallows/genetics ; Metagenomics ; Genome/genetics ; Genomics ; Chromosomes ; }, abstract = {Insights into the evolution of non-model organisms are limited by the lack of reference genomes of high accuracy, completeness, and contiguity. Here, we present a chromosome-level, karyotype-validated reference genome and pangenome for the barn swallow (Hirundo rustica). We complement these resources with a reference-free multialignment of the reference genome with other bird genomes and with the most comprehensive catalog of genetic markers for the barn swallow. We identify potentially conserved and accelerated genes using the multialignment and estimate genome-wide linkage disequilibrium using the catalog. We use the pangenome to infer core and accessory genes and to detect variants using it as a reference. Overall, these resources will foster population genomics studies in the barn swallow, enable detection of candidate genes in comparative genomics studies, and help reduce bias toward a single reference genome.}, } @article {pmid36646895, year = {2023}, author = {Sibbesen, JA and Eizenga, JM and Novak, AM and Sirén, J and Chang, X and Garrison, E and Paten, B}, title = {Haplotype-aware pantranscriptome analyses using spliced pangenome graphs.}, journal = {Nature methods}, volume = {20}, number = {2}, pages = {239-247}, pmid = {36646895}, issn = {1548-7105}, support = {U01HG010961//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; R01HG010485//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; U41HG010972//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; U24HG011853//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; OT2 OD026682/OD/NIH HHS/United States ; }, mesh = {Haplotypes ; *Computational Biology ; *Gene Expression Profiling ; Metagenomics ; Transcriptome ; }, abstract = {Pangenomics is emerging as a powerful computational paradigm in bioinformatics. This field uses population-level genome reference structures, typically consisting of a sequence graph, to mitigate reference bias and facilitate analyses that were challenging with previous reference-based methods. In this work, we extend these methods into transcriptomics to analyze sequencing data using the pantranscriptome: a population-level transcriptomic reference. Our toolchain, which consists of additions to the VG toolkit and a standalone tool, RPVG, can construct spliced pangenome graphs, map RNA sequencing data to these graphs, and perform haplotype-aware expression quantification of transcripts in a pantranscriptome. We show that this workflow improves accuracy over state-of-the-art RNA sequencing mapping methods, and that it can efficiently quantify haplotype-specific transcript expression without needing to characterize the haplotypes of a sample beforehand.}, } @article {pmid36646262, year = {2023}, author = {Mishra, A and Kesarwani, S and Jaiswal, TP and Bhattacharjee, S and Chakraborty, S and Mishra, AK and Singh, SS}, title = {Decoding whole genome of Anoxybacillus rupiensis TPH1 isolated from tatapani hot spring, India and giving insight into bioremediation ability of TPH1 via heavy metals and azo dyes.}, journal = {Research in microbiology}, volume = {174}, number = {4}, pages = {104027}, doi = {10.1016/j.resmic.2023.104027}, pmid = {36646262}, issn = {1769-7123}, mesh = {*Anoxybacillus/genetics ; *Hot Springs ; Biodegradation, Environmental ; Azo Compounds/metabolism ; Molecular Docking Simulation ; *Metals, Heavy/metabolism ; Phylogeny ; }, abstract = {A moderately thermophilic, gram-positive genomospecies Anoxybacillus rupiensis TPH1 was isolated from Tatapani hot spring, Chhattisgarh, India. Genome of 3.70 Mb with 42.3% GC subsumed 4131 CDSs, 65 tRNA, 5 rRNA, 35 AMR and 19 drug target genes. Further, comparative genomics of 19 Anoxybacillus spp. exhibited an open pan genome of 13102 genes along with core (10.62%), unique (43.5%) and accessory (45.9%) genes. Moreover, phylogenomic tree displayed clustering of Anoxybacillus spp. into two distinct clades where clade A species harbored larger genomes, more unique genes, CDS and hypothetical proteins than clade B species. Further, distribution of azoreductases showed FMN-binding NADPH azoreductase (AzoRed1) presence in clade A species only and FMN-binding NADH azoreductase (AzoRed2) harboring by species of both clades. Heavy metal resistance genes distribution showed omnipresence of znuA, copZ and arsC in both clades, dispersed presence of cbiM, czcD, merA and feoB over both clades and harboring of nikA and acr3 by few species of clade A only. Additionally, molecular docking of AzoRed1, AzoRed2, ZnuA, CopZ, Acr3, CbiM, CzcD, MerA and NikA with their respective ligands indicated high affinity and stable binding. Conclusively, present study provided insight into gene repertoire of genus Anoxybacillus and a basis for the potential application of this thermophile in bioremediation of azo dyes and heavy metals.}, } @article {pmid36644533, year = {2022}, author = {Pang, M and Tu, T and Wang, Y and Zhang, P and Ren, M and Yao, X and Luo, Y and Yang, Z}, title = {Design of a multi-epitope vaccine against Haemophilus parasuis based on pan-genome and immunoinformatics approaches.}, journal = {Frontiers in veterinary science}, volume = {9}, number = {}, pages = {1053198}, pmid = {36644533}, issn = {2297-1769}, abstract = {BACKGROUND: Glässer's disease, caused by Haemophilus parasuis (HPS), is responsible for economic losses in the pig industry worldwide. However, the existing commercial vaccines offer poor protection and there are significant barriers to the development of effective vaccines.

METHODS: In the current study, we aimed to identify potential vaccine candidates and design a multi-epitope vaccine against HPS by performing pan-genomic analysis of 121 strains and using a reverse vaccinology approach.

RESULTS: The designed vaccine constructs consist of predicted epitopes of B and T cells derived from the outer membrane proteins of the HPS core genome. The vaccine was found to be highly immunogenic, non-toxic, and non-allergenic as well as have stable physicochemical properties. It has a high binding affinity to Toll-like receptor 2. In addition, in silico immune simulation results showed that the vaccine elicited an effective immune response. Moreover, the mouse polyclonal antibody obtained by immunizing the vaccine protein can be combined with different serotypes and non-typable Haemophilus parasuis in vitro.

CONCLUSION: The overall results of the study suggest that the designed multi-epitope vaccine is a promising candidate for pan-prophylaxis against different strains of HPS.}, } @article {pmid36638170, year = {2023}, author = {Cai, H and McLimans, CJ and Beyer, JE and Krumholz, LR and Hambright, KD}, title = {Microcystis pangenome reveals cryptic diversity within and across morphospecies.}, journal = {Science advances}, volume = {9}, number = {2}, pages = {eadd3783}, pmid = {36638170}, issn = {2375-2548}, mesh = {Humans ; *Microcystis/genetics ; Phylogeny ; Base Sequence ; Ecology ; }, abstract = {Microcystis, a common harmful algal bloom (HAB) taxon, threatens water supplies and human health, yet species delimitation is contentious in this taxon, leading to challenges in research and management of this threat. Historical and common morphology-based classifications recognize multiple morphospecies, most with variable and diverse ecologies, while DNA sequence-based classifications indicate a single species with multiple ecotypes. To better delimit Microcystis species, we conducted a pangenome analysis of 122 genomes. Core- and non-core gene phylogenetic analyses placed 113 genomes into 23 monophyletic clusters containing at least two genomes. Overall, genome-related indices revealed that Microcystis contains at least 16 putative genospecies. Fifteen genospecies included at least one Microcystis aeruginosa morphospecies, and 10 genospecies included two or more morphospecies. This classification system will enable consistent taxonomic identification of Microcystis and thereby aid in resolving some of the complexities and controversies that have long characterized eco-evolutionary research and management of this important HAB taxon.}, } @article {pmid36630500, year = {2023}, author = {Konno, N and Iwasaki, W}, title = {Machine learning enables prediction of metabolic system evolution in bacteria.}, journal = {Science advances}, volume = {9}, number = {2}, pages = {eadc9130}, pmid = {36630500}, issn = {2375-2548}, mesh = {Phylogeny ; *Bacteria/genetics ; *Evolution, Molecular ; Genomics ; Genome, Bacterial ; }, abstract = {Evolution prediction is a long-standing goal in evolutionary biology, with potential impacts on strategic pathogen control, genome engineering, and synthetic biology. While laboratory evolution studies have shown the predictability of short-term and sequence-level evolution, that of long-term and system-level evolution has not been systematically examined. Here, we show that the gene content evolution of metabolic systems is generally predictable by applying ancestral gene content reconstruction and machine learning techniques to ~3000 bacterial genomes. Our framework, Evodictor, successfully predicted gene gain and loss evolution at the branches of the reference phylogenetic tree, suggesting that evolutionary pressures and constraints on metabolic systems are universally shared. Investigation of pathway architectures and meta-analysis of metagenomic datasets confirmed that these evolutionary patterns have physiological and ecological bases as functional dependencies among metabolic reactions and bacterial habitat changes. Last, pan-genomic analysis of intraspecies gene content variations proved that even "ongoing" evolution in extant bacterial species is predictable in our framework.}, } @article {pmid36627554, year = {2023}, author = {Forgacova, N and Holesova, Z and Hekel, R and Sedlackova, T and Pos, Z and Krivosikova, L and Janega, P and Kuracinova, KM and Babal, P and Radvak, P and Radvanszky, J and Gazdarica, J and Budis, J and Szemes, T}, title = {Evaluation and limitations of different approaches among COVID-19 fatal cases using whole-exome sequencing data.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {12}, pmid = {36627554}, issn = {1471-2164}, support = {PP-COVID-20-051//Pangenomics for personalized clinical management of infected persons based on identified viral genome and human exoma (Code ITMS:313011ATL7), co-financed by the European Regional Development Fund; co financed by the Slovak Research and Development Agency grant PP-COVID-20-051./ ; PP-COVID-20-051//Pangenomics for personalized clinical management of infected persons based on identified viral genome and human exoma (Code ITMS:313011ATL7), co-financed by the European Regional Development Fund; co financed by the Slovak Research and Development Agency grant PP-COVID-20-051./ ; PP-COVID-20-051//Pangenomics for personalized clinical management of infected persons based on identified viral genome and human exoma (Code ITMS:313011ATL7), co-financed by the European Regional Development Fund; co financed by the Slovak Research and Development Agency grant PP-COVID-20-051./ ; }, mesh = {Humans ; *COVID-19/genetics ; SARS-CoV-2 ; Exome Sequencing ; Alleles ; DNA ; }, abstract = {BACKGROUND: COVID-19 caused by the SARS-CoV-2 infection may result in various disease symptoms and severity, ranging from asymptomatic, through mildly symptomatic, up to very severe and even fatal cases. Although environmental, clinical, and social factors play important roles in both susceptibility to the SARS-CoV-2 infection and progress of COVID-19 disease, it is becoming evident that both pathogen and host genetic factors are important too. In this study, we report findings from whole-exome sequencing (WES) of 27 individuals who died due to COVID-19, especially focusing on frequencies of DNA variants in genes previously associated with the SARS-CoV-2 infection and the severity of COVID-19.

RESULTS: We selected the risk DNA variants/alleles or target genes using four different approaches: 1) aggregated GWAS results from the GWAS Catalog; 2) selected publications from PubMed; 3) the aggregated results of the Host Genetics Initiative database; and 4) a commercial DNA variant annotation/interpretation tool providing its own knowledgebase. We divided these variants/genes into those reported to influence the susceptibility to the SARS-CoV-2 infection and those influencing the severity of COVID-19. Based on the above, we compared the frequencies of alleles found in the fatal COVID-19 cases to the frequencies identified in two population control datasets (non-Finnish European population from the gnomAD database and genomic frequencies specific for the Slovak population from our own database). When compared to both control population datasets, our analyses indicated a trend of higher frequencies of severe COVID-19 associated risk alleles among fatal COVID-19 cases. This trend reached statistical significance specifically when using the HGI-derived variant list. We also analysed other approaches to WES data evaluation, demonstrating its utility as well as limitations.

CONCLUSIONS: Although our results proved the likely involvement of host genetic factors pointed out by previous studies looking into severity of COVID-19 disease, careful considerations of the molecular-testing strategies and the evaluated genomic positions may have a strong impact on the utility of genomic testing.}, } @article {pmid36627170, year = {2023}, author = {Nii, T and Maeda, Y and Motooka, D and Naito, M and Matsumoto, Y and Ogawa, T and Oguro-Igashira, E and Kishikawa, T and Yamashita, M and Koizumi, S and Kurakawa, T and Okumura, R and Kayama, H and Murakami, M and Sakaguchi, T and Das, B and Nakamura, S and Okada, Y and Kumanogoh, A and Takeda, K}, title = {Genomic repertoires linked with pathogenic potency of arthritogenic Prevotella copri isolated from the gut of patients with rheumatoid arthritis.}, journal = {Annals of the rheumatic diseases}, volume = {82}, number = {5}, pages = {621-629}, pmid = {36627170}, issn = {1468-2060}, mesh = {Animals ; Mice ; *Gastrointestinal Microbiome/genetics ; *Arthritis, Rheumatoid/genetics ; Prevotella/genetics ; Genomics ; Disease Models, Animal ; }, abstract = {OBJECTIVES: Prevotella copri is considered to be a contributing factor in rheumatoid arthritis (RA). However, in some non-Westernised countries, healthy individuals also harbour an abundance of P. copri in the intestine. This study investigated the pathogenicity of RA patient-derived P. copri (P. copri RA) compared with healthy control-derived P. copri (P. copri HC).

METHODS: We obtained 13 P. copri strains from the faeces of patients with RA and healthy controls. Following whole genome sequencing, the sequences of P. copri RA and P. copri HC were compared. To analyse the arthritis-inducing ability of P. copri, we examined two arthritis models (1) a collagen-induced arthritis model harbouring P. copri under specific-pathogen-free conditions and (2) an SKG mouse arthritis model under P. copri-monocolonised conditions. Finally, to evaluate the ability of P. copri to activate innate immune cells, we performed in vitro stimulation of bone marrow-derived dendritic cells (BMDCs) by P. copri RA and P. copri HC.

RESULTS: Comparative genomic analysis revealed no apparent differences in the core gene contents between P. copri RA and P. copri HC, but pangenome analysis revealed the high genome plasticity of P. copri. We identified a P. copri RA-specific genomic region as a conjugative transposon. In both arthritis models, P. copri RA-induced more severe arthritis than P. copri HC. In vitro BMDC stimulation experiments revealed the upregulation of IL-17 and Th17-related cytokines (IL-6, IL-23) by P. copri RA.

CONCLUSION: Our findings reveal the genetic diversity of P. copri, and the genomic signatures associated with strong arthritis-inducing ability of P. copri RA. Our study contributes towards elucidation of the complex pathogenesis of RA.}, } @article {pmid36623869, year = {2022}, author = {Ruggieri, AA and Livraghi, L and Lewis, JJ and Evans, E and Cicconardi, F and Hebberecht, L and Ortiz-Ruiz, Y and Montgomery, SH and Ghezzi, A and Rodriguez-Martinez, JA and Jiggins, CD and McMillan, WO and Counterman, BA and Papa, R and Van Belleghem, SM}, title = {Erratum: A butterfly pan-genome reveals that a large amount of structural variation underlies the evolution of chromatin accessibility.}, journal = {Genome research}, volume = {32}, number = {11-12}, pages = {2145}, doi = {10.1101/gr.277534.122}, pmid = {36623869}, issn = {1549-5469}, } @article {pmid36622155, year = {2023}, author = {Saak, CC and Pierce, EC and Dinh, CB and Portik, D and Hall, R and Ashby, M and Dutton, RJ}, title = {Longitudinal, Multi-Platform Metagenomics Yields a High-Quality Genomic Catalog and Guides an In Vitro Model for Cheese Communities.}, journal = {mSystems}, volume = {8}, number = {1}, pages = {e0070122}, pmid = {36622155}, issn = {2379-5077}, support = {DP2 AT010401/AT/NCCIH NIH HHS/United States ; }, mesh = {Humans ; *Cheese/microbiology ; Metagenomics ; Bacteria ; Metagenome/genetics ; *Microbiota/genetics ; }, abstract = {Microbiomes are intricately intertwined with human health, geochemical cycles, and food production. While many microbiomes of interest are highly complex and experimentally intractable, cheese rind microbiomes have proven to be powerful model systems for the study of microbial interactions. To provide a more comprehensive view of the genomic potential and temporal dynamics of cheese rind communities, we combined longitudinal, multi-platform metagenomics of three ripening washed-rind cheeses with whole-genome sequencing of community isolates. Sequencing-based approaches revealed a highly reproducible microbial succession in each cheese and the coexistence of closely related Psychrobacter species and enabled the prediction of plasmid and phage diversity and their host associations. In combination with culture-based approaches, we established a genomic catalog and a paired 16-member in vitro washed-rind cheese system. The combination of multi-platform metagenomic time-series data and an in vitro model provides a rich resource for further investigation of cheese rind microbiomes both computationally and experimentally. IMPORTANCE Metagenome sequencing can provide great insights into microbiome composition and function and help researchers develop testable hypotheses. Model microbiomes, such as those composed of cheese rind bacteria and fungi, allow the testing of these hypotheses in a controlled manner. Here, we first generated an extensive longitudinal metagenomic data set. This data set reveals successional dynamics, yields a phyla-spanning bacterial genomic catalog, associates mobile genetic elements with their hosts, and provides insights into functional enrichment of Psychrobacter in the cheese environment. Next, we show that members of the washed-rind cheese microbiome lend themselves to in vitro community reconstruction. This paired metagenomic data and in vitro system can thus be used as a platform for generating and testing hypotheses related to the dynamics within, and the functions associated with, cheese rind microbiomes.}, } @article {pmid36621865, year = {2023}, author = {Zhang, Z and Li, K and Zhang, H and Wang, Q and Zhao, L and Liu, J and Chen, H}, title = {A single silk- and multiple pollen-expressed PMEs at the Ga1 locus modulate maize unilateral cross-incompatibility.}, journal = {Journal of integrative plant biology}, volume = {65}, number = {5}, pages = {1344-1355}, doi = {10.1111/jipb.13445}, pmid = {36621865}, issn = {1744-7909}, mesh = {Germ Cells, Plant ; Plant Breeding ; Pollen/genetics ; *Zea mays/genetics/metabolism ; }, abstract = {The Gametophyte factor1 (Ga1) locus in maize confers unilateral cross-incompatibility (UCI), and it is controlled by both pollen and silk-specific determinants. Although the Ga1 locus has been reported for more than a century and is widely utilized in maize breeding programs, only the pollen-specific ZmGa1P has been shown to function as a male determinant; thus, the genomic structure of the Ga1 locus and all the determinants that control UCI at this locus have not yet been fully characterized. Here, we used map-based cloning to confirm the determinants of UCI at the Ga1 locus and maize pan-genome sequence data to characterize the genomic structure of the Ga1 locus. The Ga1 locus comprises one silk-expressed pectin methylesterase gene (PME) (ZmGa1F) and eight pollen-expressed PMEs (ZmGa1P and ZmGa1PL1-7). Knockout of ZmGa1F in Ga1/Ga1 lines leads to the complete loss of the female barrier function. The expression of individual ZmGa1PL genes in a ga1/ga1 background endows ga1 pollen with the ability to overcome the female barrier of the Ga1 locus. These findings, combined with genomic data and genetic analyses, indicate that the Ga1 locus is modulated by a single female determinant and multiple male determinants, which are tightly linked. The results of this study provide valuable insights into the genomic structure of the Ga2 and Tcb1 loci and will aid applications of these loci in maize breeding programs.}, } @article {pmid36619820, year = {2023}, author = {Khushboo, and Singhvi, N and Gupta, V and Dhaka, N and Dubey, KK}, title = {Draft genome sequence of Streptomyces sp. KD18, isolated from industrial soil.}, journal = {3 Biotech}, volume = {13}, number = {1}, pages = {34}, pmid = {36619820}, issn = {2190-572X}, abstract = {UNLABELLED: The present study scrutinizes the presence of Streptomyces strains in the soil sample collected from industrial area of Bahadurgarh (Haryana) India. The morphological approach manifested the isolated strain belong to Streptomyces species and named as Streptomyces sp. KD18. Sequencing of Streptomyces sp. KD18 genome was performed by Illumina Nextseq500 platform. 65 contigs were generated via SPAdes v3.11.1 and harboured genome size of 7.2 Mb. AntiSMASH server revealed the presence of 25 biosynthetic gene clusters in KD18 genome where BGC of lipstatin was of more interest from industrial and pharmaceutical purpose. The draft genome sequence represented via ANI values claimed that the KD18 strain belongs to Streptomyces toxytricini and finally named as S. toxytricini KD18. The LC-MS analysis of the extracted metabolite confirmed the production of lipstatin. The genome sequence data have been deposited to NCBI under the accession number of GCA_014748315.1.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s13205-022-03453-3.}, } @article {pmid36618639, year = {2022}, author = {Parakkunnel, R and Naik K, B and Vanishree, G and C, S and Purru, S and Bhaskar K, U and Bhat, KV and Kumar, S}, title = {Gene fusions, micro-exons and splice variants define stress signaling by AP2/ERF and WRKY transcription factors in the sesame pan-genome.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1076229}, pmid = {36618639}, issn = {1664-462X}, abstract = {Evolutionary dynamics of AP2/ERF and WRKY genes, the major components of defense response were studied extensively in the sesame pan-genome. Massive variation was observed for gene copy numbers, genome location, domain structure, exon-intron structure and protein parameters. In the pan-genome, 63% of AP2/ERF members were devoid of introns whereas >99% of WRKY genes contained multiple introns. AP2 subfamily was found to be micro-exon rich with the adjoining intronic sequences sharing sequence similarity to many stress-responsive and fatty acid metabolism genes. WRKY family included extensive multi-domain gene fusions where the additional domains significantly enhanced gene and exonic sizes as well as gene copy numbers. The fusion genes were found to have roles in acquired immunity, stress response, cell and membrane integrity as well as ROS signaling. The individual genomes shared extensive synteny and collinearity although ecological adaptation was evident among the Chinese and Indian accessions. Significant positive selection effects were noticed for both micro-exon and multi-domain genes. Splice variants with changes in acceptor, donor and branch sites were common and 6-7 splice variants were detected per gene. The study ascertained vital roles of lipid metabolism and chlorophyll biosynthesis in the defense response and stress signaling pathways. 60% of the studied genes localized in the nucleus while 20% preferred chloroplast. Unique cis-element distribution was noticed in the upstream promoter region with MYB and STRE in WRKY genes while MYC was present in the AP2/ERF genes. Intron-less genes exhibited great diversity in the promoter sequences wherein the predominance of dosage effect indicated variable gene expression levels. Mimicking the NBS-LRR genes, a chloroplast localized WRKY gene, Swetha_24868, with additional domains of chorismate mutase, cAMP and voltage-dependent potassium channel was found to act as a master regulator of defense signaling, triggering immunity and reducing ROS levels.}, } @article {pmid36614303, year = {2023}, author = {Schanknecht, E and Bachari, A and Nassar, N and Piva, T and Mantri, N}, title = {Phytochemical Constituents and Derivatives of Cannabis sativa; Bridging the Gap in Melanoma Treatment.}, journal = {International journal of molecular sciences}, volume = {24}, number = {1}, pages = {}, pmid = {36614303}, issn = {1422-0067}, support = {Not applicable//RMIT University/ ; }, mesh = {Humans ; *Cannabis/chemistry ; *Cannabinoids/pharmacology/therapeutic use/chemistry ; Terpenes/pharmacology ; *Melanoma/drug therapy ; Phytochemicals/pharmacology/therapeutic use ; }, abstract = {Melanoma is deadly, physically impairing, and has ongoing treatment deficiencies. Current treatment regimens include surgery, targeted kinase inhibitors, immunotherapy, and combined approaches. Each of these treatments face pitfalls, with diminutive five-year survival in patients with advanced metastatic invasion of lymph and secondary organ tissues. Polyphenolic compounds, including cannabinoids, terpenoids, and flavonoids; both natural and synthetic, have emerging evidence of nutraceutical, cosmetic and pharmacological potential, including specific anti-cancer, anti-inflammatory, and palliative utility. Cannabis sativa is a wellspring of medicinal compounds whose direct and adjunctive application may offer considerable relief for melanoma suffers worldwide. This review aims to address the diverse applications of C. sativa's biocompounds in the scope of melanoma and suggest it as a strong candidate for ongoing pharmacological evaluation.}, } @article {pmid36608657, year = {2023}, author = {Hackl, T and Laurenceau, R and Ankenbrand, MJ and Bliem, C and Cariani, Z and Thomas, E and Dooley, KD and Arellano, AA and Hogle, SL and Berube, P and Leventhal, GE and Luo, E and Eppley, JM and Zayed, AA and Beaulaurier, J and Stepanauskas, R and Sullivan, MB and DeLong, EF and Biller, SJ and Chisholm, SW}, title = {Novel integrative elements and genomic plasticity in ocean ecosystems.}, journal = {Cell}, volume = {186}, number = {1}, pages = {47-62.e16}, doi = {10.1016/j.cell.2022.12.006}, pmid = {36608657}, issn = {1097-4172}, mesh = {*Ecosystem ; *Genome, Bacterial/genetics ; Phylogeny ; Oceans and Seas ; Genomics ; }, abstract = {Horizontal gene transfer accelerates microbial evolution. The marine picocyanobacterium Prochlorococcus exhibits high genomic plasticity, yet the underlying mechanisms are elusive. Here, we report a novel family of DNA transposons-"tycheposons"-some of which are viral satellites while others carry cargo, such as nutrient-acquisition genes, which shape the genetic variability in this globally abundant genus. Tycheposons share distinctive mobile-lifecycle-linked hallmark genes, including a deep-branching site-specific tyrosine recombinase. Their excision and integration at tRNA genes appear to drive the remodeling of genomic islands-key reservoirs for flexible genes in bacteria. In a selection experiment, tycheposons harboring a nitrate assimilation cassette were dynamically gained and lost, thereby promoting chromosomal rearrangements and host adaptation. Vesicles and phage particles harvested from seawater are enriched in tycheposons, providing a means for their dispersal in the wild. Similar elements are found in microbes co-occurring with Prochlorococcus, suggesting a common mechanism for microbial diversification in the vast oligotrophic oceans.}, } @article {pmid36607068, year = {2023}, author = {Wong, ED and Miyasato, SR and Aleksander, S and Karra, K and Nash, RS and Skrzypek, MS and Weng, S and Engel, SR and Cherry, JM}, title = {Saccharomyces genome database update: server architecture, pan-genome nomenclature, and external resources.}, journal = {Genetics}, volume = {224}, number = {1}, pages = {}, pmid = {36607068}, issn = {1943-2631}, support = {U41 HG002273/HG/NHGRI NIH HHS/United States ; U24 HG001315/HG/NHGRI NIH HHS/United States ; U24 HG010859/HG/NHGRI NIH HHS/United States ; U24 HG012212/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *Saccharomyces/genetics ; Saccharomyces cerevisiae/genetics ; Genome, Fungal ; Databases, Genetic ; Software ; }, abstract = {As one of the first model organism knowledgebases, Saccharomyces Genome Database (SGD) has been supporting the scientific research community since 1993. As technologies and research evolve, so does SGD: from updates in software architecture, to curation of novel data types, to incorporation of data from, and collaboration with, other knowledgebases. We are continuing to make steps toward providing the community with an S. cerevisiae pan-genome. Here, we describe software upgrades, a new nomenclature system for genes not found in the reference strain, and additions to gene pages. With these improvements, we aim to remain a leading resource for students, researchers, and the broader scientific community.}, } @article {pmid36605514, year = {2022}, author = {Dong, C and Wei, L and Wang, J and Lai, Q and Huang, Z and Shao, Z}, title = {Genome-based taxonomic rearrangement of Oceanobacter-related bacteria including the description of Thalassolituus hydrocarbonoclasticus sp. nov. and Thalassolituus pacificus sp. nov. and emended description of the genus Thalassolituus.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1051202}, pmid = {36605514}, issn = {1664-302X}, abstract = {Oceanobacter-related bacteria (ORB) are a group of oligotrophic marine bacteria play an underappreciated role in carbon cycling. They have been frequently described as one of the dominant bacterial groups with a wide distribution in coastal and deep seawater of global oceans. To clarify their taxonomic affiliation in relation to alkane utilization, phylogenomic and comparative genomics analyses were performed based on currently available genomes from GenBank and four newly isolated strains, in addition to phenotypic and chemotaxonomic characteristics. Consistently, phylogenomic analysis robustly separated them into two groups, which are accordingly hydrocarbon-degrading (HD, Thalassolituus and Oleibacter) and non-HD (NHD, Oceanobacter). In addition, the two groups can also be readily distinguished by several polyphasic taxonomic characteristics. Furthermore, both AAI and POCP genomic indices within the HD group support the conclusion that the members of the genus Oleibacter should be transferred into the genus Thalassolituus. Moreover, HD and NHD bacteria differed significantly in terms of genome size, G + C content and genes involved in alkane utilization. All HD bacteria contain the key gene alkB encoding an alkane monooxygenase, which can be used as a marker gene to distinguish the members of closely related genera Oceanobacter and Thalassolituus. Pangenome analysis revealed that the larger accessory genome may endow Thalassolituus with the flexibility to cope with the dynamics of marine environments and thrive therein, although they possess smaller pan, core- and unique-genomes than Oceanobacter. Within the HD group, twelve species were clearly distinguished from each other by both dDDH and ANI genomic indices, including two novel species represented by the newly isolated strains alknpb1M-1 [T] and 59MF3M-4 [T] , for which the names Thalassolituus hydrocarbonoclasticus sp. nov. and Thalassolituus pacificus sp. nov. are proposed. Collectively, these findings build a phylogenetic framework for the ORB and contribute to understanding of their role in marine carbon cycling.}, } @article {pmid36605106, year = {2022}, author = {Ali, A and Khatoon, A and Mirza, T and Ahmad, F}, title = {Intensification in Genetic Information and Acquisition of Resistant Genes in Genome of Acinetobacter baumannii: A Pan-Genomic Analysis.}, journal = {BioMed research international}, volume = {2022}, number = {}, pages = {3186343}, pmid = {36605106}, issn = {2314-6141}, mesh = {Humans ; *Acinetobacter baumannii/genetics ; Genomics ; Genome, Bacterial/genetics ; Anti-Bacterial Agents/pharmacology ; Computational Biology ; Drug Resistance, Multiple, Bacterial/genetics ; Microbial Sensitivity Tests ; }, abstract = {Acinetobacter baumannii (A. baumannii) attributes 26% of the mortality rate in hospitalized patients, and the percentage can rise to 46 in patients admitted to ICU as it is a major cause of ventilator-associated pneumonia. It has been nominated as the critical priority organism by WHO for which new therapeutic drugs are urgently required. To understand the genomic identification of different strains, antimicrobial resistance patterns, and epidemiological typing of organisms, whole-genome sequencing (WGS) analysis provides insight to explore new epitopes to develop new drugs against the organism. Therefore, the study is aimed at investigating the whole genome sequence of A. baumannii strains to report the new intensifications in its genomic profile. The genome sequences were retrieved from the NCBI database system. Pan-genome BPGA (Bacterial Pan-genome Analysis Tool) was used to analyze the core, pan, and species-specific genome analysis. The pan and core genome curves were extrapolated using the empirical power law equation f(x) = a.xb and the exponential equation f1(x) = c.e (d.x). To identify the resistant genes with resistant mutations against antibiotics, ResFinder and Galaxy Community hub bioinformatics tools were used. According to pan-genome analysis, there were 2227 core genes present in each species of the A. baumannii genome. Furthermore, the number of accessory genes ranged from 1182 to 1460, and the unique genes in the genome were 931. There were 325 exclusively absent genes in the genome of Acinetobacter baumannii. The pan-genome analysis showed that there is a 5-fold increase in the genome of A. baumannii in 5 years, and the genome is still open. There is the addition of multiple unique genes; among them, genes participating in the function of information and processing are increased.}, } @article {pmid36598708, year = {2023}, author = {Karthik, K and Anbazhagan, S and Chitra, MA and Sridhar, R}, title = {Comparative phylogenomics of Trueperella pyogenes reveals host-based distinction of strains.}, journal = {Antonie van Leeuwenhoek}, volume = {116}, number = {4}, pages = {343-351}, pmid = {36598708}, issn = {1572-9699}, mesh = {Cattle ; Animals ; Swine ; Phylogeny ; Multilocus Sequence Typing ; *Genomics ; }, abstract = {Trueperella pyogenes, an opportunistic pathogen causes various ailments in different animals. Different strains from different animals have distinct characters phenotypically and genotypically. Hence understanding the strains in a particular geographical location helps in framing the preventive measures. Comparative genomics of all the available T. pyogenes genome in the NCBI was conducted to understand the relatedness among strains. Whole genome phylogeny showed host associated clustering of strains recovered from swine lungs. Core genome phylogeny also showed host associated clustering mimicking whole genome phylogeny results. MLST analysis showed that there was higher diversity among cattle strains. Multidimensional scaling revealed five swine clusters, two cattle and buffalo clusters. Pangenome analysis also showed that T. pyogenes had an open genome with 57.09% accessory genome. Host specific genes were identified by pangenome analysis, and (R)-citramalate synthase was specific for swine strains of Asian origin. Host specifc genes identified by pangenome analysis can be exploited for developing a molecular assay to specifically identify the strains. The study shows that MLST having higher discriminatory power can be used as an epidemiological tool for strain discrimination of T. pyogenes.}, } @article {pmid36598279, year = {2023}, author = {Xu, C and Rao, J and Xie, Y and Lu, J and Li, Z and Dong, C and Wang, L and Jiang, J and Chen, C and Chen, S}, title = {The DNA Phosphorothioation Restriction-Modification System Influences the Antimicrobial Resistance of Pathogenic Bacteria.}, journal = {Microbiology spectrum}, volume = {11}, number = {1}, pages = {e0350922}, pmid = {36598279}, issn = {2165-0497}, mesh = {*Anti-Bacterial Agents/pharmacology ; *Drug Resistance, Bacterial/genetics ; Bacteria/genetics ; DNA Restriction-Modification Enzymes/genetics ; DNA ; Gene Transfer, Horizontal ; }, abstract = {Bacterial defense barriers, such as DNA methylation-associated restriction-modification (R-M) and the CRISPR-Cas system, play an important role in bacterial antimicrobial resistance (AMR). Recently, a novel R-M system based on DNA phosphorothioate (PT) modification has been shown to be widespread in the kingdom of Bacteria as well as Archaea. However, the potential role of the PT R-M system in bacterial AMR remains unclear. In this study, we explored the role of PT R-Ms in AMR with a series of common clinical pathogenic bacteria. By analyzing the distribution of AMR genes related to mobile genetic elements (MGEs), it was shown that the presence of PT R-M effectively reduced the distribution of horizontal gene transfer (HGT)-derived AMR genes in the genome, even in the bacteria that did not tend to acquire AMR genes by HGT. In addition, unique gene variation analysis based on pangenome analysis and MGE prediction revealed that the presence of PT R-M could suppress HGT frequency. Thus, this is the first report showing that the PT R-M system has the potential to repress HGT-derived AMR gene acquisition by reducing the HGT frequency. IMPORTANCE In this study, we demonstrated the effect of DNA PT modification-based R-M systems on horizontal gene transfer of AMR genes in pathogenic bacteria. We show that there is no apparent association between the genetic background of the strains harboring PT R-Ms and the number of AMR genes or the kinds of gene families. The strains equipped with PT R-M harbor fewer plasmid-derived, prophage-derived, or integrating mobile genetic element (iMGE)-related AMR genes and have a lower HGT frequency, but the degree of inhibition varies among different bacteria. In addition, compared with Salmonella enterica and Escherichia coli, Klebsiella pneumoniae prefers to acquire MGE-derived AMR genes, and there is no coevolution between PT R-M clusters and bacterial core genes.}, } @article {pmid36589110, year = {2022}, author = {Liang, L and Zhang, J and Xiao, J and Li, X and Xie, Y and Tan, H and Song, X and Zhu, L and Xue, X and Xu, L and Zhou, P and Ran, J and Sun, B and Huang, Z and Tang, Y and Lin, L and Sun, G and Lai, Y and Li, H}, title = {Genome and pan-genome assembly of asparagus bean (Vigna unguiculata ssp. sesquipedialis) reveal the genetic basis of cold adaptation.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1059804}, pmid = {36589110}, issn = {1664-462X}, abstract = {Asparagus bean (Vigna unguiculata ssp. sesquipedialis) is an important cowpea subspecies. We assembled the genomes of Ningjiang 3 (NJ, 550.31 Mb) and Dubai bean (DB, 564.12 Mb) for comparative genomics analysis. The whole-genome duplication events of DB and NJ occurred at 64.55 and 64.81 Mya, respectively, while the divergence between soybean and Vigna occurred in the Paleogene period. NJ genes underwent positive selection and amplification in response to temperature and abiotic stress. In species-specific gene families, NJ is mainly enriched in response to abiotic stress, while DB is primarily enriched in respiration and photosynthesis. We established the pan-genomes of four accessions (NJ, DB, IT97K-499-35 and Xiabao II) and identified 20,336 (70.5%) core genes present in all the accessions, 6,507 (55.56%) variable genes in two individuals, and 2,004 (6.95%) unique genes. The final pan genome is 616.35 Mb, and the core genome is 399.78 Mb. The variable genes are manifested mainly in stress response functions, ABC transporters, seed storage, and dormancy control. In the pan-genome sequence variation analysis, genes affected by presence/absence variants were enriched in biological processes associated with defense responses, immune system processes, signal transduction, and agronomic traits. The results of the present study provide genetic data that could facilitate efficient asparagus bean genetic improvement, especially in producing cold-adapted asparagus bean.}, } @article {pmid36586056, year = {2023}, author = {Tanwar, UK and Stolarska, E and Rudy, E and Paluch-Lubawa, E and Grabsztunowicz, M and Arasimowicz-Jelonek, M and Sobieszczuk-Nowicka, E}, title = {Metal tolerance gene family in barley: an in silico comprehensive analysis.}, journal = {Journal of applied genetics}, volume = {64}, number = {2}, pages = {197-215}, pmid = {36586056}, issn = {2190-3883}, mesh = {*Hordeum/genetics ; Phylogeny ; Amino Acid Sequence ; Plant Proteins/genetics ; Stress, Physiological/genetics ; }, abstract = {Metal-tolerance proteins (MTPs) are divalent cation transporters that play critical roles in metal tolerance and ion homeostasis in plants. However, a comprehensive study of MTPs is still lacking in crop plants. The current study aimed to comprehensively identify and characterize the MTP gene family in barley (Hordeum vulgare, Hv), an important crop. In total, 12 HvMTPs were identified in the barley genome in this study. They were divided into three phylogenetic groups (Zn-cation diffusion facilitator proteins [CDFs], Fe/Zn-CDFs, and Mn-CDFs) and further subdivided into seven groups (G1, G5, G6, G7, G8, G9, and G12). The majority of MTPs were hydrophobic proteins found in the vacuolar membrane. Gene duplication analysis of HvMTPs revealed one pair of segmental-like duplications in the barley genome. Evolutionary analysis suggested that barley MTPs underwent purifying natural selection. Additionally, the HvMTPs were analyzed in the pan-genome sequences of barley (20 accessions), which suggests that HvMTPs are highly conserved in barley evolution. Cis-acting regulatory elements, microRNA target sites, and protein-protein interaction analysis indicated the role of HvMTPs in a variety of biological processes. Expression profiling suggests that HvMTPs play an active role in maintaining barley nutrient homeostasis throughout its life cycle, and their expression levels were not significantly altered by abiotic stresses like cold, drought, or heat. The expression of barley HvMTP genes in the presence of heavy metals such as Zn[2+], Cu[2+], As[3+], and Cd[2+] revealed that these MTPs were induced by at least one metal ion, implying their involvement in metal tolerance or transportation. The identification and comprehensive investigation of MTP gene family members will provide important gene resources for the genetic improvement of crops for metal tolerance, bioremediation, or biofortification of staple crops.}, } @article {pmid36585993, year = {2023}, author = {Bordel, S and Martín-González, D and Muñoz, R and Santos-Beneit, F}, title = {Genome sequence analysis and characterization of Bacillus altitudinis B12, a polylactic acid- and keratin-degrading bacterium.}, journal = {Molecular genetics and genomics : MGG}, volume = {298}, number = {2}, pages = {389-398}, pmid = {36585993}, issn = {1617-4623}, support = {067/229111//FEDER (TCUE 2021-2023)/ ; }, mesh = {Animals ; *Keratins/genetics/metabolism ; *Bacteria ; Polyesters/metabolism ; Sequence Analysis ; }, abstract = {Keratin-rich wastes, mainly in the form of feathers, are recalcitrant residues generated in high amounts as by-products in chicken farms and food industry. Polylactic acid (PLA) is the second most common biodegradable polymer found in commercial plastics, which is not easily degraded by microbial activity. This work reports the 3.8-Mb genome of Bacillus altitudinis B12, a highly efficient PLA- and keratin-degrading bacterium, with potential for environmental friendly biotechnological applications in the feed, fertilizer, detergent, leather, and pharmaceutical industries. The whole genome sequence of B. altitudinis B12 revealed that this strain (which had been previously misclassified as Bacillus pumilus B12) is closely related to the B. altitudinis strains ER5, W3, and GR-8. A total of 4056 coding sequences were annotated using the RAST server, of which 2484 are core genes of the pan genome of B. altitudinis and 171 are unique to this strain. According to the sequence analysis, B. pumilus B12 has a predicted secretome of 353 proteins, among which a keratinase and a PLA depolymerase were identified by sequence analysis. The presence of these two enzymes could explain the characterized PLA and keratin biodegradation capability of the strain.}, } @article {pmid36579850, year = {2023}, author = {Javkar, K and Rand, H and Strain, E and Pop, M}, title = {PRAWNS: compact pan-genomic features for whole-genome population genomics.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {1}, pages = {}, pmid = {36579850}, issn = {1367-4811}, support = {R01 AI100947/AI/NIAID NIH HHS/United States ; //Center for Food Safety and Applied Nutrition/ ; }, mesh = {*Metagenomics ; *Software ; Genomics ; Genome ; Bacteria ; }, abstract = {MOTIVATION: Scientists seeking to understand the genomic basis of bacterial phenotypes, such as antibiotic resistance, today have access to an unprecedented number of complete and nearly complete genomes. Making sense of these data requires computational tools able to perform multiple-genome comparisons efficiently, yet currently available tools cannot scale beyond several tens of genomes.

RESULTS: We describe PRAWNS, an efficient and scalable tool for multiple-genome analysis. PRAWNS defines a concise set of genomic features (metablocks), as well as pairwise relationships between them, which can be used as a basis for large-scale genotype-phenotype association studies. We demonstrate the effectiveness of PRAWNS by identifying genomic regions associated with antibiotic resistance in Acinetobacter baumannii.

PRAWNS is implemented in C++ and Python3, licensed under the GPLv3 license, and freely downloadable from GitHub (https://github.com/KiranJavkar/PRAWNS.git).

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid36577205, year = {2023}, author = {Kadiri, M and Sevugapperumal, N and Nallusamy, S and Ragunathan, J and Ganesan, MV and Alfarraj, S and Ansari, MJ and Sayyed, RZ and Lim, HR and Show, PL}, title = {Pan-genome analysis and molecular docking unveil the biocontrol potential of Bacillus velezensis VB7 against Phytophthora infestans.}, journal = {Microbiological research}, volume = {268}, number = {}, pages = {127277}, doi = {10.1016/j.micres.2022.127277}, pmid = {36577205}, issn = {1618-0623}, mesh = {*Phytophthora infestans ; Molecular Docking Simulation ; *Solanum tuberosum ; Base Sequence ; Plant Diseases/prevention & control ; }, abstract = {Management of late blight of potato incited by Phytophthora infestans remains a major challenge. Coevolution of pathogen with resistant strains and the rise of fungicide resistance have made it more challenging to prevent the spread of P. infestans. Here, the anti-oomycete potential of Bacillus velezensis VB7 against P. infestans through pan-genome analysis and molecular docking were explored. The Biocontrol potential of VB7 against P. infestans was assessed using a confrontational assay. The biomolecules from the inhibition zone were identified and subjected to in silico analysis against P. infestans target proteins. Nucleotide sequences for 54 B. velezensis strains from different geographical locations were used for pan-genome analysis. The confrontational assay revealed the anti-oomycetes potential of VB7 against P. infestans. Molecular docking confirmed that the penicillamine disulfide had the maximum binding energy with eight effector proteins of P. infestans. Besides, scanning electron microscopic observations of P. infestans interaction with VB7 revealed structural changes in hypha and sporangia. Pan-genome analysis between 54 strains of B. velezensis confirmed that the core genome had 2226 genes, and it has an open pan-genome. The present study confirmed the anti-oomycete potential of B. velezensis VB7 against P. infestans and paved the way to explore the genetic potential of VB7.}, } @article {pmid36575347, year = {2023}, author = {Srivastava, S and Bombaywala, S and Jakhesara, SJ and Patil, NV and Joshi, CG and Purohit, HJ and Dafale, NA}, title = {Potential of camel rumen derived Bacillus subtilis and Bacillus velezensis strains for application in plant biomass hydrolysis.}, journal = {Molecular genetics and genomics : MGG}, volume = {298}, number = {2}, pages = {361-374}, pmid = {36575347}, issn = {1617-4623}, mesh = {Animals ; Bacillus subtilis/genetics ; Camelus ; Hydrolysis ; Rumen ; Biomass ; *Cellulase/metabolism ; *Bacillus/genetics ; }, abstract = {Rumen inhabiting Bacillus species possesses a high genetic potential for plant biomass hydrolysis and conversion to value-added products. In view of the same, five camel rumen-derived Bacillus strains, namely B. subtilis CRN 1, B. velezensis CRN 2, B. subtilis CRN 7, B. subtilis CRN 11, and B. velezensis CRN 23 were initially assayed for diverse hydrolytic activities, followed by genome mining to unravel the potential applications. CRN 1 and CRN 7 showed the highest endoglucanase activity with 0.4 U/ml, while CRN 23 showed high β-xylosidase activity of 0.36 U/ml. The comprehensive genomic insights of strains resolve taxonomic identity, clusters of an orthologous gene, pan-genome dynamics, and metabolic features. Annotation of Carbohydrate active enzymes (CAZymes) reveals the presence of diverse glycoside hydrolases (GH) GH1, GH5, GH43, and GH30, which are solely responsible for the effective breakdown of complex bonds in plant polysaccharides. Further, protein modeling and ligand docking of annotated endoglucanases showed an affinity for cellotrioside, cellobioside, and β-glucoside. The finding indicates the flexibility of Bacillus-derived endoglucanase activity on diverse cellulosic substrates. The presence of the butyrate synthesis gene in the CRN 1 strain depicts its key role in the production of important short-chain fatty acids essential for healthy rumen development. Similarly, antimicrobial peptides such as bacilysin and non-ribosomal peptides (NRPS) synthesized by the Bacillus strains were also annotated in the genome. The findings clearly define the role of Bacillus sp. inside the camel rumen and its potential application in various plant biomass utilizing industry and animal health research sectors.}, } @article {pmid36567375, year = {2023}, author = {Filipić, B and Malešević, M and Vasiljević, Z and Novović, K and Kojić, M and Jovčić, B}, title = {Comparative genomics of trimethoprim-sulfamethoxazole-resistant Achromobacter xylosoxidans clinical isolates from Serbia reveals shortened variant of class 1 integron integrase gene.}, journal = {Folia microbiologica}, volume = {68}, number = {3}, pages = {431-440}, pmid = {36567375}, issn = {1874-9356}, support = {451-03-68/2022-14/200161//Ministarstvo Prosvete, Nauke i Tehnološkog Razvoja/ ; 451-03-68/2022-14/200042//Ministarstvo Prosvete, Nauke i Tehnološkog Razvoja/ ; 451-03-68/2022-14/200178//Ministarstvo Prosvete, Nauke i Tehnološkog Razvoja/ ; }, mesh = {Humans ; Child ; Trimethoprim, Sulfamethoxazole Drug Combination ; *Achromobacter denitrificans/genetics ; Anti-Bacterial Agents/therapeutic use ; Integrases/therapeutic use ; Integrons/genetics ; Serbia ; *Achromobacter ; *Cystic Fibrosis ; Genomics ; *Gram-Negative Bacterial Infections ; Microbial Sensitivity Tests ; }, abstract = {Trimethoprim-sulfamethoxazole (SXT) is the preferable treatment option of the infections caused by Achromobacter spp. Our study aimed to analyze the SXT resistance of 98 Achromobacter spp. isolates from pediatric patients, among which 33 isolates were SXT-resistant. The presence of intI1 was screened by PCR and genome sequence analyses. The intI1 gene was detected in 10 of SXT-resistant isolates that had shorter intI1 PCR fragments named intI1S. Structural changes in intI1S were confirmed by genome sequencing and analyses which revealed 86 amino acids deletion in IntI1S protein compared to canonical IntI1 protein. All IntI1S isolates were of non-CF origin. Pan-genome analysis of intI1S bearing A. xylosoxidans isolates comprised 9052 genes, with the core genome consisting of 5455 protein-coding genes. Results in this study indicate that IntI1S isolates were derived from clinical settings and that cystic fibrosis (CF) patients were potential reservoirs for healthcare-associated infections that occurred in non-CF patients.}, } @article {pmid36566389, year = {2023}, author = {Shirasawa, K and Hosokawa, M and Yasui, Y and Toyoda, A and Isobe, S}, title = {Chromosome-scale genome assembly of a Japanese chili pepper landrace, Capsicum annuum 'Takanotsume'.}, journal = {DNA research : an international journal for rapid publication of reports on genes and genomes}, volume = {30}, number = {1}, pages = {}, pmid = {36566389}, issn = {1756-1663}, support = {16H02535//KAKENHI/ ; //Kazusa DNA Research Institute Foundation/ ; }, mesh = {*Capsicum/genetics ; Chromosome Mapping ; Chromosomes ; Plant Breeding ; }, abstract = {Here, we report the genome sequence of a popular Japanese chili pepper landrace, Capsicum annuum 'Takanotsume'. We used long-read sequencing and optical mapping, together with the genetic mapping technique, to obtain the chromosome-scale genome assembly of 'Takanotsume'. The assembly consists of 12 pseudomolecules, which corresponds to the basic chromosome number of C. annuum, and is 3,058.5 Mb in size, spanning 97.0% of the estimated genome size. A total of 34,324 high-confidence genes were predicted in the genome, and 83.4% of the genome assembly was occupied by repetitive sequences. Comparative genomics of linked-read sequencing-derived de novo genome assemblies of two Capsicum chinense lines and whole-genome resequencing analysis of Capsicum species revealed not only nucleotide sequence variations but also genome structure variations (i.e. chromosomal rearrangements and transposon-insertion polymorphisms) between 'Takanotsume' and its relatives. Overall, the genome sequence data generated in this study will accelerate the pan-genomics and breeding of Capsicum, and facilitate the dissection of genetic mechanisms underlying the agronomically important traits of 'Takanotsume'.}, } @article {pmid36558824, year = {2022}, author = {Xia, F and Cheng, J and Jiang, M and Wang, Z and Wen, Z and Wang, M and Ren, J and Zhuge, X}, title = {Genomics Analysis to Identify Multiple Genetic Determinants That Drive the Global Transmission of the Pandemic ST95 Lineage of Extraintestinal Pathogenic Escherichia coli (ExPEC).}, journal = {Pathogens (Basel, Switzerland)}, volume = {11}, number = {12}, pages = {}, pmid = {36558824}, issn = {2076-0817}, support = {BE2022329//Jiangsu Province Key Research and Development Program (Modern Agriculture) Project/ ; 32172855//National Natural Science Foundation of China/ ; }, abstract = {Extraintestinal pathogenic Escherichia coli (ExPEC) is a pathogen that causes host extraintestinal diseases. The ST95 E. coli lineage is one of the dominant ExPEC lineages in humans and poultry. In this study, we took advantage of extensive E. coli genomes available through public open-access databases to construct a detailed understanding of the phylogeny and evolution of ST95. We used a high variability of accessory genomes to highlight the diversity and dynamic traits of ST95. Isolates from diverse hosts and geographic sources were randomly located on the phylogenetic tree, which suggested that there is no host specificity for ST95. The time-scaled phylogeny showed that ST95 is an ancient and long-lasting lineage. The virulence genes, resistance genes, and pathogenicity islands (PAIs) were characterized in ST95 pan-genomes to provide novel insights into the pathogenicity and multidrug resistance (MDR) genotypes. We found that a pool of large plasmids drives virulence and MDR. Based on the unique genes in the ST95 pan-genome, we designed a novel multiplex PCR reaction to rapidly detect ST95. Overall, our study addressed a gap in the current understanding of ST95 ExPEC genomes, with significant implications for recognizing the success and spread of ST95.}, } @article {pmid36558765, year = {2022}, author = {Lu, Q and Zhu, X and Long, Q and Yi, X and Yang, A and Long, X and Cao, D}, title = {Comparative Genomics Reveal the Utilization Ability of Variable Carbohydrates as Key Genetic Features of Listeria Pathogens in Their Pathogenic Lifestyles.}, journal = {Pathogens (Basel, Switzerland)}, volume = {11}, number = {12}, pages = {}, pmid = {36558765}, issn = {2076-0817}, support = {baike202236, baike202235//Baise Science and Technology Plan Project/ ; Nos. 81860489//National Natural Science Foundation of China/ ; Nos. AD19245174//the Science-Technology Program of Guangxi/ ; }, abstract = {BACKGROUND: L. monocytogenes and L. ivanovii, the only two pathogens of Listeria, can survive in various environments, having different pathogenic characteristics. However, the genetic basis of their excellent adaptability and differences in pathogenicity has still not been completely elucidated.

METHODS: We performed a comparative genomic analysis based on 275 L. monocytogenes, 10 L. ivanovii, and 22 non-pathogenic Listeria strains.

RESULTS: Core/pan-genome analysis revealed that 975 gene families were conserved in all the studied strains. Additionally, 204, 242, and 756 gene families existed uniquely in L. monocytogenes, L. ivanovii, and both, respectively. Functional annotation partially verified that these unique gene families were closely related to their adaptability and pathogenicity. Moreover, the protein-protein interaction (PPI) network analysis of these unique gene sets showed that plenty of carbohydrate transport systems and energy metabolism enzymes were clustered in the networks. Interestingly, ethanolamine-metabolic-process-related proteins were significantly enriched in the PPI network of the unique genes of the Listeria pathogens, which can be understood as a determining factor of their pathogenicity.

CONCLUSIONS: The utilization capacity of multiple carbon sources of Listeria pathogens, especially ethanolamine, is the key genetic basis for their ability to adapt to various environments and pathogenic lifestyles.}, } @article {pmid36557654, year = {2022}, author = {Vázquez-Sánchez, DA and Grillo, S and Carrera-Salinas, A and González-Díaz, A and Cuervo, G and Grau, I and Camoez, M and Martí, S and Berbel, D and Tubau, F and Ardanuy, C and Pujol, M and Càmara, J and Domínguez, MÁ}, title = {Molecular Epidemiology, Antimicrobial Susceptibility, and Clinical Features of Methicillin-Resistant Staphylococcus aureus Bloodstream Infections over 30 Years in Barcelona, Spain (1990-2019).}, journal = {Microorganisms}, volume = {10}, number = {12}, pages = {}, pmid = {36557654}, issn = {2076-2607}, support = {PI16/01382//Instituto de Salud Carlos III/ ; CIBERES-CB06/06/0037//Centro de Investigación Biomédica en Red de Enfermedades Respiratorias/ ; CIBERINFEC-CB21/13/00009//Centro de Investigación Biomédica en Red de Enfermedades Infecciosas/ ; FPU16/02202//Ministerio de Educación Cultura y Deporte/ ; CP19/00096//Instituto de Salud Carlos III/ ; }, abstract = {Methicillin-resistant Staphylococcus aureus bloodstream infections (MRSA-BSI) are a significant cause of mortality. We analysed the evolution of the molecular and clinical epidemiology of MRSA-BSI (n = 784) in adult patients (Barcelona, 1990−2019). Isolates were tested for antimicrobial susceptibility and genotyped (PFGE), and a selection was sequenced (WGS) to characterise the pangenome and mechanisms underlying antimicrobial resistance. Increases in patient age (60 to 71 years), comorbidities (Charlson’s index > 2, 10% to 94%), community-onset healthcare-associated acquisition (9% to 60%), and 30-day mortality (28% to 36%) were observed during the 1990−1995 and 2014−2019 periods. The proportion of catheter-related BSIs fell from 57% to 20%. Current MRSA-BSIs are caused by CC5-IV and an upward trend of CC8-IV and CC22-IV clones. CC5 and CC8 had the lowest core genome proportions. Antimicrobial resistance rates fell, and only ciprofloxacin, tobramycin, and erythromycin remained high (>50%) due to GyrA/GrlA changes, the presence of aminoglycoside-modifying enzymes (AAC(6′)-Ie-APH(2″)-Ia and ANT(4′)-Ia), and mph(C)/msr(A) or erm (C) genes. Two CC22-IV strains showed daptomycin resistance (MprF substitutions). MRSA-BSI has become healthcare-associated, affecting elderly patients with comorbidities and causing high mortality rates. Clonal replacement with CC5-IV and CC8-IV clones resulted in lower antimicrobial resistance rates. The increased frequency of the successful CC22-IV, associated with daptomycin resistance, should be monitored.}, } @article {pmid36553557, year = {2022}, author = {Wang, L and Zhou, F and Zhou, J and Harvey, PR and Yu, H and Zhang, G and Zhang, X}, title = {Genomic Analysis of Pseudomonas asiatica JP233: An Efficient Phosphate-Solubilizing Bacterium.}, journal = {Genes}, volume = {13}, number = {12}, pages = {}, pmid = {36553557}, issn = {2073-4425}, mesh = {*Phosphates/metabolism ; Phylogeny ; *Pseudomonas ; Genomics ; }, abstract = {The bacterium Pseudomonas sp. strain JP233 has been reported to efficiently solubilize sparingly soluble inorganic phosphate, promote plant growth and significantly reduce phosphorus (P) leaching loss from soil. The production of 2-keto gluconic acid (2KGA) by strain JP233 was identified as the main active metabolite responsible for phosphate solubilization. However, the genetic basis of phosphate solubilization and plant-growth promotion remained unclear. As a result, the genome of JP233 was sequenced and analyzed in this study. The JP233 genome consists of a circular chromosome with a size of 5,617,746 bp and a GC content of 62.86%. No plasmids were detected in the genome. There were 5097 protein-coding sequences (CDSs) predicted in the genome. Phylogenetic analyses based on genomes of related Pseudomonas spp. identified strain JP233 as Pseudomonas asiatica. Comparative pangenomic analysis among 9 P. asiatica strains identified 4080 core gene clusters and 111 singleton genes present only in JP233. Genes associated with 2KGA production detected in strain JP233, included those encoding glucose dehydrogenase, pyrroloquinoline quinone and gluoconate dehydrogenase. Genes associated with mechanisms of plant-growth promotion and nutrient acquisition detected in JP233 included those involved in IAA biosynthesis, ethylene catabolism and siderophore production. Numerous genes associated with other properties beneficial to plant growth were also detected in JP233, included those involved in production of acetoin, 2,3-butanediol, trehalose, and resistance to heavy metals. This study provides the genetic basis to elucidate the plant-growth promoting and bio-remediation properties of strain JP233 and its potential applications in agriculture and industry.}, } @article {pmid36551744, year = {2022}, author = {Alturki, NA and Mashraqi, MM and Jalal, K and Khan, K and Basharat, Z and Alzamami, A}, title = {Therapeutic Target Identification and Inhibitor Screening against Riboflavin Synthase of Colorectal Cancer Associated Fusobacterium nucleatum.}, journal = {Cancers}, volume = {14}, number = {24}, pages = {}, pmid = {36551744}, issn = {2072-6694}, support = {NA//Shaqra University/ ; }, abstract = {Colorectal cancer (CRC) ranks third among all cancers in terms of prevalence. There is growing evidence that gut microbiota has a role in the development of colorectal cancer. Fusobacterium nucleatum is overrepresented in the gastrointestinal tract and tumor microenvironment of patients with CRC. This suggests the role of F. nucleatum as a potential risk factor in the development of CRC. Hence, we aimed to explore whole genomes of F. nucleatum strains related to CRC to predict potential therapeutic markers through a pan-genome integrated subtractive genomics approach. In the current study, we identified 538 proteins as essential for F. nucleatum survival, 209 non-homologous to a human host, and 12 as drug targets. Eventually, riboflavin synthase (RiS) was selected as a therapeutic target for further processing. Three different inhibitor libraries of lead-like natural products, i.e., cyanobactins (n = 237), streptomycins (n = 607), and marine bacterial secondary metabolites (n = 1226) were screened against it. After the structure-based study, three compounds, i.e., CMNPD3609 (−7.63) > Malyngamide V (−7.03) > ZINC06804365 (−7.01) were prioritized as potential inhibitors of F. nucleatum. Additionally, the stability and flexibility of these compounds bound to RiS were determined via a molecular dynamics simulation of 50 ns. Results revealed the stability of these compounds within the binding pocket, after 5 ns. ADMET profiling showed compounds as drug-like, non-permeable to the blood brain barrier, non-toxic, and HIA permeable. Pan-genomics mediated drug target identification and the virtual screening of inhibitors is the preliminary step towards inhibition of this pathogenic oncobacterium and we suggest mouse model experiments to validate our findings.}, } @article {pmid36550124, year = {2022}, author = {Vaughn, JN and Branham, SE and Abernathy, B and Hulse-Kemp, AM and Rivers, AR and Levi, A and Wechter, WP}, title = {Graph-based pangenomics maximizes genotyping density and reveals structural impacts on fungal resistance in melon.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {7897}, pmid = {36550124}, issn = {2041-1723}, mesh = {Genotype ; *Cucurbitaceae/genetics ; DNA Copy Number Variations ; Plant Breeding ; Quantitative Trait Loci/genetics ; *Cucumis melo/genetics/microbiology ; }, abstract = {The genomic sequences segregating in experimental populations are often highly divergent from the community reference and from one another. Such divergence is problematic under various short-read-based genotyping strategies. In addition, large structural differences are often invisible despite being strong candidates for causal variation. These issues are exacerbated in specialty crop breeding programs with fewer, lower-quality sequence resources. Here, we examine the benefits of complete genomic information, based on long-read assemblies, in a biparental mapping experiment segregating at numerous disease resistance loci in the non-model crop, melon (Cucumis melo). We find that a graph-based approach, which uses both parental genomes, results in 19% more variants callable across the population and raw allele calls with a 2 to 3-fold error-rate reduction, even relative to single reference approaches using a parent genome. We show that structural variation has played a substantial role in shaping two Fusarium wilt resistance loci with known causal genes. We also report on the genetics of powdery mildew resistance, where copy number variation and local recombination suppression are directly interpretable via parental genome alignments. Benefits observed, even in this low-resolution biparental experiment, will inevitably be amplified in more complex populations.}, } @article {pmid36547858, year = {2023}, author = {Sreya, P and Suresh, G and Rai, A and Ria, B and Vighnesh, L and Agre, VC and Jagadeeshwari, U and Sasikala, C and Ramana, CV}, title = {Revisiting the taxonomy of the genus Rhodopirellula with the proposal for reclassification of the genus to Rhodopirellula sensu stricto, Aporhodopirellula gen. nov., Allorhodopirellula gen. nov. and Neorhodopirellula gen. nov.}, journal = {Antonie van Leeuwenhoek}, volume = {116}, number = {3}, pages = {243-264}, pmid = {36547858}, issn = {1572-9699}, mesh = {Sequence Analysis, DNA ; RNA, Ribosomal, 16S/genetics ; Phylogeny ; *Bacteria/genetics ; *DNA ; DNA, Bacterial/genetics ; Bacterial Typing Techniques ; Fatty Acids/chemistry ; }, abstract = {The current genus Rhodopirellula consists of marine bacteria which belong to the family Pirellulaceae of the phylum Planctomycetota. Members of the genus Rhodopirellula are aerobic, mesophiles and chemoheterotrophs. The here conducted analysis built on 16S rRNA gene sequence and multi-locus sequence analysis based phylogenomic trees suggested that the genus is subdivided into four clades. Existing Rhodopirellula species were studied extensively based on phenotypic, genomic and chemotaxonomic parameters. The heterogeneity was further confirmed by overall genome-related indices (OGRI) including digital DNA-DNA hybridization (dDDH), average nucleotide identity (ANI), average amino acid identity (AAI), and percentage of conserved proteins (POCP). AAI and POCP values between the clades of the genus Rhodopirellula were 62.2-69.6% and 49.5-62.5%, respectively. Comparative genomic approaches like pan-genome analysis and conserved signature indels (CSIs) also support the division of the clades. The genomic incoherence of the members of the genus is further supported by variations in phenotypic characteristics. Thus, with the here applied integrated comparative genomic and polyphasic approaches, we propose the reclassification of the genus Rhodopirellula to three new genera: Aporhodopirellula gen. nov., Allorhodopirellula gen. nov., and Neorhodopirellula gen. nov.}, } @article {pmid36547571, year = {2022}, author = {Bao, J and Wang, Z and Chen, M and Chen, S and Chen, X and Xie, J and Tang, W and Zheng, H and Wang, Z}, title = {Pan-Genomics Reveals a New Variation Pattern of Secreted Proteins in Pyricularia oryzae.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {12}, pages = {}, pmid = {36547571}, issn = {2309-608X}, support = {U1805232//National Natural Science Foundation of China/ ; 32270078//National Natural Science Foundation of China/ ; 32172365//National Natural Science Foundation of China/ ; 32001976//National Natural Science Foundation of China/ ; }, abstract = {(1) Background: Pyricularia oryzae, the causal agent of rice blast disease, is one of the major rice pathogens. The complex population structure of P. oryzae facilitates the rapid virulence variations, which make the blast disease a serious challenge for global food security. There is a large body of existing genomics research on P. oryzae, however the population structure at the pan-genome level is not clear, and the mechanism of genetic divergence and virulence variations of different sub-populations is also unknown. (2) Methods: Based on the genome data published in the NCBI, we constructed a pan-genome database of P. oryzae, which consisted of 156 strains (117 isolated from rice and 39 isolated from other hosts). (3) Results: The pan-genome contained a total of 24,100 genes (12,005 novel genes absent in the reference genome 70-15), including 16,911 (~70%) core genes (population frequency ≥95%) and 1378 (~5%) strain-specific genes (population frequency ≤5%). Gene presence-absence variation (PAV) based clustering analysis of the population structure of P. oryzae revealed four subgroups (three from rice and one from other hosts). Interestingly, the cloned avirulence genes and conventional secreted proteins (SPs, with signal peptides) were enriched in the high-frequency regions and significantly associated with transposable elements (TEs), while the unconventional SPs (without signal peptides) were enriched in the low-frequency regions and not associated significantly with TEs. This pan-genome will expand the breadth and depth of the rice blast fungus reference genome, and also serve as a new blueprint for scientists to further study the pathogenic mechanism and virulence variation of the rice blast fungus.}, } @article {pmid36544084, year = {2022}, author = {Morey-León, G and Andrade-Molina, D and Fernández-Cadena, JC and Berná, L}, title = {Comparative genomics of drug-resistant strains of Mycobacterium tuberculosis in Ecuador.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {844}, pmid = {36544084}, issn = {1471-2164}, support = {FCI-016-2017//University of Guayaquil/ ; SNI//Agencia Nacional de Investigación e Innovación/ ; }, mesh = {Male ; Humans ; Female ; *Mycobacterium tuberculosis ; Antitubercular Agents/pharmacology/therapeutic use ; *Tuberculosis, Multidrug-Resistant/epidemiology/microbiology ; Ecuador/epidemiology ; Phylogeny ; Mutation ; Microbial Sensitivity Tests ; *Tuberculosis/epidemiology/drug therapy ; Genomics ; Fluoroquinolones ; Drug Resistance, Multiple, Bacterial/genetics ; }, abstract = {BACKGROUND: Tuberculosis is a serious infectious disease affecting millions of people. In spite of efforts to reduce the disease, increasing antibiotic resistance has contributed to persist in the top 10 causes of death worldwide. In fact, the increased cases of multi (MDR) and extreme drug resistance (XDR) worldwide remains the main challenge for tuberculosis control. Whole genome sequencing is a powerful tool for predicting drug resistance-related variants, studying lineages, tracking transmission, and defining outbreaks. This study presents the identification and characterization of resistant clinical isolates of Mycobacterium tuberculosis including a phylogenetic and molecular resistance profile study by sequencing the complete genome of 24 strains from different provinces of Ecuador.

RESULTS: Genomic sequencing was used to identify the variants causing resistance. A total of 15/21 isolates were identified as MDR, 4/21 as pre-XDR and 2/21 as XDR, with three isolates discarded due to low quality; the main sub-lineage was LAM (61.9%) and Haarlem (19%) but clades X, T and S were identified. Of the six pre-XDR and XDR strains, it is noteworthy that five come from females; four come from the LAM sub-lineage and two correspond to the X-class sub-lineage. A core genome of 3,750 genes, distributed in 295 subsystems, was determined. Among these, 64 proteins related to virulence and implicated in the pathogenicity of M. tuberculosis and 66 possible pharmacological targets stand out. Most variants result in nonsynonymous amino acid changes and the most frequent genotypes were identified as conferring resistance to rifampicin, isoniazid, ethambutol, para-aminosalicylic acid and streptomycin. However, an increase in the resistance to fluoroquinolones was detected.

CONCLUSION: This work shows for the first time the variability of circulating resistant strains between men and women in Ecuador, highlighting the usefulness of genomic sequencing for the identification of emerging resistance. In this regard, we found an increase in fluoroquinolone resistance. Further sampling effort is needed to determine the total variability and associations with the metadata obtained to generate better health policies.}, } @article {pmid36539044, year = {2023}, author = {Lima, A and Carolina Barbosa Caetano, A and Hurtado Castillo, R and Gonçalves Dos Santos, R and Lucas Neres Rodrigues, D and de Jesus Sousa, T and Kato, RB and Vinicius Canário Viana, M and Cybelle Pinto Gomide, A and Figueira Aburjaile, F and Tiwari, S and Jaiswal, A and Gala-García, A and Seyffert, N and Luiz de Paula Castro, T and Brenig, B and Matiuzzi da Costa, M and Maria Seles Dorneles, E and Le Loir, Y and Azevedo, V}, title = {Comparative genomic analysis of ovine and other host associated isolates of Staphylococcus aureus exhibit the important role of mobile genetic elements and virulence factors in host adaptation.}, journal = {Gene}, volume = {855}, number = {}, pages = {147131}, doi = {10.1016/j.gene.2022.147131}, pmid = {36539044}, issn = {1879-0038}, mesh = {Female ; Animals ; Cattle ; Sheep/genetics ; Humans ; Swine ; Virulence Factors/genetics ; Staphylococcus aureus/genetics ; Host Adaptation ; *Staphylococcal Infections/genetics/veterinary/microbiology ; Ruminants/genetics ; Genomics ; Interspersed Repetitive Sequences ; *Mastitis, Bovine/genetics/microbiology ; }, abstract = {Staphylococcus aureus is the main etiological agent of mastitis in small ruminants worldwide. This disease has a difficult cure and possible relapse, leading to significant economic losses in production, milk quality and livestock. This study performed comparative genomic analyses between 73 S. aureus genomes from different hosts (human, bovine, pig and others). This work isolated and sequenced 12 of these genomes from ovine. This study contributes to the knowledge of genomic specialization and the role of specific genes in establishing infection in ovine mastitis-associated S. aureus. The genomes of S. aureus isolated from sheep maintained a higher representation when grouped with clonal complexes 130 and 133. The genomes showed high genetic similarity, the species pan-genome consisting of 4200 genes (central = 2008, accessory = 1559 and unique = 634). Among these, 277 unique genes were related to the genomes isolated from sheep, with 39.6 % as hypothetical proteins, 6.4 % as phages, 6.4 % as toxins, 2.9 % as transporters, and 44.7 % as related to other proteins. Furthermore, at the pathogen level, they showed 80 genes associated with virulence factors and 19 with antibiotic resistance shared in almost all isolates. Although S. aureus isolated from ovine showed susceptibility to antimicrobials in vitro, ten genes were predicted to be associated with antibiotic inactivation and efflux pump, suggesting resistance to gentamicin and penicillin. This work may contribute to identifying genes acquired by horizontal transfer and their role in host adaptation, virulence, bacterial resistance, and characterization of strains affecting ovine.}, } @article {pmid36537824, year = {2023}, author = {Simoni, S and Leoni, F and Veschetti, L and Malerba, G and Carelli, M and Lleò, MM and Brenciani, A and Morroni, G and Giovanetti, E and Rocchegiani, E and Barchiesi, F and Vignaroli, C}, title = {The Emerging Nosocomial Pathogen Klebsiella michiganensis: Genetic Analysis of a KPC-3 Producing Strain Isolated from Venus Clam.}, journal = {Microbiology spectrum}, volume = {11}, number = {1}, pages = {e0423522}, pmid = {36537824}, issn = {2165-0497}, mesh = {Humans ; Anti-Bacterial Agents/pharmacology ; Phylogeny ; *Cross Infection ; *Klebsiella Infections/epidemiology ; Drug Resistance, Multiple, Bacterial/genetics ; Plasmids/genetics ; Klebsiella pneumoniae ; beta-Lactamases/genetics ; Carbapenems/pharmacology ; Hospitals ; Bacterial Proteins/genetics ; Microbial Sensitivity Tests ; }, abstract = {The recovery and characterization of a multidrug-resistant, KPC-3-producing Klebsiella michiganensis that was obtained from Venus clam samples is reported in this study. A whole-genome sequencing (WGS) analysis using Illumina and Nanopore technologies of the K. michiganensis 23999A2 isolate revealed that the strain belonged to the new sequence type 382 (ST382) and carried seven plasmid replicon sequences, including four IncF type plasmids (FII, FIIY, FIIk, and FIB), one IncHI1 plasmid, and two Col plasmids. The FIB and FIIk plasmids showed high homology to each other and to multireplicon pKpQIL-like plasmids that are found in epidemic KPC-K. pneumoniae clones worldwide. The strain carried multiple β-lactamase genes on the IncF plasmids: blaOXA-9 and blaTEM-1A on FIB, blaKPC-3 inserted in a Tn4401a on FIIK, and blaSHV-12 on FIIY. The IncHI1-ST11 harbored no resistance gene. The curing of the strain caused the loss of all of the bla genes and a rearrangement of the IncF plasmids. Conjugal transfer of the blaOXA-9, blaTEM-1A and blaKPC-3 genes occurred at a frequency of 5 × 10[-7], using K. quasipneumoniae as a recipient, and all of the bla genes were transferred through a pKpQIL that originated from the recombination of the FIB and FIIk plasmids of the donor. A comparison with 31 K. michiganensis genomes that are available in the NCBI database showed that the closest phylogenetic relatives of K. michiganensis 23999A2 are an environmental isolate from soil in South Korea and a clinical isolate from human sputum in Japan. Finally, a pan-genome analysis showed a large accessory genome of the strain as well as the great genomic plasticity of the K. michiganensis species. IMPORTANCE Klebsiella michiganensis is an emerging nosocomial pathogen, and, so far, few studies describe isolates of clinical origin in the environment. This study contributes to the understanding of how the dissemination of carbapenem-resistance outside the hospital setting may be related to the circulation of pKpQIL-like plasmids that are derived from epidemic Klebsiella pneumoniae strains. The recovery of a carbapenem-resistant isolate in clams is of great concern, as bivalves could represent vehicles of transmission of pathogens and resistance genes to humans via the food chain. The study demonstrates the plasticity of K. michiganensis genome, which is probably useful to multiple environment adaptation and to the evolution of the species.}, } @article {pmid36536862, year = {2022}, author = {Cai, Q and Huang, Y and Zhou, L and Hu, N and Liu, Y and Guo, F and Liu, Q and Huang, X and Zhang, Y and Zeng, L}, title = {A Complete Genome of Nocardia terpenica NC_YFY_NT001 and Pan-Genomic Analysis Based on Different Sources of Nocardia spp. Isolates Reveal Possibly Host-Related Virulence Factors.}, journal = {Infection and drug resistance}, volume = {15}, number = {}, pages = {7259-7270}, pmid = {36536862}, issn = {1178-6973}, abstract = {OBJECTIVE: We aimed to identify the possible virulence genes associated with Nocardia NC_YFY_NT001 isolated by ourselves and other Nocardia spp.

METHODS: The genome of Nocardia terpenica NC_YFY_NT001 was completed by using PacBio and Illumina platforms. A pan-genomic analysis was applied to selected complete Nocardia genomes.

RESULTS: Nocardia terpenica NC_YFY_NT001 can cause healthy mice death by tail intravenous injection. The genome of NT001 has one circular chromosome 8,850,000 bp and one circular plasmid 70,000 bp with ~68% GC content. The chromosome and plasmid encode 7914 and 80 proteins, respectively. Furthermore, a pan-genomic analysis showed a total of 45,825 gene clusters, then 304 core, 21,045 shell and 24,476 cloud gene clusters were classified using specific parameters. In addition, we found that catalases were more abundant in human isolates. Furthermore, we also found no significant differences in the MCE proteins between different strains from different sources. The pan-genomic analysis also showed that 67 genes could only be found in humoral isolates. ReX3 and DUF853 domain protein were found in all eight human isolates. The composition of unique genes in humoral isolate genomes indicated that the transcriptional regulators may be important when Nocardia invades the host, which allows them to survive in the new ecological system.

CONCLUSION: In this study, we confirmed that NT001 could cause infected animal death, and identified many possible virulence factors for our future studies. This study also provides new insight for our further study on Nocardia virulence mechanisms.}, } @article {pmid36536253, year = {2023}, author = {Sohn, JI and Choi, MH and Yi, D and Menon, VA and Kim, YJ and Lee, J and Park, JW and Kyung, S and Shin, SH and Na, B and Joung, JG and Ju, YS and Yeom, MS and Koh, Y and Yoon, SS and Baek, D and Kim, TM and Nam, JW}, title = {Ultrafast prediction of somatic structural variations by filtering out reads matched to pan-genome k-mer sets.}, journal = {Nature biomedical engineering}, volume = {7}, number = {7}, pages = {853-866}, pmid = {36536253}, issn = {2157-846X}, mesh = {Humans ; *High-Throughput Nucleotide Sequencing/methods ; Genome ; Sequence Analysis, DNA/methods ; *Neoplasms ; }, abstract = {Variant callers typically produce massive numbers of false positives for structural variations, such as cancer-relevant copy-number alterations and fusion genes resulting from genome rearrangements. Here we describe an ultrafast and accurate detector of somatic structural variations that reduces read-mapping costs by filtering out reads matched to pan-genome k-mer sets. The detector, which we named ETCHING (for efficient detection of chromosomal rearrangements and fusion genes), reduces the number of false positives by leveraging machine-learning classifiers trained with six breakend-related features (clipped-read count, split-reads count, supporting paired-end read count, average mapping quality, depth difference and total length of clipped bases). When benchmarked against six callers on reference cell-free DNA, validated biomarkers of structural variants, matched tumour and normal whole genomes, and tumour-only targeted sequencing datasets, ETCHING was 11-fold faster than the second-fastest structural-variant caller at comparable performance and memory use. The speed and accuracy of ETCHING may aid large-scale genome projects and facilitate practical implementations in precision medicine.}, } @article {pmid36534203, year = {2022}, author = {Jesus, HNR and Ramos, JN and Rocha, DJPG and Alves, DA and Silva, CS and Cruz, JVO and Vieira, VV and Souza, C and Santos, LS and Navas, J and Ramos, RTJ and Azevedo, V and Aguiar, ERGR and Mattos-Guaraldi, AL and Pacheco, LGC}, title = {The pan-genome of the emerging multidrug-resistant pathogen Corynebacterium striatum.}, journal = {Functional & integrative genomics}, volume = {23}, number = {1}, pages = {5}, pmid = {36534203}, issn = {1438-7948}, support = {BOL0505/2018//Fundação de Amparo à Pesquisa do Estado da Bahia/ ; BOL0505/2018//Fundação de Amparo à Pesquisa do Estado da Bahia/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CNPq Nº 09/2018//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; CNPq Nº 09/2018//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; CNPq Nº 09/2018//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; MCT/FINEP/CT-INFRA01/2013//Financiadora de Estudos e Projetos/ ; }, mesh = {Humans ; *Corynebacterium ; *Anti-Bacterial Agents ; Phenotype ; Virulence Factors/genetics ; Drug Resistance, Multiple, Bacterial/genetics ; Microbial Sensitivity Tests ; }, abstract = {Corynebacterium striatum, a common constituent of the human skin microbiome, is now considered an emerging multidrug-resistant pathogen of immunocompromised and chronically ill patients. However, little is known about the molecular mechanisms in the transition from colonization to the multidrug-resistant (MDR) invasive phenotype in clinical isolates. This study performed a comprehensive pan-genomic analysis of C. striatum, including isolates from "normal skin microbiome" and from MDR infections, to gain insights into genetic factors contributing to pathogenicity and multidrug resistance in this species. For this, three novel genome sequences were obtained from clinical isolates of C. striatum of patients from Brazil, and other 24 complete or draft C. striatum genomes were retrieved from GenBank, including the ATCC6940 isolate from the Human Microbiome Project. Analysis of C. striatum strains demonstrated the presence of an open pan-genome (α = 0.852803) containing 3816 gene families, including 15 antimicrobial resistance (AMR) genes and 32 putative virulence factors. The core and accessory genomes included 1297 and 1307 genes, respectively. The identified AMR genes are primarily associated with resistance to aminoglycosides and tetracyclines. Of these, 66.6% are present in genomic islands, and four AMR genes, including aac(6')-ib7, are located in a class 1-integron. In conclusion, our data indicated that C. striatum possesses genomic characteristics favorable to the invasive phenotype, with high genomic plasticity, a robust genetic arsenal for iron acquisition, and important virulence determinants and AMR genes present in mobile genetic elements.}, } @article {pmid36534120, year = {2023}, author = {Gui, S and Martinez-Rivas, FJ and Wen, W and Meng, M and Yan, J and Usadel, B and Fernie, AR}, title = {Going broad and deep: sequencing-driven insights into plant physiology, evolution, and crop domestication.}, journal = {The Plant journal : for cell and molecular biology}, volume = {113}, number = {3}, pages = {446-459}, doi = {10.1111/tpj.16070}, pmid = {36534120}, issn = {1365-313X}, mesh = {*Domestication ; *Genome-Wide Association Study ; Genome, Plant/genetics ; Genomics ; Plants ; }, abstract = {Deep sequencing is a term that has become embedded in the plant genomic literature in recent years and with good reason. A torrent of (largely) high-quality genomic and transcriptomic data has been collected and most of this has been publicly released. Indeed, almost 1000 plant genomes have been reported (www.plabipd.de) and the 2000 Plant Transcriptomes Project has long been completed. The EarthBioGenome project will dwarf even these milestones. That said, massive progress in understanding plant physiology, evolution, and crop domestication has been made by sequencing broadly (across a species) as well as deeply (within a single individual). We will outline the current state of the art in genome and transcriptome sequencing before we briefly review the most visible of these broad approaches, namely genome-wide association and transcriptome-wide association studies, as well as the compilation of pangenomes. This will include both (i) the most commonly used methods reliant on single nucleotide polymorphisms and short InDels and (ii) more recent examples which consider structural variants. We will subsequently present case studies exemplifying how their application has brought insight into either plant physiology or evolution and crop domestication. Finally, we will provide conclusions and an outlook as to the perspective for the extension of such approaches to different species, tissues, and biological processes.}, } @article {pmid36533928, year = {2023}, author = {Wang, Z and Xu, S and Zheng, X and Zheng, X and Liu, M and Guo, G and Yu, Y and Han, X and Liu, Y and Wang, K and Zhang, W}, title = {Identification of Subunits for Novel Universal Vaccines against Three Predominant Serogroups and the Emerging O145 among Avian Pathogenic Escherichia coli by Pan-RV Pipeline.}, journal = {Applied and environmental microbiology}, volume = {89}, number = {1}, pages = {e0106122}, pmid = {36533928}, issn = {1098-5336}, mesh = {Animals ; Escherichia coli/genetics ; Serogroup ; *Escherichia coli Infections/prevention & control/veterinary ; Poultry ; Bacterial Vaccines ; *Escherichia coli Vaccines ; *Poultry Diseases/prevention & control/microbiology ; Chickens ; }, abstract = {Avian pathogenic Escherichia coli, a causative agent of avian colibacillosis, has been causing serious economic losses in the poultry industry. The increase in multidrug-resistant isolates and the complexity of the serotypes of this pathogen, especially the recently reported emergence of a newly predominant serogroup of O145, make the control of this disease difficult. To address this challenge, a high-throughput screening approach, called Pan-RV (Reverse vaccinology based on pangenome analysis), is proposed to search for universal protective antigens against the three traditional serogroups and the newly emerged O145. Using this approach, a total of 61 proteins regarded as probable antigens against the four important serogroups were screened from the core genome of 127 Avian pathogenic Escherichia coli (APEC) genomes, and six were verified by Western blots using antisera. Overall, our research will provide a foundation for the development of an APEC subunit vaccine against avian colibacillosis. Given the exponential growth of whole-genome sequencing (WGS) data, our Pan-RV pipeline will make screening of bacterial vaccine candidates inexpensive, rapid, and efficient. IMPORTANCE With the emergence of drug resistance and the newly predominant serogroup O145, the control of Avian pathogenic Escherichia coli is facing a serious challenge; an efficient immunological method is urgently needed. Here, for the first time, we propose a high-throughput screening approach to search for universal protective antigens against the three traditional serogroups and the newly emerged O145. Importantly, using this approach, a total of 61 proteins regarded as probable antigens against the four important serogroups were screened, and three were shown to be immunoreactive with all antisera (covering the four serogroups), thereby providing a foundation for the development of APEC subunit vaccines against avian colibacillosis. Further, our Pan-RV pipeline will provide immunological control strategies for pathogens with complex and variable genetic backgrounds such as Escherichia coli and will make screening of bacterial vaccine candidates more inexpensive, rapid, and efficient.}, } @article {pmid36533266, year = {2022}, author = {Usadel, B}, title = {Solanaceae pangenomes are coming of graphical age to bring heritability back.}, journal = {aBIOTECH}, volume = {3}, number = {4}, pages = {233-236}, pmid = {36533266}, issn = {2662-1738}, abstract = {Two recent articles describe a pangenome of potato and a graph-based pangenome for tomato, respectively. The latter improves our understanding of the tomato genomics architecture even further and the use of this graph-based pangenome versus a single reference dramatically improves heritability in tomato.}, } @article {pmid36532462, year = {2022}, author = {Cohn, AR and Orsi, RH and Carroll, LM and Liao, J and Wiedmann, M and Cheng, RA}, title = {Salmonella enterica serovar Cerro displays a phylogenetic structure and genomic features consistent with virulence attenuation and adaptation to cattle.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1005215}, pmid = {36532462}, issn = {1664-302X}, abstract = {Salmonella enterica subsp. enterica (S.) serovar Cerro is rarely isolated from human clinical cases of salmonellosis but represents the most common serovar isolated from cattle without clinical signs of illness in the United States. In this study, using a large, diverse set of 316 isolates, we utilized genomic methods to further elucidate the evolutionary history of S. Cerro and to identify genomic features associated with its apparent virulence attenuation in humans. Phylogenetic analyses showed that within this polyphyletic serovar, 98.4% of isolates (311/316) represent a monophyletic clade within section Typhi and the remaining 1.6% of isolates (5/316) form a monophyletic clade within subspecies enterica Clade A1. Of the section Typhi S. Cerro isolates, 93.2% of isolates (290/311) clustered into a large clonal clade comprised of predominantly sequence type (ST) 367 cattle and environmental isolates, while the remaining 6.8% of isolates (21/311), primarily from human clinical sources, clustered outside of this clonal clade. A tip-dated phylogeny of S. Cerro ST367 identified two major clades (I and II), one of which overwhelmingly consisted of cattle isolates that share a most recent common ancestor that existed circa 1975. Gene presence/absence and rarefaction curve analyses suggested that the pangenome of section Typhi S. Cerro is open, potentially reflecting the gain/loss of prophage; human isolates contained the most open pangenome, while cattle isolates had the least open pangenome. Hypothetically disrupted coding sequences (HDCs) displayed clade-specific losses of intact speC and sopA virulence genes within the large clonal S. Cerro clade, while loss of intact vgrG, araH, and vapC occurred in all section Typhi S. Cerro isolates. Further phenotypic analysis suggested that the presence of a premature stop codon in speC does not abolish ornithine decarboxylase activity in S. Cerro, likely due to the activity of the second ornithine decarboxylase encoded by speF, which remained intact in all isolates. Overall, our study identifies specific genomic features associated with S. Cerro's infrequent isolation from humans and its apparent adaptation to cattle, which has broader implications for informing our understanding of the evolutionary events facilitating host adaptation in Salmonella.}, } @article {pmid36529716, year = {2022}, author = {Cagirici, HB and Andorf, CM and Sen, TZ}, title = {Co-expression pan-network reveals genes involved in complex traits within maize pan-genome.}, journal = {BMC plant biology}, volume = {22}, number = {1}, pages = {595}, pmid = {36529716}, issn = {1471-2229}, mesh = {*Zea mays/genetics ; *Genome-Wide Association Study/methods ; Multifactorial Inheritance ; Phenotype ; Gene Regulatory Networks ; Polymorphism, Single Nucleotide/genetics ; }, abstract = {BACKGROUND: With the advances in the high throughput next generation sequencing technologies, genome-wide association studies (GWAS) have identified a large set of variants associated with complex phenotypic traits at a very fine scale. Despite the progress in GWAS, identification of genotype-phenotype relationship remains challenging in maize due to its nature with dozens of variants controlling the same trait. As the causal variations results in the change in expression, gene expression analyses carry a pivotal role in unraveling the transcriptional regulatory mechanisms behind the phenotypes.

RESULTS: To address these challenges, we incorporated the gene expression and GWAS-driven traits to extend the knowledge of genotype-phenotype relationships and transcriptional regulatory mechanisms behind the phenotypes. We constructed a large collection of gene co-expression networks and identified more than 2 million co-expressing gene pairs in the GWAS-driven pan-network which contains all the gene-pairs in individual genomes of the nested association mapping (NAM) population. We defined four sub-categories for the pan-network: (1) core-network contains the highest represented ~ 1% of the gene-pairs, (2) near-core network contains the next highest represented 1-5% of the gene-pairs, (3) private-network contains ~ 50% of the gene pairs that are unique to individual genomes, and (4) the dispensable-network contains the remaining 50-95% of the gene-pairs in the maize pan-genome. Strikingly, the private-network contained almost all the genes in the pan-network but lacked half of the interactions. We performed gene ontology (GO) enrichment analysis for the pan-, core-, and private- networks and compared the contributions of variants overlapping with genes and promoters to the GWAS-driven pan-network.

CONCLUSIONS: Gene co-expression networks revealed meaningful information about groups of co-regulated genes that play a central role in regulatory processes. Pan-network approach enabled us to visualize the global view of the gene regulatory network for the studied system that could not be well inferred by the core-network alone.}, } @article {pmid36526963, year = {2022}, author = {Abraha, HB and Lee, JW and Kim, G and Ferdiansyah, MK and Ramesha, RM and Kim, KP}, title = {Genomic diversity and comprehensive taxonomical classification of 61 Bacillus subtilis group member infecting bacteriophages, and the identification of ortholog taxonomic signature genes.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {835}, pmid = {36526963}, issn = {1471-2164}, support = {2021R1A2C2008022//National Research Foundation of Korea/ ; 2021R1A2C2008022//National Research Foundation of Korea/ ; 2021R1A2C2008022//National Research Foundation of Korea/ ; }, mesh = {*Bacteriophages/genetics ; *Bacillus/genetics ; Bacillus subtilis/genetics ; Genomics ; Genome, Viral ; Phylogeny ; }, abstract = {BACKGROUND: Despite the applications of Bacillus subtilis group species in various sectors, limited information is available regarding their phages. Here, 61 B. subtilis group species-infecting phages (BSPs) were studied for their taxonomic classification considering the genome-size, genomic diversity, and the host, followed by the identification of orthologs taxonomic signature genes.

RESULTS: BSPs have widely ranging genome sizes that can be bunched into groups to demonstrate correlations to family and subfamily classifications. Comparative analysis re-confirmed the existing, BSPs-containing 14 genera and 21 species and displayed inter-genera similarities within existing subfamilies. Importantly, it also revealed the need for the creation of new taxonomic classifications, including 28 species, nine genera, and two subfamilies (New subfamily1 and New subfamily2) to accommodate inter-genera relatedness. Following pangenome analysis, no ortholog shared by all BSPs was identified, while orthologs, namely, the tail fibers/spike proteins and poly-gamma-glutamate hydrolase, that are shared by more than two-thirds of the BSPs were identified. More importantly, major capsid protein (MCP) type I, MCP type II, MCP type III and peptidoglycan binding proteins that are distinctive orthologs for Herelleviridae, Salasmaviridae, New subfamily1, and New subfamily2, respectively, were identified and analyzed which could serve as signatures to distinguish BSP members of the respective taxon.

CONCLUSIONS: In this study, we show the genomic diversity and propose a comprehensive classification of 61 BSPs, including the proposition for the creation of two new subfamilies, followed by the identification of orthologs taxonomic signature genes, potentially contributing to phage taxonomy.}, } @article {pmid36523157, year = {2023}, author = {Shi, J and Tian, Z and Lai, J and Huang, X}, title = {Plant pan-genomics and its applications.}, journal = {Molecular plant}, volume = {16}, number = {1}, pages = {168-186}, doi = {10.1016/j.molp.2022.12.009}, pmid = {36523157}, issn = {1752-9867}, mesh = {*Genomics ; *Genome, Plant/genetics ; Chromosome Mapping ; }, abstract = {Plant genomes are so highly diverse that a substantial proportion of genomic sequences are not shared among individuals. The variable DNA sequences, along with the conserved core sequences, compose the more sophisticated pan-genome that represents the collection of all non-redundant DNA in a species. With rapid progress in genome sequencing technologies, pan-genome research in plants is now accelerating. Here we review recent advances in plant pan-genomics, including major driving forces of structural variations that constitute the variable sequences, methodological innovations for representing the pan-genome, and major successes in constructing plant pan-genomes. We also summarize recent efforts toward decoding the remaining dark matter in telomere-to-telomere or gapless plant genomes. These new genome resources, which have remarkable advantages over numerous previously assembled less-than-perfect genomes, are expected to become new references for genetic studies and plant breeding.}, } @article {pmid36516689, year = {2023}, author = {Hussain, J and Cohen, M and O'Malley, CJ and Mantri, N and Li, Y and Mueller, JF and Greaves, R and Wang, X}, title = {Detections of organophosphate and pyrethroid insecticide metabolites in urine and sweat obtained from women during infrared sauna and exercise: A pilot crossover study.}, journal = {International journal of hygiene and environmental health}, volume = {248}, number = {}, pages = {114091}, doi = {10.1016/j.ijheh.2022.114091}, pmid = {36516689}, issn = {1618-131X}, mesh = {Humans ; Female ; *Insecticides/urine ; Cross-Over Studies ; Sweat/chemistry/metabolism ; Organophosphates/urine ; *Steam Bath ; *Pyrethrins ; *Pesticides/urine ; Environmental Exposure/analysis ; }, abstract = {Synthetic pesticides such as organophosphates and pyrethroids are commonly used worldwide yet the metabolic and long-term human health effects of these environmental exposures are unclear. Urinary detections of metabolites involving both classes of insecticides have been documented in various global populations. However, reports documenting similar detections in human sweat are sparse. In this study, the concentrations of four insecticide metabolites were measured using liquid chromatography coupled with tandem mass spectrometry in repeated sweat and urine collections (n = 85) from 10 women undergoing three interventions (control, infrared sauna and indoor bicycling) within a single-blinded randomised crossover trial. The Friedman test with post-hoc two-way analysis of variance, the related-samples Wilcoxon signed rank test and the Spearman's rank-order correlation test were used to analyse the results. Organophosphate metabolites were detected in 84.6% (22/26) and pyrethroids in 26.9% (7/26) of the collected sweat samples (pooled per individual, per intervention). Urinary concentrations of three of the four metabolites marginally increased after infrared sauna bathing: 3,5,6-trichloro-2-pyridinol (z = 2.395, p = 0.017); 3-phenoxybenzoic acid (z = 2.599, p = 0.009); and trans-3-(2,2-dichlorovinyl)-2,2-dimethylcyclopropane-1-carboxylic acid (z = 2.090, p = 0.037). Urinary 3-phenoxybenzoic acid also increased after exercise (z = 2.073, p = 0.038) and demonstrated the most temporal variability (days to weeks) of any of the urinary metabolites. Definitive sweat/urine correlations were not demonstrated. These results indicate metabolites from organophosphate and pyrethroid pesticides can be detected in human sweat and this raises intriguing questions about perspiration and its role in the metabolism and excretion of synthetic pesticides.}, } @article {pmid36515536, year = {2023}, author = {Rumball, NA and Alm, EW and McLellan, SL}, title = {Genetic Determinants of Escherichia coli Survival in Beach Sand.}, journal = {Applied and environmental microbiology}, volume = {89}, number = {1}, pages = {e0142322}, pmid = {36515536}, issn = {1098-5336}, mesh = {Animals ; Humans ; *Sand ; Escherichia coli ; Lakes ; Michigan ; *Charadriiformes ; Environmental Monitoring/methods ; Feces ; Bathing Beaches ; Water Microbiology ; }, abstract = {Escherichia coli contain a high level of genetic diversity and are generally associated with the guts of warm-blooded animals but have also been isolated from secondary habitats outside hosts. We used E. coli isolates from previous in situ microcosm experiments conducted under actual beach conditions and performed population-level genomic analysis to identify accessory genes associated with survival within the beach sand environment. E. coli strains capable of surviving had been selected for by seeding isolates originating from sand, sewage, and gull waste (n = 528; 176 from each source) into sand, which was sealed in microcosm chambers and buried for 45 days in the backshore beach of Lake Michigan. In the current work, survival-associated genes were identified by comparing the pangenome of viable E. coli populations at the end of the microcosm experiment with the original isolate collection and identifying loci enriched in the out put samples. We found that environmental survival was associated with a wide variety of genetic factors, with the majority corresponding to metabolism enzymes and transport proteins. Of the 414 unique functions identified, most were present across E. coli phylogroups, except B2 which is often associated with human pathogens. Gene modules that were enriched in surviving populations included a betaine biosynthesis pathway, which produces an osmoprotectant, and the GABA (gamma-aminobutyrate) biosynthesis pathway, which aids in pH homeostasis and nutrient use versatility. Overall, these results demonstrate that the genetic flexibility within this species allows for survival in the environment for extended periods. IMPORTANCE Escherichia coli is commonly used as an indicator of recent fecal pollution in recreational water despite its known ability to survive in secondary environments, such as beach sand. These long-term survivors from sand reservoirs can be introduced into the water column through wave action or runoff during precipitation events, thereby impacting the perception of local water quality. Current beach monitoring methods cannot differentiate long-term environmental survivors from E. coli derived from recent fecal input, resulting in inaccurate monitoring results and unnecessary beach closures. This work identified the genetic factors that are associated with long-term survivors, providing insight into the mechanistic basis for E. coli accumulation in beach sand. A greater understanding of the intrinsic ability of E. coli to survive long-term and conditions that promote such survival will provide evidence of the limitations of beach water quality assessments using this indicator.}, } @article {pmid36511689, year = {2023}, author = {Dillard, LR and Glass, EM and Lewis, AL and Thomas-White, K and Papin, JA}, title = {Metabolic Network Models of the Gardnerella Pangenome Identify Key Interactions with the Vaginal Environment.}, journal = {mSystems}, volume = {8}, number = {1}, pages = {e0068922}, pmid = {36511689}, issn = {2379-5077}, support = {R01 AI114635/AI/NIAID NIH HHS/United States ; }, mesh = {Female ; Humans ; *Vaginosis, Bacterial/genetics ; Gardnerella ; Gardnerella vaginalis/genetics ; Vagina/microbiology ; Bacteria ; Metabolic Networks and Pathways/genetics ; }, abstract = {Gardnerella is the primary pathogenic bacterial genus present in the polymicrobial condition known as bacterial vaginosis (BV). Despite BV's high prevalence and associated chronic and acute women's health impacts, the Gardnerella pangenome is largely uncharacterized at both the genetic and functional metabolic levels. Here, we used genome-scale metabolic models to characterize in silico the Gardnerella pangenome metabolic content. We also assessed the metabolic functional capacity in a BV-positive cervicovaginal fluid context. The metabolic capacity varied widely across the pangenome, with 38.15% of all reactions being core to the genus, compared to 49.60% of reactions identified as being unique to a smaller subset of species. We identified 57 essential genes across the pangenome via in silico gene essentiality screens within two simulated vaginal metabolic environments. Four genes, gpsA, fas, suhB, and psd, were identified as core essential genes critical for the metabolic function of all analyzed bacterial species of the Gardnerella genus. Further understanding these core essential metabolic functions could inform novel therapeutic strategies to treat BV. Machine learning applied to simulated metabolic network flux distributions showed limited clustering based on the sample isolation source, which further supports the presence of extensive core metabolic functionality across this genus. These data represent the first metabolic modeling of the Gardnerella pangenome and illustrate strain-specific interactions with the vaginal metabolic environment across the pangenome. IMPORTANCE Bacterial vaginosis (BV) is the most common vaginal infection among reproductive-age women. Despite its prevalence and associated chronic and acute women's health impacts, the diverse bacteria involved in BV infection remain poorly characterized. Gardnerella is the genus of bacteria most commonly and most abundantly represented during BV. In this paper, we use metabolic models, which are a computational representation of the possible functional metabolism of an organism, to investigate metabolic conservation, gene essentiality, and pathway utilization across 110 Gardnerella strains. These models allow us to investigate in silico how strains may differ with respect to their metabolic interactions with the vaginal-host environment.}, } @article {pmid36503997, year = {2023}, author = {Chan, C and Salomé, PA}, title = {What makes a good reference? First steps toward a Chlamydomonas pangenome.}, journal = {The Plant cell}, volume = {35}, number = {2}, pages = {628-629}, pmid = {36503997}, issn = {1532-298X}, mesh = {*Chlamydomonas/genetics ; Genomics ; }, } @article {pmid36494615, year = {2022}, author = {Johansson, P and Säde, E and Hultman, J and Auvinen, P and Björkroth, J}, title = {Pangenome and genomic taxonomy analyses of Leuconostoc gelidum and Leuconostoc gasicomitatum.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {818}, pmid = {36494615}, issn = {1471-2164}, support = {307855//Academy of Finland/ ; NNF20OC0061239//Novo Nordisk Fonden/ ; }, mesh = {Phylogeny ; *Leuconostoc/genetics ; *DNA ; Food Microbiology ; }, abstract = {BACKGROUND: Leuconostoc gelidum and Leuconostoc gasicomitatum have dual roles in foods. They may spoil cold-stored packaged foods but can also be beneficial in kimchi fermentation. The impact in food science as well as the limited number of publicly available genomes prompted us to create pangenomes and perform genomic taxonomy analyses starting from de novo sequencing of the genomes of 37 L. gelidum/L. gasicomitatum strains from our culture collection. Our aim was also to evaluate the recently proposed change in taxonomy as well as to study the genomes of strains with different lifestyles in foods.

METHODS: We selected as diverse a set of strains as possible in terms of sources, previous genotyping results and geographical distribution, and included also 10 publicly available genomes in our analyses. We studied genomic taxonomy using pairwise average nucleotide identity (ANI) and calculation of digital DNA-DNA hybridisation (dDDH) scores. Phylogeny analyses were done using the core gene set of 1141 single-copy genes and a set of housekeeping genes commonly used for lactic acid bacteria. In addition, the pangenome and core genome sizes as well as some properties, such as acquired antimicrobial resistance (AMR), important due to the growth in foods, were analysed.

RESULTS: Genome relatedness indices and phylogenetic analyses supported the recently suggested classification that restores the taxonomic position of L. gelidum subsp. gasicomitatum back to the species level as L. gasicomitatum. Genome properties, such as size and coding potential, revealed limited intraspecies variation and showed no attribution to the source of isolation. The distribution of the unique genes between species and subspecies was not associated with the previously documented lifestyle in foods. None of the strains carried any acquired AMR genes or genes associated with any known form of virulence.

CONCLUSION: Genome-wide examination of strains confirms that the proposition to restore the taxonomic position of L. gasicomitatum is justified. It further confirms that the distribution and lifestyle of L. gelidum and L. gasicomitatum in foods have not been driven by the evolution of functional and phylogenetic diversification detectable at the genome level.}, } @article {pmid36494611, year = {2022}, author = {Guardia, AE and Wagner, A and Busalmen, JP and Di Capua, C and Cortéz, N and Beligni, MV}, title = {The draft genome of Andean Rhodopseudomonas sp. strain AZUL predicts genome plasticity and adaptation to chemical homeostasis.}, journal = {BMC microbiology}, volume = {22}, number = {1}, pages = {297}, pmid = {36494611}, issn = {1471-2180}, mesh = {*Rhodopseudomonas/genetics ; Adaptation, Physiological/genetics ; Base Sequence ; Genomics ; Acclimatization ; Phylogeny ; }, abstract = {The genus Rhodopseudomonas comprises purple non-sulfur bacteria with extremely versatile metabolisms. Characterization of several strains revealed that each is a distinct ecotype highly adapted to its specific micro-habitat. Here we present the sequencing, genomic comparison and functional annotation of AZUL, a Rhodopseudomonas strain isolated from a high altitude Andean lagoon dominated by extreme conditions and fluctuating levels of chemicals. Average nucleotide identity (ANI) analysis of 39 strains of this genus showed that the genome of AZUL is 96.2% identical to that of strain AAP120, which suggests that they belong to the same species. ANI values also show clear separation at the species level with the rest of the strains, being more closely related to R. palustris. Pangenomic analyses revealed that the genus Rhodopseudomonas has an open pangenome and that its core genome represents roughly 5 to 12% of the total gene repertoire of the genus. Functional annotation showed that AZUL has genes that participate in conferring genome plasticity and that, in addition to sharing the basal metabolic complexity of the genus, it is also specialized in metal and multidrug resistance and in responding to nutrient limitation. Our results also indicate that AZUL might have evolved to use some of the mechanisms involved in resistance as redox reactions for bioenergetic purposes. Most of those features are shared with strain AAP120, and mainly involve the presence of additional orthologs responsible for the mentioned processes. Altogether, our results suggest that AZUL, one of the few bacteria from its habitat with a sequenced genome, is highly adapted to the extreme and changing conditions that constitute its niche.}, } @article {pmid36479628, year = {2022}, author = {Adsit, FG and Randall, TA and Locklear, J and Kurtz, DM}, title = {The emergence of the tetrathionate reductase operon in the Escherichia coli/Shigella pan-genome.}, journal = {MicrobiologyOpen}, volume = {11}, number = {6}, pages = {e1333}, pmid = {36479628}, issn = {2045-8827}, mesh = {*Escherichia coli/genetics ; *Shigella ; Virulence Factors/genetics ; }, abstract = {Escherichia coli pathogenic variants (pathovars) are generally characterized by defined virulence traits and are susceptible to the evolution of hybridized identities due to the considerable plasticity of the E. coli genome. We have isolated a strain from a purified diet intended for research animals that further demonstrates the ability of E. coli to acquire novel genetic elements leading potentially to emergent new pathovars. Utilizing next generation sequencing to obtain a whole genome profile, we report an atypical strain of E. coli, EcoFA807-17, possessing a tetrathionate reductase (ttr) operon, which enables the utilization of tetrathionate as an electron acceptor, thus facilitating respiration in anaerobic environments such as the mammalian gut. The ttr operon is a potent virulence factor for several enteric pathogens, most prominently Salmonella enterica. However, the presence of chromosomally integrated tetrathionate reductase genes does not appear to have been previously reported in wild-type E. coli or Shigella. Accordingly, it is possible that the appearance of this virulence factor may signal the evolution of new mechanisms of pathogenicity in E. coli and Shigella and may potentially alter the effectiveness of existing assays using tetrathionate reductase as a unique marker for the detection of Salmonella enterica.}, } @article {pmid36479579, year = {2022}, author = {Droc, G and Martin, G and Guignon, V and Summo, M and Sempéré, G and Durant, E and Soriano, A and Baurens, FC and Cenci, A and Breton, C and Shah, T and Aury, JM and Ge, XJ and Harrison, PH and Yahiaoui, N and D'Hont, A and Rouard, M}, title = {The banana genome hub: a community database for genomics in the Musaceae.}, journal = {Horticulture research}, volume = {9}, number = {}, pages = {uhac221}, pmid = {36479579}, issn = {2662-6810}, abstract = {The Banana Genome Hub provides centralized access for genome assemblies, annotations, and the extensive related omics resources available for bananas and banana relatives. A series of tools and unique interfaces are implemented to harness the potential of genomics in bananas, leveraging the power of comparative analysis, while recognizing the differences between datasets. Besides effective genomic tools like BLAST and the JBrowse genome browser, additional interfaces enable advanced gene search and gene family analyses including multiple alignments and phylogenies. A synteny viewer enables the comparison of genome structures between chromosome-scale assemblies. Interfaces for differential expression analyses, metabolic pathways and GO enrichment were also added. A catalogue of variants spanning the banana diversity is made available for exploration, filtering, and export to a wide variety of software. Furthermore, we implemented new ways to graphically explore gene presence-absence in pangenomes as well as genome ancestry mosaics for cultivated bananas. Besides, to guide the community in future sequencing efforts, we provide recommendations for nomenclature of locus tags and a curated list of public genomic resources (assemblies, resequencing, high density genotyping) and upcoming resources-planned, ongoing or not yet public. The Banana Genome Hub aims at supporting the banana scientific community for basic, translational, and applied research and can be accessed at https://banana-genome-hub.southgreen.fr.}, } @article {pmid36478861, year = {2022}, author = {Abou Abdallah, R and Million, M and Delerce, J and Anani, H and Diop, A and Caputo, A and Zgheib, R and Rousset, E and Sidi Boumedine, K and Raoult, D and Fournier, PE}, title = {Pangenomic analysis of Coxiella burnetii unveils new traits in genome architecture.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1022356}, pmid = {36478861}, issn = {1664-302X}, abstract = {Coxiella burnetii is the etiological agent of Q fever, a worldwide zoonosis able to cause large outbreaks. The disease is polymorphic. Symptomatic primary infection is named acute Q fever and is associated with hepatitis, pneumonia, fever, and auto-immune complications while persistent focalized infections, mainly endocarditis, and vascular infections, occur in a minority of patients but are potentially lethal. In order to evaluate the genomic features, genetic diversity, evolution, as well as genetic determinants of antibiotic resistance, pathogenicity, and ability to cause outbreaks of Q fever, we performed a pangenomic analysis and genomic comparison of 75 C. burnetii strains including 63 newly sequenced genomes. Our analysis demonstrated that C. burnetii has an open pangenome, unique genes being found in many strains. In addition, pathogenicity islands were detected in all genomes. In consequence C. burnetii has a high genomic plasticity, higher than that of other intracellular bacteria. The core- and pan-genomes are made of 1,211 and 4,501 genes, respectively (ratio 0.27). The core gene-based phylogenetic analysis matched that obtained from multi-spacer typing and the distribution of plasmid types. Genomic characteristics were associated to clinical and epidemiological features. Some genotypes were associated to specific clinical forms and countries. MST1 genotype strains were associated to acute Q fever. A significant association was also found between clinical forms and plasmids. Strains harboring the QpRS plasmid were never found in acute Q fever and were only associated to persistent focalized infections. The QpDV and QpH1 plasmids were associated to acute Q fever. In addition, the Guyanese strain CB175, the most virulent strain to date, exhibited a unique MST genotype, a distinct COG profile and an important variation in gene number that may explain its unique pathogenesis. Therefore, strain-specific factors play an important role in determining the epidemiological and clinical manifestations of Q fever alongside with host-specific factors (valvular and vascular defects notably).}, } @article {pmid36476389, year = {2022}, author = {Djeghout, B and Bloomfield, SJ and Rudder, S and Elumogo, N and Mather, AE and Wain, J and Janecko, N}, title = {Comparative genomics of Campylobacter jejuni from clinical campylobacteriosis stool specimens.}, journal = {Gut pathogens}, volume = {14}, number = {1}, pages = {45}, pmid = {36476389}, issn = {1757-4749}, support = {BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {BACKGROUND: Campylobacter jejuni is a pervasive pathogen of major public health concern with a complex ecology requiring accurate and informative approaches to define pathogen diversity during outbreak investigations. Source attribution analysis may be confounded if the genetic diversity of a C. jejuni population is not adequately captured in a single specimen. The aim of this study was to determine the genomic diversity of C. jejuni within individual stool specimens from four campylobacteriosis patients. Direct plating and pre-culture filtration of one stool specimen per patient was used to culture multiple isolates per stool specimen. Whole genome sequencing and pangenome level analysis were used to investigate genomic diversity of C. jejuni within a patient.

RESULTS: A total 92 C. jejuni isolates were recovered from four patients presenting with gastroenteritis. The number of isolates ranged from 13 to 30 per patient stool. Three patients yielded a single C. jejuni multilocus sequence type: ST-21 (n = 26, patient 4), ST-61 (n = 30, patient 1) and ST-2066 (n = 23, patient 2). Patient 3 was infected with two different sequence types [ST-51 (n = 12) and ST-354 (n = 1)]. Isolates belonging to the same sequence type from the same patient specimen shared 12-43 core non-recombinant SNPs and 0-20 frameshifts with each other, and the pangenomes of each sequence type consisted of 1406-1491 core genes and 231-264 accessory genes. However, neither the mutation nor the accessory genes were connected to a specific functional gene category.

CONCLUSIONS: Our findings show that the C. jejuni population recovered from an individual patient's stool are genetically diverse even within the same ST and may have shared common ancestors before specimens were obtained. The population is unlikely to have evolved from a single isolate at the time point of initial patient infection, leading us to conclude that patients were likely infected with a heterogeneous C. jejuni population. The diversity of the C. jejuni population found within individual stool specimens can inform future methodological approaches to attribution and outbreak investigations.}, } @article {pmid36476074, year = {2022}, author = {Ullah, A and Ullah Khan, S and Haq, MU and Ahmad, S and Irfan, M and Asif, M and Muhseen, ZT and Alkeraidees, MS and Allemailem, KS and Alrumaihi, F and Almatroudi, A}, title = {Computational study to investigate Proteus mirabilis proteomes for multi-epitope vaccine construct design.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-12}, doi = {10.1080/07391102.2022.2153920}, pmid = {36476074}, issn = {1538-0254}, abstract = {Proteus mirabilis is a gram-negative bacterium particularly known for its unique swarming ability. The swarming gives the bacteria ability to enhance adherence to the catheter surface and epithelium cells of the urethra to cause catheter associated urinary tract infections. P. mirabilis has evolved resistant to antibiotics. Additionally, there is an approved vaccine against P. mirabilis, thus demanding for identification of new vaccine targets. This gram-negative bacterium consists of 19,502 core proteins, out of which 19,063 are redundant proteins and remaining 439 are non-redundant proteins. The non-redundant proteins have 21 proteins present on the cell surface out of which 11 proteins are virulent. Antigenicity analysis predicted only 2 proteins as antigenic (fimbrial biogenesis outer membrane usher protein and ligand-gated channel protein). Four and seven B-cells epitopes were predicted from the former and later proteins, respectively. The predicted B-cells epitopes were used for T- cells epitopes prediction. The predicted epitopes were linked to each other through GPGPG linkers and joined with cholera toxin beta subunit adjuvant. A multi-epitopes vaccine construct consisting of 226 residues was docked with MHC-I, MHC-II and TLR-4. The best docked complex in each case has binding energy of -714.6, -744.6 and -829.5 kcal/mol, respectively. Moreover, the docking results were validated through molecular dynamics simulation and binding free energies estimation. The net energy of -137.2 kcal/mol was calculated for vaccine-MHC-I complex, -133.39 kcal/mol for vaccine-MHC-II and -158.68 kcal/mol for vaccine-TLR-4 complex. The designed vaccine construct could provoke immune responses against targeted pathogen and may be used in experimental testing.Communicated by Ramaswamy H. Sarma.}, } @article {pmid36474047, year = {2022}, author = {Wang, M and Li, J and Qi, Z and Long, Y and Pei, L and Huang, X and Grover, CE and Du, X and Xia, C and Wang, P and Liu, Z and You, J and Tian, X and Ma, Y and Wang, R and Chen, X and He, X and Fang, DD and Sun, Y and Tu, L and Jin, S and Zhu, L and Wendel, JF and Zhang, X}, title = {Genomic innovation and regulatory rewiring during evolution of the cotton genus Gossypium.}, journal = {Nature genetics}, volume = {54}, number = {12}, pages = {1959-1971}, pmid = {36474047}, issn = {1546-1718}, mesh = {*Gossypium/genetics ; *Genomics ; Chromatin ; }, abstract = {Phenotypic diversity and evolutionary innovation ultimately trace to variation in genomic sequence and rewiring of regulatory networks. Here, we constructed a pan-genome of the Gossypium genus using ten representative diploid genomes. We document the genomic evolutionary history and the impact of lineage-specific transposon amplification on differential genome composition. The pan-3D genome reveals evolutionary connections between transposon-driven genome size variation and both higher-order chromatin structure reorganization and the rewiring of chromatin interactome. We linked changes in chromatin structures to phenotypic differences in cotton fiber and identified regulatory variations that decode the genetic basis of fiber length, the latter enabled by sequencing 1,005 transcriptomes during fiber development. We showcase how pan-genomic, pan-3D genomic and genetic regulatory data serve as a resource for delineating the evolutionary basis of spinnable cotton fiber. Our work provides insights into the evolution of genome organization and regulation and will inform cotton improvement by enabling regulome-based approaches.}, } @article {pmid36469788, year = {2022}, author = {Yebra, G and Harling-Lee, JD and Lycett, S and Aarestrup, FM and Larsen, G and Cavaco, LM and Seo, KS and Abraham, S and Norris, JM and Schmidt, T and Ehlers, MM and Sordelli, DO and Buzzola, FR and Gebreyes, WA and Gonçalves, JL and Dos Santos, MV and Zakaria, Z and Rall, VLM and Keane, OM and Niedziela, DA and Paterson, GK and Holmes, MA and Freeman, TC and Fitzgerald, JR}, title = {Multiclonal human origin and global expansion of an endemic bacterial pathogen of livestock.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {119}, number = {50}, pages = {e2211217119}, pmid = {36469788}, issn = {1091-6490}, support = {201531/Z/16/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Female ; Humans ; Cattle ; Animals ; *Staphylococcus aureus/genetics ; Livestock/genetics ; *Staphylococcal Infections/epidemiology/veterinary/genetics ; Genome ; Host Specificity ; }, abstract = {Most new pathogens of humans and animals arise via switching events from distinct host species. However, our understanding of the evolutionary and ecological drivers of successful host adaptation, expansion, and dissemination are limited. Staphylococcus aureus is a major bacterial pathogen of humans and a leading cause of mastitis in dairy cows worldwide. Here we trace the evolutionary history of bovine S. aureus using a global dataset of 10,254 S. aureus genomes including 1,896 bovine isolates from 32 countries in 6 continents. We identified 7 major contemporary endemic clones of S. aureus causing bovine mastitis around the world and traced them back to 4 independent host-jump events from humans that occurred up to 2,500 y ago. Individual clones emerged and underwent clonal expansion from the mid-19th to late 20th century coinciding with the commercialization and industrialization of dairy farming, and older lineages have become globally distributed via established cattle trade links. Importantly, we identified lineage-dependent differences in the frequency of host transmission events between humans and cows in both directions revealing high risk clones threatening veterinary and human health. Finally, pangenome network analysis revealed that some bovine S. aureus lineages contained distinct sets of bovine-associated genes, consistent with multiple trajectories to host adaptation via gene acquisition. Taken together, we have dissected the evolutionary history of a major endemic pathogen of livestock providing a comprehensive temporal, geographic, and gene-level perspective of its remarkable success.}, } @article {pmid36469554, year = {2022}, author = {Zhao, C and Goldman, M and Smith, BJ and Pollard, KS}, title = {Genotyping Microbial Communities with MIDAS2: From Metagenomic Reads to Allele Tables.}, journal = {Current protocols}, volume = {2}, number = {12}, pages = {e604}, pmid = {36469554}, issn = {2691-1299}, support = {R01 HL160862/HL/NHLBI NIH HHS/United States ; }, mesh = {*Metagenome/genetics ; Genotype ; Alleles ; *Microbiota/genetics ; Nucleotides ; }, abstract = {The Metagenomic Intra-Species Diversity Analysis System 2 (MIDAS2) is a scalable pipeline that identifies single nucleotide variants and gene copy number variants in metagenomes using comprehensive reference databases built from public microbial genome collections (metagenotyping). MIDAS2 is the first metagenotyping tool with functionality to control metagenomic read mapping filters and to customize the reference database to the microbial community, features that improve the precision and recall of detected variants. In this article we present four basic protocols for the most common use cases of MIDAS2, along with supporting protocols for installation and use. In addition, we provide in-depth guidance on adjusting command line parameters, editing the reference database, optimizing hardware utilization, and understanding the metagenotyping results. All the steps of metagenotyping, from raw sequencing reads to population genetic analysis, are demonstrated with example data in two downloadable sequencing libraries of single-end metagenomic reads representing a mixture of multiple bacterial species. This set of protocols empowers users to accurately genotype hundreds of species in thousands of samples, providing rich genetic data for studying the evolution and strain-level ecology of microbial communities. © 2022 The Authors. Current Protocols published by Wiley Periodicals LLC. Basic Protocol 1: Species prescreening Basic Protocol 2: Download MIDAS reference database Basic Protocol 3: Population single nucleotide variant calling Basic Protocol 4: Pan-genome copy number variant calling Support Protocol 1: Installing MIDAS2 Support Protocol 2: Command line inputs Support Protocol 3: Metagenotyping with a custom collection of genomes Support Protocol 4: Metagenotyping with advanced parameters.}, } @article {pmid36469480, year = {2022}, author = {Pais, AKL and Santos, LVSD and Albuquerque, GMR and Farias, ARG and Silva Junior, WJ and Balbino, VQ and Silva, AMF and Gama, MASD and Souza, EB}, title = {Comparative genomics and phylogenomics of the Ralstonia solanacearum Moko ecotype and its symptomatological variants.}, journal = {Genetics and molecular biology}, volume = {45}, number = {4}, pages = {e20220038}, pmid = {36469480}, issn = {1415-4757}, abstract = {Banana tree bacterial wilt is caused by the Ralstonia solanacearum Moko ecotype. These strains vary in their symptom progression in banana, and are classified as typical Moko variants (phylotype IIA and IIB strains from across Central and South America), Bugtok variant (Philippines), and Sergipe facies (the states of Sergipe and Alagoas, Brazil). This study used comparative genomic and phylogenomic approaches to identify a correlation between the symptom progression of the Moko ecotypes based on the analysis of 23 available genomes. Average nucleotide identity and in silico DNA-DNA hybridization revealed a high correlation (>96% and >78%, respectively) between the genomes of Moko variants. Pan-genome analysis identified 21.3% of inheritable regions between representatives of the typical Moko and Sergipe facies variants, which could be traced to an abundance of exclusive homolog clusters. Moko ecotype genomes shared 1,951 orthologous genes, but representatives with typical symptoms did not display unique orthologues. Moreover, Bugtok disease and Sergipe facies genomes did not share any unique genes, suggesting convergent evolution to a shared symptom progression. Overall, genomic and phylogenomic analyses were insufficient to differentiate the Moko variants based on symptom progression.}, } @article {pmid36467270, year = {2022}, author = {Lee, JH and Venkatesh, J and Jo, J and Jang, S and Kim, GW and Kim, JM and Han, K and Ro, N and Lee, HY and Kwon, JK and Kim, YM and Lee, TH and Choi, D and Van Deynze, A and Hill, T and Kfir, N and Freiman, A and Davila Olivas, NH and Elkind, Y and Paran, I and Kang, BC}, title = {High-quality chromosome-scale genomes facilitate effective identification of large structural variations in hot and sweet peppers.}, journal = {Horticulture research}, volume = {9}, number = {}, pages = {uhac210}, pmid = {36467270}, issn = {2662-6810}, abstract = {Pepper (Capsicum annuum) is an important vegetable crop that has been subjected to intensive breeding, resulting in limited genetic diversity, especially for sweet peppers. Previous studies have reported pepper draft genome assemblies using short read sequencing, but their capture of the extent of large structural variants (SVs), such as presence-absence variants (PAVs), inversions, and copy-number variants (CNVs) in the complex pepper genome falls short. In this study, we sequenced the genomes of representative sweet and hot pepper accessions by long-read and/or linked-read methods and advanced scaffolding technologies. First, we developed a high-quality reference genome for the sweet pepper cultivar 'Dempsey' and then used the reference genome to identify SVs in 11 other pepper accessions and constructed a graph-based pan-genome for pepper. We annotated an average of 42 972 gene families in each pepper accession, defining a set of 19 662 core and 23 115 non-core gene families. The new pepper pan-genome includes informative variants, 222 159 PAVs, 12 322 CNVs, and 16 032 inversions. Pan-genome analysis revealed PAVs associated with important agricultural traits, including potyvirus resistance, fruit color, pungency, and pepper fruit orientation. Comparatively, a large number of genes are affected by PAVs, which is positively correlated with the high frequency of transposable elements (TEs), indicating TEs play a key role in shaping the genomic landscape of peppers. The datasets presented herein provide a powerful new genomic resource for genetic analysis and genome-assisted breeding for pepper improvement.}, } @article {pmid36466678, year = {2022}, author = {Núñez-Montero, K and Rojas-Villalta, D and Barrientos, L}, title = {Antarctic Sphingomonas sp. So64.6b showed evolutive divergence within its genus, including new biosynthetic gene clusters.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1007225}, pmid = {36466678}, issn = {1664-302X}, abstract = {INTRODUCTION: The antibiotic crisis is a major human health problem. Bioprospecting screenings suggest that proteobacteria and other extremophile microorganisms have biosynthetic potential for the production novel antimicrobial compounds. An Antarctic Sphingomonas strain (So64.6b) previously showed interesting antibiotic activity and elicitation response, then a relationship between environmental adaptations and its biosynthetic potential was hypothesized. We aimed to determine the genomic characteristics in So64.6b strain related to evolutive traits for the adaptation to the Antarctic environment that could lead to its diversity of potentially novel antibiotic metabolites.

METHODS: The complete genome sequence of the Antarctic strain was obtained and mined for Biosynthetic Gene Clusters (BGCs) and other unique genes related to adaptation to extreme environments. Comparative genome analysis based on multi-locus phylogenomics, BGC phylogeny, and pangenomics were conducted within the closest genus, aiming to determine the taxonomic affiliation and differential characteristics of the Antarctic strain.

RESULTS AND DISCUSSION: The Antarctic strain So64.6b showed a closest identity with Sphingomonas alpina, however containing a significant genomic difference of ortholog cluster related to degradation multiple pollutants. Strain So64.6b had a total of six BGC, which were predicted with low to no similarity with other reported clusters; three were associated with potential novel antibiotic compounds using ARTS tool. Phylogenetic and synteny analysis of a common BGC showed great diversity between Sphingomonas genus but grouping in clades according to similar isolation environments, suggesting an evolution of BGCs that could be linked to the specific ecosystems. Comparative genomic analysis also showed that Sphingomonas species isolated from extreme environments had the greatest number of predicted BGCs and a higher percentage of genetic content devoted to BGCs than the isolates from mesophilic environments. In addition, some extreme-exclusive clusters were found related to oxidative and thermal stress adaptations, while pangenome analysis showed unique resistance genes on the Antarctic strain included in genetic islands. Altogether, our results showed the unique genetic content on Antarctic strain Sphingomonas sp. So64.6, -a probable new species of this genetically divergent genus-, which could have potentially novel antibiotic compounds acquired to cope with Antarctic poly-extreme conditions.}, } @article {pmid36466658, year = {2022}, author = {Jesus, HNR and Rocha, DJPG and Ramos, RTJ and Silva, A and Brenig, B and Góes-Neto, A and Costa, MM and Soares, SC and Azevedo, V and Aguiar, ERGR and Martínez-Martínez, L and Ocampo, A and Alibi, S and Dorta, A and Pacheco, LGC and Navas, J}, title = {Pan-genomic analysis of Corynebacterium amycolatum gives insights into molecular mechanisms underpinning the transition to a pathogenic phenotype.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1011578}, pmid = {36466658}, issn = {1664-302X}, abstract = {Corynebacterium amycolatum is a nonlipophilic coryneform which is increasingly being recognized as a relevant human and animal pathogen showing multidrug resistance to commonly used antibiotics. However, little is known about the molecular mechanisms involved in transition from colonization to the MDR invasive phenotype in clinical isolates. In this study, we performed a comprehensive pan-genomic analysis of C. amycolatum, including 26 isolates from different countries. We obtained the novel genome sequences of 8 of them, which are multidrug resistant clinical isolates from Spain and Tunisia. They were analyzed together with other 18 complete or draft C. amycolatum genomes retrieved from GenBank. The species C. amycolatum presented an open pan-genome (α = 0.854905), with 3,280 gene families, being 1,690 (51.52%) in the core genome, 1,121 related to accessory genes (34.17%), and 469 related to unique genes (14.29%). Although some classic corynebacterial virulence factors are absent in the species C. amycolatum, we did identify genes associated with immune evasion, toxin, and antiphagocytosis among the predicted putative virulence factors. Additionally, we found genomic evidence for extensive acquisition of antimicrobial resistance genes through genomic islands.}, } @article {pmid36466249, year = {2022}, author = {Park, J and Jung, H and Mannaa, M and Lee, SY and Lee, HH and Kim, N and Han, G and Park, DS and Lee, SW and Lee, SW and Seo, YS}, title = {Genome-guided comparative in planta transcriptome analyses for identifying cross-species common virulence factors in bacterial phytopathogens.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1030720}, pmid = {36466249}, issn = {1664-462X}, abstract = {Plant bacterial disease is a complex outcome achieved through a combination of virulence factors that are activated during infection. However, the common virulence factors across diverse plant pathogens are largely uncharacterized. Here, we established a pan-genome shared across the following plant pathogens: Burkholderia glumae, Ralstonia solanacearum, and Xanthomonas oryzae pv. oryzae. By overlaying in planta transcriptomes onto the pan-genome, we investigated the expression profiles of common genes during infection. We found over 70% of identical patterns for genes commonly expressed by the pathogens in different plant hosts or infection sites. Co-expression patterns revealed the activation of a signal transduction cascade to recognize and respond to external changes within hosts. Using mutagenesis, we uncovered a relationship between bacterial virulence and functions highly conserved and shared in the studied genomes of the bacterial phytopathogens, including flagellar biosynthesis protein, C4-dicarboxylate ABC transporter, 2-methylisocitrate lyase, and protocatechuate 3,4-dioxygenase (PCD). In particular, the disruption of PCD gene led to attenuated virulence in all pathogens and significantly affected phytotoxin production in B. glumae. This PCD gene was ubiquitously distributed in most plant pathogens with high homology. In conclusion, our results provide cross-species in planta models for identifying common virulence factors, which can be useful for the protection of crops against diverse pathogens.}, } @article {pmid36466237, year = {2022}, author = {Tirnaz, S and Zandberg, J and Thomas, WJW and Marsh, J and Edwards, D and Batley, J}, title = {Application of crop wild relatives in modern breeding: An overview of resources, experimental and computational methodologies.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1008904}, pmid = {36466237}, issn = {1664-462X}, abstract = {Global agricultural industries are under pressure to meet the future food demand; however, the existing crop genetic diversity might not be sufficient to meet this expectation. Advances in genome sequencing technologies and availability of reference genomes for over 300 plant species reveals the hidden genetic diversity in crop wild relatives (CWRs), which could have significant impacts in crop improvement. There are many ex-situ and in-situ resources around the world holding rare and valuable wild species, of which many carry agronomically important traits and it is crucial for users to be aware of their availability. Here we aim to explore the available ex-/in- situ resources such as genebanks, botanical gardens, national parks, conservation hotspots and inventories holding CWR accessions. In addition we highlight the advances in availability and use of CWR genomic resources, such as their contribution in pangenome construction and introducing novel genes into crops. We also discuss the potential and challenges of modern breeding experimental approaches (e.g. de novo domestication, genome editing and speed breeding) used in CWRs and the use of computational (e.g. machine learning) approaches that could speed up utilization of CWR species in breeding programs towards crop adaptability and yield improvement.}, } @article {pmid36466225, year = {2022}, author = {Ma, J and Wei, H and Yu, X and Lv, Y and Zhang, Y and Qian, Q and Shang, L and Guo, L}, title = {Compared analysis with a high-quality genome of weedy rice reveals the evolutionary game of de-domestication.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1065449}, pmid = {36466225}, issn = {1664-462X}, abstract = {The weedy rice (Oryza sativa f. spontanea) harbors large numbers of excellent traits and genetic diversities, which serves as a valuable germplasm resource and has been considered as a typical material for research about de-domestication. However, there are relatively few reference genomes on weedy rice that severely limit exploiting these genetic resources and revealing more details about de-domestication events. In this study, a high-quality genome (~376.4 Mb) of weedy rice A02 was assembled based on Nanopore ultra-long platform with a coverage depth of about 79.3× and 35,423 genes were predicted. Compared to Nipponbare genome, 5,574 structural variations (SVs) were found in A02. Based on super pan-genome graph, population SVs of 238 weedy rice and cultivated rice accessions were identified using public resequencing data. Furthermore, the de-domestication sites of weedy rice and domestication sites of wild rice were analyzed and compared based on SVs and single-nucleotide polymorphisms (SNPs). Interestingly, an average of 2,198 genes about de-domestication could only be found by F ST analysis based on SVs (SV-F ST) while not by F ST analysis based on SNPs (SNP-F ST) in divergent region. Additionally, there was a low overlap between domestication and de-domestication intervals, which demonstrated that two different mechanisms existed in these events. Our finding could facilitate pinpointing of the evolutionary events that had shaped the genomic architecture of wild, cultivated, and weedy rice, and provide a good foundation for cloning of the superior alleles for breeding.}, } @article {pmid36461252, year = {2022}, author = {Xiang, X and Diao, E and Shang, Y and Song, M and He, Y}, title = {Rapid quantitative detection of Vibrio parahaemolyticus via high-fidelity target-based microfluidic identification.}, journal = {Food research international (Ottawa, Ont.)}, volume = {162}, number = {Pt A}, pages = {112032}, doi = {10.1016/j.foodres.2022.112032}, pmid = {36461252}, issn = {1873-7145}, mesh = {*Vibrio parahaemolyticus/genetics ; Microfluidics ; DNA Primers ; Excipients ; Food ; }, abstract = {With the rapid development of logistics, a growing number of pathogenic microorganisms has the means to spread worldwide using food as a carrier; thus, there is an urgent need to develop effective detection strategies to ensure food safety. By combining novel markers identified by pan-genome analysis and a digital recombinase-aided amplification (RAA) detection method based on a microfluidic chip, a strategy of high-fidelity target-based microfluidic identification (HFTMI) has been developed. Herein, a proof-of-concept study of HFTMI for rapid pathogen detection of V. parahaemolyticus was investigated. Specific primers designed for the gene group_41170 identified in the pan-genome analysis showed high sensitivity and a broad spectrum for the detection of V. parahaemolyticus. Different power systems were investigated to increase the partition rate on specifically designed chamber-based digital chips. The performance of HFTMI was greatly improved compared with qPCR. Collectively, this novel HFTMI system provides more reliable guidance for food safety testing.}, } @article {pmid36461065, year = {2022}, author = {Marone, MP and Singh, HC and Pozniak, CJ and Mascher, M}, title = {A technical guide to TRITEX, a computational pipeline for chromosome-scale sequence assembly of plant genomes.}, journal = {Plant methods}, volume = {18}, number = {1}, pages = {128}, pmid = {36461065}, issn = {1746-4811}, support = {SHAPE II, FKZ 031B0884//Bundesministerium für Bildung und Forschung/ ; ERC Starting Grant TRANSFER 949873//European Commission/ ; }, abstract = {BACKGROUND: As complete and accurate genome sequences are becoming easier to obtain, more researchers wish to get one or more of them to support their research endeavors. Reliable and well-documented sequence assembly workflows find use in reference or pangenome projects.

RESULTS: We describe modifications to the TRITEX genome assembly workflow motivated by the rise of fast and easy long-read contig assembly of inbred plant genomes and the routine deployment of the toolchains in pangenome projects. New features include the use as surrogates of or complements to dense genetic maps and the introduction of user-editable tables to make the curation of contig placements easier and more intuitive.

CONCLUSION: Even maximally contiguous sequence assemblies of the telomere-to-telomere sort, and to a yet greater extent, the fragmented kind require validation, correction, and comparison to reference standards. As pangenomics is burgeoning, these tasks are bound to become more widespread and TRITEX is one tool to get them done. This technical guide is supported by a step-by-step computational tutorial accessible under https://tritexassembly.bitbucket.io/ . The TRITEX source code is hosted under this URL: https://bitbucket.org/tritexassembly .}, } @article {pmid36454681, year = {2023}, author = {Prondzinsky, P and Toyoda, S and McGlynn, SE}, title = {The methanogen core and pangenome: conservation and variability across biology's growth temperature extremes.}, journal = {DNA research : an international journal for rapid publication of reports on genes and genomes}, volume = {30}, number = {1}, pages = {}, pmid = {36454681}, issn = {1756-1663}, support = {JPMJSP2106//JST SPRING/ ; }, mesh = {Temperature ; Phylogeny ; *Archaea/genetics/metabolism ; *Hot Temperature ; Genomics ; }, abstract = {Temperature is a key variable in biological processes. However, a complete understanding of biological temperature adaptation is lacking, in part because of the unique constraints among different evolutionary lineages and physiological groups. Here we compared the genomes of cultivated psychrotolerant and thermotolerant methanogens, which are physiologically related and span growth temperatures from -2.5°C to 122°C. Despite being phylogenetically distributed amongst three phyla in the archaea, the genomic core of cultivated methanogens comprises about one-third of a given genome, while the genome fraction shared by any two organisms decreases with increasing phylogenetic distance between them. Increased methanogenic growth temperature is associated with reduced genome size, and thermotolerant organisms-which are distributed across the archaeal tree-have larger core genome fractions, suggesting that genome size is governed by temperature rather than phylogeny. Thermotolerant methanogens are enriched in metal and other transporters, and psychrotolerant methanogens are enriched in proteins related to structure and motility. Observed amino acid compositional differences between temperature groups include proteome charge, polarity and unfolding entropy. Our results suggest that in the methanogens, shared physiology maintains a large, conserved genomic core even across large phylogenetic distances and biology's temperature extremes.}, } @article {pmid36454044, year = {2023}, author = {Pham, HM and Le, DT and Le, LT and Chu, PTM and Tran, LH and Pham, TT and Nguyen, HM and Luu, TT and Hoang, H and Chu, HH}, title = {A highly quality genome sequence of Penicillium oxalicum species isolated from the root of Ixora chinensis in Vietnam.}, journal = {G3 (Bethesda, Md.)}, volume = {13}, number = {2}, pages = {}, pmid = {36454044}, issn = {2160-1836}, mesh = {Phylogeny ; Vietnam ; *Genome ; *Penicillium/genetics/metabolism ; }, abstract = {Penicillium oxalicum has been reported as a multienzyme-producing fungus and is widely used in industry due to great potential for cellulase release. Until now, there are only 10 available genome assemblies of P. oxalicum species deposited in the GenBank database. In this study, the genome of the I1R1 strain isolated from the root of Ixora chinensis was completely sequenced by Pacbio Sequel sequencing technology, assembled into 8 chromosomes with the genome size of 30.8 Mb, as well as a mitogenome of 26 kb. The structural and functional analyses of the I1R1 genome revealed gene model annotations encoding an enzyme set involved in significant metabolic processes, along with cytochrome P450s and secondary metabolite biosynthesis. The comparative analysis of the P. oxalicum species based on orthology and gene family duplications indicated their large and closed pan-genome of 9,500 orthologous groups. This is valuable data for future phylogenetic and population genomics studies.}, } @article {pmid36453992, year = {2022}, author = {Rabanal, FA and Gräff, M and Lanz, C and Fritschi, K and Llaca, V and Lang, M and Carbonell-Bejerano, P and Henderson, I and Weigel, D}, title = {Pushing the limits of HiFi assemblies reveals centromere diversity between two Arabidopsis thaliana genomes.}, journal = {Nucleic acids research}, volume = {50}, number = {21}, pages = {12309-12327}, pmid = {36453992}, issn = {1362-4962}, mesh = {Sequence Analysis, DNA ; *Arabidopsis/genetics ; High-Throughput Nucleotide Sequencing ; Centromere/genetics ; DNA, Ribosomal ; }, abstract = {Although long-read sequencing can often enable chromosome-level reconstruction of genomes, it is still unclear how one can routinely obtain gapless assemblies. In the model plant Arabidopsis thaliana, other than the reference accession Col-0, all other accessions de novo assembled with long-reads until now have used PacBio continuous long reads (CLR). Although these assemblies sometimes achieved chromosome-arm level contigs, they inevitably broke near the centromeres, excluding megabases of DNA from analysis in pan-genome projects. Since PacBio high-fidelity (HiFi) reads circumvent the high error rate of CLR technologies, albeit at the expense of read length, we compared a CLR assembly of accession Eyach15-2 to HiFi assemblies of the same sample. The use of five different assemblers starting from subsampled data allowed us to evaluate the impact of coverage and read length. We found that centromeres and rDNA clusters are responsible for 71% of contig breaks in the CLR scaffolds, while relatively short stretches of GA/TC repeats are at the core of >85% of the unfilled gaps in our best HiFi assemblies. Since the HiFi technology consistently enabled us to reconstruct gapless centromeres and 5S rDNA clusters, we demonstrate the value of the approach by comparing these previously inaccessible regions of the genome between the Eyach15-2 accession and the reference accession Col-0.}, } @article {pmid36453910, year = {2022}, author = {Belloso Daza, MV and Almeida-Santos, AC and Novais, C and Read, A and Alves, V and Cocconcelli, PS and Freitas, AR and Peixe, L}, title = {Distinction between Enterococcus faecium and Enterococcus lactis by a gluP PCR-Based Assay for Accurate Identification and Diagnostics.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0326822}, pmid = {36453910}, issn = {2165-0497}, mesh = {Humans ; Anti-Bacterial Agents ; *Enterococcus faecium/genetics/isolation & purification ; Genome, Bacterial ; *Gram-Positive Bacterial Infections/diagnosis/microbiology ; *Polymerase Chain Reaction ; *Enterococcus/genetics/isolation & purification ; }, abstract = {It was recently proposed that Enterococcus faecium colonizing the human gut (previous clade B) actually corresponds to Enterococcus lactis. Our goals were to develop a PCR assay to rapidly differentiate these species and to discuss the main phenotypic and genotypic differences from a clinical perspective. The pan-genome of 512 genomes of E. faecium and E. lactis strains was analyzed to assess diversity in genes between the two species. Sequences were aligned to find the best candidate gene for designing species-specific primers, and their accuracy was tested with a collection of 382 enterococci. E. lactis isolates from clinical origins were further characterized by whole-genome sequencing (Illumina). Pan-genome analysis resulted in 12 gene variants, with gene gluP (rhomboid protease) being selected as the candidate for species differentiation. The nucleotide sequence of gluP diverged by 90 to 92% between sets, which allowed species identification through PCR with 100% specificity and no cross-reactivity. E. lactis strains were greatly pan-susceptible and not host specific. Hospital E. lactis isolates were susceptible to clinically relevant antibiotics, lacked infection-associated virulence markers, and were associated with patients presenting risk factors for enhanced bacterial translocation. Here, we propose a PCR-based assay using gluP for easy routine differentiation between E. faecium and E. lactis that could be implemented in different public health contexts. We further suggest that E. lactis, a dominant human gut species, can cross the gut barrier in severely ill, immunodeficient, and surgical patients. Knowing that bacterial translocation may be a sepsis promoter, the relevance of infections caused by E. lactis strains, even if they are pan-susceptible, should be explored. IMPORTANCE Enterococcus faecium is a WHO priority pathogen that causes severe and hard-to-treat human infections. It was recently proposed that E. faecium colonizing the human gut (previous clade B) actually corresponds to Enterococcus lactis; therefore, some of the human infections occurring globally are being misidentified. In this work, we developed a PCR-based rapid identification method for the differentiation of E. faecium and E. lactis and discussed the main phenotypic and genotypic differences of these species from a clinical perspective. We identified the gluP gene as the best candidate, based on the phylogenomic analysis of 512 published pan-genomes, and validated the PCR assay with a comprehensive collection of 382 enterococci obtained from different sources. Further detailed analysis of clinical E. lactis strains showed that they are highly susceptible to antibiotics and lack the typical virulence markers of E. faecium but are able to cause severe human infections in immunosuppressed patients, possibly in part due to gut barrier translocation.}, } @article {pmid36451103, year = {2022}, author = {Sarkar, S and Kamke, A and Ward, K and Hartung, E and Ran, Q and Feehan, B and Galliart, M and Jumpponen, A and Johnson, L and Lee, STM}, title = {Pseudomonas cultivated from Andropogon gerardii rhizosphere show functional potential for promoting plant host growth and drought resilience.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {784}, pmid = {36451103}, issn = {1471-2164}, support = {OIA-1656006//National Science Foundation/ ; 2020-67019-31803//National Institute of Food and Agriculture/ ; }, mesh = {*Andropogon ; Rhizosphere ; Droughts ; Pseudomonas ; Phylogeny ; *Poa ; Nitrogen ; Nitrate Reductases ; }, abstract = {BACKGROUND: Climate change will result in more frequent droughts that can impact soil-inhabiting microbiomes (rhizobiomes) in the agriculturally vital North American perennial grasslands. Rhizobiomes have contributed to enhancing drought resilience and stress resistance properties in plant hosts. In the predicted events of more future droughts, how the changing rhizobiome under environmental stress can impact the plant host resilience needs to be deciphered. There is also an urgent need to identify and recover candidate microorganisms along with their functions, involved in enhancing plant resilience, enabling the successful development of synthetic communities.

RESULTS: In this study, we used the combination of cultivation and high-resolution genomic sequencing of bacterial communities recovered from the rhizosphere of a tallgrass prairie foundation grass, Andropogon gerardii. We cultivated the plant host-associated microbes under artificial drought-induced conditions and identified the microbe(s) that might play a significant role in the rhizobiome of Andropogon gerardii under drought conditions. Phylogenetic analysis of the non-redundant metagenome-assembled genomes (MAGs) identified a bacterial genome of interest - MAG-Pseudomonas. Further metabolic pathway and pangenome analyses recovered genes and pathways related to stress responses including ACC deaminase; nitrogen transformation including assimilatory nitrate reductase in MAG-Pseudomonas, which might be associated with enhanced drought tolerance and growth for Andropogon gerardii.

CONCLUSIONS: Our data indicated that the metagenome-assembled MAG-Pseudomonas has the functional potential to contribute to the plant host's growth during stressful conditions. Our study also suggested the nitrogen transformation potential of MAG-Pseudomonas that could impact Andropogon gerardii growth in a positive way. The cultivation of MAG-Pseudomonas sets the foundation to construct a successful synthetic community for Andropogon gerardii. To conclude, stress resilience mediated through genes ACC deaminase, nitrogen transformation potential through assimilatory nitrate reductase in MAG-Pseudomonas could place this microorganism as an important candidate of the rhizobiome aiding the plant host resilience under environmental stress. This study, therefore, provided insights into the MAG-Pseudomonas and its potential to optimize plant productivity under ever-changing climatic patterns, especially in frequent drought conditions.}, } @article {pmid36449159, year = {2023}, author = {Groza, C and Bourque, G and Goubert, C}, title = {A Pangenome Approach to Detect and Genotype TE Insertion Polymorphisms.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2607}, number = {}, pages = {85-94}, pmid = {36449159}, issn = {1940-6029}, mesh = {Humans ; *DNA Transposable Elements/genetics ; Genotype ; *Polymorphism, Genetic ; Haplotypes ; Genome, Human ; }, abstract = {Pangenome graphs are flexible data structures that contain the genetic variation that exists in a population of genomes and describe the sequences of the many possible ensuing haplotypes. Here, we use such a pangenome graph to represent and genotype transposable element (TE) polymorphisms. By combining the transposable element annotation (Alus, L1s, and SVAs) of the human genome reference with novel transposable element insertions observed in two high-quality assemblies (HG002 and HG00733), we show how to create a transposable element pangenome that consists of ~1.2 million reference and 2939 non-reference transposable elements. We then demonstrate this approach by aligning short-read sequencing data and genotyping transposable element deletions and insertions with reasonable specificity and sensitivity (0.85 F1-score).}, } @article {pmid36448683, year = {2023}, author = {Garrison, E and Guarracino, A}, title = {Unbiased pangenome graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {1}, pages = {}, pmid = {36448683}, issn = {1367-4811}, support = {U01 DA047638/DA/NIDA NIH HHS/United States ; //NIH/ ; //Human Technopole in Milan/ ; #2118709//NSF PPoSS/ ; }, mesh = {Sequence Analysis, DNA ; *Algorithms ; *Software ; Genome ; Documentation ; }, abstract = {MOTIVATION: Pangenome variation graphs model the mutual alignment of collections of DNA sequences. A set of pairwise alignments implies a variation graph, but there are no scalable methods to generate such a graph from these alignments. Existing related approaches depend on a single reference, a specific ordering of genomes or a de Bruijn model based on a fixed k-mer length. A scalable, self-contained method to build pangenome graphs without such limitations would be a key step in pangenome construction and manipulation pipelines.

RESULTS: We design the seqwish algorithm, which builds a variation graph from a set of sequences and alignments between them. We first transform the alignment set into an implicit interval tree. To build up the variation graph, we query this tree-based representation of the alignments to reduce transitive matches into single DNA segments in a sequence graph. By recording the mapping from input sequence to output graph, we can trace the original paths through this graph, yielding a pangenome variation graph. We present an implementation that operates in external memory, using disk-backed data structures and lock-free parallel methods to drive the core graph induction step. We demonstrate that our method scales to very large graph induction problems by applying it to build pangenome graphs for several species.

seqwish is published as free software under the MIT open source license. Source code and documentation are available at https://github.com/ekg/seqwish. seqwish can be installed via Bioconda https://bioconda.github.io/recipes/seqwish/README.html or GNU Guix https://github.com/ekg/guix-genomics/blob/master/seqwish.scm.}, } @article {pmid36447475, year = {2022}, author = {Moniruzzaman, M and Erazo-Garcia, MP and Aylward, FO}, title = {Endogenous giant viruses contribute to intraspecies genomic variability in the model green alga Chlamydomonas reinhardtii.}, journal = {Virus evolution}, volume = {8}, number = {2}, pages = {veac102}, pmid = {36447475}, issn = {2057-1577}, support = {R35 GM147290/GM/NIGMS NIH HHS/United States ; }, abstract = {Chlamydomonas reinhardtii is a unicellular eukaryotic alga that has been studied as a model organism for decades. Despite an extensive history as a model system, phylogenetic and genetic characteristics of viruses infecting this alga have remained elusive. We analyzed high-throughput genome sequence data of C. reinhardtii field isolates, and in six we discovered sequences belonging to endogenous giant viruses that reach up to several 100 kb in length. In addition, we have also discovered the entire genome of a closely related giant virus that is endogenized within the genome of Chlamydomonas incerta, the closest sequenced relative of C. reinhardtii. Endogenous giant viruses add hundreds of new gene families to the host strains, highlighting their contribution to the pangenome dynamics and interstrain genomic variability of C. reinhardtii. Our findings suggest that the endogenization of giant viruses may have important implications for structuring the population dynamics and ecology of protists in the environment.}, } @article {pmid36445094, year = {2022}, author = {Yu, Y and Cheng, W and Chen, X and Guo, Q and Cao, H}, title = {Cyanobacterial Blooms Are Not a Result of Positive Selection by Freshwater Eutrophication.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0319422}, pmid = {36445094}, issn = {2165-0497}, mesh = {Phylogeny ; *Lakes/microbiology ; *Cyanobacteria/genetics ; Harmful Algal Bloom ; Water ; }, abstract = {Long-standing cyanobacterial harmful algal blooms (CyanoHABs) are known to result from synergistic interaction between elevated nutrients and superior ecophysiology of cyanobacteria. However, it remains to be determined whether CyanoHABs are a result of positive selection by eutrophic waters. To address this, we conducted molecular evolutionary analyses on the genomes of 9 bloom-forming cyanobacteria, combined with pangenomics and metatranscriptomics. The results showed no positive selection by water eutrophication. Instead, all homologous genes in the species are under strong purifying selection based on the ratio of divergence at nonsynonymous and synonymous sites (dN/dS) and phylogeny. The dN/dS < 0.85 (median = 0.3) for all homologous genes are similar between the genes in the pathways driving CyanoHABs and housekeeping functions. Phylogenetic support for non-positive selection comes from the mixed clustering of strains: strains of the same species from diverse geographic origins form the same clusters, while strains from the same origins form different clusters. Further support lies in the codon adaptation index (CAI) and single nucleotide polymorphism (SNP). The CAI ranged from 0.42 to 0.9 (mean = 0.75), which indicates high-level codon usage bias; the pathways for CyanoHABs and housekeeping functions showed a similar CAI. Interestingly, CAI was negatively correlated with gene expression in 3 metatranscriptomes. The numbers of SNPs were concentrated around 5 to 50. As the SNP number increases, the gene expression level decreases. These negative correlations agree with the population-level dN/dS and phylogeny in supporting purifying selection in bloom-forming cyanobacteria. In summary, superior ecophysiology appears to be acquired prior to water eutrophication. IMPORTANCE CyanoHABs are global environmental hazards, and their mechanisms of action are being intensively investigated. On an ecological scale, CyanoHABs are consequences of synergistic interactions between biological functions and elevated nutrients in eutrophic waters. On an evolutionary scale, one important question is how bloom-forming cyanobacteria acquire these superior biological functions. There are several possibilities, including adaptive evolution and horizontal gene transfer. Here, we explored the possibility of positive selection. We reasoned that there are two possible periods for cyanobacteria to acquire these functions: before the onset of water eutrophication or during water eutrophication. Either way, there should be molecular signatures in protein sequences for positive selection. Interestingly, we found no positive selection by water eutrophication, but strong purifying selection instead on nearly all the genes, suggesting these superior functions aiding CyanoHABs are acquired prior to water eutrophication.}, } @article {pmid36445082, year = {2022}, author = {Cheng, S and Fleres, G and Chen, L and Liu, G and Hao, B and Newbrough, A and Driscoll, E and Shields, RK and Squires, KM and Chu, TY and Kreiswirth, BN and Nguyen, MH and Clancy, CJ}, title = {Within-Host Genotypic and Phenotypic Diversity of Contemporaneous Carbapenem-Resistant Klebsiella pneumoniae from Blood Cultures of Patients with Bacteremia.}, journal = {mBio}, volume = {13}, number = {6}, pages = {e0290622}, pmid = {36445082}, issn = {2150-7511}, mesh = {Animals ; Mice ; Klebsiella pneumoniae/genetics ; Blood Culture ; Anti-Bacterial Agents/therapeutic use ; Carbapenems ; *Carbapenem-Resistant Enterobacteriaceae/genetics ; *Bacteremia/microbiology ; *Sepsis/drug therapy ; *Klebsiella Infections/microbiology ; Microbial Sensitivity Tests ; beta-Lactamases ; }, abstract = {It is unknown whether bacterial bloodstream infections (BSIs) are commonly caused by single organisms or mixed microbial populations. We hypothesized that contemporaneous carbapenem-resistant Klebsiella pneumoniae (CRKP) strains from blood cultures of individual patients are genetically and phenotypically distinct. We determined short-read whole-genome sequences of 10 sequence type 258 (ST258) CRKP strains from blood cultures in each of 6 patients (Illumina HiSeq). Strains clustered by patient by core genome and pan-genome phylogeny. In 5 patients, there was within-host strain diversity by gene mutations, presence/absence of antibiotic resistance or virulence genes, and/or plasmid content. Accessory gene phylogeny revealed strain diversity in all 6 patients. Strains from 3 patients underwent long-read sequencing for genome completion (Oxford Nanopore) and phenotypic testing. Genetically distinct strains within individuals exhibited significant differences in carbapenem and other antibiotic responses, capsular polysaccharide (CPS) production, mucoviscosity, and/or serum killing. In 2 patients, strains differed significantly in virulence during mouse BSIs. Genetic or phenotypic diversity was not observed among strains recovered from blood culture bottles seeded with index strains from the 3 patients and incubated in vitro at 37°C. In conclusion, we identified genotypic and phenotypic variant ST258 CRKP strains from blood cultures of individual patients with BSIs, which were not detected by the clinical laboratory or in seeded blood cultures. The data suggest a new paradigm of CRKP population diversity during BSIs, at least in some patients. If validated for BSIs caused by other bacteria, within-host microbial diversity may have implications for medical, microbiology, and infection prevention practices and for understanding antibiotic resistance and pathogenesis. IMPORTANCE The long-standing paradigm for pathogenesis of bacteremia is that, in most cases, a single organism passes through a bottleneck and establishes itself in the bloodstream (single-organism hypothesis). In keeping with this paradigm, standard practice in processing positive microbiologic cultures is to test single bacterial strains from morphologically distinct colonies. This study is the first genome-wide analysis of within-host diversity of Klebsiella pneumoniae strains recovered from individual patients with bloodstream infections (BSIs). Our finding that positive blood cultures comprised genetically and phenotypically heterogeneous carbapenem-resistant K. pneumoniae strains challenges the single-organism hypothesis and suggests that at least some BSIs are caused by mixed bacterial populations that are unrecognized by the clinical laboratory. The data support a model of pathogenesis in which pressures in vivo select for strain variants with particular antibiotic resistance or virulence attributes and raise questions about laboratory protocols and treatment decisions directed against single strains.}, } @article {pmid36445077, year = {2022}, author = {Conde, C and Thézé, J and Cochard, T and Rossignol, MN and Fourichon, C and Delafosse, A and Joly, A and Guatteo, R and Schibler, L and Bannantine, JP and Biet, F}, title = {Genetic Features of Mycobacterium avium subsp. paratuberculosis Strains Circulating in the West of France Deciphered by Whole-Genome Sequencing.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0339222}, pmid = {36445077}, issn = {2165-0497}, mesh = {Animals ; Cattle ; *Mycobacterium avium subsp. paratuberculosis/genetics ; *Paratuberculosis/epidemiology/microbiology ; Phylogeny ; Longitudinal Studies ; Ruminants ; }, abstract = {Paratuberculosis is a chronic infection of the intestine, mainly the ileum, caused by Mycobacterium avium subsp. paratuberculosis in cattle and other ruminants. This enzootic disease is present worldwide and has a negative impact on the dairy cattle industry. For this subspecies, the current genotyping tools do not provide the needed resolution to investigate the genetic diversity of closely related strains. These limitations can be overcome by the application of whole-genome sequencing (WGS), particularly for clonal populations such as M. avium subsp. paratuberculosis. The purpose of the present study was to undertake a WGS analysis with a panel of 200 animal field M. avium subsp. paratuberculosis strains selected based on a previous large-scale longitudinal study of Prim'Holstein and Normande dairy breeds naturally infected with M. avium subsp. paratuberculosis in the West of France. The pangenome analysis revealed that M. avium subsp. paratuberculosis has a closed pangenome. The phylogeny, based on alignment of 2,786 nonhomoplasic single nucleotide polymorphisms (SNPs), showed that the strain population is structured into three clades independently of the cattle breed or geographic distribution. The increased resolution of phylogeny obtained by WGS confirmed the homoplasic nature of the markers variable-number tandem repeat (VNTR) and short sequence repeat (SSR) used for M. avium subsp. paratuberculosis genotyping. These phylogenetic data also revealed independent introductions of the different genotypes in two main waves since at least 2003. WGS applied to this sampling demonstrated the presence of mixed infections in herds and at the individual animal level. Collectively, the phylogeny results inferred with French isolates compared to M. avium subsp. paratuberculosis isolates from around the world suggest introductions of M. avium subsp. paratuberculosis genotypes through the animal trade. Relationships between genetic traits and epidemiological data can now be investigated to better understand transmission dynamics of the disease. IMPORTANCE Mycobacterium avium subsp. paratuberculosis causes Johne's disease in ruminants, which is present worldwide and has significant negative impacts on the dairy cattle industry and animal welfare. Prevention and control of M. avium subsp. paratuberculosis infection are hampered by knowledge gaps in strain virulence, genotype distribution, and transmission dynamics. This work has revealed new insights into M. avium subsp. paratuberculosis strains currently circulating in western France and how they are related to strains circulating globally. We applied whole-genome sequencing (WGS) to obtain comprehensive information on genome evolution and discrimination of closely related strains. This approach revealed the history of M. avium subsp. paratuberculosis infection in France, refined the pangenomic characteristics of M. avium subsp. paratuberculosis, and demonstrated the existence of mixed infection in animals. Finally, this study identified predominant genotypes, which allow a better understanding of disease transmission dynamics. This information will facilitate tracking of this pathogen on farms and across agricultural regions, thus informing transmission pathways and disease control points.}, } @article {pmid36437921, year = {2022}, author = {Singh, V and Pandey, S and Bhardwaj, A}, title = {From the reference human genome to human pangenome: Premise, promise and challenge.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {1042550}, pmid = {36437921}, issn = {1664-8021}, abstract = {The Reference Human Genome remains the single most important resource for mapping genetic variations and assessing their impact. However, it is monophasic, incomplete and not representative of the variation that exists in the population. Given the extent of ethno-geographic diversity and the consequent diversity in clinical manifestations of these variations, population specific references were developed overtime. The dramatically plummeting cost of sequencing whole genomes and the advent of third generation long range sequencers allowing accurate, error free, telomere-to-telomere assemblies of human genomes present us with a unique and unprecedented opportunity to develop a more composite standard reference consisting of a collection of multiple genomes that capture the maximal variation existing in the population, with the deepest annotation possible, enabling a realistic, reliable and actionable estimation of clinical significance of specific variations. The Human Pangenome Project thus is a logical next step promising a more accurate and global representation of genomic variations. The pangenome effort must be reciprocally complemented with precise variant discovery tools and exhaustive annotation to ensure unambiguous clinical assessment of the variant in ethno-geographical context. Here we discuss a broad roadmap, the challenges and way forward in developing a universal pangenome reference including data visualization techniques and integration of prior knowledge base in the new graph based architecture and tools to submit, compare, query, annotate and retrieve relevant information from the pangenomes. The biggest challenge, however, will be the ethical, legal and social implications and the training of human resource to the new reference paradigm.}, } @article {pmid36436132, year = {2022}, author = {Zoaiter, M and Magdy Wasfy, R and Caputo, A and Fenollar, F and Zeaiter, Z and Fournier, PE and Houhamdi, L}, title = {Streptococcus bouchesdurhonensis sp. nov. isolated from a bronchoalveolar lavage of a patient with pneumonia.}, journal = {Archives of microbiology}, volume = {205}, number = {1}, pages = {3}, pmid = {36436132}, issn = {1432-072X}, mesh = {Humans ; Aged ; RNA, Ribosomal, 16S/genetics ; Phylogeny ; *Genome, Bacterial ; DNA, Bacterial/genetics ; Streptococcus/genetics ; Bronchoalveolar Lavage ; *Pneumonia/genetics ; }, abstract = {Strain Marseille-Q6994 was isolated from a 72-year-old patient with pneumonia from Bouches-du-Rhône department, in France. Cells were Gram positive, non-motile, catalase and oxidase-negative cocci. The major fatty acids were hexadecanoic (47.4%) and tetradecanoic acids (28.3%). 16S rRNA gene sequence comparison suggested that strain Marseille-Q6994 was affiliated to the Streptococcus genus. GroEL phylogenetic analysis separated strain Marseille-Q6994 in a distinct branch from the closely related Streptococcus-type strains with standing in nomenclature. Whole genome sequencing-based methods (OrthoAverage Nucleotide Identity, digital DNA-DNA hybridization and pangenome analysis) supported the classification of the strain into a novel species. Therefore, based on the phenotypic, genomic, and phylogenetic analyses, we propose the name Streptococcus bouchesdurhonensis sp. nov for which strain Marseille-Q6994[T] (CSUR Marseille-Q6994 = DSMZ 113892) is the type strain.}, } @article {pmid36432770, year = {2022}, author = {Jha, UC and Nayyar, H and von Wettberg, EJB and Naik, YD and Thudi, M and Siddique, KHM}, title = {Legume Pangenome: Status and Scope for Crop Improvement.}, journal = {Plants (Basel, Switzerland)}, volume = {11}, number = {22}, pages = {}, pmid = {36432770}, issn = {2223-7747}, abstract = {In the last decade, legume genomics research has seen a paradigm shift due to advances in genome sequencing technologies, assembly algorithms, and computational genomics that enabled the construction of high-quality reference genome assemblies of major legume crops. These advances have certainly facilitated the identification of novel genetic variants underlying the traits of agronomic importance in many legume crops. Furthermore, these robust sequencing technologies have allowed us to study structural variations across the whole genome in multiple individuals and at the species level using 'pangenome analysis.' This review updates the progress of constructing pangenome assemblies for various legume crops and discusses the prospects for these pangenomes and how to harness the information to improve various traits of economic importance through molecular breeding to increase genetic gain in legumes and tackle the increasing global food crisis.}, } @article {pmid36429532, year = {2022}, author = {Almuhayawi, MS and Al Jaouni, SK and Selim, S and Alkhalifah, DHM and Marc, RA and Aslam, S and Poczai, P}, title = {Integrated Pangenome Analysis and Pharmacophore Modeling Revealed Potential Novel Inhibitors against Enterobacter xiangfangensis.}, journal = {International journal of environmental research and public health}, volume = {19}, number = {22}, pages = {}, pmid = {36429532}, issn = {1660-4601}, mesh = {*Bacterial Proteins/genetics/metabolism ; *Enterobacter/genetics/metabolism ; Genome, Bacterial ; Uridine Diphosphate ; }, abstract = {Enterobacter xiangfangensis is a novel, multidrug-resistant pathogen belonging to the Enterobacter genus and has the ability to acquire resistance to multiple antibiotic classes. However, there is currently no registered E. xiangfangensis drug on the market that has been shown to be effective. Hence, there is an urgent need to identify novel therapeutic targets and effective treatments for E. xiangfangensis. In the current study, a bacterial pan genome analysis and subtractive proteomics approach was employed to the core proteomes of six strains of E. xiangfangensis using several bioinformatic tools, software, and servers. However, 2611 nonredundant proteins were predicted from the 21,720 core proteins of core proteome. Out of 2611 nonredundant proteins, 372 were obtained from Geptop2.0 as essential proteins. After the subtractive proteomics and subcellular localization analysis, only 133 proteins were found in cytoplasm. All cytoplasmic proteins were examined using BLASTp against the virulence factor database, which classifies 20 therapeutic targets as virulent. Out of these 20, 3 cytoplasmic proteins: ferric iron uptake transcriptional regulator (FUR), UDP-2,3diacylglucosamine diphosphatase (UDP), and lipid-A-disaccharide synthase (lpxB) were chosen as potential drug targets. These drug targets are important for bacterial survival, virulence, and growth and could be used as therapeutic targets. More than 2500 plant chemicals were used to molecularly dock these proteins. Furthermore, the lowest-binding energetic docked compounds were found. The top five hit compounds, Adenine, Mollugin, Xanthohumol C, Sakuranetin, and Toosendanin demonstrated optimum binding against all three target proteins. Furthermore, molecular dynamics simulations and MM/GBSA analyses validated the stability of ligand-protein complexes and revealed that these compounds could serve as potential E. xiangfangensis replication inhibitors. Consequently, this study marks a significant step forward in the creation of new and powerful drugs against E. xiangfangensis. Future studies should validate these targets experimentally to prove their function in E. xiangfangensis survival and virulence.}, } @article {pmid36427110, year = {2022}, author = {González-Castillo, A and Carballo, JL and Bautista-Guerrero, E}, title = {Genomics, Phylogeny, and in Silico Phenotyping of Nitrosopumilus Genus.}, journal = {Current microbiology}, volume = {80}, number = {1}, pages = {3}, pmid = {36427110}, issn = {1432-0991}, support = {254806//CONACYT-SEP/ ; }, mesh = {Animals ; Phylogeny ; *Genomics ; Archaea ; *Porifera ; Multilocus Sequence Typing ; }, abstract = {The present study reports the first genome of Nitrosopumilus extracted from the marine sponge Thoosa mismalolli. The genomic study of Nitrosopumilus genus using seven genomes type strains (N. maritimus, N. piranensis, N. zosterae, N. ureiphilus, N. adriaticus, N. oxyclinae and N. cobalaminigenes), four genomes Candidatus species (Ca. N. koreensis, Ca. N. sp. AR2, Ca. N. salaria BD31, and SZUA-335), and six reference genomes (SI075, SI0036, SI0060, SI0034, SI0048, and bin36o) isolated from marine sponge, a tropical marine fish tank, dimly lit deep coastal waters, the lower euphotic zone of coastal waters, near-surface sediment, and MAG N. sp NMAG03 isolated from Thoosa mismalolli was performed. These genomes were characterized by means of a polyphasic approach comprising multilocus sequence analysis (MLSA) of 139 single-copy genes (SCG), core-pangenome, ANI, and in silico phenotypic characterization. We found that the genomes of the Nitrosopumilus genus formed three separate clusters (A, B, and C) based in 139 SCG sequence similarity. The genomes showed values between 75.2 and 99.5% for ANI, the core genome consisted of 168 gene families and the pangenome of 6,011 gene families. Based on the genomic analyses performed, the cluster A may contain a potential new species (NMAG03), and the cluster C could be represented by three new species of the genus. Finally, based on the results shown in this polyphasic approach, we support the use of the integrated approach for genomic analysis of poorly studied genera.}, } @article {pmid36425027, year = {2022}, author = {Gtari, M}, title = {Taxogenomic status of phylogenetically distant Frankia clusters warrants their elevation to the rank of genus: A description of Protofrankia gen. nov., Parafrankia gen. nov., and Pseudofrankia gen. nov. as three novel genera within the family Frankiaceae.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1041425}, pmid = {36425027}, issn = {1664-302X}, abstract = {The genus Frankia is at present the sole genus in the family Frankiaceae and encompasses filamentous, sporangia-forming actinomycetes principally isolated from root nodules of taxonomically disparate dicotyledonous hosts named actinorhizal plants. Multiple independent phylogenetic analyses agree with the division of the genus Frankia into four well-supported clusters. Within these clusters, Frankia strains are well defined based on host infectivity range, mode of infection, morphology, and their behaviour in culture. In this study, phylogenomics, overall genome related indices (OGRI), together with available data sets for phenotypic and host-plant ranges available for the type strains of Frankia species, were considered. The robustness and the deep radiation observed in Frankia at the subgeneric level, fulfilling the primary principle of phylogenetic systematics, were strengthened by establishing genome criteria for new genus demarcation boundaries. Therefore, the taxonomic elevation of the Frankia clusters to the rank of the genus is proposed. The genus Frankia should be revised to encompass cluster 1 species only and three novel genera, Protofrankia gen. nov., Parafrankia gen. nov., and Pseudofrankia gen. nov., are proposed to accommodate clusters 2, 3, and 4 species, respectively. New combinations for validly named species are also provided.}, } @article {pmid36423113, year = {2022}, author = {Swetha, RG and Basu, S and Ramaiah, S and Anbarasu, A}, title = {Multi-Epitope Vaccine for Monkeypox Using Pan-Genome and Reverse Vaccinology Approaches.}, journal = {Viruses}, volume = {14}, number = {11}, pages = {}, pmid = {36423113}, issn = {1999-4915}, mesh = {Child ; Humans ; Vaccinology ; *Monkeypox ; Molecular Docking Simulation ; Epitopes, B-Lymphocyte ; *Vaccines ; }, abstract = {Outbreaks of monkeypox virus infections have imposed major health concerns worldwide, with high morbidity threats to children and immunocompromised adults. Although repurposed drugs and vaccines are being used to curb the disease, the evolving traits of the virus, exhibiting considerable genetic dynamicity, challenge the limits of a targeted treatment. A pan-genome-based reverse vaccinology approach can provide fast and efficient solutions to resolve persistent inconveniences in experimental vaccine design during an outbreak-exigency. The approach encompassed screening of available monkeypox whole genomes (n = 910) to identify viral targets. From 102 screened viral targets, viral proteins L5L, A28, and L5 were finalized based on their location, solubility, and antigenicity. The potential T-cell and B-cell epitopes were extracted from the proteins using immunoinformatics tools and algorithms. Multiple vaccine constructs were designed by combining the epitopes. Based on immunological properties, chemical stability, and structural quality, a novel multi-epitopic vaccine construct, V4, was finalized. Flexible-docking and coarse-dynamics simulation portrayed that the V4 had high binding affinity towards human HLA-proteins (binding energy < -15.0 kcal/mol) with low conformational fluctuations (<1 Å). Thus, the vaccine construct (V4) may act as an efficient vaccine to induce immunity against monkeypox, which encourages experimental validation and similar approaches against emerging viral infections.}, } @article {pmid36421834, year = {2022}, author = {Jalil, M and Quddos, F and Anwer, F and Nasir, S and Rahman, A and Alharbi, M and Alshammari, A and Alshammari, HK and Ali, A}, title = {Comparative Pan-Genomic Analysis Revealed an Improved Multi-Locus Sequence Typing Scheme for Staphylococcus aureus.}, journal = {Genes}, volume = {13}, number = {11}, pages = {}, pmid = {36421834}, issn = {2073-4425}, mesh = {Humans ; Multilocus Sequence Typing/methods ; Staphylococcus aureus/genetics ; *Methicillin-Resistant Staphylococcus aureus/genetics ; Phylogeny ; *Staphylococcal Infections/epidemiology ; Genomics ; }, abstract = {The growing prevalence of antibiotic-resistant Staphylococcus aureus strains mandates selective susceptibility testing and epidemiological investigations. It also draws attention to an efficient typing strategy. Whole genome sequencing helps in genetic comparison, strain differentiation, and typing; however, it is not that cost-effective. In comparison, Multi-Locus Sequence Typing (MLST) is an efficient typing method employed for bacterial strain typing and characterizations. In this paper, a comprehensive pangenome and phylogenetic analysis of 502/1279 S. aureus genomes is carried out to understand the species divergence. Additionally, the current Multi-Locus Sequence Typing (MLST) scheme was evaluated, and genes were excluded or substituted by alternative genes based on reported shortcomings, genomic data, and statistical scores calculated. The data generated were helpful in devising a new Multi-Locus Sequence Typing (MLST) scheme for the efficient typing of S. aureus strains. The revised scheme is now a blend of previously used genes and new candidate genes. The genes yQil, aroE, and gmk are replaced with better gene candidates, opuCC, aspS, and rpiB, based on their genome localization, representation, and statistical scores. Therefore, the proposed Multi-Locus Sequence Typing (MLST) method offers a greater resolution with 58 sequence types (STs) in comparison to the prior scheme's 42 STs.}, } @article {pmid36420896, year = {2023}, author = {Frankish, A and Carbonell-Sala, S and Diekhans, M and Jungreis, I and Loveland, JE and Mudge, JM and Sisu, C and Wright, JC and Arnan, C and Barnes, I and Banerjee, A and Bennett, R and Berry, A and Bignell, A and Boix, C and Calvet, F and Cerdán-Vélez, D and Cunningham, F and Davidson, C and Donaldson, S and Dursun, C and Fatima, R and Giorgetti, S and Giron, CG and Gonzalez, JM and Hardy, M and Harrison, PW and Hourlier, T and Hollis, Z and Hunt, T and James, B and Jiang, Y and Johnson, R and Kay, M and Lagarde, J and Martin, FJ and Gómez, LM and Nair, S and Ni, P and Pozo, F and Ramalingam, V and Ruffier, M and Schmitt, BM and Schreiber, JM and Steed, E and Suner, MM and Sumathipala, D and Sycheva, I and Uszczynska-Ratajczak, B and Wass, E and Yang, YT and Yates, A and Zafrulla, Z and Choudhary, JS and Gerstein, M and Guigo, R and Hubbard, TJP and Kellis, M and Kundaje, A and Paten, B and Tress, ML and Flicek, P}, title = {GENCODE: reference annotation for the human and mouse genomes in 2023.}, journal = {Nucleic acids research}, volume = {51}, number = {D1}, pages = {D942-D949}, pmid = {36420896}, issn = {1362-4962}, support = {R01 HG004037/HG/NHGRI NIH HHS/United States ; U41 HG007234/HG/NHGRI NIH HHS/United States ; /WT_/Wellcome Trust/United Kingdom ; }, mesh = {Humans ; Animals ; Mice ; Molecular Sequence Annotation ; *Computational Biology/methods ; *Genome, Human/genetics ; Transcriptome/genetics ; Gene Expression Profiling ; Databases, Genetic ; }, abstract = {GENCODE produces high quality gene and transcript annotation for the human and mouse genomes. All GENCODE annotation is supported by experimental data and serves as a reference for genome biology and clinical genomics. The GENCODE consortium generates targeted experimental data, develops bioinformatic tools and carries out analyses that, along with externally produced data and methods, support the identification and annotation of transcript structures and the determination of their function. Here, we present an update on the annotation of human and mouse genes, including developments in the tools, data, analyses and major collaborations which underpin this progress. For example, we report the creation of a set of non-canonical ORFs identified in GENCODE transcripts, the LRGASP collaboration to assess the use of long transcriptomic data to build transcript models, the progress in collaborations with RefSeq and UniProt to increase convergence in the annotation of human and mouse protein-coding genes, the propagation of GENCODE across the human pan-genome and the development of new tools to support annotation of regulatory features by GENCODE. Our annotation is accessible via Ensembl, the UCSC Genome Browser and https://www.gencodegenes.org.}, } @article {pmid36420160, year = {2022}, author = {Tripodi, P}, title = {Next generation sequencing technologies to explore the diversity of germplasm resources: Achievements and trends in tomato.}, journal = {Computational and structural biotechnology journal}, volume = {20}, number = {}, pages = {6250-6258}, pmid = {36420160}, issn = {2001-0370}, abstract = {Tomato is one of the major vegetable crops grown worldwide and a model species for genetic and biological research. Progress in genomic technologies made possible the development of forefront methods for high-scale sequencing, providing comprehensive insight into the genetic architecture of germplasm resources. This review revisits next-generation sequencing strategies and applications to investigate the diversity of tomato, describing the common platforms used for SNP genotyping of large collections, de novo sequencing, and whole genome resequencing. Significant findings in evolutionary history are outlined, thus discussing how genomics has provided new hints about the processes behind domestication. Finally, achievement and perspectives on pan-genome construction and graphical pan-genome development toward precise mining of the natural variation to be exploited for breeding purposes are presented.}, } @article {pmid36419435, year = {2022}, author = {Wang, Q and Zhang, L and Zhang, Y and Chen, H and Song, J and Lyu, M and Chen, R and Zhang, L}, title = {Comparative genomic analyses reveal genetic characteristics and pathogenic factors of Bacillus pumilus HM-7.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1008648}, pmid = {36419435}, issn = {1664-302X}, abstract = {Bacillus pumilus plays an important role in industrial application and biocontrol activities, as well as causing humans and plants disease, leading to economic losses and biosafety concerns. However, until now, the pathogenesis and underlying mechanisms of B. pumilus strains remain unclear. In our previous study, one representative isolate of B. pumilus named HM-7 has been recovered and proved to be the causal agent of fruit rot on muskmelon (Cucumis melo). Herein, we present a complete and annotated genome sequence of HM-7 that contains 4,111 coding genes in a single 3,951,520 bp chromosome with 41.04% GC content. A total of 3,481 genes were functionally annotated with the GO, COG, and KEGG databases. Pan-core genome analysis of HM-7 and 20 representative B. pumilus strains, as well as six closely related Bacillus species, discovered 740 core genes and 15,205 genes in the pan-genome of 21 B. pumilus strains, in which 485 specific-genes were identified in HM-7 genome. The average nucleotide identity (ANI), and whole-genome-based phylogenetic analysis revealed that HM-7 was most closely related to the C4, GR8, MTCC-B6033, TUAT1 and SH-B11 strains, but evolutionarily distinct from other strains in B. pumilus. Collinearity analysis of the six similar B. pumilus strains showed high levels of synteny but also several divergent regions for each strains. In the HM-7 genome, we identified 484 genes in the carbohydrate-active enzymes (CAZyme) class, 650 genes encoding virulence factors, and 1,115 genes associated with pathogen-host interactions. Moreover, three HM-7-specific regions were determined, which contained 424 protein-coding genes. Further investigation of these genes showed that 19 pathogenesis-related genes were mainly associated with flagella formation and secretion of toxic products, which might be involved in the virulence of strain HM-7. Our results provided detailed genomic and taxonomic information for the HM-7 strain, and discovered its potential pathogenic mechanism, which lay a foundation for developing effective prevention and control strategies against this pathogen in the future.}, } @article {pmid36419432, year = {2022}, author = {Kumar, P and Rani, S and Dahiya, P and Kumar, A and Dang, AS and Suneja, P}, title = {Whole genome analysis for plant growth promotion profiling of Pantoea agglomerans CPHN2, a non-rhizobial nodule endophyte.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {998821}, pmid = {36419432}, issn = {1664-302X}, abstract = {Reduced agricultural production as well as issues like nutrient-depleted soils, eutrophication, and groundwater contamination have drawn attention to the use of endophyte-based bioformulations to restore soil fertility. Pantoea agglomerans CPHN2, a non-rhizobial nodule endophyte isolated from Cicer arietinum, exhibited a variety of plant growth-promoting traits. In this study, we used NextSeq500 technology to analyze whole-genome sequence information of this plant growth-promoting endophytic bacteria. The genome of P. agglomerans CPHN2 has a length of 4,839,532 bp and a G + C content of 55.2%. The whole genome comprises three different genomic fractions, comprising one circular chromosome and two circular plasmids. A comparative analysis between P. agglomerans CPHN2 and 10 genetically similar strains was performed using a bacterial pan-genome pipeline. All the predicted and annotated gene sequences for plant growth promotions (PGPs), such as phosphate solubilization, siderophore synthesis, nitrogen metabolism, and indole-3-acetic acid (IAA) of P. agglomerans CPHN2, were identified. The whole-genome analysis of P. agglomerans CPHN2 provides an insight into the mechanisms underlying PGP by endophytes and its potential applications as a biofertilizer.}, } @article {pmid36417612, year = {2022}, author = {Brito, LP and Santos, DS and Freitas, NSA and Medeiros, RS and Souza, PRE and Soares, MTCV and Porto, ALF}, title = {In silico evaluation of genomic characteristics of Streptococcus infantarius subsp. infantarius for application in fermentations.}, journal = {Anais da Academia Brasileira de Ciencias}, volume = {94}, number = {suppl 3}, pages = {e20211447}, doi = {10.1590/0001-3765202220211447}, pmid = {36417612}, issn = {1678-2690}, mesh = {Fermentation ; *Streptococcus/genetics ; *Genomics ; Sequence Analysis, DNA ; }, abstract = {This study aims to evaluate the in silico genomic characteristics of Streptococcus infantarius subsp. infantarius, isolated from Coalho cheese from Paraíba, Brazil, with a view to application in lactic fermentations. rRNA sequences from the 16S ribosomal region were used as input to GenBank, in the search for patterns that could reveal a non-pathogenic behavior of S. infantarius subsp. infantarius, comparing mobile genetic elements, antibiotic resistance genes, pan-genome analysis and multi-genome alignment among related species. S. infantarius subsp. infantarius CJ18 was the only complete genome reported by BLAST/NCBI with high similarity and after comparative genetics with complete genomes of Streptococcus agalactiae (SAG153, NJ1606) and Streptococcus thermophilus (ST106, CS18, IDCC2201, APC151) revealed that CJ18 showed a low number of transposases and integrases, infection by phage bacteria of the Streptococcus genus, absence of antibiotic resistance genes and presence of bacteriocin, folate and riboflavin producing genes. The genome alignment revealed that the collinear blocks of S. thermophilus ST106 and S. agalactiae SAG153 have inverted blocks when compared to the CJ18 genome due to gene positioning, insertions and deletions. Therefore, the strains of S. infantarius subsp. infantarius isolated from Coalho cheese from Paraíba showed genomic similarity with CJ18 and the mobility of genes analyzed in silico showed absence of pathogenicity throughout the genome of CJ18, indicating the potential of these strains for the dairy industry.}, } @article {pmid36416120, year = {2023}, author = {Yang, L and Yang, Y and Huang, L and Cui, X and Liu, Y}, title = {From single- to multi-omics: future research trends in medicinal plants.}, journal = {Briefings in bioinformatics}, volume = {24}, number = {1}, pages = {}, pmid = {36416120}, issn = {1477-4054}, support = {202102AA310034//Major Science and Technology Special Project of Yunnan Province/ ; 31960134//National Natural Science Foundation of China/ ; KKAN20222025//Yunnan Major Scientific and Technological Projects/ ; }, mesh = {*Plants, Medicinal/genetics/metabolism ; Multiomics ; Genomics ; Proteomics ; Computational Biology ; Metabolomics ; }, abstract = {Medicinal plants are the main source of natural metabolites with specialised pharmacological activities and have been widely examined by plant researchers. Numerous omics studies of medicinal plants have been performed to identify molecular markers of species and functional genes controlling key biological traits, as well as to understand biosynthetic pathways of bioactive metabolites and the regulatory mechanisms of environmental responses. Omics technologies have been widely applied to medicinal plants, including as taxonomics, transcriptomics, metabolomics, proteomics, genomics, pangenomics, epigenomics and mutagenomics. However, because of the complex biological regulation network, single omics usually fail to explain the specific biological phenomena. In recent years, reports of integrated multi-omics studies of medicinal plants have increased. Until now, there have few assessments of recent developments and upcoming trends in omics studies of medicinal plants. We highlight recent developments in omics research of medicinal plants, summarise the typical bioinformatics resources available for analysing omics datasets, and discuss related future directions and challenges. This information facilitates further studies of medicinal plants, refinement of current approaches and leads to new ideas.}, } @article {pmid36415217, year = {2022}, author = {Golchha, NC and Nighojkar, A and Nighojkar, S}, title = {Redefining genomic view of Clostridioides difficile through pangenome analysis and identification of drug targets from its core genome.}, journal = {Drug target insights}, volume = {16}, number = {}, pages = {17-24}, pmid = {36415217}, issn = {1177-3928}, abstract = {INTRODUCTION:: Clostridioides difficile infection (CDI) is a leading cause of gastrointestinal infections and in the present day is a major concern for global health care system. The unavailability of specific antibiotics for CDI treatment and its emerging cases worldwide further broaden the challenge to control CDI.

METHODS:: The availability of a large number of genome sequences for C. difficile and many bioinformatics tools for genome analysis provides the opportunity for in silico pangenomic analysis. In the present study, 97 strains of C. difficile were used for pangenomic studies and characterized for their phylogenomic and functional analysis.

RESULTS:: Pangenome analysis reveals open pangenome of C. difficile and high genetic diversity. Sequence and interactome analysis of 1,481 core genes was done and eight potent drug targets are identified. Three drug targets, namely, aminodeoxychorismate synthase (PabB), D-alanyl-D-alanine carboxypeptidase (DD-CPase) and undecaprenyl diphospho-muramoyl pentapeptide beta-N-acetylglucosaminyl transferase (MurG transferase), have been reported as drug targets for other human pathogens, and five targets, namely, bifunctional diguanylate cyclase/phosphodiesterase (cyclic-diGMP), sporulation transcription factor (Spo0A), histidinol-phosphate transaminase (HisC), 3-deoxy-7-phosphoheptulonate synthase (DAHP synthase) and c-di-GMP phosphodiesterase (PdcA), are novel.

CONCLUSION:: The suggested potent targets could act as broad-spectrum drug targets for C. difficile. However, further validation needs to be done before using them for lead compound discovery.}, } @article {pmid36412754, year = {2022}, author = {Sánchez-Suárez, J and Díaz, L and Coy-Barrera, E and Villamil, L}, title = {Specialized Metabolism of Gordonia Genus: An Integrated Survey on Chemodiversity Combined with a Comparative Genomics-Based Analysis.}, journal = {Biotech (Basel (Switzerland))}, volume = {11}, number = {4}, pages = {}, pmid = {36412754}, issn = {2673-6284}, support = {80740-168-2019//Ministerio de Ciencia, Tecnología e Innovación (Colombia)/ ; ING-175-2016//Universidad de La Sabana/ ; }, abstract = {Members of the phylum Actinomycetota (formerly Actinobacteria) have historically been the most prolific providers of small bioactive molecules. Although the genus Streptomyces is the best-known member for this issue, other genera, such as Gordonia, have shown interesting potential in their specialized metabolism. Thus, we combined herein the result of a comprehensive literature survey on metabolites derived from Gordonia strains with a comparative genomic analysis to examine the potential of the specialized metabolism of the genus Gordonia. Thirty Gordonia-derived compounds of different classes were gathered (i.e., alkaloids, amides, phenylpropanoids, and terpenoids), exhibiting antimicrobial and cytotoxic activities, and several were also isolated from Streptomyces (e.g., actinomycin, nocardamin, diolmycin A1). With the genome data, we estimated an open pan-genome of 57,901 genes, most of them being part of the cloud genome. Regarding the BGCs content, 531 clusters were found, including Terpenes, RiPP-like, and NRPS clusters as the most frequent clusters. Our findings demonstrated that Gordonia is a poorly studied genus in terms of its specialized metabolism production and potential applications. Nevertheless, given their BGCs content, Gordonia spp. are a valuable biological resource that could expand the chemical spectrum of the phylum Actinomycetota, involving novel BGCs for inspiring innovative outlines for synthetic biology and further use in biotechnological initiatives. Therefore, further studies and more efforts should be made to explore different environments and evaluate other bioactivities.}, } @article {pmid36409181, year = {2022}, author = {Mun, T and Vaddadi, NSK and Langmead, B}, title = {Pangenomic Genotyping with the Marker Array.}, journal = {Algorithms in bioinformatics : ... International Workshop, WABI ..., proceedings. WABI (Workshop)}, volume = {242}, number = {}, pages = {}, pmid = {36409181}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; R35 GM139602/GM/NIGMS NIH HHS/United States ; }, abstract = {We present a new method and software tool called rowbowt that applies a pangenome index to the problem of inferring genotypes from short-read sequencing data. The method uses a novel indexing structure called the marker array. Using the marker array, we can genotype variants with respect from large panels like the 1000 Genomes Project while avoiding the reference bias that results when aligning to a single linear reference. rowbowt can infer accurate genotypes in less time and memory compared to existing graph-based methods.}, } @article {pmid36408900, year = {2023}, author = {Fullam, A and Letunic, I and Schmidt, TSB and Ducarmon, QR and Karcher, N and Khedkar, S and Kuhn, M and Larralde, M and Maistrenko, OM and Malfertheiner, L and Milanese, A and Rodrigues, JFM and Sanchis-López, C and Schudoma, C and Szklarczyk, D and Sunagawa, S and Zeller, G and Huerta-Cepas, J and von Mering, C and Bork, P and Mende, DR}, title = {proGenomes3: approaching one million accurately and consistently annotated high-quality prokaryotic genomes.}, journal = {Nucleic acids research}, volume = {51}, number = {D1}, pages = {D760-D766}, pmid = {36408900}, issn = {1362-4962}, mesh = {Databases, Genetic ; *Genome ; Genomics ; Molecular Sequence Annotation ; *Prokaryotic Cells ; Bacteria/classification/genetics ; }, abstract = {The interpretation of genomic, transcriptomic and other microbial 'omics data is highly dependent on the availability of well-annotated genomes. As the number of publicly available microbial genomes continues to increase exponentially, the need for quality control and consistent annotation is becoming critical. We present proGenomes3, a database of 907 388 high-quality genomes containing 4 billion genes that passed stringent criteria and have been consistently annotated using multiple functional and taxonomic databases including mobile genetic elements and biosynthetic gene clusters. proGenomes3 encompasses 41 171 species-level clusters, defined based on universal single copy marker genes, for which pan-genomes and contextual habitat annotations are provided. The database is available at http://progenomes.embl.de/.}, } @article {pmid36408592, year = {2022}, author = {Vij, S and Thakur, R and Rishi, P}, title = {Reverse engineering approach: a step towards a new era of vaccinology with special reference to Salmonella.}, journal = {Expert review of vaccines}, volume = {21}, number = {12}, pages = {1763-1785}, doi = {10.1080/14760584.2022.2148661}, pmid = {36408592}, issn = {1744-8395}, mesh = {Humans ; Vaccinology ; *Typhoid Fever/prevention & control ; Salmonella/genetics ; *Typhoid-Paratyphoid Vaccines ; Anti-Bacterial Agents ; Epitopes ; }, abstract = {INTRODUCTION: Salmonella is responsible for causing enteric fever, septicemia, and gastroenteritis in humans. Due to high disease burden and emergence of multi- and extensively drug-resistant Salmonella strains, it is becoming difficult to treat the infection with existing battery of antibiotics as we are not able to discover newer antibiotics at the same pace at which the pathogens are acquiring resistance. Though vaccines against Salmonella are available commercially, they have limited efficacy. Advancements in genome sequencing technologies and immunoinformatics approaches have solved the problem significantly by giving rise to a new era of vaccine designing, i.e. 'Reverse engineering.' Reverse engineering/vaccinology has expedited the vaccine identification process. Using this approach, multiple potential proteins/epitopes can be identified and constructed as a single entity to tackle enteric fever.

AREAS COVERED: This review provides details of reverse engineering approach and discusses various protein and epitope-based vaccine candidates identified using this approach against typhoidal Salmonella.

EXPERT OPINION: Reverse engineering approach holds great promise for developing strategies to tackle the pathogen(s) by overcoming the limitations posed by existing vaccines. Progressive advancements in the arena of reverse vaccinology, structural biology, and systems biology combined with an improved understanding of host-pathogen interactions are essential components to design new-generation vaccines.}, } @article {pmid36405966, year = {2022}, author = {Guo, Y and Zeng, C and Ma, C and Cai, H and Jiang, X and Zhai, S and Xu, X and Lin, M}, title = {Comparative genomics analysis of the multidrug-resistant Aeromonas hydrophila MX16A providing insights into antibiotic resistance genes.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {1042350}, pmid = {36405966}, issn = {2235-2988}, mesh = {*Aeromonas hydrophila/genetics ; *Anti-Bacterial Agents/pharmacology ; Drug Resistance, Microbial ; beta-Lactams ; Genomics ; }, abstract = {In this paper, the whole genome of the multidrug-resistant Aeromonas hydrophila MX16A was comprehensively analyzed and compared after sequencing by PacBio RS II. To shed light on the drug resistance mechanism of A. hydrophila MX16A, a Kirby-Bauer disk diffusion method was used to assess the phenotypic drug susceptibility. Importantly, resistance against β-lactam, sulfonamides, rifamycins, macrolides, tetracyclines and chloramphenicols was largely consistent with the prediction analysis results of drug resistance genes in the CARD database. The varied types of resistance genes identified from A. hydrophila MX16A revealed multiple resistance mechanisms, including enzyme inactivation, gene mutation and active effusion. The publicly available complete genomes of 35 Aeromonas hydrophila strains on NCBI, including MX16A, were downloaded for genomic comparison and analysis. The analysis of 33 genomes with ANI greater than 95% showed that the pan-genome consisted of 9556 genes, and the core genes converged to 3485 genes. In summary, the obtained results showed that A. hydrophila exhibited a great genomic diversity as well as diverse metabolic function and it is believed that frequent exchanges between strains lead to the horizontal transfer of drug resistance genes.}, } @article {pmid36404338, year = {2022}, author = {Orata, FD and Hussain, NAS and Liang, KYH and Hu, D and Boucher, YF}, title = {Genomes of Vibrio metoecus co-isolated with Vibrio cholerae extend our understanding of differences between these closely related species.}, journal = {Gut pathogens}, volume = {14}, number = {1}, pages = {42}, pmid = {36404338}, issn = {1757-4749}, abstract = {BACKGROUND: Vibrio cholerae, the causative agent of cholera, is a well-studied species, whereas Vibrio metoecus is a recently described close relative that is also associated with human infections. The availability of V. metoecus genomes provides further insight into its genetic differences from V. cholerae. Additionally, both species have been co-isolated from a cholera-free brackish coastal pond and have been suggested to interact with each other by horizontal gene transfer (HGT).

RESULTS: The genomes of 17 strains from each species were sequenced. All strains share a large core genome (2675 gene families) and very few genes are unique to each species (< 3% of the pan-genome of both species). This led to the identification of potential molecular markers-for nitrite reduction, as well as peptidase and rhodanese activities-to further distinguish V. metoecus from V. cholerae. Interspecies HGT events were inferred in 21% of the core genes and 45% of the accessory genes. A directional bias in gene transfer events was found in the core genome, where V. metoecus was a recipient of three times (75%) more genes from V. cholerae than it was a donor (25%).

CONCLUSION: V. metoecus was misclassified as an atypical variant of V. cholerae due to their resemblance in a majority of biochemical characteristics. More distinguishing phenotypic assays can be developed based on the discovery of potential gene markers to avoid any future misclassifications. Furthermore, differences in relative abundance or seasonality were observed between the species and could contribute to the bias in directionality of HGT.}, } @article {pmid36395320, year = {2022}, author = {Lofgren, LA and Ross, BS and Cramer, RA and Stajich, JE}, title = {The pan-genome of Aspergillus fumigatus provides a high-resolution view of its population structure revealing high levels of lineage-specific diversity driven by recombination.}, journal = {PLoS biology}, volume = {20}, number = {11}, pages = {e3001890}, pmid = {36395320}, issn = {1545-7885}, support = {R01 AI130128/AI/NIAID NIH HHS/United States ; S10 OD016290/OD/NIH HHS/United States ; T32 HL134598/HL/NHLBI NIH HHS/United States ; }, mesh = {*Antifungal Agents ; *Aspergillus fumigatus/genetics ; Drug Resistance, Fungal ; Genomics ; Recombination, Genetic/genetics ; }, abstract = {Aspergillus fumigatus is a deadly agent of human fungal disease where virulence heterogeneity is thought to be at least partially structured by genetic variation between strains. While population genomic analyses based on reference genome alignments offer valuable insights into how gene variants are distributed across populations, these approaches fail to capture intraspecific variation in genes absent from the reference genome. Pan-genomic analyses based on de novo assemblies offer a promising alternative to reference-based genomics with the potential to address the full genetic repertoire of a species. Here, we evaluate 260 genome sequences of A. fumigatus including 62 newly sequenced strains, using a combination of population genomics, phylogenomics, and pan-genomics. Our results offer a high-resolution assessment of population structure and recombination frequency, phylogenetically structured gene presence-absence variation, evidence for metabolic specificity, and the distribution of putative antifungal resistance genes. Although A. fumigatus disperses primarily via asexual conidia, we identified extraordinarily high levels of recombination with the lowest linkage disequilibrium decay value reported for any fungal species to date. We provide evidence for 3 primary populations of A. fumigatus, with recombination occurring only rarely between populations and often within them. These 3 populations are structured by both gene variation and distinct patterns of gene presence-absence with unique suites of accessory genes present exclusively in each clade. Accessory genes displayed functional enrichment for nitrogen and carbohydrate metabolism suggesting that populations may be stratified by environmental niche specialization. Similarly, the distribution of antifungal resistance genes and resistance alleles were often structured by phylogeny. Altogether, the pan-genome of A. fumigatus represents one of the largest fungal pan-genomes reported to date including many genes unrepresented in the Af293 reference genome. These results highlight the inadequacy of relying on a single-reference genome-based approach for evaluating intraspecific variation and the power of combined genomic approaches to elucidate population structure, genetic diversity, and putative ecological drivers of clinically relevant fungi.}, } @article {pmid36386637, year = {2022}, author = {Jiang, ZM and Deng, Y and Han, XF and Su, J and Wang, H and Yu, LY and Zhang, YQ}, title = {Geminicoccus flavidas sp. nov. and Geminicoccus harenae sp. nov., two IAA-producing novel rare bacterial species inhabiting desert biological soil crusts.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1034816}, pmid = {36386637}, issn = {1664-302X}, abstract = {Two Gram-staining negative strains (CPCC 101082[T] and CPCC 101083[T]) were isolated from biological sandy soil crusts samples collected from Badain Jaran desert, China. Both isolates were heterotrophic phototroph, could produce indole-3-acetic acid. The 16S rRNA gene sequences of these two strains were closely related to the members of the family Geminicoccaceae, showing high similarities with Geminicoccus roseus DSM 18922[T] (96.9%) and Arboricoccus pini B29T1[T] (90.1%), respectively. In phylogenetic tree based on 16S rRNA gene sequences, strain CPCC 101082[T] and CPCC 101083[T] formed a robust distinct clade with Geminicoccus roseus DSM 18922[T] within the family Geminicoccaceae, which indicated that these two isolates could be classified into the genus Geminicoccus. The growth of strain CPCC 101082[T] occurred at 15-42°C and pH 4.0-10.0 (optima at 28-37°C and pH 6.0-8.0). The growth of strain CPCC 101083[T] occurred at 4-45°C and pH 4.0-10.0 (optima at 25-30°C and pH 6.0-8.0). The major cellular fatty acids of CPCC 101082[T] and CPCC 101083[T] contained C18:1 ω7c/C18:1 ω6c, cyclo-C19:0 ω8c, and C16:0. Q-10 was detected as the sole respiratory quinone. Diphosphatidylglycerol, phosphatidylglycerol, phosphatidylcholine, phosphatidylethanolamine, an unidentified phospholipid and an unidentified aminolipid were tested in the polar lipids profile. The genomes of the two isolates were characterized as about 5.9 Mbp in size with the G + C content of nearly 68%. The IAA-producing encoding genes were predicated in both genomes. The values of average nucleotide identity were 80.6, 81.2 and 92.4% based on a pairwise comparison of the genomes of strains CPCC 101082[T] and CPCC 101083[T] and Geminicoccus roseus DSM 18922[T], respectively. On the basis of the genotypic, chemotaxonomic and phenotypic characteristics, the strains CPCC 101082[T] (=NBRC 113513[T] = KCTC 62853[T]) and CPCC 101083[T] (=NBRC 113514[T] = KCTC 62854[T]) are proposed to represent two novel species of the genus Geminicoccus with the names Geminicoccus flavidas sp. nov. and Geminicoccus harenae sp. nov.}, } @article {pmid36377929, year = {2023}, author = {Daware, A and Malik, A and Srivastava, R and Das, D and Ellur, RK and Singh, AK and Tyagi, AK and Parida, SK}, title = {Rice Pangenome Genotyping Array: an efficient genotyping solution for pangenome-based accelerated genetic improvement in rice.}, journal = {The Plant journal : for cell and molecular biology}, volume = {113}, number = {1}, pages = {26-46}, doi = {10.1111/tpj.16028}, pmid = {36377929}, issn = {1365-313X}, mesh = {Chromosome Mapping ; *Genome-Wide Association Study ; *Oryza/genetics ; Genotype ; Quantitative Trait Loci/genetics ; Polymorphism, Single Nucleotide/genetics ; }, abstract = {The advent of the pangenome era has unraveled previously unknown genetic variation existing within diverse crop plants, including rice. This untapped genetic variation is believed to account for a major portion of phenotypic variation existing in crop plants. However, the use of conventional single reference-guided genotyping often fails to capture a large portion of this genetic variation leading to a reference bias. This makes it difficult to identify and utilize novel population/cultivar-specific genes for crop improvement. Thus, we developed a Rice Pangenome Genotyping Array (RPGA) harboring probes assaying 80K single-nucleotide polymorphisms (SNPs) and presence-absence variants spanning the entire 3K rice pangenome. This array provides a simple, user-friendly and cost-effective (60-80 USD per sample) solution for rapid pangenome-based genotyping in rice. The genome-wide association study (GWAS) conducted using RPGA-SNP genotyping data of a rice diversity panel detected a total of 42 loci, including previously known as well as novel genomic loci regulating grain size/weight traits in rice. Eight of these identified trait-associated loci (dispensable loci) could not be detected with conventional single reference genome-based GWAS. A WD repeat-containing PROTEIN 12 gene underlying one of such dispensable locus on chromosome 7 (qLWR7) along with other non-dispensable loci were subsequently detected using high-resolution quantitative trait loci mapping confirming authenticity of RPGA-led GWAS. This demonstrates the potential of RPGA-based genotyping to overcome reference bias. The application of RPGA-based genotyping for population structure analysis, hybridity testing, ultra-high-density genetic map construction and chromosome-level genome assembly, and marker-assisted selection was also demonstrated. A web application (http://www.rpgaweb.com) was further developed to provide an easy to use platform for the imputation of RPGA-based genotyping data using 3K rice reference panel and subsequent GWAS.}, } @article {pmid36377253, year = {2023}, author = {Tello, D and Gonzalez-Garcia, LN and Gomez, J and Zuluaga-Monares, JC and Garcia, R and Angel, R and Mahecha, D and Duarte, E and Leon, MDR and Reyes, F and Escobar-Velásquez, C and Linares-Vásquez, M and Cardozo, N and Duitama, J}, title = {NGSEP 4: Efficient and accurate identification of orthogroups and whole-genome alignment.}, journal = {Molecular ecology resources}, volume = {23}, number = {3}, pages = {712-724}, doi = {10.1111/1755-0998.13737}, pmid = {36377253}, issn = {1755-0998}, support = {80740-441-2020//Ministerio de Ciencia Tecnología e Innovación de Colombia/ ; //Universidad de los Andes/ ; }, mesh = {*Software ; *Genome ; Genomics/methods ; Algorithms ; Metagenomics ; }, abstract = {Whole-genome alignment allows researchers to understand the genomic structure and variation among genomes. Approaches based on direct pairwise comparisons of DNA sequences require large computational capacities. As a consequence, pipelines combining tools for orthologous gene identification and synteny have been developed. In this manuscript, we present the latest functionalities implemented in NGSEP 4, to identify orthogroups and perform whole genome alignments. NGSEP implements functionalities for identification of clusters of homologus genes, synteny analysis and whole genome alignment. Our results showed that the NGSEP algorithm for orthogroups identification has competitive accuracy and efficiency in comparison to commonly used tools. The implementation also includes a visualization of the whole genome alignment based on synteny of the orthogroups that were identified, and a reconstruction of the pangenome based on frequencies of the orthogroups among the genomes. NGSEP 4 also includes a new graphical user interface based on the JavaFX technology. We expect that these new developments will be very useful for several studies in evolutionary biology and population genomics.}, } @article {pmid36376589, year = {2023}, author = {Chivian, D and Jungbluth, SP and Dehal, PS and Wood-Charlson, EM and Canon, RS and Allen, BH and Clark, MM and Gu, T and Land, ML and Price, GA and Riehl, WJ and Sneddon, MW and Sutormin, R and Zhang, Q and Cottingham, RW and Henry, CS and Arkin, AP}, title = {Metagenome-assembled genome extraction and analysis from microbiomes using KBase.}, journal = {Nature protocols}, volume = {18}, number = {1}, pages = {208-238}, pmid = {36376589}, issn = {1750-2799}, mesh = {*Metagenome ; Phylogeny ; Genome, Bacterial ; *Microbiota/genetics ; Bacteria/genetics ; Metagenomics ; }, abstract = {Uncultivated Bacteria and Archaea account for the vast majority of species on Earth, but obtaining their genomes directly from the environment, using shotgun sequencing, has only become possible recently. To realize the hope of capturing Earth's microbial genetic complement and to facilitate the investigation of the functional roles of specific lineages in a given ecosystem, technologies that accelerate the recovery of high-quality genomes are necessary. We present a series of analysis steps and data products for the extraction of high-quality metagenome-assembled genomes (MAGs) from microbiomes using the U.S. Department of Energy Systems Biology Knowledgebase (KBase) platform (http://www.kbase.us/). Overall, these steps take about a day to obtain extracted genomes when starting from smaller environmental shotgun read libraries, or up to about a week from larger libraries. In KBase, the process is end-to-end, allowing a user to go from the initial sequencing reads all the way through to MAGs, which can then be analyzed with other KBase capabilities such as phylogenetic placement, functional assignment, metabolic modeling, pangenome functional profiling, RNA-Seq and others. While portions of such capabilities are available individually from other resources, the combination of the intuitive usability, data interoperability and integration of tools in a freely available computational resource makes KBase a powerful platform for obtaining MAGs from microbiomes. While this workflow offers tools for each of the key steps in the genome extraction process, it also provides a scaffold that can be easily extended with additional MAG recovery and analysis tools, via the KBase software development kit (SDK).}, } @article {pmid36375718, year = {2023}, author = {Santos, RGD and Hurtado, R and Rodrigues, DLN and Lima, A and Dos Anjos, WF and Rifici, C and Attili, AR and Tiwari, S and Jaiswal, AK and Spier, SJ and Mazzullo, G and Morais-Rodrigues, F and Gomide, ACP and de Jesus, LCL and Aburjaile, FF and Brenig, B and Cuteri, V and Castro, TLP and Seyffert, N and Santos, A and Góes-Neto, A and de Jesus Sousa, T and Azevedo, V}, title = {Comparative genomic analysis of the Dietzia genus: an insight into genomic diversity, and adaptation.}, journal = {Research in microbiology}, volume = {174}, number = {3}, pages = {103998}, doi = {10.1016/j.resmic.2022.103998}, pmid = {36375718}, issn = {1769-7123}, mesh = {Sequence Analysis, DNA ; Phylogeny ; *Genomics ; Genome, Bacterial/genetics ; Base Sequence ; *Actinomycetales/genetics ; }, abstract = {Dietzia strains are widely distributed in the environment, presenting an opportunistic role, and some species have undetermined taxonomic characteristics. Here, we propose the existence of errors in the classification of species in this genus using comparative genomics. We performed ANI, dDDH, pangenome and genomic plasticity analyses better to elucidate the phylogenomic relationships between Dietzia strains. For this, we used 55 genomes of Dietzia downloaded from public databases that were combined with a newly sequenced. Sequence analysis of a phylogenetic tree based on genome similarity comparisons and dDDH, ANI analyses supported grouping different Dietzia species into four distinct groups. The pangenome analysis corroborated the classification of these groups, supporting the idea that some species of Dietzia could be reassigned in a possible classification into three distinct species, each containing less variability than that found within the global pangenome of all strains. Additionally, analysis of genomic plasticity based on groups containing Dietzia strains found differences in the presence and absence of symbiotic Islands and pathogenic islands related to their isolation site. We propose that the comparison of pangenome subsets together with phylogenomic approaches can be used as an alternative for the classification and differentiation of new species of the genus Dietzia.}, } @article {pmid36375370, year = {2022}, author = {Islam, J and Sarkar, H and Hoque, H and Hasan, MN and Jewel, GMNA}, title = {In-silico approach of identifying novel therapeutic targets against Yersinia pestis using pan and subtractive genomic analysis.}, journal = {Computational biology and chemistry}, volume = {101}, number = {}, pages = {107784}, doi = {10.1016/j.compbiolchem.2022.107784}, pmid = {36375370}, issn = {1476-928X}, mesh = {Humans ; *Yersinia pestis/genetics ; *Plague/drug therapy/genetics/microbiology ; Genomics ; Genome, Bacterial ; Virulence Factors ; }, abstract = {The magnitude of human affliction brought about by bacterial infections has been on the rise since the mid-5th century. Yersinia pestis is one such notable, gram-negative bacterium that inflicted havoc around the globe three times throughout different millenniums by causing deadly plagues. Despite the unremitting efforts by scientists, different strains of Yersinia pestis are still affecting the populations in various parts of the world by growing resistant to existing antimicrobial agents owing to their overuse. The current scenario, therefore, calls for new therapeutics to further combat the disease. In this study, 3105 core, 387 pathogen-specific unique, 536 choke-point, 796 virulence factors, and 115 antimicrobial resistant proteins were found using a pan-genomic and subtractive genome analysis of nine Yersinia pestis strains that could be instrumental in the development of drugs against Yersinia pestis. Subsequently, 1461 and 1114 essential proteins were identified as non-homologous to human and gut microflora. 535 and 30 proteins were predicted as cytoplasmic and broad-spectrum targets respectively. Finally, four potential targets were selected for their high connectivity in protein-protein interaction network. These selected target proteins are associated with one of the major lipopolysaccharide biosynthesis pathways. Therefore, dismantling their activity might indicate a probable strategy for developing therapeutics to combat bacterial infection caused by Yersinia pestis. However, further experimental validation in the laboratory is needed to consolidate the research findings.}, } @article {pmid36367506, year = {2022}, author = {Qu, L and Li, Y and Wang, W and Shao, Z and Gao, Z and Lai, Q}, title = {Aestuarium zhoushanense is a later heterotypic synonym of Marivivens donghaensis, and transfer of Paradonghicola geojensis to the genus Marivivens as Marivivens geojensis comb. nov.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {72}, number = {11}, pages = {}, doi = {10.1099/ijsem.0.005564}, pmid = {36367506}, issn = {1466-5034}, mesh = {RNA, Ribosomal, 16S/genetics ; Phylogeny ; DNA, Bacterial/genetics ; Bacterial Typing Techniques ; Base Composition ; Sequence Analysis, DNA ; *Fatty Acids/chemistry ; Nucleic Acid Hybridization ; }, abstract = {The 16S rRNA genes of Aestuarium zhoushanense G7[T] and Paradonghicola geojensis FJ12[T] shared 100 % sequence identity with Marivivens donghaensis AM-4[T]. Phylogeny of 16S rRNA gene sequences showed that the three type strains formed a monophyletic clade within the genus Marivivens. Whole genome sequence comparisons showed that three type strains shared 46.7-69.7 % digital DNA-DNA hybridization, 92.1-96.4 % average nucleotide identity and 96.2-98.1 % average amino acid identity. The high 16S rRNA gene similarity values show that three type strains should belong to the same genus. The pan-genome of the five strains contained 5754 genes including 1877 core genes. Based on the principle of priority, we propose that A. zhoushanense Yu et al. 2019 is a later heterotypic synonym of M. donghaensis Park et al. 2016, and P. geojensis should be reclassified as Marivivens geojensis comb. nov., respectively.}, } @article {pmid36366394, year = {2022}, author = {Mushtaq, M and Khan, S and Hassan, M and Al-Harbi, AI and Hameed, AR and Khan, K and Ismail, S and Irfan, M and Ahmad, S}, title = {Computational Design of a Chimeric Vaccine against Plesiomonas shigelloides Using Pan-Genome and Reverse Vaccinology.}, journal = {Vaccines}, volume = {10}, number = {11}, pages = {}, pmid = {36366394}, issn = {2076-393X}, abstract = {The swift emergence of antibiotic resistance (AR) in bacterial pathogens to make themselves adaptable to changing environments has become an alarming health issue. To prevent AR infection, many ways can be accomplished such as by decreasing the misuse of antibiotics in human and animal medicine. Among these AR bacterial species, Plesiomonas shigelloides is one of the etiological agents of intestinal infection in humans. It is a gram-negative rod-shaped bacterium that is highly resistant to several classes of antibiotics, and no licensed vaccine against the aforementioned pathogen is available. Hence, substantial efforts are required to screen protective antigens from the pathogen whole genome that can be subjected easily to experimental evaluations. Here, we employed a reverse vaccinology (RV) approach to design a multi-antigenic epitopes based vaccine against P. shigelloides. The complete genomes of P. shigelloides were retrieved from the National Center for Biotechnological Information (NCBI) that on average consist of 5226 proteins. The complete proteomes were subjected to different subtractive proteomics filters, and in the results of that analysis, out of total proteins, 2399 were revealed as non-redundant and 2827 as redundant proteins. The non-redundant proteins were further checked for subcellular localization analysis, in which three were localized in the extracellular matrix, eight were outer membrane, and 13 were found in the periplasmic membrane. All surface localized proteins were found to be virulent. Out of a total of 24 virulent proteins, three proteins (flagellar hook protein (FlgE), hypothetical protein, and TonB-dependent hemoglobin/transferrin/lactoferrin family receptor protein) were considered as potential vaccine targets and subjected to epitopes prediction. The predicted epitopes were further examined for antigenicity, toxicity, and solubility. A total of 10 epitopes were selected (GFKESRAEF, VQVPTEAGQ, KINENGVVV, ENKALSQET, QGYASANDE, RLNPTDSRW, TLDYRLNPT, RVTKKQSDK, GEREGKNRP, RDKKTNQPL). The selected epitopes were linked with each other via specific GPGPG linkers in order to design a multi-epitopes vaccine construct, and linked with cholera toxin B subunit adjuvant to make the designed vaccine construct more efficient in terms of antigenicity. The 3D structure of the vaccine construct was modeled ab initio as no appropriate template was available. Furthermore, molecular docking was carried out to check the interaction affinity of the designed vaccine with major histocompatibility complex (MHC-)I (PDB ID: 1L1Y), MHC-II (1KG0), and toll-like receptor 4 ((TLR-4) (PDB: 4G8A). Molecular dynamic simulation was applied to evaluate the dynamic behavior of vaccine-receptor complexes. Lastly, the binding free energies of the vaccine with receptors were estimated by using MMPB/GBSA methods. All of the aforementioned analyses concluded that the designed vaccine molecule as a good candidate to be used in experimental studies to disclose its immune protective efficacy in animal models.}, } @article {pmid36363712, year = {2022}, author = {Murr, L and Huber, I and Pavlovic, M and Guertler, P and Messelhaeusser, U and Weiss, M and Ehrmann, M and Tuschak, C and Bauer, H and Wenning, M and Busch, U and Bretschneider, N}, title = {Whole-Genome Sequence Comparisons of Listeria monocytogenes Isolated from Meat and Fish Reveal High Inter- and Intra-Sample Diversity.}, journal = {Microorganisms}, volume = {10}, number = {11}, pages = {}, pmid = {36363712}, issn = {2076-2607}, support = {72577//Bavarian State Ministry for Environment and Consumer Protection (StMUV)/ ; }, abstract = {Interpretation of whole-genome sequencing (WGS) data for foodborne outbreak investigations is complex, as the genetic diversity within processing plants and transmission events need to be considered. In this study, we analyzed 92 food-associated Listeria monocytogenes isolates by WGS-based methods. We aimed to examine the genetic diversity within meat and fish production chains and to assess the applicability of suggested thresholds for clustering of potentially related isolates. Therefore, meat-associated isolates originating from the same samples or processing plants as well as fish-associated isolates were analyzed as distinct sets. In silico serogrouping, multilocus sequence typing (MLST), core genome MLST (cgMLST), and pangenome analysis were combined with screenings for prophages and genetic traits. Isolates of the same subtypes (cgMLST types (CTs) or MLST sequence types (STs)) were additionally compared by SNP calling. This revealed the occurrence of more than one CT within all three investigated plants and within two samples. Analysis of the fish set resulted in predominant assignment of isolates from pangasius catfish and salmon to ST2 and ST121, respectively, potentially indicating persistence within the respective production chains. The approach not only allowed the detection of distinct subtypes but also the determination of differences between closely related isolates, which need to be considered when interpreting WGS data for surveillance.}, } @article {pmid36362240, year = {2022}, author = {Khoder, M and Osman, M and Kassem, II and Rafei, R and Shahin, A and Fournier, PE and Rolain, JM and Hamze, M}, title = {Whole Genome Analyses Accurately Identify Neisseria spp. and Limit Taxonomic Ambiguity.}, journal = {International journal of molecular sciences}, volume = {23}, number = {21}, pages = {}, pmid = {36362240}, issn = {1422-0067}, support = {N/A//Azm & Saade Association/ ; N/A//Erasmus Mundus/ ; N/A//Cornell Atkinson Postdoctoral Fellowship/ ; }, mesh = {Male ; Humans ; Phylogeny ; *Neisseria/genetics ; Neisseria gonorrhoeae/genetics ; *Neisseria meningitidis/genetics ; Spectrometry, Mass, Matrix-Assisted Laser Desorption-Ionization ; DNA ; Genome, Bacterial ; }, abstract = {Genome sequencing facilitates the study of bacterial taxonomy and allows the re-evaluation of the taxonomic relationships between species. Here, we aimed to analyze the draft genomes of four commensal Neisseria clinical isolates from the semen of infertile Lebanese men. To determine the phylogenetic relationships among these strains and other Neisseria spp. and to confirm their identity at the genomic level, we compared the genomes of these four isolates with the complete genome sequences of Neisseria gonorrhoeae and Neisseria meningitidis and the draft genomes of Neisseria flavescens, Neisseria perflava, Neisseria mucosa, and Neisseria macacae that are available in the NCBI Genbank database. Our findings revealed that the WGS analysis accurately identified and corroborated the matrix-assisted laser desorption ionization-time of flight (MALDI-TOF) species identities of the Neisseria isolates. The combination of three well-established genome-based taxonomic tools (in silico DNA-DNA Hybridization, Ortho Average Nucleotide identity, and pangenomic studies) proved to be relatively the best identification approach. Notably, we also discovered that some Neisseria strains that are deposited in databases contain many taxonomical errors. The latter is very important and must be addressed to prevent misdiagnosis and missing emerging etiologies. We also highlight the need for robust cut-offs to delineate the species using genomic tools.}, } @article {pmid36362207, year = {2022}, author = {Hameed, A and Poznanski, P and Nadolska-Orczyk, A and Orczyk, W}, title = {Graph Pangenomes Track Genetic Variants for Crop Improvement.}, journal = {International journal of molecular sciences}, volume = {23}, number = {21}, pages = {}, pmid = {36362207}, issn = {1422-0067}, support = {2019/35/B/NZ9/00323//National Science Center/ ; }, mesh = {Humans ; *Genome-Wide Association Study ; *Quantitative Trait Loci ; Polymorphism, Single Nucleotide ; Plant Breeding ; Multifactorial Inheritance ; Crops, Agricultural/genetics ; }, abstract = {Global climate change and the urgency to transform crops require an exhaustive genetic evaluation. The large polyploid genomes of food crops, such as cereals, make it difficult to identify candidate genes with confirmed hereditary. Although genome-wide association studies (GWAS) have been proficient in identifying genetic variants that are associated with complex traits, the resolution of acquired heritability faces several significant bottlenecks such as incomplete detection of structural variants (SV), genetic heterogeneity, and/or locus heterogeneity. Consequently, a biased estimate is generated with respect to agronomically complex traits. The graph pangenomes have resolved this missing heritability and provide significant details in terms of specific loci segregating among individuals and evolving to variations. The graph pangenome approach facilitates crop improvements through genome-linked fast breeding.}, } @article {pmid36358771, year = {2022}, author = {Cinque, A and Minnei, R and Floris, M and Trevisani, F}, title = {The Clinical and Molecular Features in the VHL Renal Cancers; Close or Distant Relatives with Sporadic Clear Cell Renal Cell Carcinoma?.}, journal = {Cancers}, volume = {14}, number = {21}, pages = {}, pmid = {36358771}, issn = {2072-6694}, abstract = {Von Hippel-Lindau (VHL) disease is an autosomal dominant inherited cancer syndrome caused by germline mutations in the VHL tumor suppressor gene, characterized by the susceptibility to a wide array of benign and malign neoplasms, including clear-cell renal cell carcinoma. Moreover, VHL somatic inactivation is a crucial molecular event also in sporadic ccRCCs tumorigenesis. While systemic biomarkers in the VHL syndrome do not currently play a role in clinical practice, a new promising class of predictive biomarkers, microRNAs, has been increasingly studied. Lots of pan-genomic studies have deeply investigated the possible biological role of microRNAs in the development and progression of sporadic ccRCC; however, few studies have investigated the miRNA profile in VHL patients. Our review summarize all the new insights related to clinical and molecular features in VHL renal cancers, with a particular focus on the overlap with sporadic ccRCC.}, } @article {pmid36358219, year = {2022}, author = {Moglad, E and Alanazi, N and Altayb, HN}, title = {Genomic Study of Chromosomally and Plasmid-Mediated Multidrug Resistance and Virulence Determinants in Klebsiella Pneumoniae Isolates Obtained from a Tertiary Hospital in Al-Kharj, KSA.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {11}, number = {11}, pages = {}, pmid = {36358219}, issn = {2079-6382}, support = {IF-PSAU-2021/03/17707//Prince Sattam Bin Abdulaziz University/ ; }, abstract = {Klebsiella pneumoniae is an emergent pathogen causing respiratory tract, bloodstream, and urinary tract infections in humans. This study defines the genomic sequence data, genotypic and phenotypic characterization of K. pneumoniae clinically isolated from Al-Kharj, KSA. Whole-genome analysis of four K. pneumoniae strains was performed, including de novo assembly, functional annotation, whole-genome-phylogenetic analysis, antibiotic-resistant gene identification, prophage regions, virulent factor, and pan-genome analysis. The results showed that K6 and K7 strains were MDR and ESBL producers, K16 was an ESBL producer, and K8 was sensitive to all tested drugs except ampicillin. K6 and K7 were identified with sequence type (ST) 23, while K16 and K8 were identified with STs 353 and 592, respectively. K6 and K7 were identified with the K1 (wzi1 genotype) capsule and O1 serotype, while K8 was identified with the K57 (wzi206 genotype) capsule and O3b. K6 isolates harbored 10 antimicrobial resistance genes (ARGs) associated with four different plasmids; the chloramphenicol acetyltransferase (catB3), blaOXA-1 and aac(6')-Ib-cr genes were detected in plasmid pB-8922_OXA-48. K6 and K7 also carried a similar gene cassette in plasmid pC1K6P0122-2; the gene cassettes were the trimethoprim-resistant gene (dfrA14), integron integrase (IntI1), insertion sequence (IS1), transposase protein, and replication initiation protein (RepE). Two hypervirulent plasmids were reported in isolates K6 and K7 that carried synthesis genes (iucA, iucB, iucC, iucD, and iutA) and iron siderophore genes (iroB, iroC, iroD, and iroN). The presence of these plasmids in high-risk clones suggests their dissemination in our region, which represents a serious health problem.}, } @article {pmid36353749, year = {2022}, author = {Oren, E and Dafna, A and Tzuri, G and Halperin, I and Isaacson, T and Elkabetz, M and Meir, A and Saar, U and Ohali, S and La, T and Romay, C and Tadmor, Y and Schaffer, AA and Buckler, ES and Cohen, R and Burger, J and Gur, A}, title = {Pan-genome and multi-parental framework for high-resolution trait dissection in melon (Cucumis melo).}, journal = {The Plant journal : for cell and molecular biology}, volume = {112}, number = {6}, pages = {1525-1542}, pmid = {36353749}, issn = {1365-313X}, mesh = {*Cucumis melo/genetics ; *Cucurbitaceae/genetics ; Plant Breeding ; Chromosome Mapping ; Phenotype ; }, abstract = {Linking genotype with phenotype is a fundamental goal in biology and requires robust data for both. Recent advances in plant-genome sequencing have expedited comparisons among multiple-related individuals. The abundance of structural genomic within-species variation that has been discovered indicates that a single reference genome cannot represent the complete sequence diversity of a species, leading to the expansion of the pan-genome concept. For high-resolution forward genetics, this unprecedented access to genomic variation should be paralleled and integrated with phenotypic characterization of genetic diversity. We developed a multi-parental framework for trait dissection in melon (Cucumis melo), leveraging a novel pan-genome constructed for this highly variable cucurbit crop. A core subset of 25 diverse founders (MelonCore25), consisting of 24 accessions from the two widely cultivated subspecies of C. melo, encompassing 12 horticultural groups, and 1 feral accession was sequenced using a combination of short- and long-read technologies, and their genomes were assembled de novo. The construction of this melon pan-genome exposed substantial variation in genome size and structure, including detection of ~300 000 structural variants and ~9 million SNPs. A half-diallel derived set of 300 F2 populations, representing all possible MelonCore25 parental combinations, was constructed as a framework for trait dissection through integration with the pan-genome. We demonstrate the potential of this unified framework for genetic analysis of various melon traits, including rind color intensity and pattern, fruit sugar content, and resistance to fungal diseases. We anticipate that utilization of this integrated resource will enhance genetic dissection of important traits and accelerate melon breeding.}, } @article {pmid36350178, year = {2022}, author = {Dong, X and Zhu, M and Li, Y and Huang, D and Wang, L and Yan, C and Zhang, L and Dong, F and Lu, J and Lin, X and Li, K and Bao, Q and Cong, C and Pan, W}, title = {Whole-Genome Sequencing-Based Species Classification, Multilocus Sequence Typing, and Antimicrobial Resistance Mechanism Analysis of the Enterobacter cloacae Complex in Southern China.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0216022}, pmid = {36350178}, issn = {2165-0497}, mesh = {Humans ; Multilocus Sequence Typing ; *Anti-Bacterial Agents/pharmacology ; Enterobacter cloacae ; Retrospective Studies ; Drug Resistance, Bacterial/genetics ; *Enterobacteriaceae Infections/epidemiology/microbiology ; beta-Lactamases/genetics ; Bacterial Proteins/genetics ; China/epidemiology ; Microbial Sensitivity Tests ; Plasmids ; }, abstract = {Members of the Enterobacter cloacae complex (ECC) are important opportunistic nosocomial pathogens that are associated with a great variety of infections. Due to limited data on the genome-based classification of species and investigation of resistance mechanisms, in this work, we collected 172 clinical ECC isolates between 2019 and 2020 from three hospitals in Zhejiang, China and performed a retrospective whole-genome sequencing to analyze their population structure and drug resistance mechanisms. Of the 172 ECC isolates, 160 belonged to 9 classified species, and 12 belonged to unclassified species based on ANI analysis. Most isolates belonged to E. hormaechei (45.14%) followed by E. kobei (13.71%), which contained 126 STs, including 62 novel STs, as determined by multilocus sequence typing (MLST) analysis. Pan-genome analysis of the two ECC species showed that they have an "open" tendency, which indicated that their Pan-genome increased considerably with the addition of new genomes. A total of 80 resistance genes associated with 11 antimicrobial agent categories were identified in the genomes of all the isolates. The most prevailing resistance genes (12/29, 41.38%) were related to β-lactams followed by aminoglycosides. A total of 247 β-lactamase genes were identified, of which the blaACT genes were the most dominant (145/247, 58.70%), followed by the blaTEM genes (21/247, 8.50%). The inherent ACT type β-lactamase genes differed among different species. blaACT-2 and blaACT-3 were only present in E. asburiae, while blaACT-9, blaACT-12, and blaACT-6 exclusively appeared in E. kobei, E. ludwigii, and E. mori. Among the six carbapenemase-encoding genes (blaNDM-1, blaNDM-5, blaIMP-1, blaIMP-4, blaIMP-26, and blaKPC-2) identified, two (blaNDM-1 and blaIMP-1) were identified in an ST78 E. hormaechei isolate. Comparative genomic analysis of the carbapenemase gene-related sequences was performed, and the corresponding genetic structure of these resistance genes was analyzed. Genome-wide molecular characterization of the ECC population and resistance mechanism would offer valuable insights into the effective management of ECC infection in clinical settings. IMPORTANCE The presence and emergence of multiple species/subspecies of ECC have led to diversity and complications at the taxonomic level, which impedes our further understanding of the epidemiology and clinical significance of species/subspecies of ECC. Accurate identification of ECC species is extremely important. Also, it is of great importance to study the carbapenem-resistant genes in ECC and to further understand the mechanism of horizontal transfer of the resistance genes by analyzing the surrounding environment around the genes. The occurrence of ECC carrying two MBL genes also indicates that the selection pressure of bacteria is further increased, suggesting that we need to pay special attention to the emergence of such bacteria in the clinic.}, } @article {pmid36344558, year = {2022}, author = {Otani, H and Udwary, DW and Mouncey, NJ}, title = {Comparative and pangenomic analysis of the genus Streptomyces.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {18909}, pmid = {36344558}, issn = {2045-2322}, support = {DE-AC02-05CH11231//U.S. Department of Energy/ ; }, mesh = {*Streptomyces/metabolism ; Biosynthetic Pathways/genetics ; Secondary Metabolism/genetics ; Sequence Analysis, DNA ; *Polyketides/metabolism ; Multigene Family ; }, abstract = {Streptomycetes are highly metabolically gifted bacteria with the abilities to produce bioproducts that have profound economic and societal importance. These bioproducts are produced by metabolic pathways including those for the biosynthesis of secondary metabolites and catabolism of plant biomass constituents. Advancements in genome sequencing technologies have revealed a wealth of untapped metabolic potential from Streptomyces genomes. Here, we report the largest Streptomyces pangenome generated by using 205 complete genomes. Metabolic potentials of the pangenome and individual genomes were analyzed, revealing degrees of conservation of individual metabolic pathways and strains potentially suitable for metabolic engineering. Of them, Streptomyces bingchenggensis was identified as a potent degrader of plant biomass. Polyketide, non-ribosomal peptide, and gamma-butyrolactone biosynthetic enzymes are primarily strain specific while ectoine and some terpene biosynthetic pathways are highly conserved. A large number of transcription factors associated with secondary metabolism are strain-specific while those controlling basic biological processes are highly conserved. Although the majority of genes involved in morphological development are highly conserved, there are strain-specific varieties which may contribute to fine tuning the timing of cellular differentiation. Overall, these results provide insights into the metabolic potential, regulation and physiology of streptomycetes, which will facilitate further exploitation of these important bacteria.}, } @article {pmid36340844, year = {2022}, author = {Lynch, T and Nandi, T and Jayaprakash, T and Gregson, D and Church, DL}, title = {Genomic analysis of group A Streptococcus isolated during a correctional facility outbreak of MRSA in 2004.}, journal = {Journal of the Association of Medical Microbiology and Infectious Disease Canada = Journal officiel de l'Association pour la microbiologie medicale et l'infectiologie Canada}, volume = {7}, number = {1}, pages = {23-35}, pmid = {36340844}, issn = {2371-0888}, abstract = {BACKGROUND: In 2004-2005, an outbreak of impetigo occurred at a correctional facility during a sentinel outbreak of methicillin- resistant Staphylococcus aureus (MRSA) in Alberta, Canada. Next-generation sequencing (NGS) was used to characterize the group A Streptococcus (GAS) isolates and evaluate whether genomic biomarkers could distinguish between those recovered alone and those co-isolated with S. aureus.

METHODS: Superficial wound swabs collected from all adults with impetigo during this outbreak were cultured using standard methods. NGS was used to characterize and compare all of the GAS and S. aureus genomes.

RESULTS: Fifty-three adults were culture positive for GAS, with a subset of specimens also positive for MRSA (n = 5) or methicillin-sensitive S. aureus (n = 3). Seventeen additional MRSA isolates from this facility from the same time frame (no GAS co-isolates) were also included. All 78 bacterial genomes were analyzed for the presence of known virulence factors, plasmids, and antimicrobial resistance (AMR) genes. Among the GAS isolates were 12 emm types, the most common being 41.2 (n = 27; 51%). GAS genomes were phylogenetically compared with local and public datasets of invasive and non-invasive isolates. GAS genomes had diverse profiles for virulence factors, plasmids, and AMR genes. Pangenome analysis did not identify horizontally transferred genes in the co-infection versus single infections.

CONCLUSIONS: GAS recovered from invasive and non-invasive sources were not genetically distinguishable. Virulence factors, plasmids, and AMR profiles grouped by emm type, and no genetic changes were identified that predict co-infection or horizontal gene transfer between GAS and S. aureus.}, } @article {pmid36336469, year = {2022}, author = {Weigert, S and Perez-Garcia, P and Gisdon, FJ and Gagsteiger, A and Schweinshaut, K and Ullmann, GM and Chow, J and Streit, WR and Höcker, B}, title = {Investigation of the halophilic PET hydrolase PET6 from Vibrio gazogenes.}, journal = {Protein science : a publication of the Protein Society}, volume = {31}, number = {12}, pages = {e4500}, pmid = {36336469}, issn = {1469-896X}, mesh = {Humans ; *Hydrolases/chemistry ; Plastics ; Microplastics ; *Vibrio/genetics ; }, abstract = {The handling of plastic waste and the associated ubiquitous occurrence of microplastic poses one of the biggest challenges of our time. Recent investigations of plastic degrading enzymes have opened new prospects for biological microplastic decomposition as well as recycling applications. For polyethylene terephthalate, in particular, several natural and engineered enzymes are known to have such promising properties. From a previous study that identified new PETase candidates by homology search, we chose the candidate PET6 from the globally distributed, halophilic organism Vibrio gazogenes for further investigation. By mapping the occurrence of Vibrios containing PET6 homologs we demonstrated their ubiquitous prevalence in the pangenome of several Vibrio strains. The biochemical characterization of PET6 showed that PET6 has a comparatively lower activity than other enzymes but also revealed a superior turnover at very high salt concentrations. The crystal structure of PET6 provides structural insights into this adaptation to saline environments. By grafting only a few beneficial mutations from other PET degrading enzymes onto PET6, we increased the activity up to three-fold, demonstrating the evolutionary potential of the enzyme. MD simulations of the variant helped rationalize the mutational effects of those mutants and elucidate the interaction of the enzyme with a PET substrate. With tremendous amounts of plastic waste in the Ocean and the prevalence of Vibrio gazogenes in marine biofilms and estuarine marshes, our findings suggest that Vibrio and the PET6 enzyme are worthy subjects to study the PET degradation in marine environments.}, } @article {pmid36333324, year = {2022}, author = {Luo, X and Kang, X and Schönhuth, A}, title = {VeChat: correcting errors in long reads using variation graphs.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {6657}, pmid = {36333324}, issn = {2041-1723}, mesh = {Sequence Analysis, DNA/methods ; *Algorithms ; *Nanopores ; Haplotypes ; Data Analysis ; High-Throughput Nucleotide Sequencing ; Software ; }, abstract = {Error correction is the canonical first step in long-read sequencing data analysis. Current self-correction methods, however, are affected by consensus sequence induced biases that mask true variants in haplotypes of lower frequency showing in mixed samples. Unlike consensus sequence templates, graph-based reference systems are not affected by such biases, so do not mistakenly mask true variants as errors. We present VeChat, as an approach to implement this idea: VeChat is based on variation graphs, as a popular type of data structure for pangenome reference systems. Extensive benchmarking experiments demonstrate that long reads corrected by VeChat contain 4 to 15 (Pacific Biosciences) and 1 to 10 times (Oxford Nanopore Technologies) less errors than when being corrected by state of the art approaches. Further, using VeChat prior to long-read assembly significantly improves the haplotype awareness of the assemblies. VeChat is an easy-to-use open-source tool and publicly available at https://github.com/HaploKit/vechat .}, } @article {pmid36330071, year = {2022}, author = {Alsowayeh, N and Albutti, A}, title = {Designing a novel chimeric multi-epitope vaccine against Burkholderia pseudomallei, a causative agent of melioidosis.}, journal = {Frontiers in medicine}, volume = {9}, number = {}, pages = {945938}, pmid = {36330071}, issn = {2296-858X}, abstract = {Burkholderia pseudomallei, a gram-negative soil-dwelling bacterium, is primarily considered a causative agent of melioidosis infection in both animals and humans. Despite the severity of the disease, there is currently no licensed vaccine on the market. The development of an effective vaccine against B. pseudomallei could help prevent the spread of infection. The purpose of this study was to develop a multi-epitope-based vaccine against B. pseudomallei using advanced bacterial pan-genome analysis. A total of four proteins were prioritized for epitope prediction by using multiple subtractive proteomics filters. Following that, a multi-epitopes based chimeric vaccine construct was modeled and joined with an adjuvant to improve the potency of the designed vaccine construct. The structure of the construct was predicted and analyzed for flexibility. A population coverage analysis was performed to evaluate the broad-spectrum applicability of B. pseudomallei. The computed combined world population coverage was 99.74%. Molecular docking analysis was applied further to evaluate the binding efficacy of the designed vaccine construct with the human toll-like receptors-5 (TLR-5). Furthermore, the dynamic behavior and stability of the docked complexes were investigated using molecular dynamics simulation, and the binding free energy determined for Vaccine-TLR-5 was delta total -168.3588. The docking result revealed that the vaccine construct may elicit a suitable immunological response within the host body. Hence, we believe that the designed in-silico vaccine could be helpful for experimentalists in the formulation of a highly effective vaccine for B. pseudomallei.}, } @article {pmid36326919, year = {2022}, author = {Amulyasai, B and Anusha, R and Sasikala, C and Ramana, CV}, title = {Phylogenomic analysis of a metagenome-assembled genome indicates a new taxon of an anoxygenic phototroph bacterium in the family Chromatiaceae and the proposal of "Candidatus Thioaporhodococcus" gen. nov.}, journal = {Archives of microbiology}, volume = {204}, number = {12}, pages = {688}, pmid = {36326919}, issn = {1432-072X}, mesh = {Phylogeny ; *Metagenome ; RNA, Ribosomal, 16S/genetics ; DNA, Bacterial/genetics ; Sequence Analysis, DNA ; *Chromatiaceae ; Bacterial Typing Techniques ; Fatty Acids/analysis ; }, abstract = {In this study, three metagenome-assembled genomes of a sediment sample were constructed. A Bin1 (JB001) genome was identified as a photo-litho-auto/heterotroph (purple sulfur bacteria) bacterium with the ability to fix nitrogen, tolerate salt, and to produce bacteriochlorophyll a. It has a genome length of 4.1 Mb and a G + C content of 64.9%. Phylogenetic studies based on concatenated 92 core genes and photosynthetic genes (pufLM and bchY) showed that Bin JB001 is related to Thiococcus pfennigii, "Thioflavicoccus mobilis" and to the Lamprocystis purpurea lineage. Bin JB001 and its closely related members were subjected to the genome-based study of phenotypic and phylogenomic analysis. Genomic similarity indices (dDDH and ANI) showed that Bin JB001 could be defined as a novel species. The average amino acid identity (AAI) and percentage of conserved proteins (POCP) values were below 60 and 50%, respectively. The pan-genome analysis indicated that the pan-genome was an open type wherein Bin JB001 had 855 core genes. This study shows that the binned genome, Bin JB001 could represent a novel species of a new genus under the family Chromatiaceae, for which the name "Candidatus Thioaporhodococcus sediminis" gen. nov. sp. nov. is proposed.}, } @article {pmid36326658, year = {2022}, author = {Kittiwan, N and Calland, JK and Mourkas, E and Hitchings, MD and Murray, S and Tadee, P and Tadee, P and Duangsonk, K and Meric, G and Sheppard, SK and Patchanee, P and Pascoe, B}, title = {Genetic diversity and variation in antimicrobial-resistance determinants of non-serotype 2 Streptococcus suis isolates from healthy pigs.}, journal = {Microbial genomics}, volume = {8}, number = {11}, pages = {}, pmid = {36326658}, issn = {2057-5858}, support = {MR/T030062/1//Medical Research Council/United Kingdom ; MR/L015080/1//Medical Research Council/United Kingdom ; }, mesh = {Swine ; Animals ; *Streptococcus suis/genetics ; *Streptococcal Infections/veterinary/genetics ; Anti-Bacterial Agents/pharmacology ; Genetic Variation ; }, abstract = {Streptococcus suis is a leading cause of bacterial meningitis in South-East Asia, with frequent zoonotic transfer to humans associated with close contact with pigs. A small number of invasive lineages are responsible for endemic infection in the swine industry, causing considerable global economic losses. A lack of surveillance and a rising trend in clinical treatment failure has raised concerns of growing antimicrobial resistance (AMR) among invasive S. suis . Gene flow between healthy and disease isolates is poorly understood and, in this study, we sample and sequence a collection of isolates predominantly from healthy pigs in Chiang Mai province, Northern Thailand. Pangenome characterization identified extensive genetic diversity and frequent AMR carriage in isolates from healthy pigs. Multiple AMR genes were identified, conferring resistance to aminoglycosides, lincosamides, tetracycline and macrolides. All isolates were non-susceptible to three or more different antimicrobial classes, and 75 % of non-serotype 2 isolates were non-susceptible to six or more classes (compared to 37.5 % of serotype 2 isolates). AMR genes were found on integrative and conjugative elements previously observed in other species, suggesting a mobile gene pool that can be accessed by invasive disease isolates. This article contains data hosted by Microreact.}, } @article {pmid36324059, year = {2023}, author = {Amas, JC and Thomas, WJW and Zhang, Y and Edwards, D and Batley, J}, title = {Key Advances in the New Era of Genomics-Assisted Disease Resistance Improvement of Brassica Species.}, journal = {Phytopathology}, volume = {113}, number = {5}, pages = {771-785}, doi = {10.1094/PHYTO-08-22-0289-FI}, pmid = {36324059}, issn = {0031-949X}, mesh = {*Brassica/genetics ; Disease Resistance/genetics ; Genome, Plant/genetics ; Plant Diseases/genetics ; Plant Breeding ; Genomics ; }, abstract = {Disease resistance improvement remains a major focus in breeding programs as diseases continue to devastate Brassica production systems due to intensive cultivation and climate change. Genomics has paved the way to understand the complex genomes of Brassicas, which has been pivotal in the dissection of the genetic underpinnings of agronomic traits driving the development of superior cultivars. The new era of genomics-assisted disease resistance breeding has been marked by the development of high-quality genome references, accelerating the identification of disease resistance genes controlling both qualitative (major) gene and quantitative resistance. This facilitates the development of molecular markers for marker assisted selection and enables genome editing approaches for targeted gene manipulation to enhance the genetic value of disease resistance traits. This review summarizes the key advances in the development of genomic resources for Brassica species, focusing on improved genome references, based on long-read sequencing technologies and pangenome assemblies. This is further supported by the advances in pathogen genomics, which have resulted in the discovery of pathogenicity factors, complementing the mining of disease resistance genes in the host. Recognizing the co-evolutionary arms race between the host and pathogen, it is critical to identify novel resistance genes using crop wild relatives and synthetic cultivars or through genetic manipulation via genome-editing to sustain the development of superior cultivars. Integrating these key advances with new breeding techniques and improved phenotyping using advanced data analysis platforms will make disease resistance improvement in Brassica species more efficient and responsive to current and future demands.}, } @article {pmid36322504, year = {2022}, author = {Maynard-Smith, L and Derrick, JP and Borrow, R and Lucidarme, J and Maiden, MCJ and Heyderman, RS and Harrison, OB}, title = {Genome-Wide Association Studies Identify an Association of Transferrin Binding Protein B Variation and Invasive Serogroup Y Meningococcal Disease in Older Adults.}, journal = {The Journal of infectious diseases}, volume = {226}, number = {12}, pages = {2204-2214}, pmid = {36322504}, issn = {1537-6613}, support = {ACF-2015-18-029/DH_/Department of Health/United Kingdom ; 218205/Z/19/Z/WT_/Wellcome Trust/United Kingdom ; PR-OD-101720007/DH_/Department of Health/United Kingdom ; 214374/Z/18/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Humans ; Aged ; Neisseria meningitidis, Serogroup Y/genetics ; Transferrin-Binding Protein B/genetics ; Genome-Wide Association Study ; Serogroup ; Phylogeny ; *Meningococcal Infections/genetics/microbiology ; *Neisseria meningitidis ; Iron ; *Meningococcal Vaccines ; }, abstract = {BACKGROUND: Neisseria meningitidis serogroup Y, especially ST-23 clonal complex (Y:cc23), represents a larger proportion of invasive meningococcal disease (IMD) in older adults compared to younger individuals. This study explored the meningococcal genetic variation underlying this association.

METHODS: Maximum-likelihood phylogenies and the pangenome were analyzed using whole-genome sequence (WGS) data from 200 Y:cc23 isolates in the Neisseria PubMLST database. Genome-wide association studies (GWAS) were performed on WGS data from 250 Y:cc23 isolates from individuals with IMD aged ≥65 years versus < 65 years.

RESULTS: Y:cc23 meningococcal variants did not cluster by age group or disease phenotype in phylogenetic analyses. Pangenome comparisons found no differences in presence or absence of genes in IMD isolates from the different age groups. GWAS identified differences in nucleotide polymorphisms within the transferrin-binding protein B (tbpB) gene in isolates from individuals ≥65 years of age. TbpB structure modelling suggests these may impact binding of human transferrin.

CONCLUSIONS: These data suggest differential iron scavenging capacity amongst Y:cc23 meningococci isolated from older compared to younger patients. Iron acquisition is essential for many bacterial pathogens including the meningococcus. These polymorphisms may facilitate colonization, thereby increasing the risk of disease in vulnerable older people with altered nasopharyngeal microbiomes and nutritional status.}, } @article {pmid36318249, year = {2023}, author = {Martin, FJ and Amode, MR and Aneja, A and Austine-Orimoloye, O and Azov, AG and Barnes, I and Becker, A and Bennett, R and Berry, A and Bhai, J and Bhurji, SK and Bignell, A and Boddu, S and Branco Lins, PR and Brooks, L and Ramaraju, SB and Charkhchi, M and Cockburn, A and Da Rin Fiorretto, L and Davidson, C and Dodiya, K and Donaldson, S and El Houdaigui, B and El Naboulsi, T and Fatima, R and Giron, CG and Genez, T and Ghattaoraya, GS and Martinez, JG and Guijarro, C and Hardy, M and Hollis, Z and Hourlier, T and Hunt, T and Kay, M and Kaykala, V and Le, T and Lemos, D and Marques-Coelho, D and Marugán, JC and Merino, GA and Mirabueno, LP and Mushtaq, A and Hossain, SN and Ogeh, DN and Sakthivel, MP and Parker, A and Perry, M and Piližota, I and Prosovetskaia, I and Pérez-Silva, JG and Salam, AIA and Saraiva-Agostinho, N and Schuilenburg, H and Sheppard, D and Sinha, S and Sipos, B and Stark, W and Steed, E and Sukumaran, R and Sumathipala, D and Suner, MM and Surapaneni, L and Sutinen, K and Szpak, M and Tricomi, FF and Urbina-Gómez, D and Veidenberg, A and Walsh, TA and Walts, B and Wass, E and Willhoft, N and Allen, J and Alvarez-Jarreta, J and Chakiachvili, M and Flint, B and Giorgetti, S and Haggerty, L and Ilsley, GR and Loveland, JE and Moore, B and Mudge, JM and Tate, J and Thybert, D and Trevanion, SJ and Winterbottom, A and Frankish, A and Hunt, SE and Ruffier, M and Cunningham, F and Dyer, S and Finn, RD and Howe, KL and Harrison, PW and Yates, AD and Flicek, P}, title = {Ensembl 2023.}, journal = {Nucleic acids research}, volume = {51}, number = {D1}, pages = {D933-D941}, pmid = {36318249}, issn = {1362-4962}, support = {U41 HG010972/HG/NHGRI NIH HHS/United States ; U41 HG007234/HG/NHGRI NIH HHS/United States ; U24 HG007234/HG/NHGRI NIH HHS/United States ; R01 HG010485/HG/NHGRI NIH HHS/United States ; /WT_/Wellcome Trust/United Kingdom ; }, mesh = {Animals ; Humans ; *Software ; *Databases, Genetic ; Molecular Sequence Annotation ; Genomics ; Genome ; }, abstract = {Ensembl (https://www.ensembl.org) has produced high-quality genomic resources for vertebrates and model organisms for more than twenty years. During that time, our resources, services and tools have continually evolved in line with both the publicly available genome data and the downstream research and applications that utilise the Ensembl platform. In recent years we have witnessed a dramatic shift in the genomic landscape. There has been a large increase in the number of high-quality reference genomes through global biodiversity initiatives. In parallel, there have been major advances towards pangenome representations of higher species, where many alternative genome assemblies representing different breeds, cultivars, strains and haplotypes are now available. In order to support these efforts and accelerate downstream research, it is our goal at Ensembl to create high-quality annotations, tools and services for species across the tree of life. Here, we report our resources for popular reference genomes, the dramatic growth of our annotations (including haplotypes from the first human pangenome graphs), updates to the Ensembl Variant Effect Predictor (VEP), interactive protein structure predictions from AlphaFold DB, and the beta release of our new website.}, } @article {pmid36318042, year = {2022}, author = {Liu, N and Liu, D and Li, K and Hu, S and He, Z}, title = {Pan-Genome Analysis of Staphylococcus aureus Reveals Key Factors Influencing Genomic Plasticity.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0311722}, pmid = {36318042}, issn = {2165-0497}, mesh = {Humans ; *Staphylococcus aureus ; Multilocus Sequence Typing/methods ; Phylogeny ; *Staphylococcal Infections/microbiology ; Genome, Bacterial ; Genomics ; }, abstract = {The massive quantities of bacterial genomic data being generated have facilitated in-depth analyses of bacteria for pan-genomic studies. However, the pan-genome compositions of one species differed significantly between different studies, so we used Staphylococcus aureus as a model organism to explore the influences driving bacterial pan-genome composition. We selected a series of diverse strains for pan-genomic analysis to explore the pan-genomic composition of S. aureus at the species level and the actual contribution of influencing factors (sequence type [ST], source of isolation, country of isolation, and date of collection) to pan-genome composition. We found that the distribution of core genes in bacterial populations restrained under different conditions differed significantly and showed "local core gene regions" in the same ST. Therefore, we propose that ST may be a key factor driving the dynamic distribution of bacterial genomes and that phylogenetic analyses using whole-genome alignment are no longer appropriate in populations containing multiple ST strains. Pan-genomic analysis showed that some of the housekeeping genes of multilocus sequence typing (MLST) are carried at less than 60% in S. aureus strains. Consequently, we propose a new set of marker genes for the classification of S. aureus, which provides a reference for finding a new set of housekeeping genes to apply to MLST. In this study, we explored the role of driving factors influencing pan-genome composition, providing new insights into the study of bacterial pan-genomes. IMPORTANCE We sought to explore the impact of driving factors influencing pan-genome composition using Staphylococcus aureus as a model organism to provide new insights for the study of bacterial pan-genomes. We believe that the sequence type (ST) of the strains under consideration plays a significant role in the dynamic distribution of bacterial genes. Our findings indicate that there are a certain number of essential genes in Staphylococcus aureus; however, the number of core genes is not as high as previously thought. The new classification method proposed herein suggests that a new set of housekeeping genes more suitable for Staphylococcus aureus must be identified to improve the current classification status of this species.}, } @article {pmid36317888, year = {2022}, author = {Yuan, Y and Seif, Y and Rychel, K and Yoo, R and Chauhan, S and Poudel, S and Al-Bulushi, T and Palsson, BO and Sastry, AV}, title = {Pan-Genome Analysis of Transcriptional Regulation in Six Salmonella enterica Serovar Typhimurium Strains Reveals Their Different Regulatory Structures.}, journal = {mSystems}, volume = {7}, number = {6}, pages = {e0046722}, pmid = {36317888}, issn = {2379-5077}, mesh = {Humans ; *Salmonella enterica/genetics ; Serogroup ; Salmonella typhimurium/genetics ; Gene Expression Regulation ; Gene Expression Profiling ; }, abstract = {Establishing transcriptional regulatory networks (TRNs) in bacteria has been limited to well-characterized model strains. Using machine learning methods, we established the transcriptional regulatory networks of six Salmonella enterica serovar Typhimurium strains from their transcriptomes. By decomposing a compendia of RNA sequencing (RNA-seq) data with independent component analysis, we obtained 400 independently modulated sets of genes, called iModulons. We (i) performed pan-genome analysis of the phylogroup structure of S. Typhimurium and analyzed the iModulons against this background, (ii) revealed different genetic signatures in pathogenicity islands that explained phenotypes, (iii) discovered three transport iModulons linked to antibiotic resistance, (iv) described concerted responses to cationic antimicrobial peptides, and (v) uncovered new regulons. Thus, by combining pan-genome and transcriptomic analytics, we revealed variations in TRNs across six strains of serovar Typhimurium. IMPORTANCE Salmonella enterica serovar Typhimurium is a pathogen involved in human nontyphoidal infections. Treating S. Typhimurium infections is difficult due to the species's dynamic adaptation to its environment, which is dictated by a complex transcriptional regulatory network (TRN) that is different across strains. In this study, we describe the use of independent component analysis to characterize the differential TRNs across the S. Typhimurium pan-genome using a compendium of high-quality RNA-seq data. This approach provided unprecedented insights into the differences between regulation of key cellular functions and pathogenicity in the different strains. The study provides an impetus to initiate a large-scale effort to reveal the TRN differences between the major phylogroups of the pathogenic bacteria, which could fundamentally impact personalizing treatments of bacterial pathogens.}, } @article {pmid36314968, year = {2022}, author = {Hur, JI and Kim, J and Ryu, S and Jeon, B}, title = {Phylogenetic Association and Genetic Factors in Cold Stress Tolerance in Campylobacter jejuni.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0268122}, pmid = {36314968}, issn = {2165-0497}, mesh = {Animals ; Humans ; *Campylobacter jejuni/genetics ; Phylogeny ; Multilocus Sequence Typing ; Cold-Shock Response/genetics ; Cold Temperature ; Chickens ; *Campylobacter Infections ; }, abstract = {Campylobacter jejuni is a major foodborne pathogen transmitted to humans primarily via contaminated poultry meat. Since poultry meat is generally processed, distributed, and stored in the cold chain, the survival of C. jejuni at refrigeration temperatures crucially affects human exposure to C. jejuni. Here, we investigated genetic factors associated with cold stress tolerance in C. jejuni. Seventy-nine C. jejuni strains isolated from retail raw chicken exhibited different survival levels at 4°C for 21 days. Multilocus sequence typing (MLST) clonal complex 21 (CC-21) and CC-443 were dominant among cold stress-tolerant strains, whereas CC-45 was common among cold stress-sensitive strains. Genome-wide average nucleotide identity (ANI) analysis identified a phylogenetic cluster associated with cold stress tolerance. Moreover, a pangenome analysis revealed 58 genes distinctively present in the cold stress-tolerant phylogenetic cluster. Among these 58 genes, cfrA, encoding the ferric enterobactin receptor involved in ion transport and metabolism, was selected for further analysis. Remarkably, the viability of a ΔcfrA mutant at 4°C was significantly decreased, while the levels of total reactive oxygen species and intracellular iron exceeded those of the wild type. Additionally, a knockout mutation of cfrA also significantly decreased the viability of three cold stress-tolerant isolates at 4°C, confirming the role of cfrA in cold stress tolerance. The results of this study demonstrate that unique phylogenetic clusters of C. jejuni associated with cold stress tolerance exist and that cfrA is a genetic factor contributing to cold stress tolerance in C. jejuni. IMPORTANCE The tolerance of foodborne pathogens to environmental stresses significantly affects food safety. Several studies have demonstrated that C. jejuni survives extended exposures to low temperatures, but the mechanisms of cold stress tolerance are not fully understood. Here, we demonstrate that C. jejuni strains in certain phylogenetic groups exhibit increased tolerance to cold stress. Notably, cfrA is present in the phylogenetic cluster associated with cold stress tolerance and plays a role in the survival of C. jejuni at low temperatures by alleviating oxidative stress. This is the first study to discover phylogenetic associations involving cold stress tolerance and to identify genetic elements conferring cold stress tolerance to C. jejuni.}, } @article {pmid36307757, year = {2022}, author = {Chen, Y and Miao, Y and Bai, W and Lin, K and Pang, E}, title = {Characteristics and potential functional effects of long insertions in Asian butternuts.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {732}, pmid = {36307757}, issn = {1471-2164}, support = {31571361//the National Natural Science Foundation of China/ ; }, mesh = {Humans ; Sequence Analysis, DNA/methods ; *Genome ; *Asian People ; }, abstract = {BACKGROUND: Structural variants (SVs) play important roles in adaptation evolution and species diversification. Especially, in plants, many phenotypes of response to the environment were found to be associated with SVs. Despite the prevalence and significance of SVs, long insertions remain poorly detected and studied in all but model species.

RESULTS: We used whole-genome resequencing of paired reads from 80 Asian butternuts to detect long insertions and further analyse their characteristics and potential functional effects. By combining of mapping-based and de novo assembly-based methods, we obtained a multiple related species pangenome representing higher taxonomic groups. We obtained 89,312 distinct contigs totaling 147,773,999 base pair (bp) of new sequences, of which 347 were putative long insertions placed in the reference genome. Most of the putative long insertions appeared in multiple species; in contrast, only 62 putative long insertions appeared in one species, which may be involved in the response to the environment. 65 putative long insertions fell into 61 distinct protein-coding genes involved in plant development, and 105 putative long insertions fell into upstream of 106 distinct protein-coding genes involved in cellular respiration. 3,367 genes were annotated in 2,606 contigs. We propose PLAINS (https://github.com/CMB-BNU/PLAINS.git), a streamlined, comprehensive pipeline for the prediction and analysis of long insertions using whole-genome resequencing.

CONCLUSIONS: Our study lays down an important foundation for further whole-genome long insertion studies, allowing the investigation of their effects by experiments.}, } @article {pmid36303546, year = {2022}, author = {Zia, K and Rao, MJ and Sadaqat, M and Azeem, F and Fatima, K and Tahir Ul Qamar, M and Alshammari, A and Alharbi, M}, title = {Pangenome-wide analysis of cyclic nucleotide-gated channel (CNGC) gene family in citrus Spp. Revealed their intraspecies diversity and potential roles in abiotic stress tolerance.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {1034921}, pmid = {36303546}, issn = {1664-8021}, abstract = {Cyclic nucleotide-gated channels (CNGC) gene family has been found to be involved in physiological processes including signaling pathways, environmental stresses, plant growth, and development. This gene family of non-selective cation channels is known to regulate the uptake of calcium and is reported in several plant species. The pangenome-wide studies enable researchers to understand the genetic diversity comprehensively; as a comparative analysis of multiple plant species or member of a species at once helps to better understand the evolutionary relationships and diversity present among them. In the current study, pangenome-wide analysis of the CNGC gene family has been performed on five Citrus species. As a result, a total of 32 genes in Citrus sinensis, 27 genes in Citrus recticulata, 30 genes in Citrus grandis, 31 genes in Atalantia buxfolia, and 30 genes in Poncirus trifoliata were identified. In addition, two unique genes CNGC13 and CNGC14 were identified, which may have potential roles. All the identified CNGC genes were unevenly distributed on 9 chromosomes except P. trifoliata had genes distributed on 7 chromosomes and were classified into four major groups and two sub-groups namely I, II, III, IV-A, and IV-B. Cyclic nucleotide binding (CNB) motif, calmodulin-binding motif (CaMB), and motif for IQ-domain were conserved in Citrus Spp. Intron exon structures of citrus species were not exactly as same as the gene structures of Arabidopsis. The majority of cis-regulatory elements (CREs) were light responsive and others include growth, development, and stress-related indicating potential roles of the CNGC gene family in these functions. Both segmental and tandem duplication were involved in the expansion of the CNGC gene family in Citrus Spp. The miRNAs are involved in the response of CsCNGC genes towards drought stress along with having regulatory association in the expression of these genes. Protein- Protein interaction (PPI) analysis also showed the interaction of CNGC proteins with other CNGCs which suggested their potential role in pathways regulating different biological processes. GO enrichment revealed that CNGC genes were involved in the transport of ions across membranes. Furthermore, tissue-specific expression patterns of leaves sample of C. sinensis were studied under drought stress. Out of 32 genes of C. sinensis 3 genes i.e., CsCNGC1.4, CsCNGC2.1, and CsCNGC4.2 were highly up-regulated, and only CsCNGC4.6 was highly down-regulated. The qRT-PCR analysis also showed that CNGC genes were highly expressed after treatment with drought stress, while gene expression was lower under controlled conditions. This work includes findings based on multiple genomes instead of one, therefore, this will provide more genomic information rather than single genome-based studies. These findings will serve as a basis for further functional insights into the CNGC gene family.}, } @article {pmid36303348, year = {2023}, author = {Zhang, J and Xu, J and Lei, H and Liang, H and Li, X and Li, B}, title = {The development of variation-based rifampicin resistance in Staphylococcus aureus deciphered through genomic and transcriptomic study.}, journal = {Journal of hazardous materials}, volume = {442}, number = {}, pages = {130112}, doi = {10.1016/j.jhazmat.2022.130112}, pmid = {36303348}, issn = {1873-3336}, mesh = {*Rifampin/pharmacology ; *Staphylococcus aureus/genetics ; Drug Resistance, Bacterial/genetics ; Transcriptome ; DNA-Directed RNA Polymerases/genetics/pharmacology ; Microbial Sensitivity Tests ; Anti-Bacterial Agents/pharmacology ; Mutation ; Genomics ; Bacterial Proteins/genetics ; }, abstract = {Rifampicin (RIF) resistance imposes a challenge on the antimicrobial treatment of pathogen infections. Figuring out the development mechanism of RIF resistance is critical to improving antimicrobial therapy strategy in clinics and biological treatment strategy of RIF polluted sewage in environmental engineering. The RIF resistance development of Staphylococcus aureus (S. aureus) with exposure to RIF at sub-inhibitory concentrations was comprehensively investigated via genomic and transcriptomic approaches in this study. RIF minimal inhibitory concentration (MIC) for S. aureus rapidly increased from 0.032 to 256 mg/L. Membrane permeability decrease, biofilm formation enhancement, and ROS production increase associated with RIF resistance were observed in RIF-induced strains. Through comparative genomic analysis, mutations in rpoB and rpoC were considered to be associated with RIF resistance in S. aureus mutants. Pan-genome-wide single-nucleotide variant analysis indicated that mutations at rpoB-1412, rpoB-1451, and rpoB-1457 were prevalent in 13849 public genomes of S. aureus, while mutations at rpoB-2256, and rpoC-3092 were first discovered in this study. The panorama of adaptative alteration of cellular physiological processes was observed via transcriptomic analysis. The oxidation pressure responses, metabolism, transporters, virulence factors, and multiple steps of DNA and RNA machinery were found to be perturbed by RIF in S. aureus.}, } @article {pmid36301610, year = {2022}, author = {Leigh, RJ and McKenna, C and McWade, R and Lynch, B and Walsh, F}, title = {Comparative genomics and pangenomics of vancomycin-resistant and susceptible Enterococcus faecium from Irish hospitals.}, journal = {Journal of medical microbiology}, volume = {71}, number = {10}, pages = {}, doi = {10.1099/jmm.0.001590}, pmid = {36301610}, issn = {1473-5644}, mesh = {Humans ; *Enterococcus faecium/genetics ; Vancomycin Resistance/genetics ; Vancomycin/pharmacology ; *Gram-Positive Bacterial Infections/epidemiology ; Hospitals ; Genomics ; Anti-Bacterial Agents/pharmacology ; *Vancomycin-Resistant Enterococci/genetics ; Bacterial Proteins/genetics ; }, abstract = {Introduction. Enterococcus faecium has emerged as an important nosocomial pathogen, which is increasingly difficult to treat due to the genetic acquisition of vancomycin resistance. Ireland has a recalcitrant vancomycin-resistant bloodstream infection rate compared to other developed countries.Hypothesis/Gap statement. Vancomycin resistance rates persist amongst E. faecium isolates from Irish hospitals. The evolutionary genomics governing these trends have not been fully elucidated.Methodology. A set of 28 vancomycin-resistant isolates was sequenced to construct a dataset alongside 61 other publicly available Irish genomes. This dataset was extensively analysed using in silico methodologies (comparative genomics, pangenomics, phylogenetics, genotypics and comparative functional analyses) to uncover distinct evolutionary, coevolutionary and clinically relevant population trends.Results. These results suggest that a stable (in terms of genome size, GC% and number of genes), yet genetically diverse population (in terms of gene content) of E. faecium persists in Ireland with acquired resistance arising via plasmid acquisition (vanA) or, to a lesser extent, chromosomal recombination (vanB). Population analysis revealed five clusters with one cluster partitioned into four clades which transcend isolation dates. Pangenomic and recombination analyses revealed an open (whole genome and chromosomal specific) pangenome illustrating a rampant evolutionary pattern. Comparative resistomics and virulomics uncovered distinct chromosomal and mobilomal propensity for multidrug resistance, widespread chromosomal point-mutation-mediated resistance and chromosomally harboured arsenals of virulence factors. Interestingly, a potential difference in biofilm formation strategies was highlighted by coevolutionary analysis, suggesting differential biofilm genotypes between vanA and vanB isolates.Conclusions. These results highlight the evolutionary history of Irish E. faecium isolates and may provide insight into underlying infection dynamics in a clinical setting. Due to the apparent ease of vancomycin resistance acquisition over time, susceptible E. faecium should be concurrently reduced in Irish hospitals to mitigate potential resistant infections.}, } @article {pmid36298594, year = {2022}, author = {Nawaz, M and Ullah, A and Al-Harbi, AI and Haq, MU and Hameed, AR and Ahmad, S and Aziz, A and Raziq, K and Khan, S and Irfan, M and Muhammad, R}, title = {Genome-Based Multi-Antigenic Epitopes Vaccine Construct Designing against Staphylococcus hominis Using Reverse Vaccinology and Biophysical Approaches.}, journal = {Vaccines}, volume = {10}, number = {10}, pages = {}, pmid = {36298594}, issn = {2076-393X}, abstract = {Staphylococcus hominis is a Gram-positive bacterium from the staphylococcus genus; it is also a member of coagulase-negative staphylococci because of its opportunistic nature and ability to cause life-threatening bloodstream infections in immunocompromised patients. Gram-positive and opportunistic bacteria have become a major concern for the medical community. It has also drawn the attention of scientists due to the evaluation of immune evasion tactics and the development of multidrug-resistant strains. This prompted the need to explore novel therapeutic approaches as an alternative to antibiotics. The current study aimed to develop a broad-spectrum, multi-epitope vaccine to control bacterial infections and reduce the burden on healthcare systems. A computational framework was designed to filter the immunogenic potent vaccine candidate. This framework consists of pan-genomics, subtractive proteomics, and immunoinformatics approaches to prioritize vaccine candidates. A total of 12,285 core proteins were obtained using a pan-genome analysis of all strains. The screening of the core proteins resulted in the selection of only two proteins for the next epitope prediction phase. Eleven B-cell derived T-cell epitopes were selected that met the criteria of different immunoinformatics approaches such as allergenicity, antigenicity, immunogenicity, and toxicity. A vaccine construct was formulated using EAAAK and GPGPG linkers and a cholera toxin B subunit. This formulated vaccine construct was further used for downward analysis. The vaccine was loop refined and improved for structure stability through disulfide engineering. For an efficient expression, the codons were optimized as per the usage pattern of the E coli (K12) expression system. The top three refined docked complexes of the vaccine that docked with the MHC-I, MHC-II, and TLR-4 receptors were selected, which proved the best binding potential of the vaccine with immune receptors; this was followed by molecular dynamic simulations. The results indicate the best intermolecular bonding between immune receptors and vaccine epitopes and that they are exposed to the host's immune system. Finally, the binding energies were calculated to confirm the binding stability of the docked complexes. This work aimed to provide a manageable list of immunogenic and antigenic epitopes that could be used as potent vaccine candidates for experimental in vivo and in vitro studies.}, } @article {pmid36296313, year = {2022}, author = {Liu, Y and Cui, X and Yang, R and Zhang, Y and Xu, Y and Liu, G and Zhang, B and Wang, J and Wang, X and Zhang, W and Chen, T and Zhang, G}, title = {Genomic Insights into the Radiation-Resistant Capability of Sphingomonas qomolangmaensis S5-59[T] and Sphingomonas glaciei S8-45[T], Two Novel Bacteria from the North Slope of Mount Everest.}, journal = {Microorganisms}, volume = {10}, number = {10}, pages = {}, pmid = {36296313}, issn = {2076-2607}, abstract = {Mount Everest provides natural advantages to finding radiation-resistant extremophiles that are functionally mechanistic and possess commercial significance. (1) Background: Two bacterial strains, designated S5-59T and S8-45T, were isolated from moraine samples collected from the north slope of Mount Everest at altitudes of 5700m and 5100m above sea level. (2) Methods: The present study investigated the polyphasic features and genomic characteristics of S5-59[T] and S8-45[T]. (3) Results: The major fatty acids and the predominant respiratory menaquinone of S5-59[T] and S8-45[T] were summed as feature 3 (comprising C16:1 ω6c and/or C16:1 ω7c) and ubiquinone-10 (Q-10). Phylogenetic analyses based on 16S rRNA sequences and average nucleotide identity values among these two strains and their reference type strains were below the species demarcation thresholds of 98.65% and 95%. Strains S5-59[T] and S8-45[T] harbored great radiation resistance. The genomic analyses showed that DNA damage repair genes, such as mutL, mutS, radA, radC, recF, recN, etc., were present in the S5-59[T] and S8-45[T] strains. Additionally, strain S5-59[T] possessed more genes related to DNA protection proteins. The pan-genome analysis and horizontal gene transfers revealed that strains of Sphingomonas had a consistently homologous genetic evolutionary radiation resistance. Moreover, enzymatic antioxidative proteins also served critical roles in converting ROS into harmless molecules that resulted in resistance to radiation. Further, pigments and carotenoids such as zeaxanthin and alkylresorcinols of the non-enzymatic antioxidative system were also predicted to protect them from radiation. (4) Conclusions: Type strains S5-59[T] (=JCM 35564T =GDMCC 1.3193T) and S8-45[T] (=JCM 34749T =GDMCC 1.2715T) represent two novel species of the genus Sphingomonas with the proposed name Sphingomonas qomolangmaensis sp. nov. and Sphingomonas glaciei sp. nov. The type strains, S5-59[T] and S8-45[T], were assessed in a deeply genomic study of their radiation-resistant mechanisms and this thus resulted in a further understanding of their greater potential application for the development of anti-radiation protective drugs.}, } @article {pmid36290512, year = {2022}, author = {Zhang, Z and Guo, Y and Yang, F and Li, J}, title = {Pan-Genome Analysis Reveals Functional Divergences in Gut-Restricted Gilliamella and Snodgrassella.}, journal = {Bioengineering (Basel, Switzerland)}, volume = {9}, number = {10}, pages = {}, pmid = {36290512}, issn = {2306-5354}, abstract = {Gilliamella and Snodgrassella, members of core gut microbiota in corbiculate bees, have high species diversity and adaptability to a wide range of hosts. In this study, we performed species taxonomy and phylogenetic analysis for Gilliamella and Snodgrassella strains that we isolated in our laboratory, in combination with published whole-genome. Functional effects of accessory and unique genes were investigated by KEGG category and pathway annotation in pan-genome analysis. Consequently, in Gilliamella, we inferred the importance of carbohydrate metabolism, amino acid metabolism, membrane transport, energy metabolism, and metabolism of cofactors and vitamins in accessory or unique genes. The pathway mentioned above, plus infectious disease, lipid metabolism, nucleotide metabolism as well as replication and repair exert a pivotal role in accessory or unique genes of Snodgrassella. Further analysis revealed the existence of functional differentiation of accessory and unique genes among Apis-derived genomes and Bombus-derived genomes. We also identified eight and four biosynthetic gene clusters in all Gilliamella and Snodgrassella genomes, respectively. Our study provides a good insight to better understand how host heterogeneity influences the bacterial speciation and affects the versatility of the genome of the gut bacteria.}, } @article {pmid36288801, year = {2023}, author = {McInerney, JO}, title = {Prokaryotic Pangenomes Act as Evolving Ecosystems.}, journal = {Molecular biology and evolution}, volume = {40}, number = {1}, pages = {}, pmid = {36288801}, issn = {1537-1719}, mesh = {Phylogeny ; *Ecosystem ; *Evolution, Molecular ; Prokaryotic Cells ; Biological Evolution ; }, abstract = {Understanding adaptation to the local environment is a central tenet and a major focus of evolutionary biology. But this is only part of the adaptionist story. In addition to the external environment, one of the main drivers of genome composition is genetic background. In this perspective, I argue that there is a growing body of evidence that intra-genomic selective pressures play a significant part in the composition of prokaryotic genomes and play a significant role in the origin, maintenance and structuring of prokaryotic pangenomes.}, } @article {pmid36288260, year = {2022}, author = {Sun, X and Chen, Z and Kong, T and Chen, Z and Dong, Y and Kolton, M and Cao, Z and Zhang, X and Zhang, H and Liu, G and Gao, P and Yang, N and Lan, L and Xu, Y and Sun, W}, title = {Mycobacteriaceae Mineralizes Micropolyethylene in Riverine Ecosystems.}, journal = {Environmental science & technology}, volume = {56}, number = {22}, pages = {15705-15717}, doi = {10.1021/acs.est.2c05346}, pmid = {36288260}, issn = {1520-5851}, mesh = {Plastics/analysis ; Ecosystem ; Environmental Monitoring ; *Water Pollutants, Chemical/analysis ; *Mycobacteriaceae ; Carbon Dioxide/analysis ; Rivers/chemistry ; }, abstract = {Microplastic (MP) contamination is a serious global environmental problem. Plastic contamination has attracted extensive attention during the past decades. While physiochemical weathering may influence the properties of MPs, biodegradation by microorganisms could ultimately mineralize plastics into CO2. Compared to the well-studied marine ecosystems, the MP biodegradation process in riverine ecosystems, however, is less understood. The current study focuses on the MP biodegradation in one of the world's most plastic contaminated rivers, Pearl River, using micropolyethylene (mPE) as a model substrate. Mineralization of [13]C-labeled mPE into [13]CO2 provided direct evidence of mPE biodegradation by indigenous microorganisms. Several Actinobacteriota genera were identified as putative mPE degraders. Furthermore, two Mycobacteriaceae isolates related to the putative mPE degraders, Mycobacterium sp. mPE3 and Nocardia sp. mPE12, were retrieved, and their ability to mineralize [13]C-mPE into [13]CO2 was confirmed. Pangenomic analysis reveals that the genes related to the proposed mPE biodegradation pathway are shared by members of Mycobacteriaceae. While both Mycobacterium and Nocardia are known for their pathogenicity, these populations on the plastisphere in this study were likely nonpathogenic as they lacked virulence factors. The current study provided direct evidence for MP mineralization by indigenous biodegraders and predicted their biodegradation pathway, which may be harnessed to improve bioremediation of MPs in urban rivers.}, } @article {pmid36284702, year = {2022}, author = {Rodrigues Blanco, I and José Luduverio Pizauro, L and Victor Dos Anjos Almeida, J and Miguel Nóbrega Mendonça, C and de Mello Varani, A and Pinheiro de Souza Oliveira, R}, title = {Pan-genomic and comparative analysis of Pediococcus pentosaceus focused on the in silico assessment of pediocin-like bacteriocins.}, journal = {Computational and structural biotechnology journal}, volume = {20}, number = {}, pages = {5595-5606}, pmid = {36284702}, issn = {2001-0370}, abstract = {Bacteriocins are antimicrobial peptides produced by different species of bacteria, especially the Gram-positive lactic acid bacteria (LAB). Pediococcus pentosaceus is widely applied in the industry and stands out as Bacteriocin-Like Inhibitory Substances (BLIS) producer known to inhibit pathogens commonly considered a concern in the food industries. This study aimed to perform in silico comparisons of P. pentosaceus genomes available in the public GenBank database focusing on their pediocin-like bacteriocins repertoire. The pan-genome analysis evidenced a temporal signal in the pattern of gene gain and loss, supporting the hypothesis that the complete genetic repertoire of this group of bacteria is still uncovered. Thirteen bacteriocin genes from Class II and III were predicted in the accessory genome. Four pediocin-like bacteriocins (54% of the detected bacteriocin repertoire) and their accompanying immunity genes are highlighted; penocin A, coagulin A, pediocin PA-1, and plantaricin 423. Additionally, in silico, modeling of the pediocin-like bacteriocins revealed different configurations of the helix motif compared to other physically determined pediocin-like structures. Comparative and phylogenomic analyses support the hypothesis that a dynamic mechanism of bacteriocin acquisition and purging is not dependent on the bacterial isolation source origin. Synteny analysis revealed that while coagulin A, pediocin PA-1, and Plantaricin 423 loci are associated with insertion sequences mainly from the IS30 family and are likely of plasmid origin, penocin A lies in a conserved chromosomal locus. The results presented here provide insights into the unique pediocin-like bacteriocin peptide fold, genomic diversity, and the evolution of the bacteriocin genetic repertoire of P. pentosaceus, shedding new insights into the role of these biomolecules for application in inhibiting bacterial pathogens, and suggesting that prospecting and sequencing new strains is still an alternative to mining for new probiotic compounds.}, } @article {pmid36282844, year = {2022}, author = {Chia, CT and Bender, AT and Lillis, L and Sullivan, BP and Martin, CD and Burke, W and Landis, C and Boyle, DS and Posner, JD}, title = {Rapid detection of hepatitis C virus using recombinase polymerase amplification.}, journal = {PloS one}, volume = {17}, number = {10}, pages = {e0276582}, pmid = {36282844}, issn = {1932-6203}, mesh = {Humans ; Recombinases/genetics ; Hepacivirus/genetics ; Antiviral Agents ; *Hepatitis C, Chronic/diagnosis ; *Hepatitis C/diagnosis ; Nucleic Acid Amplification Techniques ; Sensitivity and Specificity ; RNA ; RNA, Viral/genetics ; }, abstract = {Over 71 million people are infected with hepatitis C virus (HCV) worldwide, and approximately 400,000 global deaths result from complications of untreated chronic HCV. Pan-genomic direct-acting antivirals (DAAs) have recently become widely available and feature high cure rates in less than 12 weeks of treatment. The rollout of DAAs is reliant on diagnostic tests for HCV RNA to identify eligible patients with viremic HCV infections. Current PCR-based HCV RNA assays are restricted to well-resourced central laboratories, and there remains a prevailing clinical need for expanded access to decentralized HCV RNA testing to provide rapid chronic HCV diagnosis and linkage to DAAs in outpatient clinics. This paper reports a rapid, highly accurate, and minimally instrumented assay for HCV RNA detection using reverse transcription recombinase polymerase amplification (RT-RPA). The assay detects all HCV genotypes with a limit of detection of 25 copies per reaction for genotype 1, the most prevalent in the United States and worldwide. The clinical sensitivity and specificity of the RT-RPA assay were both 100% when evaluated using 78 diverse clinical serum specimens. The accuracy, short runtime, and low heating demands of RT-RPA may enable implementation in a point-of-care HCV test to expand global access to effective treatment via rapid chronic HCV diagnosis.}, } @article {pmid36280878, year = {2022}, author = {Gourlie, R and McDonald, M and Hafez, M and Ortega-Polo, R and Low, KE and Abbott, DW and Strelkov, SE and Daayf, F and Aboukhaddour, R}, title = {The pangenome of the wheat pathogen Pyrenophora tritici-repentis reveals novel transposons associated with necrotrophic effectors ToxA and ToxB.}, journal = {BMC biology}, volume = {20}, number = {1}, pages = {239}, pmid = {36280878}, issn = {1741-7007}, mesh = {Plant Diseases/microbiology ; Phylogeny ; *Mycotoxins/genetics ; *Ascomycota/genetics ; }, abstract = {BACKGROUND: In fungal plant pathogens, genome rearrangements followed by selection pressure for adaptive traits have facilitated the co-evolutionary arms race between hosts and their pathogens. Pyrenophora tritici-repentis (Ptr) has emerged recently as a foliar pathogen of wheat worldwide and its populations consist of isolates that vary in their ability to produce combinations of different necrotrophic effectors. These effectors play vital roles in disease development. Here, we sequenced the genomes of a global collection (40 isolates) of Ptr to gain insights into its gene content and genome rearrangements.

RESULTS: A comparative genome analysis revealed an open pangenome, with an abundance of accessory genes (~ 57%) reflecting Ptr's adaptability. A clear distinction between pathogenic and non-pathogenic genomes was observed in size, gene content, and phylogenetic relatedness. Chromosomal rearrangements and structural organization, specifically around effector coding genes, were detailed using long-read assemblies (PacBio RS II) generated in this work in addition to previously assembled genomes. We also discovered the involvement of large mobile elements associated with Ptr's effectors: ToxA, the gene encoding for the necrosis effector, was found as a single copy within a 143-kb 'Starship' transposon (dubbed 'Horizon') with a clearly defined target site and target site duplications. 'Horizon' was located on different chromosomes in different isolates, indicating mobility, and the previously described ToxhAT transposon (responsible for horizontal transfer of ToxA) was nested within this newly identified Starship. Additionally, ToxB, the gene encoding the chlorosis effector, was clustered as three copies on a 294-kb element, which is likely a different putative 'Starship' (dubbed 'Icarus') in a ToxB-producing isolate. ToxB and its putative transposon were missing from the ToxB non-coding reference isolate, but the homolog toxb and 'Icarus' were both present in a different non-coding isolate. This suggests that ToxB may have been mobile at some point during the evolution of the Ptr genome which is contradictory to the current assumption of ToxB vertical inheritance. Finally, the genome architecture of Ptr was defined as 'one-compartment' based on calculated gene distances and evolutionary rates.

CONCLUSIONS: These findings together reflect on the highly plastic nature of the Ptr genome which has likely helped to drive its worldwide adaptation and has illuminated the involvement of giant transposons in facilitating the evolution of virulence in Ptr.}, } @article {pmid36278460, year = {2022}, author = {Suryaletha, K and Savithri, AV and Nayar, SA and Asokan, S and Rajeswary, D and Thomas, S}, title = {Demystifying Bacteriocins of Human Microbiota by Genome Guided Prospects: An Impetus to Rekindle the Antimicrobial Research.}, journal = {Current protein & peptide science}, volume = {23}, number = {12}, pages = {811-822}, pmid = {36278460}, issn = {1875-5550}, mesh = {Humans ; *Bacteriocins/genetics/pharmacology ; Anti-Bacterial Agents/pharmacology ; *Microbiota ; Bacteria/genetics ; }, abstract = {The human microbiome is a reservoir of potential bacteriocins that can counteract multidrug resistant bacterial pathogens. Unlike antibiotics, bacteriocins selectively inhibit a spectrum of competent bacteria and are said to safeguard gut commensals, reducing the chance of dysbiosis. Bacteriocinogenic probiotics or bacteriocins of human origin will be more pertinent in human physiological conditions for therapeutic applications to act against invading pathogens. Recent advancement in the omics approach enables the mining of diverse and novel bacteriocins by identifying biosynthetic gene clusters from the human microbial genome, pangenome or shotgun metagenome, which is a breakthrough in the discovery line of novel bacteriocins. This review summarizes the most recent trends and therapeutic potential of bacteriocins of human microbial origin, the advancement in the in silico algorithms and databases in the discovery of novel bacteriocin, and how to bridge the gap between the discovery of bacteriocin genes from big datasets and their in vitro production. Besides, the later part of the review discussed the various impediments in their clinical applications and possible solution to bring them into the frontline therapeutics to control infections, thereby meeting the challenges of global antimicrobial resistance.}, } @article {pmid36265748, year = {2023}, author = {González-Torres, B and González-Gómez, JP and Ramírez, K and Castro-Del Campo, N and González-López, I and Garrido-Palazuelos, LI and Chaidez, C and Medrano-Félix, JA}, title = {Population structure of the Salmonella enterica serotype Oranienburg reveals similar virulence, regardless of isolation years and sources.}, journal = {Gene}, volume = {851}, number = {}, pages = {146966}, doi = {10.1016/j.gene.2022.146966}, pmid = {36265748}, issn = {1879-0038}, mesh = {Serogroup ; Virulence/genetics ; *Salmonella enterica/genetics ; Salmonella ; Anti-Bacterial Agents ; }, abstract = {Salmonella enterica serotype Oranienburg is a multi-host, ubiquitous, and prevalent Non-typhoidal Salmonella (NTS) in subtropical rivers, particularly in sediments; little studied so far possible the adaptation and establishment of this microorganism based on its genetic content. This study was focused on the first five genomes of S. Oranienburg in sediments through whole-genome sequencing (WGS) and 61 river water genomes isolated in previous studies. Results showed an open pangenome with 5,594 gene clusters (GCs), and the division of their categories showed; 3,303 core genes, 741 persistent genes, 1,282 accessory genes, and 268 unique genes. Additionally, it showed three main subclades within the same serotype and showed a conserved genetic content, suggesting the display of different adaptation strategies to its establishment. Nine genes for antimicrobial resistance were detected: aac (6') - Iy, H-NS, golS, marA, mdsABC, mdtK, and sdiA, and a mutation in the parC gene p. T57S generating a resistance. In addition, virulence genes and pathogenicity islands (SPI's) were analyzed, finding 92 genes and an identity above 80 % in the SPI's 1 to 5, and the centisomes 54 and 63. The environmental strains of S. Oranienburg do not represent a concern as multidrug resistance (MDR) bacterium; however, virulence genes remain a potential health risk. This study contributes to understanding its adaptation to aquatic environments in Mexico.}, } @article {pmid36263788, year = {2022}, author = {Dyrhage, K and Garcia-Montaner, A and Tamarit, D and Seeger, C and Näslund, K and Olofsson, TC and Vasquez, A and Webster, MT and Andersson, SGE}, title = {Genome Evolution of a Symbiont Population for Pathogen Defense in Honeybees.}, journal = {Genome biology and evolution}, volume = {14}, number = {11}, pages = {}, pmid = {36263788}, issn = {1759-6653}, mesh = {Bees/genetics ; Animals ; *Genome, Bacterial ; *Gastrointestinal Microbiome ; Bacteria ; Evolution, Molecular ; }, abstract = {The honeybee gut microbiome is thought to be important for bee health, but the role of the individual members is poorly understood. Here, we present closed genomes and associated mobilomes of 102 Apilactobacillus kunkeei isolates obtained from the honey crop (foregut) of honeybees sampled from beehives in Helsingborg in the south of Sweden and from the islands Gotland and Åland in the Baltic Sea. Each beehive contained a unique composition of isolates and repeated sampling of similar isolates from two beehives in Helsingborg suggests that the bacterial community is stably maintained across bee generations during the summer months. The sampled bacterial population contained an open pan-genome structure with a high genomic density of transposons. A subset of strains affiliated with phylogroup A inhibited growth of the bee pathogen Melissococcus plutonius, all of which contained a 19.5 kb plasmid for the synthesis of the antimicrobial compound kunkecin A, while a subset of phylogroups B and C strains contained a 32.9 kb plasmid for the synthesis of a putative polyketide antibiotic. This study suggests that the mobile gene pool of A. kunkeei plays a key role in pathogen defense in honeybees, providing new insights into the evolutionary dynamics of defensive symbiont populations.}, } @article {pmid36261518, year = {2022}, author = {Jarvis, ED and Formenti, G and Rhie, A and Guarracino, A and Yang, C and Wood, J and Tracey, A and Thibaud-Nissen, F and Vollger, MR and Porubsky, D and Cheng, H and Asri, M and Logsdon, GA and Carnevali, P and Chaisson, MJP and Chin, CS and Cody, S and Collins, J and Ebert, P and Escalona, M and Fedrigo, O and Fulton, RS and Fulton, LL and Garg, S and Gerton, JL and Ghurye, J and Granat, A and Green, RE and Harvey, W and Hasenfeld, P and Hastie, A and Haukness, M and Jaeger, EB and Jain, M and Kirsche, M and Kolmogorov, M and Korbel, JO and Koren, S and Korlach, J and Lee, J and Li, D and Lindsay, T and Lucas, J and Luo, F and Marschall, T and Mitchell, MW and McDaniel, J and Nie, F and Olsen, HE and Olson, ND and Pesout, T and Potapova, T and Puiu, D and Regier, A and Ruan, J and Salzberg, SL and Sanders, AD and Schatz, MC and Schmitt, A and Schneider, VA and Selvaraj, S and Shafin, K and Shumate, A and Stitziel, NO and Stober, C and Torrance, J and Wagner, J and Wang, J and Wenger, A and Xiao, C and Zimin, AV and Zhang, G and Wang, T and Li, H and Garrison, E and Haussler, D and Hall, I and Zook, JM and Eichler, EE and Phillippy, AM and Paten, B and Howe, K and Miga, KH and , }, title = {Semi-automated assembly of high-quality diploid human reference genomes.}, journal = {Nature}, volume = {611}, number = {7936}, pages = {519-531}, pmid = {36261518}, issn = {1476-4687}, support = {R01 HG006677/HG/NHGRI NIH HHS/United States ; U01 HG010961/HG/NHGRI NIH HHS/United States ; R35 GM130151/GM/NIGMS NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; R01 HG010169/HG/NHGRI NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; R01 HG002385/HG/NHGRI NIH HHS/United States ; R01 HG010040/HG/NHGRI NIH HHS/United States ; U41 HG010972/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *Chromosome Mapping/standards ; *Diploidy ; *Genome, Human/genetics ; Haplotypes/genetics ; High-Throughput Nucleotide Sequencing/methods/standards ; Sequence Analysis, DNA/methods/standards ; Reference Standards ; *Genomics/methods/standards ; Chromosomes, Human/genetics ; Genetic Variation/genetics ; }, abstract = {The current human reference genome, GRCh38, represents over 20 years of effort to generate a high-quality assembly, which has benefitted society[1,2]. However, it still has many gaps and errors, and does not represent a biological genome as it is a blend of multiple individuals[3,4]. Recently, a high-quality telomere-to-telomere reference, CHM13, was generated with the latest long-read technologies, but it was derived from a hydatidiform mole cell line with a nearly homozygous genome[5]. To address these limitations, the Human Pangenome Reference Consortium formed with the goal of creating high-quality, cost-effective, diploid genome assemblies for a pangenome reference that represents human genetic diversity[6]. Here, in our first scientific report, we determined which combination of current genome sequencing and assembly approaches yield the most complete and accurate diploid genome assembly with minimal manual curation. Approaches that used highly accurate long reads and parent-child data with graph-based haplotype phasing during assembly outperformed those that did not. Developing a combination of the top-performing methods, we generated our first high-quality diploid reference assembly, containing only approximately four gaps per chromosome on average, with most chromosomes within ±1% of the length of CHM13. Nearly 48% of protein-coding genes have non-synonymous amino acid changes between haplotypes, and centromeric regions showed the highest diversity. Our findings serve as a foundation for assembling near-complete diploid human genomes at scale for a pangenome reference to capture global genetic variation from single nucleotides to structural rearrangements.}, } @article {pmid36258067, year = {2022}, author = {Abram, KZ and Jun, SR and Udaondo, Z}, title = {Pseudomonas aeruginosa Pangenome: Core and Accessory Genes of a Highly Resourceful Opportunistic Pathogen.}, journal = {Advances in experimental medicine and biology}, volume = {1386}, number = {}, pages = {3-28}, pmid = {36258067}, issn = {0065-2598}, mesh = {*Pseudomonas aeruginosa/genetics ; *Genome, Bacterial ; Anti-Bacterial Agents ; Amino Acids ; Carbohydrates ; Phylogeny ; }, abstract = {In this chapter, we leverage a novel approach to assess the seamless population structure of Pseudomonas aeruginosa, using the full repertoire of genomes sequenced to date (GenBank, April 6, 2020). In order to assess the set of core functions that represents the species as well as the differences in these core functions among the phylogroups observed in the population structure analysis, we performed pangenome analyses at the species level and at the phylogroup level. The existence of the phylogroups described in the population structure analyses was supported by their different profiles of antibiotic-resistant determinants. Finally, we utilized a presence/absence matrix of protein families from the entire species to evaluate if P. aeruginosa phylogroups can be differentiated according to their accessory genomic content. Our analysis shows that the core genome of P. aeruginosa is approximately 62% of the average gene content for the species, and it is highly enriched with pathways related to the metabolism of carbohydrates and amino acids as well as cellular processes and cell maintenance. The analysis of the accessory genome of P. aeruginosa performed in this chapter confirmed not only the existence of the three phylogroups previously described in the population structure analysis, but also of 29 genetic substructures (subgroups) within the main phylogroups. Our work illustrates the utility of populations genomics pipelines to better understand highly complex bacterial species such as P. aeruginosa.}, } @article {pmid36255144, year = {2023}, author = {Wang, S and Qian, YQ and Zhao, RP and Chen, LL and Song, JM}, title = {Graph-based pan-genomes: increased opportunities in plant genomics.}, journal = {Journal of experimental botany}, volume = {74}, number = {1}, pages = {24-39}, doi = {10.1093/jxb/erac412}, pmid = {36255144}, issn = {1460-2431}, mesh = {*Genomics ; *Genome, Plant/genetics ; Sequence Analysis, DNA ; Polymorphism, Single Nucleotide ; }, abstract = {Due to the development of sequencing technology and the great reduction in sequencing costs, an increasing number of plant genomes have been assembled, and numerous genomes have revealed large amounts of variations. However, a single reference genome does not allow the exploration of species diversity, and therefore the concept of pan-genome was developed. A pan-genome is a collection of all sequences available for a species, including a large number of consensus sequences, large structural variations, and small variations including single nucleotide polymorphisms and insertions/deletions. A simple linear pan-genome does not allow these structural variations to be intuitively characterized, so graph-based pan-genomes have been developed. These pan-genomes store sequence and structural variation information in the form of nodes and paths to store and display species variation information in a more intuitive manner. The key role of graph-based pan-genomes is to expand the coordinate system of the linear reference genome to accommodate more regions of genetic diversity. Here, we review the origin and development of graph-based pan-genomes, explore their application in plant research, and further highlight the application of graph-based pan-genomes for future plant breeding.}, } @article {pmid36250060, year = {2022}, author = {Monshizadeh, M and Zomorodi, S and Mortensen, K and Ye, Y}, title = {Revealing bacteria-phage interactions in human microbiome through the CRISPR-Cas immune systems.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {933516}, pmid = {36250060}, issn = {2235-2988}, support = {R01 AI143254/AI/NIAID NIH HHS/United States ; }, mesh = {Bacteria/genetics ; *Bacteriophages/genetics ; CRISPR-Cas Systems ; Humans ; Immune System ; *Microbiota/genetics ; }, abstract = {The human gut microbiome is composed of a diverse consortium of microorganisms. Relatively little is known about the diversity of the bacteriophage population and their interactions with microbial organisms in the human microbiome. Due to the persistent rivalry between microbial organisms (hosts) and phages (invaders), genetic traces of phages are found in the hosts' CRISPR-Cas adaptive immune system. Mobile genetic elements (MGEs) found in bacteria include genetic material from phage and plasmids, often resultant from invasion events. We developed a computational pipeline (BacMGEnet), which can be used for inference and exploratory analysis of putative interactions between microbial organisms and MGEs (phages and plasmids) and their interaction network. Given a collection of genomes as the input, BacMGEnet utilizes computational tools we have previously developed to characterize CRISPR-Cas systems in the genomes, which are then used to identify putative invaders from publicly available collections of phage/prophage sequences. In addition, BacMGEnet uses a greedy algorithm to summarize identified putative interactions to produce a bacteria-MGE network in a standard network format. Inferred networks can be utilized to assist further examination of the putative interactions and for discovery of interaction patterns. Here we apply the BacMGEnet pipeline to a few collections of genomic/metagenomic datasets to demonstrate its utilities. BacMGEnet revealed a complex interaction network of the Phocaeicola vulgatus pangenome with its phage invaders, and the modularity analysis of the resulted network suggested differential activities of the different P. vulgatus' CRISPR-Cas systems (Type I-C and Type II-C) against some phages. Analysis of the phage-bacteria interaction network of human gut microbiome revealed a mixture of phages with a broad host range (resulting in large modules with many bacteria and phages), and phages with narrow host range. We also showed that BacMGEnet can be used to infer phages that invade bacteria and their interactions in wound microbiome. We anticipate that BacMGEnet will become an important tool for studying the interactions between bacteria and their invaders for microbiome research.}, } @article {pmid36238595, year = {2022}, author = {Palevich, N and Palevich, FP and Gardner, A and Brightwell, G and Mills, J}, title = {Genome collection of Shewanella spp. isolated from spoiled lamb.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {976152}, pmid = {36238595}, issn = {1664-302X}, abstract = {The diversity of the genus Shewanella and their roles across a variety of ecological niches is largely unknown highlighting the phylogenetic diversity of these bacteria. From a food safety perspective, Shewanella species have been recognized as causative spoilage agents of vacuum-packed meat products. However, the genetic basis and metabolic pathways for the spoilage mechanism are yet to be explored due to the unavailability of relevant Shewanella strains and genomic resources. In this study, whole-genome sequencing of 32 Shewanella strains isolated from vacuum-packaged refrigerated spoiled lamb was performed to examine their roles in meat spoilage. Phylogenomic reconstruction revealed their genomic diversity with 28 Shewanella spp. strains belonging to the same putative novel species, two Shewanella glacialipiscicola strains (SM77 and SM91), Shewanella xiamenensis NZRM825, and Shewanella putrefaciens DSM 50426 (ATCC 8072) isolated from butter. Genome-wide clustering of orthologous gene families revealed functional groupings within the major Shewanella cluster but also considerable plasticity across the different species. Pan-genome analysis revealed conserved occurrence of spoilage genes associated with sulfur and putrescine metabolism, while the complete set of trimethylamine metabolism genes was observed in only Shewanella sp. SM74, S. glacialipiscicola SM77 and SM91 strains. Through comparative genomics, some variations were also identified pertaining to genes associated with adaptation to environmental cues such as temperature, osmotic, salt, oxidative, antimicrobial peptide, and drug resistance stresses. Here we provide a reference collection of draft Shewanella genomes for subsequent species descriptions and future investigations into the molecular spoilage mechanisms for further applications in the meat industry.}, } @article {pmid36226968, year = {2022}, author = {Jana, B and Keppel, K and Fridman, CM and Bosis, E and Salomon, D}, title = {Multiple T6SSs, Mobile Auxiliary Modules, and Effectors Revealed in a Systematic Analysis of the Vibrio parahaemolyticus Pan-Genome.}, journal = {mSystems}, volume = {7}, number = {6}, pages = {e0072322}, pmid = {36226968}, issn = {2379-5077}, mesh = {Animals ; Humans ; *Type VI Secretion Systems/genetics ; *Vibrio parahaemolyticus/genetics ; Bacterial Proteins/genetics ; Bacteria/metabolism ; Anti-Bacterial Agents/metabolism ; }, abstract = {Type VI secretion systems (T6SSs) play a major role in interbacterial competition and in bacterial interactions with eukaryotic cells. The distribution of T6SSs and the effectors they secrete vary between strains of the same bacterial species. Therefore, a pan-genome investigation is required to better understand the T6SS potential of a bacterial species of interest. Here, we performed a comprehensive, systematic analysis of T6SS gene clusters and auxiliary modules found in the pan-genome of Vibrio parahaemolyticus, an emerging pathogen widespread in marine environments. We identified 4 different T6SS gene clusters within genomes of this species; two systems appear to be ancient and widespread, whereas the other 2 systems are rare and appear to have been more recently acquired via horizontal gene transfer. In addition, we identified diverse T6SS auxiliary modules containing putative effectors with either known or predicted toxin domains. Many auxiliary modules are possibly horizontally shared between V. parahaemolyticus genomes, since they are flanked by DNA mobility genes. We further investigated a DUF4225-containing protein encoded on an Hcp auxiliary module, and we showed that it is an antibacterial T6SS effector that exerts its toxicity in the bacterial periplasm, leading to cell lysis. Computational analyses of DUF4225 revealed a widespread toxin domain associated with various toxin delivery systems. Taken together, our findings reveal a diverse repertoire of T6SSs and auxiliary modules in the V. parahaemolyticus pan-genome, as well as novel T6SS effectors and toxin domains that can play a major role in the interactions of this species with other cells. IMPORTANCE Gram-negative bacteria employ toxin delivery systems to mediate their interactions with neighboring cells. Vibrio parahaemolyticus, an emerging pathogen of humans and marine animals, was shown to deploy antibacterial toxins into competing bacteria via the type VI secretion system (T6SS). Here, we analyzed 1,727 V. parahaemolyticus genomes and revealed the pan-genome T6SS repertoire of this species, including the T6SS gene clusters, horizontally shared auxiliary modules, and toxins. We also identified a role for a previously uncharacterized domain, DUF4225, as a widespread antibacterial toxin associated with diverse toxin delivery systems.}, } @article {pmid36223424, year = {2022}, author = {Wang, F and Guo, Y and Liu, Z and Wang, Q and Jiang, Y and Zhao, G}, title = {New insights into the novel sequences of the chicken pan-genome by liquid chip.}, journal = {Journal of animal science}, volume = {100}, number = {12}, pages = {}, pmid = {36223424}, issn = {1525-3163}, support = {32072708//National Natural Science Foundation of China/ ; 2022JQ-171//Natural Science Basic Research Program of Shaanxi Province/ ; }, mesh = {Animals ; *Chickens/genetics ; Genotype ; *Polymorphism, Single Nucleotide ; Genome ; Oligonucleotide Array Sequence Analysis/veterinary ; }, abstract = {Increasing evidence indicates that the missing sequences and genes in the chicken reference genome are involved in many crucial biological pathways, including metabolism and immunity. The low detection rate of novel sequences by resequencing hindered the acquisition of these sequences and the exploration of the relationship between new genes and economic traits. To improve the capture ratio of novel sequences, a 48K liquid chip including 25K from the reference sequence and 23K from the novel sequence was designed. The assay was tested on a panel of 218 animals from 5 chicken breeds. The average capture ratio of the reference sequence was 99.55%, and the average sequencing depth of the target sites was approximately 187X, indicating a good performance and successful application of liquid chips in farm animals. For the target region in the novel sequence, the average capture ratio was 33.15% and the average sequencing depth of target sites was approximately 60X, both of which were higher than that of resequencing. However, the different capture ratios and capture regions among varieties and individuals proved the difficulty of capturing these regions with complex structures. After genotyping, GWAS showed variations in novel sequences potentially relevant to immune-related traits. For example, a SNP close to the differentiation of lymphocyte-related gene IGHV3-23-like was associated with the H/L ratio. These results suggest that targeted capture sequencing is a preferred method to capture these sequences with complex structures and genes potentially associated with immune-related traits.}, } @article {pmid36223396, year = {2022}, author = {Wagner, DM and Birdsell, DN and McDonough, RF and Nottingham, R and Kocos, K and Celona, K and Özsürekci, Y and Öhrman, C and Karlsson, L and Myrtennäs, K and Sjödin, A and Johansson, A and Keim, PS and Forsman, M and Sahl, JW}, title = {Genomic characterization of Francisella tularensis and other diverse Francisella species from complex samples.}, journal = {PloS one}, volume = {17}, number = {10}, pages = {e0273273}, pmid = {36223396}, issn = {1932-6203}, mesh = {Animals ; *Anti-Infective Agents ; DNA, Bacterial/genetics ; *Francisella tularensis/genetics ; Genomics ; Humans ; Phylogeny ; RNA ; *Tularemia/microbiology ; }, abstract = {Francisella tularensis, the bacterium that causes the zoonosis tularemia, and its genetic near neighbor species, can be difficult or impossible to cultivate from complex samples. Thus, there is a lack of genomic information for these species that has, among other things, limited the development of robust detection assays for F. tularensis that are both specific and sensitive. The objective of this study was to develop and validate approaches to capture, enrich, sequence, and analyze Francisella DNA present in DNA extracts generated from complex samples. RNA capture probes were designed based upon the known pan genome of F. tularensis and other diverse species in the family Francisellaceae. Probes that targeted genomic regions also present in non-Francisellaceae species were excluded, and probes specific to particular Francisella species or phylogenetic clades were identified. The capture-enrichment system was then applied to diverse, complex DNA extracts containing low-level Francisella DNA, including human clinical tularemia samples, environmental samples (i.e., animal tissue and air filters), and whole ticks/tick cell lines, which was followed by sequencing of the enriched samples. Analysis of the resulting data facilitated rigorous and unambiguous confirmation of the detection of F. tularensis or other Francisella species in complex samples, identification of mixtures of different Francisella species in the same sample, analysis of gene content (e.g., known virulence and antimicrobial resistance loci), and high-resolution whole genome-based genotyping. The benefits of this capture-enrichment system include: even very low target DNA can be amplified; it is culture-independent, reducing exposure for research and/or clinical personnel and allowing genomic information to be obtained from samples that do not yield isolates; and the resulting comprehensive data not only provide robust means to confirm the presence of a target species in a sample, but also can provide data useful for source attribution, which is important from a genomic epidemiology perspective.}, } @article {pmid36219094, year = {2022}, author = {Bista, PK and Pillai, D and Roy, C and Scaria, J and Narayanan, SK}, title = {Comparative Genomic Analysis of Fusobacterium necrophorum Provides Insights into Conserved Virulence Genes.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0029722}, pmid = {36219094}, issn = {2165-0497}, mesh = {Animals ; Cattle ; Humans ; *Fusobacterium necrophorum/genetics ; Virulence/genetics ; Base Composition ; Phylogeny ; Sequence Analysis, DNA ; RNA, Ribosomal, 16S/genetics ; *Genomics ; }, abstract = {Fusobacterium necrophorum is a Gram-negative, filamentous anaerobe prevalent in the mucosal flora of animals and humans. It causes necrotic infections in cattle, resulting in a substantial economic impact on the cattle industry. Although infection severity and management differ within F. necrophorum species, little is known about F. necrophorum speciation and the genetic virulence determinants between strains. To characterize the clinical isolates, we performed whole-genome sequencing of four bovine isolates (8L1, 212, B17, and SM1216) and one human isolate (MK12). To determine the phylogenetic relationship and evolution pattern and investigate the presence of antimicrobial resistance genes (ARGs) and potential virulence genes of F. necrophorum, we also performed comparative genomics with publicly available Fusobacterium genomes. Using up-to-date bacterial core gene (UBCG) set analysis, we uncovered distinct Fusobacterium species and F. necrophorum subspecies clades. Pangenome analyses revealed a high level of diversity among Fusobacterium strains down to species levels. The output also identified 14 and 26 genes specific to F. necrophorum subsp. necrophorum and F. necrophorum subsp. funduliforme, respectively, which could be essential for bacterial survival under different environmental conditions. ClonalFrameML-based recombination analysis suggested that extensive recombination among accessory genes led to species divergence. Furthermore, the only strain of F. necrophorum with ARGs was F. necrophorum subsp. funduliforme B35, with acquired macrolide and tetracycline resistance genes. Our custom search revealed common virulence genes, including toxins, adhesion proteins, outer membrane proteins, cell envelope, type IV secretion system, ABC (ATP-binding cassette) transporters, and transporter proteins. A focused study on these genes could help identify major virulence genes and inform effective vaccination strategies against fusobacterial infections. IMPORTANCE Fusobacterium necrophorum is an anaerobic bacterium that causes liver abscesses in cattle with an annual incidence rate of 10% to 20%, resulting in a substantial economic impact on the cattle industry. The lack of definite biochemical tests makes it difficult to distinguish F. necrophorum subspecies phenotypically, where genomic characterization plays a significant role. However, due to the lack of a good reference genome for comparison, F. necrophorum subspecies-level identification represents a significant challenge. To overcome this challenge, we used comparative genomics to validate clinical test strains for subspecies-level identification. The findings of our study help predict specific clades of previously uncharacterized strains of F. necrophorum. Our study identifies both general and subspecies-specific virulence genes through a custom search-based analysis. The virulence genes identified in this study can be the focus of future studies aimed at evaluating their potential as vaccine targets to prevent fusobacterial infections in cattle.}, } @article {pmid36214662, year = {2022}, author = {Moolhuijzen, PM and See, PT and Shi, G and Powell, HR and Cockram, J and Jørgensen, LN and Benslimane, H and Strelkov, SE and Turner, J and Liu, Z and Moffat, CS}, title = {A global pangenome for the wheat fungal pathogen Pyrenophora tritici-repentis and prediction of effector protein structural homology.}, journal = {Microbial genomics}, volume = {8}, number = {10}, pages = {}, pmid = {36214662}, issn = {2057-5858}, mesh = {Ascomycota ; Host-Pathogen Interactions/genetics ; *Mycotoxins/genetics/metabolism ; Plant Diseases/microbiology ; Structural Homology, Protein ; *Triticum/genetics/metabolism/microbiology ; }, abstract = {The adaptive potential of plant fungal pathogens is largely governed by the gene content of a species, consisting of core and accessory genes across the pathogen isolate repertoire. To approximate the complete gene repertoire of a globally significant crop fungal pathogen, a pan genomic analysis was undertaken for Pyrenophora tritici-repentis (Ptr), the causal agent of tan (or yellow) spot disease in wheat. In this study, 15 new Ptr genomes were sequenced, assembled and annotated, including isolates from three races not previously sequenced. Together with 11 previously published Ptr genomes, a pangenome for 26 Ptr isolates from Australia, Europe, North Africa and America, representing nearly all known races, revealed a conserved core-gene content of 57 % and presents a new Ptr resource for searching natural homologues (orthologues not acquired by horizontal transfer from another species) using remote protein structural homology. Here, we identify for the first time a non-synonymous mutation in the Ptr necrotrophic effector gene ToxB, multiple copies of the inactive toxb within an isolate, a distant natural Pyrenophora homologue of a known Parastagonopora nodorum necrotrophic effector (SnTox3), and clear genomic break points for the ToxA effector horizontal transfer region. This comprehensive genomic analysis of Ptr races includes nine isolates sequenced via long read technologies. Accordingly, these resources provide a more complete representation of the species, and serve as a resource to monitor variations potentially involved in pathogenicity.}, } @article {pmid36212836, year = {2022}, author = {Kim, E and Yang, SM and Kim, IS and Lee, SY and Kim, HY}, title = {Identification of Leuconostoc species based on novel marker genes identified using real-time PCR via computational pangenome analysis.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1014872}, pmid = {36212836}, issn = {1664-302X}, abstract = {Leuconostoc species are important microorganisms in food fermentation but also cause food spoilage. Although these species are commercially important, their taxonomy is still based on inaccurate identification methods. Here, we used computational pangenome analysis to develop a real-time PCR-based method for identifying and differentiating the 12 major Leuconostoc species found in food. Analysis of pan and core-genome phylogenies showed clustering of strains into 12 distinct groups according to the species. Pangenome analysis of 130 Leuconostoc genomes from these 12 species enabled the identification of each species-specific gene. In silico testing of the species-specific genes against 143 publicly available Leuconostoc and 100 other lactic acid bacterial genomes showed that all the assays had 100% inclusivity/exclusivity. We also verified the specificity for each primer pair targeting each specific gene using 23 target and 124 non-target strains and found high specificity (100%). The sensitivity of the real-time PCR method was 10[2] colony forming units (CFUs)/ml in pure culture and spiked food samples. All standard curves showed good linear correlations, with an R [2] value of ≥0.996, suggesting that screened targets have good specificity and strong anti-interference ability from food sample matrices and non-target strains. The real-time PCR method can be potentially used to determine the taxonomic status and identify the Leuconostoc species in foods.}, } @article {pmid36209154, year = {2022}, author = {Marcet-Houben, M and Alvarado, M and Ksiezopolska, E and Saus, E and de Groot, PWJ and Gabaldón, T}, title = {Chromosome-level assemblies from diverse clades reveal limited structural and gene content variation in the genome of Candida glabrata.}, journal = {BMC biology}, volume = {20}, number = {1}, pages = {226}, pmid = {36209154}, issn = {1741-7007}, mesh = {*Candida glabrata/chemistry/genetics ; Chromosomes ; *Fungal Proteins/genetics ; Genome, Fungal ; Plastics ; }, abstract = {BACKGROUND: Candida glabrata is an opportunistic yeast pathogen thought to have a large genetic and phenotypic diversity and a highly plastic genome. However, the lack of chromosome-level genome assemblies representing this diversity limits our ability to accurately establish how chromosomal structure and gene content vary across strains.

RESULTS: Here, we expanded publicly available assemblies by using long-read sequencing technologies in twelve diverse strains, obtaining a final set of twenty-one chromosome-level genomes spanning the known C. glabrata diversity. Using comparative approaches, we inferred variation in chromosome structure and determined the pan-genome, including an analysis of the adhesin gene repertoire. Our analysis uncovered four new adhesin orthogroups and inferred a rich ancestral adhesion repertoire, which was subsequently shaped through a still ongoing process of gene loss, gene duplication, and gene conversion.

CONCLUSIONS: C. glabrata has a largely stable pan-genome except for a highly variable subset of genes encoding cell wall-associated functions. Adhesin repertoire was established for each strain and showed variability among clades.}, } @article {pmid36209064, year = {2022}, author = {Harling-Lee, JD and Gorzynski, J and Yebra, G and Angus, T and Fitzgerald, JR and Freeman, TC}, title = {A graph-based approach for the visualisation and analysis of bacterial pangenomes.}, journal = {BMC bioinformatics}, volume = {23}, number = {1}, pages = {416}, pmid = {36209064}, issn = {1471-2105}, support = {ETM/421/CSO_/Chief Scientist Office/United Kingdom ; 201531/Z/16/Z/WT_/Wellcome Trust/United Kingdom ; BB/PO13740/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/P013732/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Bacteria/genetics ; *Genome, Bacterial ; *High-Throughput Nucleotide Sequencing/methods ; Sequence Analysis, DNA/methods ; }, abstract = {BACKGROUND: The advent of low cost, high throughput DNA sequencing has led to the availability of thousands of complete genome sequences for a wide variety of bacterial species. Examining and interpreting genetic variation on this scale represents a significant challenge to existing methods of data analysis and visualisation.

RESULTS: Starting with the output of standard pangenome analysis tools, we describe the generation and analysis of interactive, 3D network graphs to explore the structure of bacterial populations, the distribution of genes across a population, and the syntenic order in which those genes occur, in the new open-source network analysis platform, Graphia. Both the analysis and the visualisation are scalable to datasets of thousands of genome sequences.

CONCLUSIONS: We anticipate that the approaches presented here will be of great utility to the microbial research community, allowing faster, more intuitive, and flexible interaction with pangenome datasets, thereby enhancing interpretation of these complex data.}, } @article {pmid36205822, year = {2022}, author = {Deb, S}, title = {Pan-genome evolution and its association with divergence of metabolic functions in Bifidobacterium genus.}, journal = {World journal of microbiology & biotechnology}, volume = {38}, number = {12}, pages = {231}, pmid = {36205822}, issn = {1573-0972}, mesh = {*Bifidobacterium/genetics ; Carbohydrates ; Evolution, Molecular ; *Genome, Bacterial/genetics ; Humans ; Phylogeny ; }, abstract = {Previous studies were mainly focused on genomic evolution and diversity of type species of Bifidobacterium genus due to their health-promoting effect on host. However, those studies were mainly based on species-level taxonomic resolution, adaptation, and characterization of carbohydrate metabolic features of the bifidobacterial species. Here, a comprehensive analysis of the type strain genome unveils the association of pan-genome evolution with the divergence of metabolic function of the Bifidobacterium genus. This study has also demonstrated that horizontal gene transfer, as well as genome expansion and reduction events, leads to the divergence of metabolic functions in Bifidobacterium genus. Furthermore, the genome-based search of probiotic traits among all the available bifidobacterial type strains gives hints on type species, that could confer health benefits to nutrient-deficient individuals. Altogether, the present study provides insight into the developments of genomic evolution, functional divergence, and potential probiotic type species of the Bifidobacterium genus.}, } @article {pmid36204049, year = {2022}, author = {Nisar, T and Tahir, MHN and Iqbal, S and Sajjad, M and Nadeem, MA and Qanmber, G and Baig, A and Khan, Z and Zhao, Z and Geng, Z and Ur Rehman, S}, title = {Genome-wide characterization and sequence polymorphism analyses of cysteine-rich poly comb-like protein in Glycine max.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {996265}, pmid = {36204049}, issn = {1664-462X}, abstract = {Cysteine-rich poly comb-like protein (CPP) is a member of cysteine-rich transcription factors that regulates plant growth and development. In the present work, we characterized twelve CPP transcription factors encoding genes in soybean (Glycine max). Phylogenetic analyses classified CPP genes into six clades. Sequence logos analyses between G. max and G. soja amino acid residues exhibited high conservation. The presence of growth and stress-related cis-acting elements in the upstream regions of GmCPPs highlight their role in plant development and tolerance against abiotic stress. Ka/Ks levels showed that GmCPPs experienced limited selection pressure with limited functional divergence arising from segmental or whole genome duplication events. By using the PAN-genome of soybean, a single nucleotide polymorphism was identified in GmCPP-6. To perform high throughput genotyping, a kompetitive allele-specific PCR (KASP) marker was developed. Association analyses indicated that GmCPP-6-T allele of GmCPP-6 (in exon region) was associated with higher thousand seed weight under both water regimes (well-water and water-limited). Taken together, these results provide vital information to further decipher the biological functions of CPP genes in soybean molecular breeding.}, } @article {pmid36201155, year = {2022}, author = {Torres-Miranda, A and Melis-Arcos, F and Garrido, D}, title = {Characterization and Identification of Probiotic Features in Lacticaseibacillus Paracasei Using a Comparative Genomic Analysis Approach.}, journal = {Probiotics and antimicrobial proteins}, volume = {14}, number = {6}, pages = {1211-1224}, pmid = {36201155}, issn = {1867-1314}, mesh = {Inulin/metabolism ; Phylogeny ; *Lacticaseibacillus paracasei/metabolism ; *Probiotics ; Genomics ; }, abstract = {Lacticaseibacillus paracasei species are widely used for their health-promoting properties in food and agricultural applications. These bacteria have been isolated from various habitats such as the oral cavity, cereals, vegetables, meats, and dairy products conferring them the ability to consume different carbohydrates. Two subspecies are recognized, Lacticaseibacillus paracasei subsp. paracasei and Lacticaseibacillus paracasei subsp. tolerans according to their acid production from carbohydrates. Some strains are currently used as probiotics. In this study, we performed a comparative genomic analysis of 181 genomes of the Lacticaseibacillus paracasei species to reveal genomic differences at the subspecies level and to reveal adaptive and probiotic features, and special emphasis is given to inulin consumption. No clear distinction at the subspecies level for L. paracasei was shown using a phylogenetic tree with orthologous genes from the core-genome set. In general, a good correlation was observed between genomic distance and isolation origin, suggesting that L. paracasei strains are adapted to their natural habitat, giving rise to genetic differences at the genomic level. A low frequency of undesirable characteristics such as plasmids, prophages, antibiotic resistance genes, absence of virulence factors, and frequent bacteriocin production supports these species being good candidates for use as probiotics. Lastly, we found that the inulin gene cluster in L. paracasei strains seems to differ slightly in the presence or absence of some genes but maintains a core defined by at least three fructose-PTS proteins, one hypothetical protein, and extracellular β-fructosidase. Finally, we conclude that further work has to be done for L. paracasei subspecies classification. Improving outgroup selection criteria is a key factor for their correct subspecies assignation.}, } @article {pmid36201053, year = {2022}, author = {Karthik, K and Anbazhagan, S and Chitra, MA and Sridhar, R}, title = {First report of the whole genome of Moraxella bovoculi genotype 1 from India and comparative genomics of Moraxella bovoculi to identify genotype-specific markers.}, journal = {Archives of microbiology}, volume = {204}, number = {11}, pages = {663}, pmid = {36201053}, issn = {1432-072X}, mesh = {Animals ; Cattle ; *Cattle Diseases/diagnosis ; Fimbriae Proteins ; Genomics ; Genotype ; *Keratoconjunctivitis ; *Keratoconjunctivitis, Infectious ; Moraxella/genetics ; *Moraxellaceae Infections/diagnosis/veterinary ; Vaccines, Synthetic ; Virulence Factors/genetics ; }, abstract = {Moraxella bovoculi has been isolated frequently from cattle with Infectious bovine keratoconjunctivitis (IBK). Two diverse genotypes of M. bovoculi, 1 and 2 were identified based on whole genome sequence analysis. It is essential to discriminate between the two genotypes to frame prevention and control measures. The whole genome of M. bovoculi TN7 was sequenced and compared to other M. bovoculi strains available in the NCBI database. M. bovoculi TN7 was found to be genotype 1, had an RTX toxin operon and pilA gene that are the known virulence factors in related Moraxella sp., but lacked antimicrobial resistance genes. M. bovoculi was found to have an open pangenome with 4051 (75.31%) accessory genes, and the addition of each new genome adds 18 genes to the pangenome. Comparison of pilin protein amino acid sequences revealed three new sequence types. Furthermore, the presence of linx, nagL, swrC and mdtA genes was found to be genotype 1 specific, whereas hyaD, garR, gbsA, yhdG, gabT, iclR, higB2, hmuU, hmuT and hemS were found only in genotype 2. Polymerase Chain Reaction (PCR) primers were designed and evaluated on strain TN7 plus seven additional strains accessible to us that had not been whole genome sequenced. This initial evaluation of the designed primers for the linX and hyaD genes produced the expected banding patterns on PCR gels for genotypes 1 and 2, respectively, among the 8 strains. The genotype-specific genes identified in this study can be used as markers for accurate diagnosis of genotype 1 isolates and this can aid in the development of autogenous or other molecular vaccines for treatment of infectious bovine keratoconjunctivitis (IBK) in resource-limited research settings.}, } @article {pmid36200898, year = {2022}, author = {Hildebrand, RE and Chandrasekar, SS and Riel, M and Touray, BJB and Aschenbroich, SA and Talaat, AM}, title = {Superinfection with SARS-CoV-2 Has Deleterious Effects on Mycobacterium bovis BCG Immunity and Promotes Dissemination of Mycobacterium tuberculosis.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0307522}, pmid = {36200898}, issn = {2165-0497}, mesh = {Mice ; Animals ; *Mycobacterium tuberculosis ; *Mycobacterium bovis ; Interleukin-17 ; SARS-CoV-2 ; BCG Vaccine ; *Superinfection ; *COVID-19 ; *Tuberculosis, Lymph Node ; Cytokines ; }, abstract = {An estimated one-third of the world's population is infected with Mycobacterium tuberculosis, with the majority being vaccinated with Mycobacterium bovis BCG. Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) remains a threat, and we must understand how SARS-CoV-2 can modulate both BCG immunity and tuberculosis pathogenesis. Interestingly, neither BCG vaccination nor tuberculosis infection resulted in differences in clinical outcomes associated with SARS-CoV-2 in transgenic mice. Surprisingly, earlier M. tuberculosis infection resulted in lower SARS-CoV-2 viral loads, mediated by the heightened immune microenvironment of the murine lungs, unlike vaccination with BCG, which had no impact. In contrast, M. tuberculosis-infected tissues had increased bacterial loads and decreased histiocytic inflammation in the lungs following SARS-CoV-2 superinfection. SARS-CoV-2 modulated BCG-induced type 17 responses while decreasing type 1 and increasing type 2 cytokines in M. tuberculosis-infected mice. These findings challenge initial findings of BCG's positive impact on SARS-CoV-2 infection and suggest potential ramifications for M. tuberculosis reactivation upon SARS-CoV-2 superinfection. IMPORTANCE Prior to SARS-CoV-2, M. tuberculosis was the leading infectious disease killer, with an estimated one-third of the world's population infected and 1.7 million deaths a year. Here, we show that SARS-CoV-2 superinfection caused increased bacterial dissemination in M. tuberculosis-infected mice along with immune and pathological changes. SARS-CoV-2 also impacted the immunity of BCG-vaccinated mice, resulting in decreased interleukin-17 (IL-17) levels, while offering no protective effect against SARS-CoV-2. These results demonstrate that SARS-CoV-2 may have a deleterious effect on the ongoing M. tuberculosis pandemic and potentially limit BCG's efficacy.}, } @article {pmid36200250, year = {2023}, author = {Srivastava, P and Jain, CK}, title = {Computer Aided Reverse Vaccinology: A Game-changer Approach for Vaccine Development.}, journal = {Combinatorial chemistry & high throughput screening}, volume = {26}, number = {10}, pages = {1813-1821}, doi = {10.2174/1386207325666220930124013}, pmid = {36200250}, issn = {1875-5402}, mesh = {*Artificial Intelligence ; Vaccinology/methods ; *Vaccines ; Computational Biology/methods ; Computers ; Vaccine Development ; }, abstract = {One of the most dynamic approaches in biotechnology is reverse vaccinology, which plays a huge role in today's developing vaccines. It has the capability of exploring and identifying the most potent vaccine candidate in a limited period of time. The first successful novel approach of reverse vaccinology was observed in Neisseria meningitidis serogroup B, which has revolutionised the whole field of computational biology. In this review, we have summarized the application of reverse vaccinology for different infectious diseases, discussed epitope prediction and various available bioinformatic tools, and explored the advantages, limitations and necessary elements of this approach. Some of the modifications in the reverse vaccinology approach, like pan-genome and comparative reverse vaccinology, are also outlined. Vaccines for illnesses like AIDS and hepatitis C have not yet been developed. Computer Aided Reverse vaccinology has the potential to be a game-changer in this area. The use of computational tools, pipelines and advanced soft-computing methods, such as artificial intelligence and deep learning, and exploitation of available omics data in integration have paved the way for speedy and effective vaccine designing. Is reverse vaccinology a viable option for developing vaccines against such infections, or is it a myth? Vaccine development gained momentum after the spread of various infections, resulting in numerous deaths; these vaccines are developed using the traditional technique, which includes inactivated microorganisms. As a result, reverse vaccinology may be a far superior technique for creating an effective vaccine.}, } @article {pmid36195962, year = {2022}, author = {Grytten, I and Dagestad Rand, K and Sandve, GK}, title = {KAGE: fast alignment-free graph-based genotyping of SNPs and short indels.}, journal = {Genome biology}, volume = {23}, number = {1}, pages = {209}, pmid = {36195962}, issn = {1474-760X}, mesh = {Algorithms ; Bayes Theorem ; Genome, Human ; Genotype ; Genotyping Techniques ; High-Throughput Nucleotide Sequencing ; Humans ; *INDEL Mutation ; *Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; }, abstract = {Genotyping is a core application of high-throughput sequencing. We present KAGE, a genotyper for SNPs and short indels that is inspired by recent developments within graph-based genome representations and alignment-free methods. KAGE uses a pan-genome representation of the population to efficiently and accurately predict genotypes. Two novel ideas improve both the speed and accuracy: a Bayesian model incorporates genotypes from thousands of individuals to improve prediction accuracy, and a computationally efficient method leverages correlation between variants. We show that the accuracy of KAGE is at par with the best existing alignment-free genotypers, while being an order of magnitude faster.}, } @article {pmid36190436, year = {2022}, author = {Xu, Z and Yuan, C}, title = {Molecular Epidemiology of Staphylococcus aureus in China Reveals the Key Gene Features Involved in Epidemic Transmission and Adaptive Evolution.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0156422}, pmid = {36190436}, issn = {2165-0497}, mesh = {Humans ; Animals ; Staphylococcus aureus ; Molecular Epidemiology/methods ; Phylogeny ; *Staphylococcal Infections/epidemiology ; *Anti-Infective Agents ; Anti-Bacterial Agents ; Microbial Sensitivity Tests ; *Methicillin-Resistant Staphylococcus aureus/genetics ; Evolution, Molecular ; }, abstract = {Staphylococcus aureus is a Gram-positive pathogen that causes various infections in humans and domestic animals. In China, S. aureus is the most common Gram-positive pathogen that causes clinical infections. However, there are few comprehensive genome-based molecular epidemiology studies to investigate the genotypic background of the major S. aureus clones that are epidemic in China. Here, four S. aureus isolates that were recovered from hospital personnel were sequenced. In combination with whole-genome sequencing (WGS) data of 328 S. aureus strains as references, we performed a comprehensive molecular epidemiology study to reveal the molecular epidemic characterization of S. aureus that is epidemic in China. It was found that 332 S. aureus isolates were phylogenetically categorized into 4 major epidemic groups with different epidemiology phenotypes. Each group has exclusive features in virulence genotypic profiles, antimicrobial resistance genotypic profiles, core and pangenome features representing the differences involved in genetic features, evolutionary processes, and potential future evolutionary directions. Moreover, a comparative core genome analysis of 332 S. aureus isolates indicated several key genes that contributed to differences in molecular epidemic characterization and promoted the adaptive evolutionary process of each group. This study provides a comprehensive understanding of molecular epidemiological characteristics and adaptive evolutionary directions of major S. aureus clones that are epidemic in China. IMPORTANCE Staphylococcus aureus is an important Gram-positive pathogen that is epidemic worldwide and causes various infections in humans and domestic animals. However, there has been relatively little research on comprehensive molecular epidemiology in China. In this research, we reconstructed the phylogenetic relationship based on whole-genome data of strains almost all over China, screened for resistance and virulence genes, and took core and pan genome analysis to perform a comprehensive molecular epidemiology study of S. aureus that is epidemic in China. Our results highlight that there are 4 major epidemic groups with different epidemiology phenotypes after phylogenetic categorization with exclusive genetic features in virulence genotypic profiles, antimicrobial-resistance genotypic profiles, and core and pangenome features, and we found key gene features involved in epidemic transmission and adaptive evolution. Our findings are critical in describing molecular characteristic profiles of S. aureus infection, which could update existing preventive measures and take appropriate strategies.}, } @article {pmid36190419, year = {2022}, author = {Park, M and Kim, J and Horn, L and Haan, J and Strickland, A and Lappi, V and Boxrud, D and Hedberg, C and Ryu, S and Jeon, B}, title = {Sugar Modification of Wall Teichoic Acids Determines Serotype-Dependent Strong Biofilm Production in Listeria monocytogenes.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0276922}, pmid = {36190419}, issn = {2165-0497}, mesh = {Humans ; *Listeria monocytogenes/genetics ; Serogroup ; Teichoic Acids ; Phylogeny ; Sugars ; Rhamnose ; Biofilms ; Serotyping ; Food Microbiology ; }, abstract = {Biofilm production is responsible for persistent food contamination by Listeria monocytogenes, threatening food safety and public health. Human infection and food contamination with L. monocytogenes are caused primarily by serotypes 1/2a, 1/2b, and 4b. However, the association of biofilm production with phylogenic lineage and serotype has not yet been fully understood. In this study, we measured the levels of biofilm production in 98 clinical strains of L. monocytogenes at 37°C, 25°C, and 4°C. The phylogenetic clusters grouped by core genome multilocus sequence typing (cgMLST) exhibited association between biofilm production and phylogenetic lineage and serotype. Whereas clusters 1 and 3 consisting of serotype 4b strains exhibited weak biofilm production, clusters 2 (serotype 1/2b) and 4 (serotype 1/2a) were composed of strong biofilm formers. Particularly, cluster 2 (serotype 1/2b) strains exhibited the highest levels of biofilm production at 37°C, and the levels of biofilm production of cluster 4 (serotype 1/2a) strains were significantly elevated at all tested temperatures. Pan-genome analysis identified 22 genes unique to strong biofilm producers, most of which are related to the synthesis and modification of teichoic acids. Notably, a knockout mutation of the rml genes related to the modification of wall teichoic acids with l-rhamnose, which is specific to serogroup 1/2, significantly reduced the level of biofilm production by preventing biofilm maturation. Here, the results of our study show that biofilm production in L. monocytogenes is related to phylogeny and serotype and that the modification of wall teichoic acids with l-rhamnose is responsible for serotype-specific strong biofilm formation in L. monocytogenes. IMPORTANCE Biofilm formation on the surface of foods or food-processing facilities by L. monocytogenes is a serious food safety concern. Here, our data demonstrate that the level of biofilm production differs among serotypes 1/2a, 1/2b, and 4b depending on the temperature. Furthermore, sugar decoration of bacterial cell walls with l-rhamnose is responsible for strong biofilm production in serotypes 1/2a and 1/2b, commonly isolated from foods and listeriosis cases. The findings in this study improve our understanding of the association of biofilm production with phylogenetic lineage and serotype in L. monocytogenes.}, } @article {pmid36189364, year = {2022}, author = {Lv, Z and Chen, Y and Zhou, H and Chen, Z and Yao, Q and Ren, J and Liu, X and Liu, S and Deng, X and Pang, Y and Chen, W and Yang, H and Xu, P}, title = {Genomic characterization of two metagenome-assembled genomes of Tropheryma whipplei from China.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {947486}, pmid = {36189364}, issn = {2235-2988}, mesh = {Fluoroquinolones ; Genomics ; *Metagenome ; Phylogeny ; *Tropheryma/genetics ; Virulence Factors ; }, abstract = {Whipple's disease is a rare chronic systemic disease that affects almost any organ system of the body caused by the intracellular bacterium Tropheryma whipplei, which is found ubiquitously in the environment. Sequencing of the T. whipplei genome has revealed that it has a reduced genome (0.93 Mbp), a characteristic shared with other intracellular bacteria. Until our research started, 19 T. whipplei strains had been sequenced from cultures originated in France, Canada, and Germany. The genome of T. whipplei bacterium has not been studied in Asia yet. Here, two metagenome-assembled genomes (MAGs) of T. whipplei from China were reconstructed through metagenomic next-generation sequencing (mNGS) and genome binning. We also provided genomic insights into the geographical role and genomic features by analyzing the whole genome. The whole-genome phylogenetic tree was constructed based on single-nucleotide polymorphism (SNP) distance calculations and then grouped by distance similarity. The phylogenetic tree shows inconsistencies with geographic origins, thus suggesting that the variations in geographical origins cannot explain the phylogenetic relationships among the 21 T. whipplei strains. The two Chinese strains were closely related to each other, and also found to be related to strains from Germany (T. whipplei TW08/27) and France (T. whipplei Bcu26 and T. whipplei Neuro1). Furthermore, the Average Nucleotide Identity (ANI) matrix also showed no association between geographic origins and genomic similarities. The pan-genome analysis revealed that T. whipplei has a closed pan-genome composed of big core-genomes and small accessory genomes, like other intracellular bacteria. By examining the genotypes of the sequenced strains, all 21 T. whipplei strains were found to be resistant to fluoroquinolones, due to the genetic mutations in genes gyrA, gyrB, parC, and parE. The 21 T. Whipplei strains shared the same virulence factors, except for the alpC gene, which existed in 7 out of the 21 T. whipplei strains. When comparing 21 entire T. whipplei pan-genomes from various nations, it was discovered that the bacterium also possessed a closed genome, which was a trait shared by intracellular pathogens.}, } @article {pmid36187939, year = {2022}, author = {Awori, RM}, title = {Nematophilic bacteria associated with entomopathogenic nematodes and drug development of their biomolecules.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {993688}, pmid = {36187939}, issn = {1664-302X}, abstract = {The importance of Xenorhabdus and Photorhabdus symbionts to their respective Steinernema and Heterorhabditis nematode hosts is that they not only contribute to their entomopathogenicity but also to their fecundity through the production of small molecules. Thus, this mini-review gives a brief introductory overview of these nematophilic bacteria. Specifically, their type species, nematode hosts, and geographic region of isolations are tabulated. The use of nucleotide sequence-based techniques for their species delineation and how pangenomes can improve this are highlighted. Using the Steinernema-Xenorhabdus association as an example, the bacterium-nematode lifecycle is visualized with an emphasis on the role of bacterial biomolecules. Those currently in drug development are discussed, and two potential antimalarial lead compounds are highlighted. Thus, this mini-review tabulates forty-eight significant nematophilic bacteria and visualizes the ecological importance of their biomolecules. It further discusses three of these biomolecules that are currently in drug development. Through it, one is introduced to Xenorhabdus and Photorhabdus bacteria, their natural production of biomolecules in the nematode-bacterium lifecycle, and how these molecules are useful in developing novel therapies.}, } @article {pmid36186424, year = {2022}, author = {Walker, AR and Shields, RC}, title = {Investigating CRISPR spacer targets and their impact on genomic diversification of Streptococcus mutans.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {997341}, pmid = {36186424}, issn = {1664-8021}, support = {R03 DE029882/DE/NIDCR NIH HHS/United States ; }, abstract = {CRISPR-Cas is a bacterial immune system that restricts the acquisition of mobile DNA elements. These systems provide immunity against foreign DNA by encoding CRISPR spacers that help target DNA if it re-enters the cell. In this way, CRISPR spacers are a type of molecular tape recorder of foreign DNA encountered by the host microorganism. Here, we extracted ∼8,000 CRISPR spacers from a collection of over three hundred Streptococcus mutans genomes. Phage DNA is a major target of S. mutans spacers. S. mutans strains have also generated immunity against mobile DNA elements such as plasmids and integrative and conjugative elements. There may also be considerable immunity generated against bacterial DNA, although the relative contribution of self-targeting versus bona fide intra- or inter-species targeting needs to be investigated further. While there was clear evidence that these systems have acquired immunity against foreign DNA, there appeared to be minimal impact on horizontal gene transfer (HGT) constraints on a species-level. There was little or no impact on genome size, GC content and 'openness' of the pangenome when comparing between S. mutans strains with low or high CRISPR spacer loads. In summary, while there is evidence of CRISPR spacer acquisition against self and foreign DNA, CRISPR-Cas does not act as a barrier on the expansion of the S. mutans accessory genome.}, } @article {pmid36185029, year = {2022}, author = {Wang, S and Sun, L and Narsing Rao, MP and Fang, BZ and Li, WJ}, title = {Comparative Genome Analysis of a Novel Alkaliphilic Actinobacterial Species Nesterenkonia haasae.}, journal = {Polish journal of microbiology}, volume = {71}, number = {3}, pages = {453-461}, pmid = {36185029}, issn = {2544-4646}, mesh = {Adenosine Triphosphate ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids ; Membrane Transport Proteins/genetics ; *Nitrates ; Nitrite Reductases/genetics ; Nucleic Acid Hybridization ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Sulfites ; *Thiosulfates ; Urea ; }, abstract = {In the present study, a comparative genome analysis of the novel alkaliphilic actinobacterial Nesterenkonia haasae with other members of the genus Nesterenkonia was performed. The genome size of Nesterenkonia members ranged from 2,188,008 to 3,676,111 bp. N. haasae and Nesterenkonia members of the present study encode the essential glycolysis and pentose phosphate pathway genes. In addition, some Nesterenkonia members encode the crucial genes for Entner-Doudoroff pathways. Some Nesterenkonia members possess the genes responsible for sulfate/thiosulfate transport system permease protein/ ATP-binding protein and conversion of sulfate to sulfite. Nesterenkonia members also encode the genes for assimilatory nitrate reduction, nitrite reductase, and the urea cycle. All Nesterenkonia members have the genes to overcome environmental stress and produce secondary metabolites. The present study helps to understand N. haasae and Nesterenkonia members' environmental adaptation and niches specificity based on their specific metabolic properties. Further, based on genome analysis, we propose reclassifying Nesterenkonia jeotgali as a later heterotypic synonym of Nesterenkonia sandarakina.}, } @article {pmid36183960, year = {2022}, author = {Sharma, V and Sood, A and Ray, P and Angrup, A}, title = {Comparative genomics reveals the evolution of antimicrobial resistance in Bacteroides nordii.}, journal = {Microbial pathogenesis}, volume = {173}, number = {Pt A}, pages = {105811}, doi = {10.1016/j.micpath.2022.105811}, pmid = {36183960}, issn = {1096-1208}, mesh = {*Anti-Bacterial Agents/pharmacology ; Drug Resistance, Multiple, Bacterial ; Metronidazole ; Drug Resistance, Bacterial/genetics ; Genomics ; *Anti-Infective Agents/pharmacology ; }, abstract = {Bacteroides nordii, is an understudied member of the pathogenic B. fragilis group which comprises several multidrug-resistant (MDR) strains. Thus, it is of great interest to study the genome biology of Bacteroides nordii. However, no detailed study is available that characterized B. nordii at the genetic level and explored its role as a potential pathogen. We isolated an MDR strain viz., B. nordii PGMM4098 from the pus sample and subjected it to whole genome sequencing using Illumina technology. The draft genome was de-novo assembled and annotated, followed by comprehensive comparative genomics analyses using the publicly available genome dataset of B. nordii. The pan-genome analysis revealed the open nature of B. nordii, indicating the continuous accumulation of novel genes in non-core components leading to the emergence of new strains of this species. The thirteen antimicrobial resistance (AMR) genes identified in the genomes of all B. nordii strains were part of the non-core component of the pan-genome. Of these, four AMR genes, nimE, aadS, mef(En2), and ermB/F/G were found to be acquired via the process of horizontal gene transfer (HGT) from anaerobic Bacteroidetes. Importantly, the nimE gene conferring metronidazole resistance was found to be present only in B. nordii PGMM4098, which harbors five other AMR genes encoded in its genome. Of these, nimE (metronidazole resistance), ermB/F/G (macrolide-lincosamide-streptogramin B resistance), and cfxA2/A3 (class A β-lactam resistance) genes were further validated using targeted polymerase chain reaction assay. Notably, these three genes were also found to be under the operation of positive selective pressure suggesting the diversification of these genes, which might lead to the emergence of new MDR strains of B. nordii in the near future. Our study reported and characterized the genome of the first MDR strain of B. nordii and revealed the AMR evolution in this species using a comprehensive comparative genomics approach.}, } @article {pmid36183957, year = {2022}, author = {Yan, S and Liu, X and Li, C and Jiang, Z and Li, D and Zhu, L}, title = {Genomic virulence genes profile analysis of Salmonella enterica isolates from animal and human in China from 2004 to 2019.}, journal = {Microbial pathogenesis}, volume = {173}, number = {Pt A}, pages = {105808}, doi = {10.1016/j.micpath.2022.105808}, pmid = {36183957}, issn = {1096-1208}, mesh = {Animals ; Humans ; *Salmonella enterica ; Virulence/genetics ; *Salmonella Infections, Animal/epidemiology ; Virulence Factors/genetics ; Salmonella ; Genomics ; }, abstract = {Salmonella is a momentously zoonotic and food-borne pathogen that seriously threats human and animal health around the world. Salmonella pathogenicity is closely related to its virulence genes profile. However, conventional virulence gene analysis methods cannot truly reveal whole virulence genes carried by Salmonella. In this study, whole genome sequencing in combination with Virulence Factor Database were applied to investigate whole virulence gene profiles of 243 Salmonella isolates from animals and humans in China from 2004 to 2019. The results showed that a total of 670 virulence genes were identified in Salmonella, among them, 319 virulence genes were found in all the Salmonella tested isolates, and 9 virulence genes were unique to Salmonella. The 670 virulence genes were classified into 14 categories according to their functions, and the genes related to adherence, effector delivery system, immune modulation, motility and nutritional/metabolic factors accounted for 84.63%. Relationships between virulence genes and serovars, sequence types indicated that strains belonged to the same serovar or sequence type had similar virulence genes profiles, however, isolates from different sources, years and locations of isolation had variable virulence gene profiles. In addition, copy number of virulence genes and homologous virulence genes shared with other pathogens were also analyzed in this study. In summary, we investigated pan-genomic virulence gene profiles and molecular epidemiology of Salmonella isolates from humans and animals in China from 2004 to 2019. These findings are beneficial for pathogenic monitoring, investigation of virulence evolution as well as prevention and control of Salmonella.}, } @article {pmid36179091, year = {2022}, author = {Sirén, J and Paten, B}, title = {GBZ file format for pangenome graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {22}, pages = {5012-5018}, pmid = {36179091}, issn = {1367-4811}, support = {R01 HG010485/HG/NHGRI NIH HHS/United States ; U01HG010961/NH/NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; OT2 OD026682/OD/NIH HHS/United States ; U41 HG010972/HG/NHGRI NIH HHS/United States ; }, mesh = {High-Throughput Nucleotide Sequencing ; Software ; *Data Compression ; *Libraries ; }, abstract = {MOTIVATION: Pangenome graphs representing aligned genome assemblies are being shared in the text-based Graphical Fragment Assembly format. As the number of assemblies grows, there is a need for a file format that can store the highly repetitive data space efficiently.

RESULTS: We propose the GBZ file format based on data structures used in the Giraffe short-read aligner. The format provides good compression, and the files can be efficiently loaded into in-memory data structures. We provide compression and decompression tools and libraries for using GBZ graphs, and we show that they can be efficiently used on a variety of systems.

C++ and Rust implementations are available at https://github.com/jltsiren/gbwtgraph and https://github.com/jltsiren/gbwt-rs, respectively.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid36169590, year = {2022}, author = {Abbas, M and Abid, MA and Meng, Z and Abbas, M and Wang, P and Lu, C and Askari, M and Akram, U and Ye, Y and Wei, Y and Wang, Y and Guo, S and Liang, C and Zhang, R}, title = {Integrating advancements in root phenotyping and genome-wide association studies to open the root genetics gateway.}, journal = {Physiologia plantarum}, volume = {174}, number = {6}, pages = {e13787}, doi = {10.1111/ppl.13787}, pmid = {36169590}, issn = {1399-3054}, support = {31771850//National Natural Science Foundation of China/ ; 32072115//National Natural Science Foundation of China/ ; //National Program for Support of Top-Notch Young Professionals/ ; //"Outstanding Agricultural Science Talent" of CAAS/ ; //Agricultural Science and Technology Innovation Program of Chinese Academy of Agricultural Sciences/ ; }, mesh = {*Quantitative Trait Loci ; *Genome-Wide Association Study ; Plant Roots/genetics ; Phenotype ; Plants/genetics ; }, abstract = {Plant adaptation to challenging environmental conditions around the world has made root growth and development an important research area for plant breeders and scientists. Targeted manipulation of root system architecture (RSA) to increase water and nutrient use efficiency can minimize the adverse effects of climate change on crop production. However, phenotyping of RSA is a major bottleneck since the roots are hidden in the soil. Recently the development of 2- and 3D root imaging techniques combined with the genome-wide association studies (GWASs) have opened up new research tools to identify the genetic basis of RSA. These approaches provide a comprehensive understanding of the RSA, by accelerating the identification and characterization of genes involved in root growth and development. This review summarizes the latest developments in phenotyping techniques and GWAS for RSA, which are used to map important genes regulating various aspects of RSA under varying environmental conditions. Furthermore, we discussed about the state-of-the-art image analysis tools integrated with various phenotyping platforms for investigating and quantifying root traits with the highest phenotypic plasticity in both artificial and natural environments which were used for large scale association mapping studies, leading to the identification of RSA phenotypes and their underlying genetics with the greatest potential for RSA improvement. In addition, challenges in root phenotyping and GWAS are also highlighted, along with future research directions employing machine learning and pan-genomics approaches.}, } @article {pmid36169225, year = {2023}, author = {Zhang, Y and Zhang, H and Zhang, Z and Qian, Q and Zhang, Z and Xiao, J}, title = {ProPan: a comprehensive database for profiling prokaryotic pan-genome dynamics.}, journal = {Nucleic acids research}, volume = {51}, number = {D1}, pages = {D767-D776}, pmid = {36169225}, issn = {1362-4962}, mesh = {Archaea/genetics ; Bacteria/genetics ; *Genome ; Genome, Bacterial ; Genomics ; *Prokaryotic Cells ; *Databases, Genetic ; }, abstract = {Compared with conventional comparative genomics, the recent studies in pan-genomics have provided further insights into species genomic dynamics, taxonomy and identification, pathogenicity and environmental adaptation. To better understand genome characteristics of species of interest and to fully excavate key metabolic and resistant genes and their conservations and variations, here we present ProPan (https://ngdc.cncb.ac.cn/propan), a public database covering 23 archaeal species and 1,481 bacterial species (in a total of 51,882 strains) for comprehensively profiling prokaryotic pan-genome dynamics. By analyzing and integrating these massive datasets, ProPan offers three major aspects for the pan-genome dynamics of the species of interest: 1) the evaluations of various species' characteristics and composition in pan-genome dynamics; 2) the visualization of map association, the functional annotation and presence/absence variation for all contained species' gene clusters; 3) the typical characteristics of the environmental adaptation, including resistance genes prediction of 126 substances (biocide, antimicrobial drug and metal) and evaluation of 31 metabolic cycle processes. Besides, ProPan develops a very user-friendly interface, flexible retrieval and multi-level real-time statistical visualization. Taken together, ProPan will serve as a weighty resource for the studies of prokaryotic pan-genome dynamics, taxonomy and identification as well as environmental adaptation.}, } @article {pmid36166154, year = {2023}, author = {Duarte, IO and Hissa, DC and Quintela, BCSF and Rabelo, MC and Oliveira, FADS and Lima, NCB and Melo, VMM}, title = {Genomic Analysis of Surfactant-Producing Bacillus vallismortis TIM68: First Glimpse at Species Pangenome and Prediction of New Plipastatin-Like Lipopeptide.}, journal = {Applied biochemistry and biotechnology}, volume = {195}, number = {2}, pages = {753-771}, pmid = {36166154}, issn = {1559-0291}, support = {312462/2017-4//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; }, mesh = {*Lipopeptides/pharmacology ; Surface-Active Agents/pharmacology/chemistry ; *Bacillus/genetics ; Genomics ; }, abstract = {Surfactants are applied in several industrial processes when the modification of interface activity and the stability of colloidal systems are required. Lipopeptides are a class of microbial biosurfactants produced by species of the Bacillus genus. The present study aimed at assembling and analyzing the genome of a new Bacillus vallismortis strain, TIM68, that was shown to produce surfactant lipopeptides. The draft genome was also screened for common virulence factors and antibiotics resistance genes to investigate the strain biosafety. Comparative genomics analyses, i.e., synteny, average nucleotide identity (ANI), and pangenome, were also carried out using strain TIM68 and publicly available B. vallismortis complete and partial genomes. Three peptide synthetase operons were found in TIM68 genome, and they were surfactin A, mojavensin, and a novel plipastatin-like lipopeptide named vallisin. No virulence factors that render pathogenicity to the strain have been identified, but a region of prophage, that may contain unknown pathogenic factors, has been predicted. The pangenome of the species was characterized as closed, with 57% of genes integrating the core genome. The results obtained here on the genetic potential of TIM68 strain should contribute to its exploration in biotechnological applications.}, } @article {pmid36159272, year = {2022}, author = {Cherchame, E and Ilango, G and Noël, V and Cadel-Six, S}, title = {Polyphyly in widespread Salmonella enterica serovars and using genomic proximity to choose the best reference genome for bioinformatics analyses.}, journal = {Frontiers in public health}, volume = {10}, number = {}, pages = {963188}, pmid = {36159272}, issn = {2296-2565}, mesh = {Computational Biology ; Genomics ; Humans ; Multilocus Sequence Typing ; Phylogeny ; Salmonella ; *Salmonella enterica/genetics ; Serogroup ; United States ; }, abstract = {Salmonella is the most common cause of gastroenteritis in the world. Over the past 5 years, whole-genome analysis has led to the high-resolution characterization of clinical and foodborne Salmonella responsible for typhoid fever, foodborne illness or contamination of the agro-food chain. Whole-genome analyses are simplified by the availability of high-quality, complete genomes for mapping analysis and for calculating the pairwise distance between genomes, but unfortunately some difficulties may still remain. For some serovars, the complete genome is not available, or some serovars are polyphyletic and knowing the serovar alone is not sufficient for choosing the most appropriate reference genome. For these serovars, it is essential to identify the genetically closest complete genome to be able to carry out precise genome analyses. In this study, we explored the genomic proximity of 650 genomes of the 58 Salmonella enterica subsp. enterica serovars most frequently isolated in humans and from the food chain in the United States (US) and in Europe (EU), with a special focus on France. For each serovar, to take into account their genomic diversity, we included all the multilocus sequence type (MLST) profiles represented in EnteroBase with 10 or more genomes (on 19 July 2021). A phylogenetic analysis using both core- and pan-genome approaches was carried out to identify the genomic proximity of all the Salmonella studied and 20 polyphyletic serovars that have not yet been described in the literature. This study determined the genetic proximity between all 58 serovars studied and revealed polyphyletic serovars, their genomic lineages and MLST profiles. Finally, we enhanced the open-access databases with 73 new genomes and produced a list of high-quality complete reference genomes for 48 S. enterica subsp. enterica serovars among the most isolated in the US, EU, and France.}, } @article {pmid36156193, year = {2022}, author = {Smith-Zaitlik, T and Shibu, P and McCartney, AL and Foster, G and Hoyles, L and Negus, D}, title = {Extended genomic analyses of the broad-host-range phages vB_KmiM-2Di and vB_KmiM-4Dii reveal slopekviruses have highly conserved genomes.}, journal = {Microbiology (Reading, England)}, volume = {168}, number = {9}, pages = {}, doi = {10.1099/mic.0.001247}, pmid = {36156193}, issn = {1465-2080}, support = {MR/L01632X/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {*Anti-Infective Agents ; *Bacteriophages/genetics ; Endonucleases ; Genome, Viral ; Genomics/methods ; Host Specificity ; Sewage ; Water ; }, abstract = {High levels of antimicrobial resistance among members of the Klebsiella oxytoca complex (KoC) have led to renewed interest in the use of bacteriophage (phage) therapy to tackle infections caused by these bacteria. In this study we characterized two lytic phages, vB_KmiM-2Di and vB_KmiM-4Dii, that were isolated from sewage water against two GES-5-positive Klebsiella michiganensis strains (PS_Koxy2 and PS_Koxy4, respectively). ViPTree analysis showed both phages belonged to the genus Slopekvirus. rpoB gene-based sequence analysis of 108 presumptive K. oxytoca isolates (n=59 clinical, n=49 veterinary) found K. michiganensis to be more prevalent (46 % clinical and 43 % veterinary, respectively) than K. oxytoca (40 % clinical and 6 % veterinary, respectively). Host range analysis against these 108 isolates found both vB_KmiM-2Di and vB_KmiM-4Dii showed broad lytic activity against KoC species. Several hypothetical homing endonuclease genes were encoded within the genomes of both phages, which may contribute to their broad host range. Differences in the tail fibre protein may explain the non-identical host range of the two phages. Pangenome analysis of 24 slopekviruses found that genomes within this genus are highly conserved, with more than 50 % of all predicted coding sequences representing core genes at ≥95 % identity and ≥70 % coverage. Given their broad host ranges, our results suggest vB_KmiM-2Di and vB_KmiM-4Dii represent attractive potential therapeutics. In addition, current recommendations for phage-based pangenome analyses may require revision.}, } @article {pmid36153338, year = {2022}, author = {Tong, X and Han, MJ and Lu, K and Tai, S and Liang, S and Liu, Y and Hu, H and Shen, J and Long, A and Zhan, C and Ding, X and Liu, S and Gao, Q and Zhang, B and Zhou, L and Tan, D and Yuan, Y and Guo, N and Li, YH and Wu, Z and Liu, L and Li, C and Lu, Y and Gai, T and Zhang, Y and Yang, R and Qian, H and Liu, Y and Luo, J and Zheng, L and Lou, J and Peng, Y and Zuo, W and Song, J and He, S and Wu, S and Zou, Y and Zhou, L and Cheng, L and Tang, Y and Cheng, G and Yuan, L and He, W and Xu, J and Fu, T and Xiao, Y and Lei, T and Xu, A and Yin, Y and Wang, J and Monteiro, A and Westhof, E and Lu, C and Tian, Z and Wang, W and Xiang, Z and Dai, F}, title = {High-resolution silkworm pan-genome provides genetic insights into artificial selection and ecological adaptation.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {5619}, pmid = {36153338}, issn = {2041-1723}, mesh = {Animals ; *Bombyx/genetics ; *Diapause ; Domestication ; Genomics ; Silk/genetics ; }, abstract = {The silkworm Bombyx mori is an important economic insect for producing silk, the "queen of fabrics". The currently available genomes limit the understanding of its genetic diversity and the discovery of valuable alleles for breeding. Here, we deeply re-sequence 1,078 silkworms and assemble long-read genomes for 545 representatives. We construct a high-resolution pan-genome dataset representing almost the entire genomic content in the silkworm. We find that the silkworm population harbors a high density of genomic variants and identify 7308 new genes, 4260 (22%) core genes, and 3,432,266 non-redundant structure variations (SVs). We reveal hundreds of genes and SVs that may contribute to the artificial selection (domestication and breeding) of silkworm. Further, we focus on four genes responsible, respectively, for two economic (silk yield and silk fineness) and two ecologically adaptive traits (egg diapause and aposematic coloration). Taken together, our population-scale genomic resources will promote functional genomics studies and breeding improvement for silkworm.}, } @article {pmid36146509, year = {2022}, author = {Phanse, Y and Puttamreddy, S and Loy, D and Ramirez, JV and Ross, KA and Alvarez-Castro, I and Mogler, M and Broderick, S and Rajan, K and Narasimhan, B and Bartholomay, LC}, title = {RNA Nanovaccine Protects against White Spot Syndrome Virus in Shrimp.}, journal = {Vaccines}, volume = {10}, number = {9}, pages = {}, pmid = {36146509}, issn = {2076-393X}, support = {2015-67021-23309//United States Department of Agriculture/ ; 114370//United States Department of Agriculture/ ; }, abstract = {In the last 15 years, crustacean fisheries have experienced billions of dollars in economic losses, primarily due to viral diseases caused by such pathogens as white spot syndrome virus (WSSV) in the Pacific white shrimp Litopenaeus vannamei and Asian tiger shrimp Penaeus monodon. To date, no effective measures are available to prevent or control disease outbreaks in these animals, despite their economic importance. Recently, double-stranded RNA-based vaccines have been shown to provide specific and robust protection against WSSV infection in cultured shrimp. However, the limited stability of double-stranded RNA is the most significant hurdle for the field application of these vaccines with respect to delivery within an aquatic system. Polyanhydride nanoparticles have been successfully used for the encapsulation and release of vaccine antigens. We have developed a double-stranded RNA-based nanovaccine for use in shrimp disease control with emphasis on the Pacific white shrimp L. vannamei. Nanoparticles based on copolymers of sebacic acid, 1,6-bis(p-carboxyphenoxy)hexane, and 1,8-bis(p-carboxyphenoxy)-3,6-dioxaoctane exhibited excellent safety profiles, as measured by shrimp survival and histological evaluation. Furthermore, the nanoparticles localized to tissue target replication sites for WSSV and persisted through 28 days postadministration. Finally, the nanovaccine provided ~80% protection in a lethal WSSV challenge model. This study demonstrates the exciting potential of a safe, effective, and field-applicable RNA nanovaccine that can be rationally designed against infectious diseases affecting aquaculture.}, } @article {pmid36144349, year = {2022}, author = {Gontijo, JB and Paula, FS and Venturini, AM and Mandro, JA and Bodelier, PLE and Tsai, SM}, title = {Insights into the Genomic Potential of a Methylocystis sp. from Amazonian Floodplain Sediments.}, journal = {Microorganisms}, volume = {10}, number = {9}, pages = {}, pmid = {36144349}, issn = {2076-2607}, support = {2014/50320-4, 2017/26138-0, 2018/14974-0, 2019/25924-7, and 2019/25931-3//São Paulo Research Foundation/ ; }, abstract = {Although floodplains are recognized as important sources of methane (CH4) in the Amazon basin, little is known about the role of methanotrophs in mitigating CH4 emissions in these ecosystems. Our previous data reported the genus Methylocystis as one of the most abundant methanotrophs in these floodplain sediments. However, information on the functional potential and life strategies of these organisms living under seasonal flooding is still missing. Here, we described the first metagenome-assembled genome (MAG) of a Methylocystis sp. recovered from Amazonian floodplains sediments, and we explored its functional potential and ecological traits through phylogenomic, functional annotation, and pan-genomic approaches. Both phylogenomics and pan-genomics identified the closest placement of the bin.170_fp as Methylocystis parvus. As expected for Type II methanotrophs, the Core cluster from the pan-genome comprised genes for CH4 oxidation and formaldehyde assimilation through the serine pathway. Furthermore, the complete set of genes related to nitrogen fixation is also present in the Core. Interestingly, the MAG singleton cluster revealed the presence of unique genes related to nitrogen metabolism and cell motility. The study sheds light on the genomic characteristics of a dominant, but as yet unexplored methanotroph from the Amazonian floodplains. By exploring the genomic potential related to resource utilization and motility capability, we expanded our knowledge on the niche breadth of these dominant methanotrophs in the Amazonian floodplains.}, } @article {pmid36142806, year = {2022}, author = {Guche, MD and Pilati, S and Trenti, F and Dalla Costa, L and Giorni, P and Guella, G and Marocco, A and Lanubile, A}, title = {Functional Study of Lipoxygenase-Mediated Resistance against Fusarium verticillioides and Aspergillus flavus Infection in Maize.}, journal = {International journal of molecular sciences}, volume = {23}, number = {18}, pages = {}, pmid = {36142806}, issn = {1422-0067}, support = {protocol n. 5385//Cremona Agri-Food Technologies (CRAFT)/ ; 35th PhD in AgriFood and Environmental Sciences//Fondazione Edmund Mach/ ; }, mesh = {Aspergillus flavus/genetics/metabolism ; *Fumonisins ; *Fusarium/metabolism ; Lipoxygenase/genetics/metabolism ; *Mycotoxins/metabolism ; Oxylipins/metabolism ; Plant Diseases/genetics/microbiology ; Triglycerides/metabolism ; Zea mays/metabolism ; }, abstract = {Mycotoxin contamination of maize kernels by fungal pathogens like Fusarium verticillioides and Aspergillus flavus is a chronic global challenge impacting food and feed security, health, and trade. Maize lipoxygenase genes (ZmLOXs) synthetize oxylipins that play defense roles and govern host-fungal interactions. The current study investigated the involvement of ZmLOXs in maize resistance against these two fungi. A considerable intraspecific genetic and transcript variability of the ZmLOX family was highlighted by in silico analysis comparing publicly available maize pan-genomes and pan-transcriptomes, respectively. Then, phenotyping and expression analysis of ZmLOX genes along with key genes involved in oxylipin biosynthesis were carried out in a maize mutant carrying a Mu transposon insertion in the ZmLOX4 gene (named UFMulox4) together with Tzi18, Mo17, and W22 inbred lines at 3- and 7-days post-inoculation with F. verticillioides and A. flavus. Tzi18 showed the highest resistance to the pathogens coupled with the lowest mycotoxin accumulation, while UFMulox4 was highly susceptible to both pathogens with the most elevated mycotoxin content. F. verticillioides inoculation determined a stronger induction of ZmLOXs and maize allene oxide synthase genes as compared to A. flavus. Additionally, oxylipin analysis revealed prevalent linoleic (18:2) peroxidation by 9-LOXs, the accumulation of 10-oxo-11-phytoenoic acid (10-OPEA), and triglyceride peroxidation only in F. verticillioides inoculated kernels of resistant genotypes.}, } @article {pmid36142798, year = {2022}, author = {Lee, JH and Lee, SR and Han, S and Lee, PC}, title = {Comparative Genomic Analysis of Agarolytic Flavobacterium faecale WV33[T].}, journal = {International journal of molecular sciences}, volume = {23}, number = {18}, pages = {}, pmid = {36142798}, issn = {1422-0067}, support = {2020R1A2C3008889//National Research Foundation of Korea/ ; 20220258//Ministry of Oceans and Fisheries/ ; }, mesh = {Agar ; DNA, Bacterial/genetics ; Fatty Acids/metabolism ; *Flavobacteriaceae ; *Flavobacterium/genetics ; Genomics ; Nucleotides/metabolism ; Phylogeny ; Plastics/metabolism ; RNA/metabolism ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {Flavobacteria are widely dispersed in a variety of environments and produce various polysaccharide-degrading enzymes. Here, we report the complete genome of Flavobacterium faecale WV33[T], an agar-degrading bacterium isolated from the stools of Antarctic penguins. The sequenced genome of F. faecale WV33[T] represents a single circular chromosome (4,621,116 bp, 35.2% G + C content), containing 3984 coding DNA sequences and 85 RNA-coding genes. The genome of F. faecale WV33[T] contains 154 genes that encode carbohydrate-active enzymes (CAZymes). Among the CAZymes, seven putative genes encoding agarases have been identified in the genome. Transcriptional analysis revealed that the expression of these putative agarases was significantly enhanced by the presence of agar in the culture medium, suggesting that these proteins are involved in agar hydrolysis. Pangenome analysis revealed that the genomes of the 27 Flavobacterium type strains, including F. faecale WV33[T], tend to be very plastic, and Flavobacterium strains are unique species with a tiny core genome and a large non-core region. The average nucleotide identity and phylogenomic analysis of the 27 Flavobacterium-type strains showed that F. faecale WV33[T] was positioned in a unique clade in the evolutionary tree.}, } @article {pmid36141842, year = {2022}, author = {Ismail, S and Alsowayeh, N and Abbasi, HW and Albutti, A and Tahir Ul Qamar, M and Ahmad, S and Raza, RZ and Sadia, K and Abbasi, SW}, title = {Pan-Genome-Assisted Computational Design of a Multi-Epitopes-Based Vaccine Candidate against Helicobacter cinaedi.}, journal = {International journal of environmental research and public health}, volume = {19}, number = {18}, pages = {}, pmid = {36141842}, issn = {1660-4601}, mesh = {Computational Biology ; Cytokines ; Epitopes, B-Lymphocyte/chemistry ; Epitopes, T-Lymphocyte/chemistry ; *Helicobacter/genetics ; Interferons ; Molecular Docking Simulation ; Toll-Like Receptor 5 ; *Type VI Secretion Systems ; *Vaccines ; }, abstract = {Helicobacter cinaedi is a Gram-negative bacterium from the family Helicobacteraceae and genus Helicobacter. The pathogen is a causative agent of gastroenteritis, cellulitis, and bacteremia. The increasing antibiotic resistance pattern of the pathogen prompts the efforts to develop a vaccine to prevent dissemination of the bacteria and stop the spread of antibiotic resistance (AR) determinants. Herein, a pan-genome analysis of the pathogen strains was performed to shed light on its core genome and its exploration for potential vaccine targets. In total, four vaccine candidates (TonB dependent receptor, flagellar hook protein FlgE, Hcp family type VI secretion system effector, flagellar motor protein MotB) were identified as promising vaccine candidates and subsequently subjected to an epitopes' mapping phase. These vaccine candidates are part of the pathogen core genome: they are essential, localized at the pathogen surface, and are antigenic. Immunoinformatics was further applied on the selected vaccine proteins to predict potential antigenic, non-allergic, non-toxic, virulent, and DRB*0101 epitopes. The selected epitopes were then fused using linkers to structure a multi-epitopes' vaccine construct. Molecular docking simulations were conducted to determine a designed vaccine binding stability with TLR5 innate immune receptor. Further, binding free energy by MMGB/PBSA and WaterSwap was employed to examine atomic level interaction energies. The designed vaccine also stimulated strong humoral and cellular immune responses as well as interferon and cytokines' production. In a nutshell, the designed vaccine is promising in terms of immune responses' stimulation and could be an ideal candidate for experimental analysis due to favorable physicochemical properties.}, } @article {pmid36140772, year = {2022}, author = {Socarras, KM and Haslund-Gourley, BS and Cramer, NA and Comunale, MA and Marconi, RT and Ehrlich, GD}, title = {Large-Scale Sequencing of Borreliaceae for the Construction of Pan-Genomic-Based Diagnostics.}, journal = {Genes}, volume = {13}, number = {9}, pages = {}, pmid = {36140772}, issn = {2073-4425}, mesh = {*Borrelia burgdorferi/genetics ; Genome, Bacterial ; Genomics/methods ; Humans ; *Lyme Disease/genetics/microbiology ; Phylogeny ; }, abstract = {The acceleration of climate change has been associated with an alarming increase in the prevalence and geographic range of tick-borne diseases (TBD), many of which have severe and long-lasting effects-particularly when treatment is delayed principally due to inadequate diagnostics and lack of physician suspicion. Moreover, there is a paucity of treatment options for many TBDs that are complicated by diagnostic limitations for correctly identifying the offending pathogens. This review will focus on the biology, disease pathology, and detection methodologies used for the Borreliaceae family which includes the Lyme disease agent Borreliella burgdorferi. Previous work revealed that Borreliaceae genomes differ from most bacteria in that they are composed of large numbers of replicons, both linear and circular, with the main chromosome being the linear with telomeric-like termini. While these findings are novel, additional gene-specific analyses of each class of these multiple replicons are needed to better understand their respective roles in metabolism and pathogenesis of these enigmatic spirochetes. Historically, such studies were challenging due to a dearth of both analytic tools and a sufficient number of high-fidelity genomes among the various taxa within this family as a whole to provide for discriminative and functional genomic studies. Recent advances in long-read whole-genome sequencing, comparative genomics, and machine-learning have provided the tools to better understand the fundamental biology and phylogeny of these genomically-complex pathogens while also providing the data for the development of improved diagnostics and therapeutics.}, } @article {pmid36139949, year = {2022}, author = {Hurtado, R and Barh, D and Weimer, BC and Viana, MVC and Profeta, R and Sousa, TJ and Aburjaile, FF and Quino, W and Souza, RP and Mestanza, O and Gavilán, RG and Azevedo, V}, title = {WGS-Based Lineage and Antimicrobial Resistance Pattern of Salmonella Typhimurium Isolated during 2000-2017 in Peru.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {11}, number = {9}, pages = {}, pmid = {36139949}, issn = {2079-6382}, abstract = {Salmonella Typhimurium is associated with foodborne diseases worldwide, including in Peru, and its emerging antibiotic resistance (AMR) is now a global public health problem. Therefore, country-specific monitoring of the AMR emergence is vital to control this pathogen, and in these aspects, whole genome sequence (WGS)—based approaches are better than gene-based analyses. Here, we performed the antimicrobial susceptibility test for ten widely used antibiotics and WGS-based various analyses of 90 S. Typhimurium isolates (human, animal, and environment) from 14 cities of Peru isolated from 2000 to 2017 to understand the lineage and antimicrobial resistance pattern of this pathogen in Peru. Our results suggest that the Peruvian isolates are of Typhimurium serovar and predominantly belong to sequence type ST19. Genomic diversity analyses indicate an open pan-genome, and at least ten lineages are circulating in Peru. A total of 48.8% and 31.0% of isolates are phenotypically and genotypically resistant to at least one antibiotic, while 12.0% are multi-drug resistant (MDR). Genotype−phenotype correlations for ten tested drugs show >80% accuracy, and >90% specificity. Sensitivity above 90% was only achieved for ciprofloxacin and ceftazidime. Two lineages exhibit the majority of the MDR isolates. A total of 63 different AMR genes are detected, of which 30 are found in 17 different plasmids. Transmissible plasmids such as lncI-gamma/k, IncI1-I(Alpha), Col(pHAD28), IncFIB, IncHI2, and lncI2 that carry AMR genes associated with third-generation antibiotics are also identified. Finally, three new non-synonymous single nucleotide variations (SNVs) for nalidixic acid and eight new SNVs for nitrofurantoin resistance are predicted using genome-wide association studies, comparative genomics, and functional annotation. Our analysis provides for the first time the WGS-based details of the circulating S. Typhimurium lineages and their antimicrobial resistance pattern in Peru.}, } @article {pmid36138232, year = {2022}, author = {Yang, T and Liu, R and Luo, Y and Hu, S and Wang, D and Wang, C and Pandey, MK and Ge, S and Xu, Q and Li, N and Li, G and Huang, Y and Saxena, RK and Ji, Y and Li, M and Yan, X and He, Y and Liu, Y and Wang, X and Xiang, C and Varshney, RK and Ding, H and Gao, S and Zong, X}, title = {Improved pea reference genome and pan-genome highlight genomic features and evolutionary characteristics.}, journal = {Nature genetics}, volume = {54}, number = {10}, pages = {1553-1563}, pmid = {36138232}, issn = {1546-1718}, mesh = {Biological Evolution ; Genomics ; *Peas/genetics ; *Plant Breeding ; Quantitative Trait Loci/genetics ; }, abstract = {Complete and accurate reference genomes and annotations provide fundamental resources for functional genomics and crop breeding. Here we report a de novo assembly and annotation of a pea cultivar ZW6 with contig N50 of 8.98 Mb, which features a 243-fold increase in contig length and evident improvements in the continuity and quality of sequence in complex repeat regions compared with the existing one. Genome diversity of 118 cultivated and wild pea demonstrated that Pisum abyssinicum is a separate species different from P. fulvum and P. sativum within Pisum. Quantitative trait locus analyses uncovered two known Mendel's genes related to stem length (Le/le) and seed shape (R/r) as well as some candidate genes for pod form studied by Mendel. A pan-genome of 116 pea accessions was constructed, and pan-genes preferred in P. abyssinicum and P. fulvum showed distinct functional enrichment, indicating the potential value of them as pea breeding resources in the future.}, } @article {pmid36131075, year = {2022}, author = {Izydorczyk, C and Waddell, BJ and Weyant, RB and Surette, MG and Somayaji, R and Rabin, HR and Conly, JM and Church, DL and Parkins, MD}, title = {The natural history and genetic diversity of Haemophilus influenzae infecting the airways of adults with cystic fibrosis.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {15765}, pmid = {36131075}, issn = {2045-2322}, mesh = {Adult ; *Cystic Fibrosis/complications ; Genetic Variation ; *Haemophilus Infections ; Haemophilus influenzae ; Humans ; Phylogeny ; Retrospective Studies ; }, abstract = {Haemophilus influenzae is a Gram-negative pathobiont, frequently recovered from the airways of persons with cystic fibrosis (pwCF). Previous studies of H. influenzae infection dynamics and transmission in CF predominantly used molecular methods, lacking resolution. In this retrospective cohort study, representative yearly H. influenzae isolates from all pwCF attending the Calgary Adult CF Clinic with H. influenzae positive sputum cultures between 2002 and 2016 were typed by pulsed-field gel electrophoresis. Isolates with shared pulsotypes common to ≥ 2 pwCF were sequenced by Illumina MiSeq. Phylogenetic and pangenomic analyses were used to assess genetic relatedness within shared pulsotypes, and epidemiological investigations were performed to assess potential for healthcare associated transmission. H. influenzae infection was observed to be common (33% of patients followed) and dynamic in pwCF. Most infected pwCF exhibited serial infections with new pulsotypes (75% of pwCF with ≥ 2 positive cultures), with up to four distinct pulsotypes identified from individual patients. Prolonged infection by a single pulsotype was only rarely observed. Intra-patient genetic diversity was observed at the single-nucleotide polymorphism and gene content levels. Seven shared pulsotypes encompassing 39% of pwCF with H. influenzae infection were identified, but there was no evidence, within our sampling scheme, of direct patient-to-patient infection transmission.}, } @article {pmid36125674, year = {2022}, author = {Chávez-Luzanía, RA and Montoya-Martínez, AC and Parra-Cota, FI and de Los Santos-Villalobos, S}, title = {Pangenomes-identified singletons for designing specific primers to identify bacterial strains in a plant growth-promoting consortium.}, journal = {Molecular biology reports}, volume = {49}, number = {11}, pages = {10489-10498}, pmid = {36125674}, issn = {1573-4978}, support = {PROFAPI 2022_0001//ITSON/ ; }, mesh = {*Plant Development ; *Bacteria/genetics ; Triticum/genetics/microbiology ; Whole Genome Sequencing ; Crops, Agricultural/genetics ; }, abstract = {BACKGROUND: The use of plant growth-promoting microorganisms represents a sustainable way to increase agricultural yields and plant health. Thus, the identification and tracking of these microorganisms are determinants for validating their positive effects on crops. Pangenomes allow the identification of singletons that can be used to design specific primers for the detection of the studied strains.

OBJECTIVE: This study aimed to establish a strategy based on the use of whole-genome sequencing and pangenomes for designing and validating primer sets for detecting Bacillus cabrialesii TE3[T], Priestia megaterium TRQ8, and Bacillus paralicheniformis TRQ65, a promising beneficial bacterial consortium for wheat.

METHODS AND RESULTS: The identification of singletons of TE3[T], TRQ8, and TRQ65 was performed by pangenomes using the Kbase platform and subsequently analyzed using BLAST®. The identified DNA regions were used for primer design in AlleleID version 7. Primers were validated by multiplex PCR using pure template DNA from each studied strain, combinations of two or three DNA from these strains, and DNA from agricultural soil samples enriched (and not) with the bacterial consortium. Here, we report the first design of primers capable of detecting and identifying the beneficial strains TE3[T], TRQ8, and TRQ65.

CONCLUSIONS: The use of pangenomes allowed the distinction of unique sequences that enables the design of primers for specific identification of the studied bacterial strains. This strategy can be widely used for the design of primer sets to detect other strains of interest for combating biopiracy, and commercial protection of biological products, among other applications.}, } @article {pmid36124775, year = {2022}, author = {Li, T and Yin, Y}, title = {Critical assessment of pan-genomic analysis of metagenome-assembled genomes.}, journal = {Briefings in bioinformatics}, volume = {23}, number = {6}, pages = {}, pmid = {36124775}, issn = {1477-4054}, support = {58-8042-7-089//United States Department of Agriculture/ ; R21 AI171952/AI/NIAID NIH HHS/United States ; DBI-1933521//National Science Foundation/ ; R01 GM140370/GM/NIGMS NIH HHS/United States ; 2019-YIN//Nebraska Tobacco Settlement Biomedical Research Enhancement Funds/ ; }, mesh = {*Metagenome ; Phylogeny ; *Genome, Bacterial ; Genomics ; Sequence Analysis, DNA/methods ; Metagenomics/methods ; }, abstract = {Pan-genome analyses of metagenome-assembled genomes (MAGs) may suffer from the known issues with MAGs: fragmentation, incompleteness and contamination. Here, we conducted a critical assessment of pan-genomics of MAGs, by comparing pan-genome analysis results of complete bacterial genomes and simulated MAGs. We found that incompleteness led to significant core gene (CG) loss. The CG loss remained when using different pan-genome analysis tools (Roary, BPGA, Anvi'o) and when using a mixture of MAGs and complete genomes. Contamination had little effect on core genome size (except for Roary due to in its gene clustering issue) but had major influence on accessory genomes. Importantly, the CG loss was partially alleviated by lowering the CG threshold and using gene prediction algorithms that consider fragmented genes, but to a less degree when incompleteness was higher than 5%. The CG loss also led to incorrect pan-genome functional predictions and inaccurate phylogenetic trees. Our main findings were supported by a study of real MAG-isolate genome data. We conclude that lowering CG threshold and predicting genes in metagenome mode (as Anvi'o does with Prodigal) are necessary in pan-genome analysis of MAGs. Development of new pan-genome analysis tools specifically for MAGs are needed in future studies.}, } @article {pmid36123438, year = {2022}, author = {Vassallo, CN and Doering, CR and Littlehale, ML and Teodoro, GIC and Laub, MT}, title = {A functional selection reveals previously undetected anti-phage defence systems in the E. coli pangenome.}, journal = {Nature microbiology}, volume = {7}, number = {10}, pages = {1568-1579}, pmid = {36123438}, issn = {2058-5276}, support = {F32 GM139231/GM/NIGMS NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; }, mesh = {Antiviral Agents ; *Bacteriophages/genetics ; CRISPR-Cas Systems ; Escherichia coli/genetics ; Prophages/genetics ; }, abstract = {The ancient, ongoing coevolutionary battle between bacteria and their viruses, bacteriophages, has given rise to sophisticated immune systems including restriction-modification and CRISPR-Cas. Many additional anti-phage systems have been identified using computational approaches based on genomic co-location within defence islands, but these screens may not be exhaustive. Here we developed an experimental selection scheme agnostic to genomic context to identify defence systems in 71 diverse E. coli strains. Our results unveil 21 conserved defence systems, none of which were previously detected as enriched in defence islands. Additionally, our work indicates that intact prophages and mobile genetic elements are primary reservoirs and distributors of defence systems in E. coli, with defence systems typically carried in specific locations or hotspots. These hotspots encode dozens of additional uncharacterized defence system candidates. Our findings reveal an extended landscape of antiviral immunity in E. coli and provide an approach for mapping defence systems in other species.}, } @article {pmid36109518, year = {2022}, author = {Yu, Y and Zhang, Z and Dong, X and Yang, R and Duan, Z and Xiang, Z and Li, J and Li, G and Yan, F and Xue, H and Jiao, D and Lu, J and Lu, H and Zhang, W and Wei, Y and Fan, S and Li, J and Jia, J and Zhang, J and Ji, J and Liu, P and Lu, H and Zhao, H and Chen, S and Wei, C and Chen, H and Zhu, Z}, title = {Pangenomic analysis of Chinese gastric cancer.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {5412}, pmid = {36109518}, issn = {2041-1723}, mesh = {Asian People/genetics ; China ; Genome, Human ; Humans ; Lectins/genetics ; Receptors, Cell Surface/genetics ; *Stomach Neoplasms/genetics ; }, abstract = {Pangenomic study might improve the completeness of human reference genome (GRCh38) and promote precision medicine. Here, we use an automated pipeline of human pangenomic analysis to build gastric cancer pan-genome for 185 paired deep sequencing data (370 samples), and characterize the gene presence-absence variations (PAVs) at whole genome level. Genes ACOT1, GSTM1, SIGLEC14 and UGT2B17 are identified as highly absent genes in gastric cancer population. A set of genes from unaligned sequences with GRCh38 are predicted. We successfully locate one of predicted genes GC0643 on chromosome 9q34.2. Overexpression of GC0643 significantly inhibits cell growth, cell migration and invasion, cell cycle progression, and induces cell apoptosis in cancer cells. The tumor suppressor functions can be reversed by shGC0643 knockdown. The GC0643 is approved by NCBI database (GenBank: MW194843.1). Collectively, the robust pan-genome strategy provides a deeper understanding of the gene PAVs in the human cancer genome.}, } @article {pmid36109150, year = {2022}, author = {Ruggieri, AA and Livraghi, L and Lewis, JJ and Evans, E and Cicconardi, F and Hebberecht, L and Ortiz-Ruiz, Y and Montgomery, SH and Ghezzi, A and Rodriguez-Martinez, JA and Jiggins, CD and McMillan, WO and Counterman, BA and Papa, R and Van Belleghem, SM}, title = {A butterfly pan-genome reveals that a large amount of structural variation underlies the evolution of chromatin accessibility.}, journal = {Genome research}, volume = {32}, number = {10}, pages = {1862-1875}, pmid = {36109150}, issn = {1549-5469}, support = {P20 GM103475/GM/NIGMS NIH HHS/United States ; }, mesh = {Animals ; *Butterflies/genetics ; Chromatin/genetics ; DNA Transposable Elements/genetics ; Genomics ; INDEL Mutation ; Drosophila/genetics ; Evolution, Molecular ; }, abstract = {Despite insertions and deletions being the most common structural variants (SVs) found across genomes, not much is known about how much these SVs vary within populations and between closely related species, nor their significance in evolution. To address these questions, we characterized the evolution of indel SVs using genome assemblies of three closely related Heliconius butterfly species. Over the relatively short evolutionary timescales investigated, up to 18.0% of the genome was composed of indels between two haplotypes of an individual Heliconius charithonia butterfly and up to 62.7% included lineage-specific SVs between the genomes of the most distant species (11 Mya). Lineage-specific sequences were mostly characterized as transposable elements (TEs) inserted at random throughout the genome and their overall distribution was similarly affected by linked selection as single nucleotide substitutions. Using chromatin accessibility profiles (i.e., ATAC-seq) of head tissue in caterpillars to identify sequences with potential cis-regulatory function, we found that out of the 31,066 identified differences in chromatin accessibility between species, 30.4% were within lineage-specific SVs and 9.4% were characterized as TE insertions. These TE insertions were localized closer to gene transcription start sites than expected at random and were enriched for sites with significant resemblance to several transcription factor binding sites with known function in neuron development in Drosophila We also identified 24 TE insertions with head-specific chromatin accessibility. Our results show high rates of structural genome evolution that were previously overlooked in comparative genomic studies and suggest a high potential for structural variation to serve as raw material for adaptive evolution.}, } @article {pmid36107145, year = {2022}, author = {Bhat, SV and Maughan, H and Cameron, ADS and Yost, CK}, title = {Phylogenomic analysis of the genus Delftia reveals distinct major lineages with ecological specializations.}, journal = {Microbial genomics}, volume = {8}, number = {9}, pages = {}, pmid = {36107145}, issn = {2057-5858}, mesh = {Animals ; DNA, Bacterial/genetics ; *Delftia/genetics ; Humans ; *Metals, Heavy ; Phylogeny ; Sequence Analysis, DNA ; Sewage ; Soil ; }, abstract = {Delftia is a diverse betaproteobacterial genus with many strains having agricultural and industrial relevance, including plant-growth promotion, bioremediation of hydrocarbon-contaminated soils, and heavy metal immobilization. Delftia spp. are broadly distributed in the environment, and have been isolated from plant hosts as well as healthy and diseased animal hosts, yet the genetic basis of this ecological versatility has not been characterized. Here, we present a phylogenomic comparison of published Delftia genomes and show that the genus is divided into two well-supported clades: one 'Delftia acidovorans' clade with isolates from soils and plant rhizospheres, and a second 'Delftia lacustris and Delftia tsuruhatensis' clade with isolates from humans and sludge. The pan-genome inferred from 61 Delftia genomes contained over 28 000 genes, of which only 884 were found in all genomes. Analysis of industrially relevant functions highlighted the ecological versatility of Delftia and supported their role as generalists.}, } @article {pmid36106979, year = {2022}, author = {Jiang, C and Kasai, H and Mino, S and Romalde, JL and Sawabe, T}, title = {The pan-genome of Splendidus clade species in the family Vibrionaceae: Insights into evolution, adaptation, and pathogenicity.}, journal = {Environmental microbiology}, volume = {24}, number = {10}, pages = {4587-4606}, doi = {10.1111/1462-2920.16209}, pmid = {36106979}, issn = {1462-2920}, mesh = {Animals ; Carbohydrates ; Evolution, Molecular ; Genome, Bacterial/genetics ; Phylogeny ; *Vibrionaceae/genetics ; Virulence/genetics ; Virulence Factors/genetics ; Genome ; }, abstract = {The Splendidus clade is the largest clade in Vibrionaceae, and its members are often related to mortality of marine animals with huge economic losses. The molecular bases of their pathogenicity and virulence, however, remain largely unknown. In particular, the complete genome sequences of the Splendidus clade species are rarely registered, which is one of the obstacles to predict core and/or unique genes responsible for their adaptation and pathogenicity, and to perform a fine scale meta-transcriptome during bacterial infection to their hosts. In this study, we obtained the complete genomes of all type strains in the Splendidus clade and revealed that (1) different genome sizes (4.4-5.9 Mb) with V. lentus the biggest and most of them had several big plasmids, likely because of the different features on mobilome elements; (2) the Splendidus clade consists of 19 species except V. cortegadensis, and 3 sub-clades (SC) were defined with the 15 most closely related members as SC1; (3) different carbohydrate degradation preferences may be the result of environmental adaptation; and (4) a broad prediction of virulence factors (VFs) revealed core and species unique VF genes.}, } @article {pmid36097170, year = {2022}, author = {Rosconi, F and Rudmann, E and Li, J and Surujon, D and Anthony, J and Frank, M and Jones, DS and Rock, C and Rosch, JW and Johnston, CD and van Opijnen, T}, title = {A bacterial pan-genome makes gene essentiality strain-dependent and evolvable.}, journal = {Nature microbiology}, volume = {7}, number = {10}, pages = {1580-1592}, pmid = {36097170}, issn = {2058-5276}, support = {R01 DE027850/DE/NIDCR NIH HHS/United States ; R21 AI117247/AI/NIAID NIH HHS/United States ; U01 AI124302/AI/NIAID NIH HHS/United States ; R01 AI110724/AI/NIAID NIH HHS/United States ; R01 GM034496/GM/NIGMS NIH HHS/United States ; }, mesh = {*DNA Transposable Elements ; Genes, Essential/genetics ; *Genome, Bacterial/genetics ; Streptococcus pneumoniae/genetics ; Whole Genome Sequencing ; }, abstract = {Many bacterial species are represented by a pan-genome, whose genetic repertoire far outstrips that of any single bacterial genome. Here we investigate how a bacterial pan-genome might influence gene essentiality and whether essential genes that are initially critical for the survival of an organism can evolve to become non-essential. By using Transposon insertion sequencing (Tn-seq), whole-genome sequencing and RNA-seq on a set of 36 clinical Streptococcus pneumoniae strains representative of >68% of the species' pan-genome, we identify a species-wide 'essentialome' that can be subdivided into universal, core strain-specific and accessory essential genes. By employing 'forced-evolution experiments', we show that specific genetic changes allow bacteria to bypass essentiality. Moreover, by untangling several genetic mechanisms, we show that gene essentiality can be highly influenced by and/or be dependent on: (1) the composition of the accessory genome, (2) the accumulation of toxic intermediates, (3) functional redundancy, (4) efficient recycling of critical metabolites and (5) pathway rewiring. While this functional characterization underscores the evolvability potential of many essential genes, we also show that genes with differential essentiality remain important antimicrobial drug target candidates, as their inactivation almost always has a severe fitness cost in vivo.}, } @article {pmid36097169, year = {2022}, author = {Beavan, AJS and McInerney, JO}, title = {Gene essentiality evolves across a pangenome.}, journal = {Nature microbiology}, volume = {7}, number = {10}, pages = {1510-1511}, pmid = {36097169}, issn = {2058-5276}, mesh = {*Genomics ; *Software ; }, } @article {pmid36094203, year = {2022}, author = {Batarseh, TN and Morales-Cruz, A and Ingel, B and Roper, MC and Gaut, BS}, title = {Using Genomes and Evolutionary Analyses to Screen for Host-Specificity and Positive Selection in the Plant Pathogen Xylella fastidiosa.}, journal = {Applied and environmental microbiology}, volume = {88}, number = {18}, pages = {e0122022}, pmid = {36094203}, issn = {1098-5336}, mesh = {*Cellulases/genetics ; Histidine Kinase/genetics ; Host Specificity ; Phylogeny ; Plant Diseases/microbiology ; Plants/microbiology ; *Xylella/genetics ; }, abstract = {Xylella fastidiosa infects several economically important crops in the Americas, and it also recently emerged in Europe. Here, using a set of Xylella genomes reflective of the genus-wide diversity, we performed a pan-genome analysis based on both core and accessory genes for two purposes: (i) to test associations between genetic divergence and plant host species and (ii) to identify positively selected genes that are potentially involved in arms-race dynamics. For the former, tests yielded significant evidence for the specialization of X. fastidiosa to plant host species. This observation contributes to a growing literature suggesting that the phylogenetic history of X. fastidiosa lineages affects the host range. For the latter, our analyses uncovered evidence of positive selection across codons for 5.3% (67 of 1,257) of the core genes and 5.4% (201 of 3,691) of the accessory genes. These genes are candidates to encode interacting factors with plant and insect hosts. Most of these genes had unknown functions, but we did identify some tractable candidates, including nagZ_2, which encodes a beta-glucosidase that is important for Neisseria gonorrhoeae biofilm formation; cya, which modulates gene expression in pathogenic bacteria, and barA, a membrane associated histidine kinase that has roles in cell division, metabolism, and pili formation. IMPORTANCE Xylella fastidiosa causes devasting diseases to several critical crops. Because X. fastidiosa colonizes and infects many plant species, it is important to understand whether the genome of X. fastidiosa has genetic determinants that underlie specialization to specific host plants. We analyzed genome sequences of X. fastidiosa to investigate evolutionary relationships and to test for evidence of positive selection on specific genes. We found a significant signal between genome diversity and host plants, consistent with bacterial specialization to specific plant hosts. By screening for positive selection, we identified both core and accessory genes that may affect pathogenicity, including genes involved in biofilm formation.}, } @article {pmid36087828, year = {2023}, author = {Irfan, M and Tariq, M and Basharat, Z and Abid Khan, RM and Jahanzaeb, M and Shakeel, M and Nisa, ZU and Shahzad, M and Jahanzaib, M and Moin, ST and Hassan, SS and Khan, IA}, title = {Genomic analysis of Chryseobacterium indologenes and conformational dynamics of the selected DD-peptidase.}, journal = {Research in microbiology}, volume = {174}, number = {1-2}, pages = {103990}, doi = {10.1016/j.resmic.2022.103990}, pmid = {36087828}, issn = {1769-7123}, mesh = {Humans ; *Serine-Type D-Ala-D-Ala Carboxypeptidase ; Anti-Bacterial Agents/pharmacology/therapeutic use ; *Chryseobacterium/genetics ; Genomics ; }, abstract = {Chrysobacterium indologenes is an emerging MDR pathogen that belongs to the family Flavobacteriaceae. The genome of the C. indologenes, isolated from the nephrotic patient, was sequenced through Illumina MiSeq. The pangenomics of available 56 C. indologenes strains using BPGA revealed an open pangenome (n=5553 CDS), core genome (2141), and accessory genome (2013). The CEG/DEG database identified 662 essential genes that drastically reduced to 68 genes after non-homology analyses towards human and gut microbiome. Further filtering the data for other drug target prioritizing parameters resulted in 32 putative targets. Keeping in view the crucial role played in cell wall biosynthesis, dacB was selected as the final target that encodes D-alanyl-d-alanine carboxypeptidase/endopeptidase (DD-peptidase). The 3D structure of dacB was modelled and rendered to docking analyses against two compound libraries of African plants (n=6842) and Tibetan medicines (n=52). The ADMET profiling exhibited the physicochemical properties of final compounds. The MD simulations showed the stability of inhibitor-DD-peptidase complex and interactions in terms of RMSD, RMSF, binding free energy calculation and H-bonding. We propose that the novel compounds Leptopene and ZINC95486338 from our findings might be potent DD-peptidase inhibitors that could aid in the development of new antibiotic-resistant therapy for the emerging MDR C. indologenes.}, } @article {pmid36086997, year = {2022}, author = {Fisher, CR and Wilson, M and Scott, JG}, title = {A chromosome-level assembly of the widely used Rockefeller strain of Aedes aegypti, the yellow fever mosquito.}, journal = {G3 (Bethesda, Md.)}, volume = {12}, number = {11}, pages = {}, pmid = {36086997}, issn = {2160-1836}, support = {R21 AI149121/AI/NIAID NIH HHS/United States ; }, mesh = {Animals ; Humans ; *Aedes/genetics ; Mosquito Vectors/genetics ; *Yellow Fever/genetics ; Ecosystem ; Chromosomes ; }, abstract = {Aedes aegypti is the vector of important human diseases, and genomic resources are crucial in facilitating the study of A. aegypti and its ecosystem interactions. Several laboratory-acclimated strains of this mosquito have been established, but the most used strain in toxicology studies is "Rockefeller," which was originally collected and established in Cuba 130 years ago. A full-length genome assembly of another reference strain, "Liverpool," was published in 2018 and is the reference genome for the species (AaegL5). However, genetic studies with the Rockefeller strain are complicated by the availability of only the Liverpool strain as the reference genome. Differences between Liverpool and Rockefeller have been known for decades, particularly in the expression of genes relevant to mosquito behavior and vector control (e.g. olfactory). These differences indicate that AaegL5 is likely not fully representative of the Rockefeller genome, presenting potential impediments to research. Here, we present a chromosomal-level assembly and annotation of the Rockefeller genome and a comparative characterization vs the Liverpool genome. Our results set the stage for a pan-genomic approach to understanding evolution and diversity within this important disease vector.}, } @article {pmid36084857, year = {2022}, author = {Ribeiro, IDA and Bach, E and Passaglia, LMP}, title = {Alternative nitrogenase of Paenibacillus sonchi genomovar Riograndensis: An insight in the origin of Fe-nitrogenase in the Paenibacillaceae family.}, journal = {Molecular phylogenetics and evolution}, volume = {177}, number = {}, pages = {107624}, doi = {10.1016/j.ympev.2022.107624}, pmid = {36084857}, issn = {1095-9513}, mesh = {Nitrogen Fixation/genetics ; *Nitrogenase/genetics/metabolism ; *Paenibacillus/genetics/metabolism ; Phylogeny ; }, abstract = {Paenibacillus sonchi genomovar Riograndensis is a nitrogen-fixing bacteria isolated from wheat that displays diverse plant growth-promoting abilities. Beyond conventional Mo-nitrogenase, this organism also harbors an alternative Fe-nitrogenase, whose many aspects related to regulation, physiology, and evolution remain to be elucidated. In this work, the origins of this alternative system were investigated, exploring the distribution and diversification of nitrogenases in the Panibacillaceae family. Our analysis showed that diazotrophs represent 17% of Paenibacillaceae genomes, of these, only 14.4% (2.5% of all Paenibacillaceae genomes) also contained Fe or V- nitrogenases. Diverse nif-like sequences were also described, occurring mainly in genomes that also harbor the alternative systems. The analysis of genomes containing Fe-nitrogenase showed a conserved cluster of nifEN anfHDGK across three genera: Gorillibacterium, Fontibacillus, and Paenibacillus. A phylogeny of anfHDGK separated the Fe-nitrogenases into three main groups. Our analysis suggested that Fe-nitrogenase was acquired by the ancestral lineage of Fontibacillus, Gorillibacterium, and Paenibacillus genera via horizontal gene transfer (HGT), and further events of transfer and gene loss marked the evolution of this alternative nitrogenase in these groups. The species phylogeny of N-fixing Paenibacillaceae separated the diazotrophs into five clades, one of these containing all occurrences of strains harboring alternative nitrogenases in the Paenibacillus genus. The pangenome of this clade is open and composed of more than 96% of accessory genes. Diverse functional categories were enriched in the flexible genome, including functions related to replication and repair. The latter involved diverse genes related to HGT, suggesting that such events may have an important role in the evolution of diazotrophic Paenibacillus. This study provided an insight into the organization, distribution, and evolution of alternative nitrogenase genes in Paenibacillaceae, considering different genomic aspects.}, } @article {pmid36083529, year = {2022}, author = {de Lima Ferreira, JK and de Mello Varani, A and Tótola, MR and Fernandes Almeida, M and de Sousa Melo, D and Ferreira Silva E Batista, C and Chalfun-Junior, A and Pimenta de Oliveira, KK and Wurdig Roesch, LF and Satler Pylro, V}, title = {Phylogenomic characterization and pangenomic insights into the surfactin-producing bacteria Bacillus subtilis strain RI4914.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {53}, number = {4}, pages = {2051-2063}, pmid = {36083529}, issn = {1678-4405}, support = {404651/2018-6//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; 303061/2019-7//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; 133550/2019-2//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; Finance Code 001//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; 001//Brazilian Microbiome Project/ ; }, mesh = {*Bacillus subtilis/genetics/metabolism ; Phylogeny ; *Peptides, Cyclic/genetics/metabolism ; Lipopeptides ; Operon ; Bacterial Proteins/metabolism ; }, abstract = {Bacillus subtilis is a versatile bacterial species able to produce surfactin, a lipopeptide biosurfactant. We carried out the phylogenomic characterization and pangenomic analyses using available B. subtilis complete genomes. Also, we report the whole genome of the biosurfactant-producing B. subtilis strain RI4914 that was isolated from effluent water from an oil exploration field. We applied a hybrid sequencing approach using both long- and short-read sequencing technologies to generate a highly accurate, single-chromosome genome. The pangenomics analysis of 153 complete genomes classified as B. subtilis retrieved from the NCBI shows an open pangenome composed of 28,511 accessory genes, which agrees with the high genetic plasticity of the species. Also, this analysis suggests that surfactin production is a common trait shared by members of this species since the srfA operon is highly conserved among the B. subtilis strains found in most of the assemblies available. Finally, increased surfactin production corroborates the higher srfAA gene expression in B. subtilis strain RI4914.}, } @article {pmid36081802, year = {2022}, author = {Zhai, Y and Wei, C}, title = {Open pangenome of Lactococcus lactis generated by a combination of metagenome-assembled genomes and isolate genomes.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {948138}, pmid = {36081802}, issn = {1664-302X}, abstract = {Lactococcus lactis (L. lactis) is a well isolated and cultured lactic acid bacterium, but if utilizing the isolate genomes alone, the genome-based analysis of this taxon would be incomplete, because there are still uncultured strains in some ecological niches. In this study, we recovered 93 high-quality metagenome-assembled genomes (MAGs) of L. lactis from food and human gut metagenomes with a culture-independent method. We then constructed a unified genome catalog of L. lactis by integrating these MAGs with 70 publicly available isolated genomes. Having this comprehensive resource, we assessed the genomic diversity and phylogenetic relationships to further explore the genetic and functional properties of L. lactis. An open pangenome of L. lactis was generated using our genome catalog, consisting of 13,066 genes in total, from which 5,448 genes were not identified in the isolate genomes. The core genome-based phylogenetic analysis showed that L. lactis strains we collected were separated into two main subclades corresponding to two subspecies, with some uncultured phylogenetic lineages discovered. The species disparity was also indicated in PCA analysis based on accessory genes of our pangenome. These various analyzes shed further light on unexpectedly high diversity within the taxon at both genome and gene levels and gave clues about its population structure and evolution. Lactococcus lactis has a long history of safe use in food fermentations and is considered as one of the important probiotic microorganisms. Obtaining the complete genetic information of L. lactis is important to the food and health industry. However, it can naturally inhabit many environments other than dairy products, including drain water and human gut samples. Here we presented an open pan-genome of L. lactis constructed from 163 high-quality genomes obtained from various environments, including MAGs recovered from environmental metagenomes and isolate genomes. This study expanded the genetic information of L. lactis about one third, including more than 5,000 novel genes found in uncultured strains. This more complete gene repertoire of L. lactis is crucial to further understanding the genetic and functional properties. These properties may be harnessed to impart additional value to dairy fermentation or other industries.}, } @article {pmid36077108, year = {2022}, author = {Lau, NS and Heng, WL and Miswan, N and Azami, NA and Furusawa, G}, title = {Comparative Genomic Analyses of the Genus Photobacterium Illuminate Biosynthetic Gene Clusters Associated with Antagonism.}, journal = {International journal of molecular sciences}, volume = {23}, number = {17}, pages = {}, pmid = {36077108}, issn = {1422-0067}, support = {304.PCCB.6315625//Universiti Sains Malaysia/ ; }, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; *Fatty Acids/analysis ; Genomics ; Multigene Family ; *Photobacterium/genetics ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {The genus Photobacterium is known for its ecophysiological versatility encompassing free-living, symbiotic, and pathogenic lifestyles. Photobacterium sp. CCB-ST2H9 was isolated from estuarine sediment collected at Matang Mangrove, Malaysia. In this study, the genome of CCB-ST2H9 was sequenced, and the pan-genome of 37 Photobacterium strains was analysed. Phylogeny based on core genes showed that CCB-ST2H9 clustered with P. galatheae, forming a distinct clade with P. halotolerans, P. salinisoli, and P. arenosum. The core genome of Photobacterium was conserved in housekeeping functions, while the flexible genome was well represented by environmental genes related to energy production and carbohydrate metabolism. Genomic metrics including 16S rRNA sequence similarity, average nucleotide identity, and digital DNA-DNA hybridization values were below the cut-off for species delineation, implying that CCB-ST2H9 potentially represents a new species. Genome mining revealed that biosynthetic gene clusters (BGCs) involved in producing antimicrobial compounds such as holomycin in CCB-ST2H9 could contribute to the antagonistic potential. Furthermore, the EtOAc extract from the culture broth of CCB-ST2H9 exhibited antagonistic activity against Vibrio spp. Intriguingly, clustering based on BGCs profiles grouped P. galatheae, P. halotolerans, P. salinisoli, P. arenosum, and CCB-ST2H9 together in the heatmap by the presence of a large number of BGCs. These BGCs-rich Photobacterium strains represent great potential for bioactive secondary metabolites production and sources for novel compounds.}, } @article {pmid36076928, year = {2022}, author = {Ravin, NV and Rudenko, TS and Smolyakov, DD and Beletsky, AV and Gureeva, MV and Samylina, OS and Grabovich, MY}, title = {History of the Study of the Genus Thiothrix: From the First Enrichment Cultures to Pangenomic Analysis.}, journal = {International journal of molecular sciences}, volume = {23}, number = {17}, pages = {}, pmid = {36076928}, issn = {1422-0067}, support = {20-14-00137//Russian Science Foundation/ ; }, mesh = {DNA, Bacterial/genetics ; Phylogeny ; RNA, Ribosomal, 16S/genetics/metabolism ; Sulfur/metabolism ; *Thiothrix/genetics/metabolism ; }, abstract = {Representatives of the genus Thiothrix are filamentous, sulfur-oxidizing bacteria found in flowing waters with counter-oriented sulfide and oxygen gradients. They were first described at the end of the 19th century, but the first pure cultures of this species only became available 100 years later. An increase in the number of described Thiothrix species at the beginning of the 21st century shows that the classical phylogenetic marker, 16S rRNA gene, is not informative for species differentiation, which is possible based on genome analysis. Pangenome analysis of the genus Thiothrix showed that the core genome includes genes for dissimilatory sulfur metabolism and central metabolic pathways, namely the Krebs cycle, Embden-Meyerhof-Parnas pathway, glyoxylate cycle, Calvin-Benson-Bassham cycle, and genes for phosphorus metabolism and amination. The shell part of the pangenome includes genes for dissimilatory nitrogen metabolism and nitrogen fixation, for respiration with thiosulfate. The dispensable genome comprises genes predicted to encode mainly hypothetical proteins, transporters, transcription regulators, methyltransferases, transposases, and toxin-antitoxin systems.}, } @article {pmid36076376, year = {2022}, author = {Yu, L and Zang, X and Chen, Y and Gao, Y and Pei, Z and Yang, B and Zhang, H and Narbad, A and Tian, F and Zhai, Q and Chen, W}, title = {Phenotype-genotype analysis of Latilactobacills curvatus from different niches: Carbohydrate metabolism, antibiotic resistance, bacteriocin, phage fragments and linkages with CRISPR-Cas systems.}, journal = {Food research international (Ottawa, Ont.)}, volume = {160}, number = {}, pages = {111640}, doi = {10.1016/j.foodres.2022.111640}, pmid = {36076376}, issn = {1873-7145}, support = {BBS/E/F/00044453/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/OS/NW/000006/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Bacteriocins/genetics ; *Bacteriophages/genetics ; CRISPR-Cas Systems ; Carbohydrate Metabolism/genetics ; Drug Resistance, Microbial ; Genome, Bacterial/genetics ; Genotype ; Phenotype ; }, abstract = {The potential probiotic function of Latilactobacills curvatus has attracted the attention of researchers. To explore the differences in the genomes of L. curvatus, nine strains were isolated from various sources, including feces and fermented vegetables and compared with 25 strains from the NCBI database. The findings indicated that the average genome size, GC content, and CDS of L. curvatus were 1.94 MB, 41.9%, and 1825, respectively. Its core genome is associated with transcription, translation, carbohydrate transport and metabolism, and defense functions. The pan-genome of L. curvatus was in a closed state. The genetic diversity of L. curatus is mainly manifested in its ability to use carbohydrates, antibiotic resistance, bacteriocin operon, and polymeric regularly interspaced short palindromic repeats (CRISPR)-Cas for bacterial immunity. The CRISPR system of 34 strains of L. curvatus was predominantly found to be of the IIA type with a few IIC and IE types. These findings will contribute to a better understanding of this species.}, } @article {pmid36073311, year = {2022}, author = {McLean, AR and Torres-Morales, J and Dewhirst, FE and Borisy, GG and Mark Welch, JL}, title = {Site-tropism of streptococci in the oral microbiome.}, journal = {Molecular oral microbiology}, volume = {37}, number = {6}, pages = {229-243}, pmid = {36073311}, issn = {2041-1014}, support = {R01 DE016937/DE/NIDCR NIH HHS/United States ; R01 DE022586/DE/NIDCR NIH HHS/United States ; R01 DE027958/DE/NIDCR NIH HHS/United States ; R01 DE030136/DE/NIDCR NIH HHS/United States ; }, mesh = {Humans ; RNA, Ribosomal, 16S/genetics ; *Streptococcus/genetics ; *Microbiota/genetics ; Metagenome ; Bacteria/genetics ; Mouth/microbiology ; Tropism ; Phylogeny ; }, abstract = {A detailed understanding of where bacteria localize is necessary to advance microbial ecology and microbiome-based therapeutics. The site-specialist hypothesis predicts that most microbes in the human oral cavity have a primary habitat type within the mouth where they are most abundant. We asked whether this hypothesis accurately describes the distribution of the members of the genus Streptococcus, a clinically relevant taxon that dominates most oral sites. Prior analysis of 16S rRNA gene sequencing data indicated that some oral Streptococcus clades are site-specialists while others may be generalists. However, within complex microbial populations composed of numerous closely related species and strains, such as the oral streptococci, genome-scale analysis is necessary to provide the resolution to discriminate closely related taxa with distinct functional roles. Here, we assess whether individual species within this genus are specialists using publicly available genomic sequence data that provide species-level resolution. We chose a set of high-quality representative genomes for human oral Streptococcus species. Onto these genomes, we mapped shotgun metagenomic sequencing reads from supragingival plaque, tongue dorsum, and other sites in the oral cavity. We found that every abundant Streptococcus species in the healthy human oral cavity showed strong site-tropism and that even closely related species such as S. mitis, S. oralis, and S. infantis specialized in different sites. These findings indicate that closely related bacteria can have distinct habitat distributions in the absence of dispersal limitation and under similar environmental conditions and immune regimes. Substantial overlap between the core genes of these three species suggests that site-specialization is determined by subtle differences in genomic content.}, } @article {pmid36069574, year = {2022}, author = {Zhong, C and Qu, B and Hu, G and Ning, K}, title = {Pan-Genome Analysis of Campylobacter: Insights on the Genomic Diversity and Virulence Profile.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0102922}, pmid = {36069574}, issn = {2165-0497}, mesh = {Animals ; Humans ; *Campylobacter/genetics ; *Campylobacter Infections ; *Gastroenteritis ; Genome, Bacterial ; Genomics ; Phylogeny ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {The genus Campylobacter contains pathogens that cause bacterial gastroenteritis in humans and animals. Despite large-scale sequencing efforts to raise clinical awareness of Campylobacter, little is known about the diversity and functions of virulence factors. Here, we constructed the pan-genome of Campylobacter using 39 representative genomes, elucidating their genetic diversity, evolutionary characteristics, and virulence and resistance profiles. The Campylobacter pan-genome was open and showed extensive genome variability, with high levels of gene expansion and contraction as the organism evolved. These Campylobacter members had diverse virulence gene content, and six potential core virulence genes (porA, PEB4, cheY, htrB, Cj1135, and kpsF) have been identified. The conserved mechanisms for Campylobacter pathogenicity were related to adherence, motility, and immune modulation. We emphasized the relative importance of variable virulence genes. Many virulence genes have experienced expansion or contraction in specific lineages, which may be one of the factors causing differences in the content of virulence genes. Additionally, these Campylobacter genomes have a high prevalence of the cmeA and cmeC genes, which are linked to the CmeABC pump and contribute to multidrug resistance. The genomic variations, core and variable virulence factors, and resistance genes of Campylobacter characterized in this study would contribute to a better understanding of the virulence of Campylobacter and more effective use of candidates for drug development and prevention of Campylobacter infections. IMPORTANCE Pathogenic members of the genus Campylobacter are recognized as one of the major causative agents of human bacterial gastroenteritis. This study revealed the pan-genome of 39 Campylobacter species, provided the most updated reconstruction of the global virulence gene pool of 39 Campylobacter species, and identified species-related virulence differences. This study highlighted the basic conserved functionality and specificity of pathogenicity that are crucial to infection, which was critical for improving the diagnosis and prevention of Campylobacter infections.}, } @article {pmid36067550, year = {2022}, author = {Hitch, TCA and Bisdorf, K and Afrizal, A and Riedel, T and Overmann, J and Strowig, T and Clavel, T}, title = {A taxonomic note on the genus Prevotella: Description of four novel genera and emended description of the genera Hallella and Xylanibacter.}, journal = {Systematic and applied microbiology}, volume = {45}, number = {6}, pages = {126354}, doi = {10.1016/j.syapm.2022.126354}, pmid = {36067550}, issn = {1618-0984}, mesh = {Humans ; RNA, Ribosomal, 16S/genetics ; Phylogeny ; DNA, Bacterial/genetics ; Sequence Analysis, DNA ; *Ecosystem ; *Prevotella/genetics ; }, abstract = {The genus Prevotella comprises 55 species with validly published, and correct, names (at June 2021) that are phenotypically, ecologically and functionally diverse. This study used a range of comparative genome approaches (marker gene-based genome phylogeny, core genome phylogeny, average amino acid identity, percentage of conserved proteins and clade-specific marker genes) to identify large differences between the 53 species for which genomes are available, as well as two effectively published yet not validly named species and four novel species. These differences were consistent between the various analysis methods and justify the separation of Prevotella into multiple genera. While the distribution across 19 ecosystem types was unique for each species and inconsistent within clades, the functional repertoire based on the presence/absence of both PFAMs and CAZy families revealed distinct clustering based on the proposed genera. Based on the integration of all results, we propose the reclassification of species previously assigned to the genus Prevotella into seven genera, including four novel genera for which the names Segatella, Hoylesella, Leyella and Palleniella are proposed. In addition to the reclassification of Prevotella, this work describes four novel species, Hallella faecis, Xylanibacter rodentium, Xylanibacter muris, and Palleniella intestinalis.}, } @article {pmid36061813, year = {2022}, author = {Cai, K and Kuang, L and Yue, W and Xie, S and Xia, X and Zhang, G and Wang, J}, title = {Calmodulin and calmodulin-like gene family in barley: Identification, characterization and expression analyses.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {964888}, pmid = {36061813}, issn = {1664-462X}, abstract = {Calmodulin (CaM) and calmodulin-like (CML) proteins are Ca[2+] relays and play diverse and multiple roles in plant growth, development and stress responses. However, CaM/CML gene family has not been identified in barley (Hordeum vulgare). In the present study, 5 HvCaMs and 80 HvCMLs were identified through a genome-wide analysis. All HvCaM proteins possessed 4 EF-hand motifs, whereas HvCMLs contained 1 to 4 EF-hand motifs. HvCaM2, HvCaM3 and HvCaM5 coded the same polypeptide although they differed in nucleotide sequence, which was identical to the polypeptides coded by OsCaM1-1, OsCaM1-2 and OsCaM1-3. HvCaMs/CMLs were unevenly distributed over barley 7 chromosomes, and could be phylogenetically classified into 8 groups. HvCaMs/CMLs differed in gene structure, cis-acting elements and tissue expression patterns. Segmental and tandem duplication were observed among HvCaMs/CMLs during evolution. HvCML16, HvCML18, HvCML50 and HvCML78 were dispensable genes and the others were core genes in barley pan-genome. In addition, 14 HvCaM/CML genes were selected to examine their responses to salt, osmotic and low potassium stresses by qRT-PCR, and their expression were stress-and time-dependent. These results facilitate our understanding and further functional identification of HvCaMs/CMLs.}, } @article {pmid36058542, year = {2022}, author = {Ribeiro, M and Sousa, M and Borges, V and Gomes, JP and Duarte, S and Isidro, J and Vieira, L and Torres, C and Santos, H and Capelo, JL and Poeta, P and Igrejas, G}, title = {Bioinformatics study of expression from genomes of epidemiologically related MRSA CC398 isolates from human and wild animal samples.}, journal = {Journal of proteomics}, volume = {268}, number = {}, pages = {104714}, doi = {10.1016/j.jprot.2022.104714}, pmid = {36058542}, issn = {1876-7737}, mesh = {Aminoglycosides ; Animals ; Animals, Wild/microbiology ; Anti-Bacterial Agents/pharmacology ; Clindamycin ; Computational Biology ; Humans ; Immunoglobulins ; Livestock ; Macrolides ; *Methicillin-Resistant Staphylococcus aureus/genetics ; Proteome ; *Staphylococcal Infections/epidemiology/veterinary ; Staphylococcus aureus/genetics ; *Transcriptome ; }, abstract = {One of the most important livestock-associated methicillin-resistant Staphylococcus aureus (LA-MRSA) genetic lineages is the clonal complex (CC) 398, which can cause typical S. aureus-associated infections in people. In this work, whole-genome sequencing, RNA-sequencing, and gel-based comparative proteomics were applied to study the genetic characteristics of three MRSA CC398 isolates recovered from humans (strains C5621 and C9017), and from an animal (strain OR418). Of the three strains, C9017 presented the broadest resistance genotype, including resistance to fluroquinolone, clindamycin, tiamulin, macrolide and aminoglycoside antimicrobial classes. The scn, sak, and chp genes of the immune evasion cluster system were solely detected in OR418. Pangenome analysis showed a total of 288 strain-specific genes, most of which are hypothetical or phage-related proteins. OR418 had the most pronounced genetic differences. RNAIII (δ-hemolysin) gene was clearly the most expressed gene in OR418 and C5621, but it was not detected in C9017. Significant differences in the proteome profiles were found between strains. For example, the immunoglobulin-binding protein Sbi was more abundant in OR418. Considering that Sbi is a multifunctional immune evasion factor in S. aureus, the results point to OR418 strain having high zoonotic potential. Overall, multiomics biomarker signatures can assume an important role to advance precision medicine in the years to come. SIGNIFICANCE: MRSA is one of the most representative drug-resistant pathogens and its dissemination is increasing due to MRSA capability of establishing new reservoirs. LA-MRSA is considered an emerging problem worldwide and CC398 is one of the most important genetic lineages. In this study, three MRSA CC398 isolates recovered from humans and from a wild animal were analyzed through whole-genome sequencing, RNA-sequencing, and gel-based comparative proteomics in order to gather systems-wide omics data and better understand the genetic characteristics of this lineage to identify distinctive markers and genomic features of relevance to public health.}, } @article {pmid36053980, year = {2022}, author = {Goff, JL and Szink, EG and Thorgersen, MP and Putt, AD and Fan, Y and Lui, LM and Nielsen, TN and Hunt, KA and Michael, JP and Wang, Y and Ning, D and Fu, Y and Van Nostrand, JD and Poole, FL and Chandonia, JM and Hazen, TC and Stahl, DA and Zhou, J and Arkin, AP and Adams, MWW}, title = {Ecophysiological and genomic analyses of a representative isolate of highly abundant Bacillus cereus strains in contaminated subsurface sediments.}, journal = {Environmental microbiology}, volume = {24}, number = {11}, pages = {5546-5560}, pmid = {36053980}, issn = {1462-2920}, mesh = {RNA, Ribosomal, 16S/genetics ; *Bacillus cereus/genetics ; *Metals, Heavy ; Genomics ; Phylogeny ; }, abstract = {Bacillus cereus strain CPT56D-587-MTF (CPTF) was isolated from the highly contaminated Oak Ridge Reservation (ORR) subsurface. This site is contaminated with high levels of nitric acid and multiple heavy metals. Amplicon sequencing of the 16S rRNA genes (V4 region) in sediment from this area revealed an amplicon sequence variant (ASV) with 100% identity to the CPTF 16S rRNA sequence. Notably, this CPTF-matching ASV had the highest relative abundance in this community survey, with a median relative abundance of 3.77% and comprised 20%-40% of reads in some samples. Pangenomic analysis revealed that strain CPTF has expanded genomic content compared to other B. cereus species-largely due to plasmid acquisition and expansion of transposable elements. This suggests that these features are important for rapid adaptation to native environmental stressors. We connected genotype to phenotype in the context of the unique geochemistry of the site. These analyses revealed that certain genes (e.g. nitrate reductase, heavy metal efflux pumps) that allow this strain to successfully occupy the geochemically heterogenous microniches of its native site are characteristic of the B. cereus species while others such as acid tolerance are mobile genetic element associated and are generally unique to strain CPTF.}, } @article {pmid36051757, year = {2022}, author = {Dai, Z and Wu, T and Xu, S and Zhou, L and Tang, W and Hu, E and Zhan, L and Chen, M and Yu, G}, title = {Characterization of toxin-antitoxin systems from public sequencing data: A case study in Pseudomonas aeruginosa.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {951774}, pmid = {36051757}, issn = {1664-302X}, abstract = {The toxin-antitoxin (TA) system is a widely distributed group of genetic modules that play important roles in the life of prokaryotes, with mobile genetic elements (MGEs) contributing to the dissemination of antibiotic resistance gene (ARG). The diversity and richness of TA systems in Pseudomonas aeruginosa, as one of the bacterial species with ARGs, have not yet been completely demonstrated. In this study, we explored the TA systems from the public genomic sequencing data and genome sequences. A small scale of genomic sequencing data in 281 isolates was selected from the NCBI SRA database, reassembling the genomes of these isolates led to the findings of abundant TA homologs. Furthermore, remapping these identified TA modules on 5,437 genome/draft genomes uncovers a great diversity of TA modules in P. aeruginosa. Moreover, manual inspection revealed several TA systems that were not yet reported in P. aeruginosa including the hok-sok, cptA-cptB, cbeA-cbtA, tomB-hha, and ryeA-sdsR. Additional annotation revealed that a large number of MGEs were closely distributed with TA. Also, 16% of ARGs are located relatively close to TA. Our work confirmed a wealth of TA genes in the unexplored P. aeruginosa pan-genomes, expanded the knowledge on P. aeruginosa, and provided methodological tips on large-scale data mining for future studies. The co-occurrence of MGE, ARG, and TA may indicate a potential interaction in their dissemination.}, } @article {pmid36042298, year = {2022}, author = {}, title = {One pangenome to bind them all.}, journal = {Nature biotechnology}, volume = {40}, number = {9}, pages = {1301}, doi = {10.1038/s41587-022-01484-y}, pmid = {36042298}, issn = {1546-1696}, mesh = {*Genome, Bacterial ; *Genomics ; }, } @article {pmid36034705, year = {2022}, author = {Tian, C and Xing, M and Fu, L and Zhao, Y and Fan, X and Wang, S}, title = {Emergence of uncommon KL38-OCL6-ST220 carbapenem-resistant Acinetobacter pittii strain, co-producing chromosomal NDM-1 and OXA-820 carbapenemases.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {943735}, pmid = {36034705}, issn = {2235-2988}, mesh = {Acinetobacter ; *Acinetobacter Infections ; Bacterial Proteins ; DNA Transposable Elements ; Humans ; Meropenem ; Microbial Sensitivity Tests ; Multilocus Sequence Typing ; Phylogeny ; Virulence Factors ; beta-Lactamases ; }, abstract = {OBJECTIVE: To characterize one KL38-OCL6-ST220 carbapenem-resistant Acinetobacter pittii strain, co-producing chromosomal NDM-1 and OXA-820 carbapenemases.

METHODS: A. pittii TCM strain was isolated from a bloodstream infection (BSI). Antimicrobial susceptibility tests were conducted via disc diffusion and broth microdilution. Stability experiments of bla NDM-1 and bla OXA-820 carbapenemase genes were further performed. Whole-genome sequencing (WGS) was performed on the Illumina and Oxford Nanopore platforms. Multilocus sequence typing (MLST) was analyzed based on the Pasteur and Oxford schemes. Resistance genes, virulence factors, and insertion sequences (ISs) were identified with ABRicate based on ResFinder 4.0, virulence factor database (VFDB), and ISfinder. Capsular polysaccharide (KL), lipooligosaccharide outer core (OCL), and plasmid reconstruction were tested using Kaptive and PLACNETw. PHASTER was used to predict prophage regions. A comparative genomics analysis of all ST220 A. pittii strains from the public database was carried out. Point mutations, average nucleotide identity (ANI), DNA-DNA hybridization (DDH) distances, and pan-genome analysis were performed.

RESULTS: A. pittii TCM was ST220[Pas] and ST1818[Oxf] with KL38 and OCL6, respectively. It was resistant to imipenem, meropenem, and ciprofloxacin but still susceptible to amikacin, colistin, and tigecycline. WGS revealed that A. pittii TCM contained one circular chromosome and four plasmids. The Tn125 composite transposon, including bla NDM-1, was located in the chromosome with 3-bp target site duplications (TSDs). Many virulence factors and the bla OXA-820 carbapenemase gene were also identified. The stability assays revealed that bla NDM-1 and bla OXA-820 were stabilized by passage in an antibiotic-free medium. Moreover, 12 prophage regions were identified in the chromosome. Phylogenetic analysis showed that there are 11 ST220 A. pittii strains, and one collected from Anhui, China was closely related. All ST220 A. pittii strains presented high ANI and DDH values; they ranged from 99.85% to 100% for ANI and from 97.4% to 99.9% for DDH. Pan-genome analysis revealed 3,200 core genes, 0 soft core genes, 1,571 shell genes, and 933 cloud genes among the 11 ST220 A. pittii strains.

CONCLUSIONS: The coexistence of chromosomal NDM-1 and OXA-820 carbapenemases in A. pittii presents a huge challenge in healthcare settings. Increased surveillance of this species in hospital and community settings is urgently needed.}, } @article {pmid36029458, year = {2022}, author = {Hwang, CY and Cho, ES and Rhee, WJ and Kim, E and Seo, MJ}, title = {Genomic and physiological analysis of C50 carotenoid-producing novel Halorubrum ruber sp. nov.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {60}, number = {10}, pages = {1007-1020}, pmid = {36029458}, issn = {1976-3794}, mesh = {Amino Acids/genetics ; Antioxidants/analysis ; Bacterial Typing Techniques ; Carotenoids ; DNA, Archaeal/genetics ; DNA, Bacterial ; Fatty Acids/analysis ; Free Radicals ; Genomics ; *Halorubrum/genetics ; Mevalonic Acid ; Nucleic Acid Hybridization ; Nucleotides ; Phospholipids/analysis ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Sodium Chloride/metabolism ; Water ; }, abstract = {A novel haloarchaeal species designated as MBLA0099[T] was isolated from seawater near Yeongheung Island. Cells were Gram-negative, non-motile, red-pigmented, and rod-shaped. They grew at 10-45°C, within pH 5.5-9.0, and between 7.5% and 30% NaCl concentrations. Cells were able to grow without Mg[2+] and were lysed in distilled water. The size of the whole-genome and G + C content of DNA was 3.02 Mb and 68.9 mol%, respectively. Phylogenetic analysis shows that the strain MBLA0099[T] belongs to the genus Halorubrum. The average nucleotide and amino acid identity, and in silico DNA-DNA hybridization values were below the species delineation threshold. Pan-genomic analysis revealed that 3.2% of all genes present in strain MBLA0099[T] were unique to the strain. The red carotenoid produced by strain MBLA0099[T] was subjected to spectrometric and chromatographic analyses and confirmed to be bacterioruberin as C50 carotenoid. Mevalonic acid, terpenoid backbone, and carotenoid biosynthesis pathway were annotated for strain MBLA0099[T]. The C50 carotenoid production by strain MBLA0099[T] was also enhanced under various stress conditions including relatively netural pH, high oxidative and salinity conditions. Additionally, the strain MBLA0099[T]-derived bacterioruberin showed the antioxidant activity with EC50 value of 12.29 µg/ml, based on the evaluation of DPPH free radical scavenging activity. The present study would be the first report on the identification of C50 carotenoid from the strain MBLA0099[T] representing a novel species of the genus Halorubrum, for which the name Halorubrum ruber sp. nov. is proposed. The typestrain used was MBLA0099[T] (= KCTC 4296[T] = JCM 34701[T]).}, } @article {pmid36016080, year = {2022}, author = {Yousaf, M and Ullah, A and Sarosh, N and Abbasi, SW and Ismail, S and Bibi, S and Hasan, MM and Albadrani, GM and Talaat Nouh, NA and Abdulhakim, JA and Abdel-Daim, MM and Bin Emran, T}, title = {Design of Multi-Epitope Vaccine for Staphylococcus saprophyticus: Pan-Genome and Reverse Vaccinology Approach.}, journal = {Vaccines}, volume = {10}, number = {8}, pages = {}, pmid = {36016080}, issn = {2076-393X}, support = {PNURSP2022R30//This research was supported by Princess Nourah bint Abdulrahman University Researchers Supporting Project number (PNURSP2022R30), Princess Nourah bint Abdulrahman University, Riyadh, Saudi Arabia./ ; }, abstract = {Staphylococcus saprophyticus is a Gram-positive coccus responsible for the occurrence of cystitis in sexually active, young females. While effective antibiotics against this organism exist, resistant strains are on the rise. Therefore, prevention via vaccines appears to be a viable solution to address this problem. In comparison to traditional techniques of vaccine design, computationally aided vaccine development demonstrates marked specificity, efficiency, stability, and safety. In the present study, a novel, multi-epitope vaccine construct was developed against S. saprophyticus by targeting fully sequenced proteomes of its five different strains, which were examined using a pangenome and subtractive proteomic strategy to characterize prospective vaccination targets. The three immunogenic vaccine targets which were utilized to map the probable immune epitopes were verified by annotating the entire proteome. The predicted epitopes were further screened on the basis of antigenicity, allergenicity, water solubility, toxicity, virulence, and binding affinity towards the DRB*0101 allele, resulting in 11 potential epitopes, i.e., DLKKQKEKL, NKDLKKQKE, QDKLKDKSD, NVMDNKDLE, TSGTPDSQA, NANSDGSSS, GSDSSSSNN, DSSSSNNDS, DSSSSDRNN, SSSDRNNGD, and SSDDKSKDS. All these epitopes have the efficacy to cover 99.74% of populations globally. Finally, shortlisted epitopes were joined together with linkers and three different adjuvants to find the most stable and immunogenic vaccine construct. The top-ranked vaccine construct was further scrutinized on the basis of its physicochemical characterization and immunological profile. The non-allergenic and antigenic features of modeled vaccine constructs were initially validated and then subjected to docking with immune receptor major histocompatibility complex I and II (MHC-I and II), resulting in strong contact. In silico cloning validations yielded a codon adaptation index (CAI) value of 1 and an ideal percentage of GC contents (46.717%), indicating a putative expression of the vaccine in E. coli. Furthermore, immune simulation demonstrated that, after injecting the proposed MEVC, powerful antibodies were produced, resulting in the sharpest peaks of IgM + IgG formation (>11,500) within 5 to 15 days. Experimental testing against S. saprophyticus can evaluate the safety and efficacy of these prophylactic vaccination designs.}, } @article {pmid36014959, year = {2022}, author = {Jing, L and Xu, Z and Zhang, Y and Li, D and Song, Y and Hu, H and Fang, Y and Zhu, W}, title = {Metagenomic Insights into Pathogenic Characterization of ST410 Acinetobacter nosocomialis Prevalent in China.}, journal = {Pathogens (Basel, Switzerland)}, volume = {11}, number = {8}, pages = {}, pmid = {36014959}, issn = {2076-0817}, support = {TYU-039F//Beijing Medical and Health Foundation/ ; }, abstract = {Acinetobacter nosocomialis is a prevalent opportunistic pathogen that causes hospital-acquired infections. The increasing threats from A. nosocomialis infections have led to attention from the scientific and medical communities. Metagenomic next-generation sequencing (mNGS) was performed for an exudate specimen collected from an ICU patient with wound infection, followed by sepsis, in Tongji Hospital. Three assembly strategies were employed to recover the genome of A. nosocomialis in the metagenomic sample. Together with publicly available genomes of A. nosocomialis, the features of population genetics and molecular epidemiology were deeply analyzed. A draft genome was reconstructed for the metagenomic strain WHM01, derived from the ST410 A. nosocomialis dominating the microbial community, thereby prompting its highly pathogenic risk, which is associated with infection and persistence. The structure of the bacterial pangenome was characterized, including the 1862 core and 11,815 accessory genes present in the 157 strains. The genetic diversity of the genes coding for the 128 virulence factors assigned to 14 functional categories was uncovered in this nosocomial pathogen, such as the lipooligosaccharide, capsule, type IV pilus, and outer membrane proteins. Our work revealed genomic properties of ST410 A. nosocomialis, which is prevalent in China, and further highlighted that metagenomic surveillance may be a prospective application for evaluating the pathogenic characteristics of the nosocomial opportunistic pathogens.}, } @article {pmid36013379, year = {2022}, author = {Zoclanclounon, YAB and Rostás, M and Chung, NJ and Mo, Y and Karlovsky, P and Dossa, K}, title = {Characterization of Peroxidase and Laccase Gene Families and In Silico Identification of Potential Genes Involved in Upstream Steps of Lignan Formation in Sesame.}, journal = {Life (Basel, Switzerland)}, volume = {12}, number = {8}, pages = {}, pmid = {36013379}, issn = {2075-1729}, support = {Ref 3.4 - 1202788 - 417 SEN - GF-P//Alexander von Humboldt Foundation/ ; }, abstract = {Peroxidases and laccases are oxidative enzymes involved in physiological processes in plants, covering responses to biotic and abiotic stress as well as biosynthesis of health-promoting specialized metabolites. Although they are thought to be involved in the biosynthesis of (+)-pinoresinol, a comprehensive investigation of this class of enzymes has not yet been conducted in the emerging oil crop sesame and no information is available regarding the potential (+)-pinoresinol synthase genes in this crop. In the present study, we conducted a pan-genome-wide identification of peroxidase and laccase genes coupled with transcriptome profiling of diverse sesame varieties. A total of 83 and 48 genes have been identified as coding for sesame peroxidase and laccase genes, respectively. Based on their protein domain and Arabidopsis thaliana genes used as baits, the genes were classified into nine and seven groups of peroxidase and laccase genes, respectively. The expression of the genes was evaluated using dynamic transcriptome sequencing data from six sesame varieties, including one elite cultivar, white vs black seed varieties, and high vs low oil content varieties. Two peroxidase genes (SiPOD52 and SiPOD63) and two laccase genes (SiLAC1 and SiLAC39), well conserved within the sesame pan-genome and exhibiting consistent expression patterns within sesame varieties matching the kinetic of (+)-pinoresinol accumulation in seeds, were identified as potential (+)-pinoresinol synthase genes. Cis-acting elements of the candidate genes revealed their potential involvement in development, hormonal signaling, and response to light and other abiotic triggers. Transcription factor enrichment analysis of promoter regions showed the predominance of MYB binding sequences. The findings from this study pave the way for lignans-oriented engineering of sesame with wide potential applications in food, health and medicinal domains.}, } @article {pmid36012871, year = {2022}, author = {Ogaji, YO and Lee, RC and Sawbridge, TI and Cocks, BG and Daetwyler, HD and Kaur, S}, title = {De Novo Long-Read Whole-Genome Assemblies and the Comparative Pan-Genome Analysis of Ascochyta Blight Pathogens Affecting Field Pea.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {8}, pages = {}, pmid = {36012871}, issn = {2309-608X}, abstract = {Ascochyta Blight (AB) is a major disease of many cool-season legumes globally. In field pea, three fungal pathogens have been identified to be responsible for this disease in Australia, namely Peyronellaea pinodes, Peyronellaea pinodella and Phoma koolunga. Limited genomic resources for these pathogens have been generated, which has hampered the implementation of effective management strategies and breeding for resistant cultivars. Using Oxford Nanopore long-read sequencing, we report the first high-quality, fully annotated, near-chromosome-level nuclear and mitochondrial genome assemblies for 18 isolates from the Australian AB complex. Comparative genome analysis was performed to elucidate the differences and similarities between species and isolates using phylogenetic relationships and functional diversity. Our data indicated that P. pinodella and P. koolunga are heterothallic, while P. pinodes is homothallic. More homology and orthologous gene clusters are shared between P. pinodes and P. pinodella compared to P. koolunga. The analysis of the repetitive DNA content showed differences in the transposable repeat composition in the genomes and their expression in the transcriptomes. Significant repeat expansion in P. koolunga's genome was seen, with strong repeat-induced point mutation (RIP) activity being evident. Phylogenetic analysis revealed that genetic diversity can be exploited for species marker development. This study provided the much-needed genetic resources and characterization of the AB species to further drive research in key areas such as disease epidemiology and host-pathogen interactions.}, } @article {pmid36011264, year = {2022}, author = {Woldegiorgis, ST and Wu, T and Gao, L and Huang, Y and Zheng, Y and Qiu, F and Xu, S and Tao, H and Harrison, A and Liu, W and He, H}, title = {Identification of Heat-Tolerant Genes in Non-Reference Sequences in Rice by Integrating Pan-Genome, Transcriptomics, and QTLs.}, journal = {Genes}, volume = {13}, number = {8}, pages = {}, pmid = {36011264}, issn = {2073-4425}, mesh = {Genes, Plant ; *Oryza/genetics ; Quantitative Trait Loci/genetics ; *Thermotolerance/genetics ; Transcriptome ; }, abstract = {The availability of large-scale genomic data resources makes it very convenient to mine and analyze genes that are related to important agricultural traits in rice. Pan-genomes have been constructed to provide insight into the genome diversity and functionality of different plants, which can be used in genome-assisted crop improvement. Thus, a pan-genome comprising all genetic elements is crucial for comprehensive variation study among the heat-resistant and -susceptible rice varieties. In this study, a rice pan-genome was firstly constructed by using 45 heat-tolerant and 15 heat-sensitive rice varieties. A total of 38,998 pan-genome genes were identified, including 37,859 genes in the reference and 1141 in the non-reference contigs. Genomic variation analysis demonstrated that a total of 76,435 SNPs were detected and identified as the heat-tolerance-related SNPs, which were specifically present in the highly heat-resistant rice cultivars and located in the genic regions or within 2 kbp upstream and downstream of the genes. Meanwhile, 3214 upregulated and 2212 downregulated genes with heat stress tolerance-related SNPs were detected in one or multiple RNA-seq datasets of rice under heat stress, among which 24 were located in the non-reference contigs of the rice pan-genome. We then mapped the DEGs with heat stress tolerance-related SNPs to the heat stress-resistant QTL regions. A total of 1677 DEGs, including 990 upregulated and 687 downregulated genes, were mapped to the 46 heat stress-resistant QTL regions, in which 2 upregulated genes with heat stress tolerance-related SNPs were identified in the non-reference sequences. This pan-genome resource is an important step towards the effective and efficient genetic improvement of heat stress resistance in rice to help meet the rapidly growing needs for improved rice productivity under different environmental stresses. These findings provide further insight into the functional validation of a number of non-reference genes and, especially, the two genes identified in the heat stress-resistant QTLs in rice.}, } @article {pmid36010855, year = {2022}, author = {Yamamoto, M and Takahashi, Y}, title = {Genetic and Epigenetic Pathogenesis of Acromegaly.}, journal = {Cancers}, volume = {14}, number = {16}, pages = {}, pmid = {36010855}, issn = {2072-6694}, abstract = {Acromegaly is caused by excessive secretion of GH and IGF-I mostly from somatotroph tumors. Various genetic and epigenetic factors are involved in the pathogenesis of somatotroph tumors. While somatic mutations of GNAS are the most prevalent cause of somatotroph tumors, germline mutations in various genes (AIP, PRKAR1A, GPR101, GNAS, MEN1, CDKN1B, SDHx, MAX) are also known as the cause of somatotroph tumors. Moreover, recent findings based on multiple perspectives of the pangenomic approach including genome, transcriptome, and methylome analyses, histological characterization, genomic instability, and possible involvement of miRNAs have gradually unveiled the whole landscape of the underlying mechanisms of somatotroph tumors. In this review, we will focus on the recent advances in genetic and epigenetic pathogenesis of somatotroph tumors.}, } @article {pmid36008774, year = {2022}, author = {Rodriguez Jimenez, A and Guiglielmoni, N and Goetghebuer, L and Dechamps, E and George, IF and Flot, JF}, title = {Comparative genome analysis of Vagococcus fluvialis reveals abundance of mobile genetic elements in sponge-isolated strains.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {618}, pmid = {36008774}, issn = {1471-2164}, support = {DISARM//Fonds De La Recherche Scientifique - FNRS/ ; DISARM//Fonds De La Recherche Scientifique - FNRS/ ; DISARM//Fonds De La Recherche Scientifique - FNRS/ ; }, mesh = {Animals ; Enterococcaceae/genetics ; Interspersed Repetitive Sequences/genetics ; Phylogeny ; *Porifera/genetics ; Sequence Analysis, DNA ; }, abstract = {BACKGROUND: Vagococcus fluvialis is a species of lactic acid bacteria found both free-living in river and seawater and associated to hosts, such as marine sponges. This species has been greatly understudied, with no complete genome assembly available to date, which is essential for the characterisation of the mobilome.

RESULTS: We sequenced and assembled de novo the complete genome sequences of five V. fluvialis isolates recovered from marine sponges. Pangenome analysis of the V. fluvialis species (total of 17 genomes) showed a high intraspecific diversity, with 45.5% of orthologous genes found to be strain specific. Despite this diversity, analyses of gene functions clustered all V. fluvialis species together and separated them from other sequenced Vagococcus species. V. fluvialis strains from different habitats were highly similar in terms of functional diversity but the sponge-isolated strains were enriched in several functions related to the marine environment. Furthermore, sponge-isolated strains carried a significantly higher number of mobile genetic elements (MGEs) compared to previously sequenced V. fluvialis strains from other environments. Sponge-isolated strains carried up to 4 circular plasmids each, including a 48-kb conjugative plasmid. Three of the five strains carried an additional circular extrachromosomal sequence, assumed to be an excised prophage as it contained mainly viral genes and lacked plasmid replication genes. Insertion sequences (ISs) were up to five times more abundant in the genomes of sponge-isolated strains compared to the others, including several IS families found exclusively in these genomes.

CONCLUSIONS: Our findings highlight the dynamics and plasticity of the V. fluvialis genome. The abundance of mobile genetic elements in the genomes of sponge-isolated V. fluvialis strains suggests that the mobilome might be key to understanding the genomic signatures of symbiosis in bacteria.}, } @article {pmid36005754, year = {2022}, author = {Ashrafi, S and Kuzmanović, N and Patz, S and Lohwasser, U and Bunk, B and Spröer, C and Lorenz, M and Elhady, A and Frühling, A and Neumann-Schaal, M and Verbarg, S and Becker, M and Thünen, T}, title = {Two New Rhizobiales Species Isolated from Root Nodules of Common Sainfoin (Onobrychis viciifolia) Show Different Plant Colonization Strategies.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0109922}, pmid = {36005754}, issn = {2165-0497}, mesh = {Fertilizers ; Carbon Dioxide ; *Mesorhizobium/genetics ; *Fabaceae/microbiology ; *Rhizobium/genetics ; Symbiosis ; Nitrogen ; }, abstract = {Root nodules of legume plants are primarily inhabited by rhizobial nitrogen-fixing bacteria. Here, we propose two new Rhizobiales species isolated from root nodules of common sainfoin (Onobrychis viciifolia), as shown by core-gene phylogeny, overall genome relatedness indices, and pan-genome analysis. Mesorhizobium onobrychidis sp. nov. actively induces nodules and achieves atmospheric nitrogen and carbon dioxide fixation. This species appears to be depleted in motility genes and is enriched in genes for direct effects on plant growth performance. Its genome reveals functional and plant growth-promoting signatures, like a large unique chromosomal genomic island with high density of symbiotic genetic traits. Onobrychidicola muellerharveyae gen. nov. sp. nov. is described as a type species of the new genus Onobrychidicola in Rhizobiaceae. This species comprises unique genetic features and plant growth-promoting traits (PGPTs), which strongly indicate its function in biotic stress reduction and motility. We applied a newly developed bioinformatics approach for in silico prediction of PGPTs (PGPT-Pred), which supports the different lifestyles of the two new species and the plant growth-promoting performance of M. onobrychidis in the greenhouse trial. IMPORTANCE The intensive use of chemical fertilizers has a variety of negative effects on the environment. Increased utilization of biological nitrogen fixation (BNF) is one way to mitigate those negative impacts. In order to optimize BNF, suitable candidates for different legume species are required. Despite intensive search for new rhizobial bacteria associated with legumes, no new rhizobia have recently been identified from sainfoin (Onobrychis viciifolia). Here, we report on the discovery of two new rhizobial species associated with sainfoin, which are of high importance for the host and may help to increase sustainability in agricultural practices. We employed the combination of in silico prediction and in planta experiments, which is an effective way to detect promising plant growth-promoting bacteria.}, } @article {pmid36004795, year = {2022}, author = {Liu, H and Wang, X and Liu, S and Huang, Y and Guo, YX and Xie, WZ and Liu, H and Tahir Ul Qamar, M and Xu, Q and Chen, LL}, title = {Citrus Pan-Genome to Breeding Database (CPBD): A comprehensive genome database for citrus breeding.}, journal = {Molecular plant}, volume = {15}, number = {10}, pages = {1503-1505}, doi = {10.1016/j.molp.2022.08.006}, pmid = {36004795}, issn = {1752-9867}, mesh = {*Citrus/genetics ; Genome, Plant/genetics ; Plant Breeding ; }, } @article {pmid36003217, year = {2022}, author = {Mattock, J and Smith, AM and Keddy, KH and Manners, EJ and Duze, ST and Smouse, S and Tau, N and Baker, D and Chattaway, MA and Mather, AE and Wain, J and Langridge, GC}, title = {Genetic characterization of Salmonella Infantis from South Africa, 2004-2016.}, journal = {Access microbiology}, volume = {4}, number = {7}, pages = {acmi000371}, pmid = {36003217}, issn = {2516-8290}, support = {MC_PC_16093/MRC_/Medical Research Council/United Kingdom ; }, abstract = {Salmonella Infantis is presenting an increasing risk to public health. Of particular concern are the reports of pESI, a multidrug resistance (MDR) encoding megaplasmid, in isolates from multiple countries, but little is known about its presence or diversity in South Africa. Whole genome sequences of 387 S. Infantis isolates from South Africa (2004-2020) were analysed for genetic phylogeny, recombination frequency, antimicrobial resistance (AMR) determinants, plasmid presence and overall gene content. The population structure of South African S. Infantis was substantially different to S. Infantis reported elsewhere; only two thirds of isolates belonged to eBG31, while the remainder were identified as eBG297, a much rarer group globally. Significantly higher levels of recombination were observed in the eBG297 isolates, which was associated with the presence of prophages. The majority of isolates were putatively susceptible to antimicrobials (335/387) and lacked any plasmids (311/387); the megaplasmid pESI was present in just one isolate. A larger proportion of eBG31 isolates, 19% (49/263), contained at least one AMR determinant, compared to eBG297 at 2% (3/124). Comparison of the pan-genomes of isolates from either eBG identified 943 genes significantly associated with eBG, with 43 found exclusively in eBG31 isolates and 34 in eBG297 isolates. This, along with the single nucleotide polymorphism distance and difference in resistance profiles, suggests that eBG31 and eBG297 isolates occupy different niches within South Africa. If antibiotic-resistant S. Infantis emerges in South Africa, probably through the spread of the pESI plasmid, treatment of this infection would be compromised.}, } @article {pmid36000891, year = {2022}, author = {Holm, MKA and Jørgensen, KM and Bagge, K and Worning, P and Pedersen, M and Westh, H and Monk, JM and Bartels, MD}, title = {Estimated Roles of the Carrier and the Bacterial Strain When Methicillin-Resistant Staphylococcus aureus Decolonization Fails: a Case-Control Study.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0129622}, pmid = {36000891}, issn = {2165-0497}, mesh = {Humans ; Adolescent ; *Methicillin-Resistant Staphylococcus aureus/genetics ; *Staphylococcal Infections/drug therapy/epidemiology/microbiology ; Case-Control Studies ; Quality of Life ; Anti-Bacterial Agents/therapeutic use ; Carrier State/epidemiology ; }, abstract = {Methicillin-resistant Staphylococcus aureus (MRSA) is a common bacterial pathogen that frequently colonizes healthy individuals, with potential to cause invasive infection. In Denmark, to keep the prevalence low, MRSA carriers are recommended to undergo decolonization treatments, but achieving decolonization is challenging. Knowledge about the factors contributing to decolonization is scarce. We aimed to identify bacterial genome and clinical factors influencing MRSA decolonization. We identified all new MRSA patients above 2 years of age within the Hvidovre catchment area, Copenhagen, Denmark, in 2017 and 2018. Carriers were defined as chronic carriers (cases) if they were MRSA positive after two or more treatments and as nonchronic carriers (controls) if they were MRSA free after the first or second treatment. Using whole-genome sequencing (WGS), we constructed a pangenome of bacterial strains. With the incorporation of bacterial genome and clinical patient data, machine learning and multivariate analyses were performed to determine the factors associated with decolonization. A total of 477 MRSA carriers were included. An age of ≥13 years was significantly associated with nonchronic carriage. We identified 278 bacterial genetic features that were statistically significantly associated with chronic carriage (P < 0.05 by Fisher's exact test). Chronic MRSA carriage was predicted with 68% accuracy using a combination of bacterial genome data and patient clinical data. Decolonization success is multifactorial. Apart from the 68% predicted accuracy found in this study, we estimate that the remaining 32% is a result of host factors and microbiome composition. IMPORTANCE Carriage of methicillin-resistant Staphylococcus aureus (MRSA) and other multiresistant bacteria is a prerequisite for infection and transmission. Successful decolonization treatment removes these risks. We aimed to identify bacterial genome and host clinical factors that influence MRSA decolonization to estimate the roles of the carrier and the bacterial strain, respectively, when decolonization fails. The long-term goal, beyond this study, is to optimize decolonization success, minimize MRSA transmission, and, ultimately, improve the quality of life of MRSA carriers.}, } @article {pmid35999561, year = {2022}, author = {Gui, S and Wei, W and Jiang, C and Luo, J and Chen, L and Wu, S and Li, W and Wang, Y and Li, S and Yang, N and Li, Q and Fernie, AR and Yan, J}, title = {A pan-Zea genome map for enhancing maize improvement.}, journal = {Genome biology}, volume = {23}, number = {1}, pages = {178}, pmid = {35999561}, issn = {1474-760X}, mesh = {Chromosome Mapping/methods ; Domestication ; *Genome, Plant ; Humans ; Plant Breeding/methods ; *Zea mays/genetics ; }, abstract = {BACKGROUND: Maize (Zea mays L.) is at the vanguard facing the upcoming breeding challenges. However, both a super pan-genome for the Zea genus and a comprehensive genetic variation map for maize breeding are still lacking.

RESULTS: Here, we construct an approximately 6.71-Gb pan-Zea genome that contains around 4.57-Gb non-B73 reference sequences from fragmented de novo assemblies of 721 pan-Zea individuals. We annotate a total of 58,944 pan-Zea genes and find around 44.34% of them are dispensable in the pan-Zea population. Moreover, 255,821 common structural variations are identified and genotyped in a maize association mapping panel. Further analyses reveal gene presence/absence variants and their potential roles during domestication of maize. Combining genetic analyses with multi-omics data, we demonstrate how structural variants are associated with complex agronomic traits.

CONCLUSIONS: Our results highlight the underexplored role of the pan-Zea genome and structural variations to further understand domestication of maize and explore their potential utilization in crop improvement.}, } @article {pmid35993719, year = {2022}, author = {Baker, JL}, title = {Using Nanopore Sequencing to Obtain Complete Bacterial Genomes from Saliva Samples.}, journal = {mSystems}, volume = {7}, number = {5}, pages = {e0049122}, pmid = {35993719}, issn = {2379-5077}, support = {K99 DE029228/DE/NIDCR NIH HHS/United States ; K99-DE029228//HHS | NIH | National Institute of Dental and Craniofacial Research (NIDCR)/ ; }, mesh = {Humans ; Sequence Analysis, DNA/methods ; *Nanopore Sequencing/methods ; Saliva ; Genome, Bacterial/genetics ; *Microbiota/genetics ; Bacteria/genetics ; }, abstract = {Obtaining complete, high-quality reference genomes is essential to the study of any organism. Recent advances in nanopore sequencing, as well as genome assembly and analysis methods, have made it possible to obtain complete bacterial genomes from metagenomic (i.e., multispecies) samples, including those from the human microbiome. In this study, methods are presented to obtain complete bacterial genomes from human saliva using complementary Oxford Nanopore (ONT) and Illumina sequencing. Applied to 3 human saliva samples, these methods resulted in 11 complete bacterial genomes: 3 Saccharibacteria clade G6 (also known as Ca. Nanogingivalaceae HMT-870), 1 Saccharibacteria clade G1 HMT-348, 2 Rothia mucilaginosa, 2 Actinomyces graevenitzii, 1 Mogibacterium diversum, 1 Lachnospiraceae HMT-096, and 1 Lancefieldella parvula; and one circular chromosome of Ruminococcaceae HMT-075 (which likely has at least 2 chromosomes). The 4 Saccharibacteria genomes, as well as the Actinomyces graeventizii genomes, represented the first complete genomes from their respective bacterial taxa. Aside from the complete genomes, the assemblies contained 147 contigs of over 500,000 bp each and thousands of smaller contigs, together representing a myriad of additional draft genomes including many which are likely nearly complete. The complete genomes enabled highly accurate pangenome analysis, which identified unique and missing features of each genome compared to its closest relatives with complete genomes available in public repositories. These features provide clues as to the lifestyle and ecological role of these bacteria within the human oral microbiota, which will be particularly useful in designing future studies of the taxa that have never been isolated or cultivated. IMPORTANCE Obtaining complete and accurate genomes is crucial to the study of any organism. Previously, obtaining complete genomes of bacteria, including those of the human microbiome, frequently required isolation of the organism, as well as low-throughput, manual sequencing methods to resolve repeat regions. Advancements in long-read sequencing technologies, including Oxford Nanopore (ONT), have made it possible to obtain complete, closed bacterial genomes from metagenomic samples. This study reports methods to obtain complete genomes from the human oral microbiome using complementary ONT and Illumina sequencing of saliva samples. Eleven complete genomes were obtained from 3 human saliva samples, with genomes of Saccharibacteria HMT-870, Saccharibacteria HMT-348, and Actinomyces graeventzii being the first complete genomes from their respective taxa. Obtaining complete bacterial genomes in a high-throughput manner will help illuminate the metabolic and ecological roles of important members of the human microbiota, particularly those that have remained recalcitrant to isolation and cultivation.}, } @article {pmid35991422, year = {2022}, author = {Jia, Y and Pradeep, K and Vance, WH and Zhang, X and Weir, B and Wei, H and Deng, Z and Zhang, Y and Xu, X and Zhao, C and Berger, JD and Bell, RW and Li, C}, title = {Identification of two chickpea multidrug and toxic compound extrusion transporter genes transcriptionally upregulated upon aluminum treatment in root tips.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {909045}, pmid = {35991422}, issn = {1664-462X}, abstract = {Aluminum (Al) toxicity poses a significant challenge for the yield improvement of chickpea, which is an economically important legume crop with high nutritional value in human diets. The genetic basis of Al-tolerance in chickpea remains unclear. Here, we assessed the Al-tolerance of 8 wild Cicer and one cultivated chickpea (PBA Pistol) accessions by measuring the root elongation in solution culture under control (0 μM Al[3+]) and Al treatments (15, 30 μM Al[3+]). Compared to PBA Pistol, the wild Cicer accessions displayed both tolerant and sensitive phenotypes, supporting wild Cicer as a potential genetic pool for Al-tolerance improvement. To identify potential genes related to Al-tolerance in chickpea, genome-wide screening of multidrug and toxic compound extrusion (MATE) encoding genes was performed. Fifty-six MATE genes were identified in total, which can be divided into 4 major phylogenetic groups. Four chickpea MATE genes (CaMATE1-4) were clustered with the previously characterized citrate transporters MtMATE66 and MtMATE69 in Medicago truncatula. Transcriptome data showed that CaMATE1-4 have diverse expression profiles, with CaMATE2 being root-specific. qRT-PCR analyses confirmed that CaMATE2 and CaMATE4 were highly expressed in root tips and were up-regulated upon Al treatment in all chickpea lines. Further measurement of carboxylic acids showed that malonic acid, instead of malate or citrate, is the major extruded acid by Cicer spp. root. Protein structural modeling analyses revealed that CaMATE2 has a divergent substrate-binding cavity from Arabidopsis AtFRD3, which may explain the different acid-secretion profile for chickpea. Pangenome survey showed that CaMATE1-4 have much higher genetic diversity in wild Cicer than that in cultivated chickpea. This first identification of CaMATE2 and CaMATE4 responsive to Al[3+] treatment in Cicer paves the way for future functional characterization of MATE genes in Cicer spp., and to facilitate future design of gene-specific markers for Al-tolerant line selection in chickpea breeding programs.}, } @article {pmid35977842, year = {2022}, author = {Zhou, Y and Yang, L and Han, X and Han, J and Hu, Y and Li, F and Xia, H and Peng, L and Boschiero, C and Rosen, BD and Bickhart, DM and Zhang, S and Guo, A and Van Tassell, CP and Smith, TPL and Yang, L and Liu, GE}, title = {Assembly of a pangenome for global cattle reveals missing sequences and novel structural variations, providing new insights into their diversity and evolutionary history.}, journal = {Genome research}, volume = {32}, number = {8}, pages = {1585-1601}, pmid = {35977842}, issn = {1549-5469}, abstract = {A cattle pangenome representation was created based on the genome sequences of 898 cattle representing 57 breeds. The pangenome identified 83 Mb of sequence not found in the cattle reference genome, representing 3.1% novel sequence compared with the 2.71-Gb reference. A catalog of structural variants developed from this cattle population identified 3.3 million deletions, 0.12 million inversions, and 0.18 million duplications. Estimates of breed ancestry and hybridization between cattle breeds using insertion/deletions as markers were similar to those produced by single nucleotide polymorphism-based analysis. Hundreds of deletions were observed to have stratification based on subspecies and breed. For example, an insertion of a Bov-tA1 repeat element was identified in the first intron of the APPL2 gene and correlated with cattle breed geographic distribution. This insertion falls within a segment overlapping predicted enhancer and promoter regions of the gene, and could affect important traits such as immune response, olfactory functions, cell proliferation, and glucose metabolism in muscle. The results indicate that pangenomes are a valuable resource for studying diversity and evolutionary history, and help to delineate how domestication, trait-based breeding, and adaptive introgression have shaped the cattle genome.}, } @article {pmid35976181, year = {2022}, author = {Sancho, R and Catalán, P and Contreras-Moreira, B and Juenger, TE and Des Marais, DL}, title = {Patterns of pan-genome occupancy and gene coexpression under water-deficit in Brachypodium distachyon.}, journal = {Molecular ecology}, volume = {31}, number = {20}, pages = {5285-5306}, pmid = {35976181}, issn = {1365-294X}, mesh = {*Brachypodium/genetics ; Droughts ; Genes, Plant ; Transcriptome/genetics ; Water ; }, abstract = {Natural populations are characterized by abundant genetic diversity driven by a range of different types of mutation. The tractability of sequencing complete genomes has allowed new insights into the variable composition of genomes, summarized as a species pan-genome. These analyses demonstrate that many genes are absent from the first reference genomes, whose analysis dominated the initial years of the genomic era. Our field now turns towards understanding the functional consequence of these highly variable genomes. Here, we analysed weighted gene coexpression networks from leaf transcriptome data for drought response in the purple false brome Brachypodium distachyon and the differential expression of genes putatively involved in adaptation to this stressor. We specifically asked whether genes with variable "occupancy" in the pan-genome - genes which are either present in all studied genotypes or missing in some genotypes - show different distributions among coexpression modules. Coexpression analysis united genes expressed in drought-stressed plants into nine modules covering 72 hub genes (87 hub isoforms), and genes expressed under controlled water conditions into 13 modules, covering 190 hub genes (251 hub isoforms). We find that low occupancy pan-genes are under-represented among several modules, while other modules are over-enriched for low-occupancy pan-genes. We also provide new insight into the regulation of drought response in B. distachyon, specifically identifying one module with an apparent role in primary metabolism that is strongly responsive to drought. Our work shows the power of integrating pan-genomic analysis with transcriptomic data using factorial experiments to understand the functional genomics of environmental response.}, } @article {pmid35974988, year = {2022}, author = {Haque, F and Jabeen, I and Keya, CA and Shuvo, SR}, title = {Whole-genome sequencing and comparative analysis of heavy metals tolerant Bacillus anthracis FHq strain isolated from tannery effluents in Bangladesh.}, journal = {AIMS microbiology}, volume = {8}, number = {2}, pages = {227-239}, pmid = {35974988}, issn = {2471-1888}, abstract = {Heavy metal contamination of the environment is a primary concern in Bangladesh. This study aims to characterize a novel heavy metal tolerant strain, Bacillus anthracis FHq, isolated from the tannery effluents of Savar, Bangladesh. The strain could tolerate up to 5 mM of lead nitrate, 2.5 mM of sodium arsenate, chromium chloride, cobalt chloride, 1.5 mM cadmium acetate, and 1 mM of sodium arsenite. Whole-genome sequencing analysis revealed that the genome of the strain is around 5.2 Mbp long, and the G + C content is 35.4%. Besides, FHq has genes cadC, zntA, arsCR, czcD, and chrA, which confer lead, arsenic, cobalt, and chromium resistance, respectively. A total of nineteen other closely related and completely sequenced B. anthracis strains were selected based on average nucleotide identity along with the FHq strain for phylogenomic and pan-genome analysis. The phylogenomic analysis predicted the inter-genomic evolutionary relationship of the strain isolated from Bangladesh, and it was closely related to a strain isolated from China. Pan-genome analysis revealed that the FHq strain possesses 6045 pan genes, 3802 core genes, and 152 unique genes in its genomic content. Hence, the genetic information and comparative analysis of the FHq strain might facilitate identifying the mechanisms conferring high resistance to lead in B. anthracis strains isolated from Bangladesh.}, } @article {pmid35974327, year = {2022}, author = {Garza, DR and von Meijenfeldt, FAB and van Dijk, B and Boleij, A and Huynen, MA and Dutilh, BE}, title = {Nutrition or nature: using elementary flux modes to disentangle the complex forces shaping prokaryote pan-genomes.}, journal = {BMC ecology and evolution}, volume = {22}, number = {1}, pages = {101}, pmid = {35974327}, issn = {2730-7182}, mesh = {Archaea/genetics ; Bacteria/genetics ; *Evolution, Molecular ; *Genome, Bacterial/genetics ; Genomics ; Humans ; Phylogeny ; Prokaryotic Cells ; }, abstract = {BACKGROUND: Microbial pan-genomes are shaped by a complex combination of stochastic and deterministic forces. Even closely related genomes exhibit extensive variation in their gene content. Understanding what drives this variation requires exploring the interactions of gene products with each other and with the organism's external environment. However, to date, conceptual models of pan-genome dynamics often represent genes as independent units and provide limited information about their mechanistic interactions.

RESULTS: We simulated the stochastic process of gene-loss using the pooled genome-scale metabolic reaction networks of 46 taxonomically diverse bacterial and archaeal families as proxies for their pan-genomes. The frequency by which reactions are retained in functional networks when stochastic gene loss is simulated in diverse environments allowed us to disentangle the metabolic reactions whose presence depends on the metabolite composition of the external environment (constrained by "nutrition") from those that are independent of the environment (constrained by "nature"). By comparing the frequency of reactions from the first group with their observed frequencies in bacterial and archaeal families, we predicted the metabolic niches that shaped the genomic composition of these lineages. Moreover, we found that the lineages that were shaped by a more diverse metabolic niche also occur in more diverse biomes as assessed by global environmental sequencing datasets.

CONCLUSION: We introduce a computational framework for analyzing and interpreting pan-reactomes that provides novel insights into the ecological and evolutionary drivers of pan-genome dynamics.}, } @article {pmid35972150, year = {2022}, author = {Wittmers, F and Needham, DM and Hehenberger, E and Giovannoni, SJ and Worden, AZ}, title = {Genomes from Uncultivated Pelagiphages Reveal Multiple Phylogenetic Clades Exhibiting Extensive Auxiliary Metabolic Genes and Cross-Family Multigene Transfers.}, journal = {mSystems}, volume = {7}, number = {5}, pages = {e0152221}, pmid = {35972150}, issn = {2379-5077}, mesh = {Humans ; Phylogeny ; Genome, Viral ; *Bacteriophages ; *Podoviridae ; Bacteria/genetics ; Myoviridae/genetics ; }, abstract = {For the abundant marine Alphaproteobacterium Pelagibacter (SAR11), and other bacteria, phages are powerful forces of mortality. However, little is known about the most abundant Pelagiphages in nature, such as the widespread HTVC023P-type, which is currently represented by two cultured phages. Using viral metagenomic data sets and fluorescence-activated cell sorting, we recovered 80 complete, undescribed Podoviridae genomes that form 10 phylogenomically distinct clades (herein, named Clades I to X) related to the HTVC023P-type. These expanded the HTVC023P-type pan-genome by 15-fold and revealed 41 previously unknown auxiliary metabolic genes (AMGs) in this viral lineage. Numerous instances of partner-AMGs (colocated and involved in related functions) were observed, including partners in nucleotide metabolism, DNA hypermodification, and Curli biogenesis. The Type VIII secretion system (T8SS) responsible for Curli biogenesis was identified in nine genomes and expanded the repertoire of T8SS proteins reported thus far in viruses. Additionally, the identified T8SS gene cluster contained an iron-dependent regulator (FecR), as well as a histidine kinase and adenylate cyclase that can be implicated in T8SS function but are not within T8SS operons in bacteria. While T8SS are lacking in known Pelagibacter, they contribute to aggregation and biofilm formation in other bacteria. Phylogenetic reconstructions of partner-AMGs indicate derivation from cellular lineages with a more recent transfer between viral families. For example, homologs of all T8SS genes are present in syntenic regions of distant Myoviridae Pelagiphages, and they appear to have alphaproteobacterial origins with a later transfer between viral families. The results point to an unprecedented multipartner-AMG transfer between marine Myoviridae and Podoviridae. Together with the expansion of known metabolic functions, our studies provide new prospects for understanding the ecology and evolution of marine phages and their hosts. IMPORTANCE One of the most abundant and diverse marine bacterial groups is Pelagibacter. Phages have roles in shaping Pelagibacter ecology; however, several Pelagiphage lineages are represented by only a few genomes. This paucity of data from even the most widespread lineages has imposed limits on the understanding of the diversity of Pelagiphages and their impacts on hosts. Here, we report 80 complete genomes, assembled directly from environmental data, which are from undescribed Pelagiphages and render new insights into the manipulation of host metabolism during infection. Notably, the viruses have functionally related partner genes that appear to be transferred between distant viruses, including a suite that encode a secretion system which both brings a new functional capability to the host and is abundant in phages across the ocean. Together, these functions have important implications for phage evolution and for how Pelagiphage infection influences host biology in manners extending beyond canonical viral lysis and mortality.}, } @article {pmid35966654, year = {2022}, author = {Xue, M and Huang, X and Xue, J and He, R and Liang, G and Liang, H and Liu, J and Wen, C}, title = {Comparative Genomic Analysis of Seven Vibrio alginolyticus Strains Isolated From Shrimp Larviculture Water With Emphasis on Chitin Utilization.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {925747}, pmid = {35966654}, issn = {1664-302X}, abstract = {The opportunistic pathogen Vibrio alginolyticus is gaining attention because of its disease-causing risks to aquatic animals and humans. In this study, seven Vibrio strains isolated from different shrimp hatcheries in Southeast China were subjected to genome sequencing and subsequent comparative analysis to explore their intricate relationships with shrimp aquaculture. The seven isolates had an average nucleotide identity of ≥ 98.3% with other known V. alginolyticus strains. The species V. alginolyticus had an open pan-genome, with the addition of ≥ 161 novel genes following each new genome for seven isolates and 14 publicly available V. alginolyticus strains. The percentages of core genes of the seven strains were up to 83.1-87.5%, indicating highly conserved functions, such as chitin utilization. Further, a total of 14 core genes involved in the chitin degradation pathway were detected on the seven genomes with a single copy, 12 of which had undergone significant purifying selection (dN/dS < 1). Moreover, the seven strains could utilize chitin as the sole carbon-nitrogen source. In contrast, mobile genetic elements (MGEs) were identified in seven strains, including plasmids, prophages, and genomic islands, which mainly encoded accessory genes annotated as hypothetical proteins. The infection experiment showed that four of the seven strains might be pathogenic because the survival rates of Litopenaeus vannamei postlarvae were significantly reduced (P < 0.05) when compared to the control. However, no obvious correlation was noted between the number of putative virulence factors and toxic effects of the seven strains. Collectively, the persistence of V. alginolyticus in various aquatic environments may be attributed to its high genomic plasticity via the acquisition of novel genes by various MGEs. In view of the strong capability of chitin utilization by diverse vibrios, the timely removal of massive chitin-rich materials thoroughly in shrimp culture systems may be a key strategy to inhibit proliferation of vibrios and subsequent infection of shrimp. In addition, transcontinental transfer of potentially pathogenic V. alginolyticus strains should receive great attention to avoid vibriosis.}, } @article {pmid35964310, year = {2022}, author = {Dmitriev, AA and Pushkova, EN and Melnikova, NV}, title = {[Plant Genome Sequencing: Modern Technologies and Novel Opportunities for Breeding].}, journal = {Molekuliarnaia biologiia}, volume = {56}, number = {4}, pages = {531-545}, doi = {10.31857/S0026898422040048}, pmid = {35964310}, issn = {0026-8984}, mesh = {Base Sequence ; Chromosome Mapping ; *Genome, Plant ; *Plant Breeding ; Plants/genetics ; Sequence Analysis, DNA/methods ; }, abstract = {The investigation of plant genomes is of great importance for basic research and practical breeding. In 1977, F. Sanger proposed a DNA sequencing method, which allowed the complete sequences of a number of genomes to be determined. Then high-throughput and cost-effective next-generation/second-generation sequencing methods, producing up to billions of short reads, made it possible to sequence genomes of a significant number of species and provided a breakthrough in plant genetic studies. Finally, third-generation sequencing technologies allowed the determination of single-molecule sequences up to a million nucleotides in length, which is key for high-quality genome assemblies. An important task is to obtain a pan-genome, which includes an entire set of nucleotide sequences presented in various genotypes of the same species. The sequencing of plant genomes made it possible to assess intraspecific polymorphism, identify key genes influencing the formation of significant features, and develop molecular markers of economically valuable traits and this has become the basis for the development of marker-assisted and genomic selection. This review provides information on the latest advances in sequencing technologies and the assembly of plant genomes, as well as the opportunities that they open up for basic and applied works.}, } @article {pmid35958219, year = {2022}, author = {Hu, G and Cheng, L and Cheng, Y and Mao, W and Qiao, Y and Lan, Y}, title = {Pan-genome analysis of three main Chinese chestnut varieties.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {916550}, pmid = {35958219}, issn = {1664-462X}, abstract = {Chinese chestnut (Castanea mollissima Blume) is one of the earliest domesticated plants of high nutritional and ecological value, yet mechanisms of C. mollissima underlying its growth and development are poorly understood. Although individual chestnut species differ greatly, the molecular basis of the formation of their characteristic traits remains unknown. Though the draft genomes of chestnut have been previously released, the pan-genome of different variety needs to be studied. We report the genome sequence of three cultivated varieties of chestnut herein, namely Hei-Shan-Zhai-7 (H7, drought-resistant variety), Yan-Hong (YH, easy-pruning variety), and Yan-Shan-Zao-Sheng (ZS, early-maturing variety), to expedite convenience and efficiency in its genetics-based breeding. We obtained three chromosome-level chestnut genome assemblies through a combination of Oxford Nanopore technology, Illumina HiSeq X, and Hi-C mapping. The final genome assemblies are 671.99 Mb (YH), 790.99 Mb (ZS), and 678.90 Mb (H7), across 12 chromosomes, with scaffold N50 sizes of 50.50 Mb (YH), 65.05 Mb (ZS), and 52.16 Mb (H7). Through the identification of homologous genes and the cluster analysis of gene families, we found that H7, YH and ZS had 159, 131, and 91 unique gene families, respectively, and there were 13,248 single-copy direct homologous genes in the three chestnut varieties. For the convenience of research, the chestnut genome database was constructed. Based on the results of gene family identification, the presence/absence variations (PAVs) information of the three sample genes was calculated, and a total of 2,364, 2,232, and 1,475 unique genes were identified in H7, YH and ZS, respectively. Our results suggest that the GBSS II-b gene family underwent expansion in chestnut (relative to nearest source species). Overall, we developed high-quality and well-annotated genome sequences of three C. mollissima varieties, which will facilitate clarifying the molecular mechanisms underlying important traits, and shortening the breeding process.}, } @article {pmid35956427, year = {2022}, author = {Petereit, J and Bayer, PE and Thomas, WJW and Tay Fernandez, CG and Amas, J and Zhang, Y and Batley, J and Edwards, D}, title = {Pangenomics and Crop Genome Adaptation in a Changing Climate.}, journal = {Plants (Basel, Switzerland)}, volume = {11}, number = {15}, pages = {}, pmid = {35956427}, issn = {2223-7747}, support = {DP210100296//Australian Research Council/ ; DP200100762//Australian Research Council/ ; }, abstract = {During crop domestication and breeding, wild plant species have been shaped into modern high-yield crops and adapted to the main agro-ecological regions. However, climate change will impact crop productivity in these regions, and agriculture needs to adapt to support future food production. On a global scale, crop wild relatives grow in more diverse environments than crop species, and so may host genes that could support the adaptation of crops to new and variable environments. Through identification of individuals with increased climate resilience we may gain a greater understanding of the genomic basis for this resilience and transfer this to crops. Pangenome analysis can help to identify the genes underlying stress responses in individuals harbouring untapped genomic diversity in crop wild relatives. The information gained from the analysis of these pangenomes can then be applied towards breeding climate resilience into existing crops or to re-domesticating crops, combining environmental adaptation traits with crop productivity.}, } @article {pmid35945191, year = {2022}, author = {Xia, F and Jiang, M and Wen, Z and Wang, Z and Wang, M and Xu, Y and Zhuge, X and Dai, J}, title = {Complete genomic analysis of ST117 lineage extraintestinal pathogenic Escherichia coli (ExPEC) to reveal multiple genetic determinants to drive its global transmission: ST117 E. coli as an emerging multidrug-resistant foodborne ExPEC with zoonotic potential.}, journal = {Transboundary and emerging diseases}, volume = {69}, number = {6}, pages = {3256-3273}, doi = {10.1111/tbed.14678}, pmid = {35945191}, issn = {1865-1682}, support = {CX(21)3126//Jiangsu Agricultural Science and Technology Innovation Fund/ ; 32172855//National Natural Science Foundation of China/ ; }, mesh = {Animals ; Humans ; Escherichia coli/genetics ; *Extraintestinal Pathogenic Escherichia coli ; *Escherichia coli Infections/epidemiology/veterinary ; Birds ; Genomics ; *Poultry Diseases/epidemiology ; Phylogeny ; Chickens ; Virulence Factors/genetics ; }, abstract = {Avian pathogenic Escherichia coli (APEC) is recognized as a primary source of foodborne extraintestinal pathogenic E. coli (ExPEC), which poses a significant risk of extraintestinal infections in humans. The potential of human infection with ST117 lineage APEC/ExPEC from poultry is particularly concerning. However, relatively few whole-genome studies have focused on ST117 as an emerging ExPEC lineage. In this study, the complete genomes of 11 avian ST117 isolates and the draft genomes of 20 ST117 isolates in China were sequenced to reveal the genomic islands and large plasmid composition of ST117 APEC. With reference to the extensive E. coli genomes available in public databases, large-scale comprehensive genomic analysis of the ST117 lineage APEC/ExPEC was performed to reveal the features of the ST117 pan-genome and population. The high variability of the accessory genome emphasized the diversity and dynamic traits of the ST117 pan-genome. ST117 isolates recovered from different hosts and geographic sources were randomly located on a phylogeny tree, suggesting that ST117 E. coli lacked host specificity. A time-scaled phylogeny tree showed that ST117 was a recent E. coli lineage with a relatively short evolutionary period. Further characterization of a wide diversity of ExPEC-related virulence genes, pathogenicity islands (PAIs), and resistance genes of the ST117 pan-genome provided insights into the virulence and resistance of ST117 APEC/ExPEC. The results suggested zoonotic potential of ST117 APEC/ExPEC between birds and humans. Moreover, genomic analysis showed that a pool of diverse plasmids drove the virulence and multidrug resistance of ST117 APEC/ExPEC. Several types of large plasmids were scattered across the ST117 isolates, but there was no strong plasmid-clade adaptation. Combined with the pan-genome analysis, a double polymerase chain reaction (PCR) method was designed for rapid and cost-effective detection of ST117 isolates from various avian and human APEC/ExPEC isolates. Overall, this study addressed a gap in current knowledge about the ST117 APEC/ExPEC genome, with significant implications to understand the success and spread of ST117 APEC/ExPEC.}, } @article {pmid35944516, year = {2023}, author = {Goldman, AD and Kaçar, B}, title = {Very early evolution from the perspective of microbial ecology.}, journal = {Environmental microbiology}, volume = {25}, number = {1}, pages = {5-10}, doi = {10.1111/1462-2920.16144}, pmid = {35944516}, issn = {1462-2920}, mesh = {*Evolution, Molecular ; Genome ; Ecology ; Gene Transfer, Horizontal ; *Microbiota ; Phylogeny ; Biological Evolution ; }, abstract = {The universal ancestor at the root of the species tree of life depicts a population of organisms with a surprising degree of complexity, posessing genomes and translation systems much like that of microbial life today. As the first life forms were most likely to have been simple replicators, considerable evolutionary change must have taken place prior to the last universal common ancestor. It is often assumed that the lack of earlier branches on the tree of life is due to a prevalence of random horizontal gene transfer that obscured the delineations between lineages and hindered their divergence. Therefore, principles of microbial evolution and ecology may give us some insight into these early stages in the history of life. Here, we synthesize the current understanding of organismal and genome evolution from the perspective of microbial ecology and apply these evolutionary principles to the earliest stages of life on Earth. We focus especially on broad evolutionary modes pertaining to horizontal gene transfer, pangenome structure, and microbial mat communities.}, } @article {pmid35935202, year = {2022}, author = {Camargo, A and Guerrero-Araya, E and Castañeda, S and Vega, L and Cardenas-Alvarez, MX and Rodríguez, C and Paredes-Sabja, D and Ramírez, JD and Muñoz, M}, title = {Intra-species diversity of Clostridium perfringens: A diverse genetic repertoire reveals its pathogenic potential.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {952081}, pmid = {35935202}, issn = {1664-302X}, abstract = {Clostridium perfringens is the causative agent of many enterotoxic diseases in humans and animals, and it is present in diverse environments (soil, food, sewage, and water). Multilocus Sequence Typing (MLST) and Whole Genome Sequencing (WGS) have provided a general approach about genetic diversity of C. perfringens; however, those studies are limited to specific locations and often include a reduced number of genomes. In this study, 372 C. perfringens genomes from multiple locations and sources were used to assess the genetic diversity and phylogenetic relatedness of this pathogen. In silico MLST was used for typing the isolates, and the resulting sequence types (ST) were assigned to clonal complexes (CC) based on allelic profiles that differ from its founder by up to double-locus variants. A pangenome analysis was conducted, and a core genome-based phylogenetic tree was created to define phylogenetic groups. Additionally, key virulence factors, toxinotypes, and antibiotic resistance genes were identified using ABRicate against Virulence Factor Database (VFDB), TOXiper, and Resfinder, respectively. The majority of the C. perfringens genomes found in publicly available databases were derived from food (n = 85) and bird (n = 85) isolates. A total of 195 STs, some of them shared between sources such as food and human, horses and dogs, and environment and birds, were grouped in 25 CC and distributed along five phylogenetic groups. Fifty-three percent of the genomes were allocated to toxinotype A, followed by F (32%) and G (7%). The most frequently found virulence factors based on > 70% coverage and 99.95% identity were plc (100%), nanH (99%), ccp (99%), and colA (98%), which encode an alpha-toxin, a sialidase, an alpha-clostripain, and a collagenase, respectively, while tetA (39.5%) and tetB (36.2%), which mediate tetracycline resistance determinants, were the most common antibiotic resistance genes detected. The analyses conducted here showed a better view of the presence of this pathogen across several host species. They also confirm that the genetic diversity of C. perfringens is based on a large number of virulence factors that vary among phylogroups, and antibiotic resistance markers, especially to tetracyclines, aminoglycosides, and macrolides. Those characteristics highlight the importance of C. perfringens as a one of the most common causes of foodborne illness.}, } @article {pmid35927368, year = {2022}, author = {Sotty, J and Bablon, P and Lekbaby, B and Augustin, J and Girier-Dufournier, M and Langlois, L and Dorival, C and Carrat, F and Pol, S and Fontaine, H and Sarica, N and Neuveut, C and Housset, C and Kremdsorf, D and Schnuriger, A and Soussan, P}, title = {Diversity of the nucleic acid forms of circulating HBV in chronically infected patients and its impact on viral cycle.}, journal = {Hepatology international}, volume = {16}, number = {6}, pages = {1259-1272}, pmid = {35927368}, issn = {1936-0541}, support = {ECTZ 103985//Agence Nationale de Recherches sur le Sida et les Hépatites Virales/ ; ECTZ 163186//Agence Nationale de Recherches sur le Sida et les Hépatites Virales/ ; EQU202003010517//FRM/ ; }, mesh = {Humans ; Hepatitis B virus/genetics ; *Nucleic Acids/therapeutic use ; Prospective Studies ; DNA, Viral/genetics ; *Hepatitis B, Chronic/drug therapy ; Virus Replication ; *Hepatitis B ; RNA ; RNA, Viral/analysis ; }, abstract = {BACKGROUND: Besides the prototypical hepatitis B virus (HBV) infectious particle, which contains a full-length double-stranded DNA (flDNA), additional circulating virus-like particles, which carry pregenomic RNA (pgRNA), spliced1RNA (sp1RNA) or spliced-derived DNA (defDNA) forms have been described. We aimed to determine the level of these four circulating forms in patients and to evaluate their impact on viral lifecycle.

METHODS: Chronic HBV untreated patients (n = 162), included in the HEPATHER cohort, were investigated. Pangenomic qPCRs were set up to quantify the four circulating forms of HBV nucleic acids (HBVnaf). In vitro infection assays were performed to address the impact of HBVnaf.

RESULTS: Hierarchical clustering individualized two clusters of HBVnaf diversity among patients: (1) cluster 1 (C1) showing a predominance of flDNA; (2) cluster 2 (C2) showing various proportions of the different forms. HBeAg-positive chronic hepatitis phase and higher viral load (7.0 ± 6.4 vs 6.6 ± 6.2 Log10 copies/ml; p < 0.001) characterized C2 compared to C1 patients. Among the different HBVnaf, pgRNA was more prevalent in C1 patients with high vs low HBV viral load (22.1% ± 2.5% vs 4.1% ± 1.8% of HBVnaf, p < 0.0001) but remained highly prevalent in C2 patients, whatever the level of replication. C2 patients samples used in infection assays showed that: (1) HBVnaf secretion was independent of the viral strain; (2) the viral cycle efficiency differed according to the proportion of HBVnaf in the inoculum, independently of cccDNA formation. Inoculum enrichment before infection suggests that pgRNA-containing particles drive this impact on viral replication.

CONCLUSION: Besides the critical role of HBV replication in circulating HBVnaf diversity, our data highlight an impact of this diversity on the dynamics of viral cycle.

CLINICAL TRIAL REGISTRATION: Patients were included from a prospective multicenter French national cohort (ANRS CO22 HEPATHER, NCT01953458).}, } @article {pmid35924489, year = {2022}, author = {Meleshko, D and Yang, R and Marks, P and Williams, S and Hajirasouliha, I}, title = {Efficient detection and assembly of non-reference DNA sequences with synthetic long reads.}, journal = {Nucleic acids research}, volume = {50}, number = {18}, pages = {e108}, pmid = {35924489}, issn = {1362-4962}, support = {R35 GM138152/GM/NIGMS NIH HHS/United States ; }, mesh = {Algorithms ; Base Sequence ; *Genome, Human ; *High-Throughput Nucleotide Sequencing/methods ; Humans ; Sequence Analysis, DNA/methods ; }, abstract = {Recent pan-genome studies have revealed an abundance of DNA sequences in human genomes that are not present in the reference genome. A lion's share of these non-reference sequences (NRSs) cannot be reliably assembled or placed on the reference genome. Improvements in long-read and synthetic long-read (aka linked-read) technologies have great potential for the characterization of NRSs. While synthetic long reads require less input DNA than long-read datasets, they are algorithmically more challenging to use. Except for computationally expensive whole-genome assembly methods, there is no synthetic long-read method for NRS detection. We propose a novel integrated alignment-based and local assembly-based algorithm, Novel-X, that uses the barcode information encoded in synthetic long reads to improve the detection of such events without a whole-genome de novo assembly. Our evaluations demonstrate that Novel-X finds many non-reference sequences that cannot be found by state-of-the-art short-read methods. We applied Novel-X to a diverse set of 68 samples from the Polaris HiSeq 4000 PGx cohort. Novel-X discovered 16 691 NRS insertions of size > 300 bp (total length 18.2 Mb). Many of them are population specific or may have a functional impact.}, } @article {pmid35916725, year = {2022}, author = {Dereeper, A and Summo, M and Meyer, DF}, title = {PanExplorer: a web-based tool for exploratory analysis and visualization of bacterial pan-genomes.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {18}, pages = {4412-4414}, pmid = {35916725}, issn = {1367-4811}, support = {//European Union on the Guadeloupe Region/ ; 2018-FED-1084//European Research and Development Funds/ ; }, mesh = {*Genome, Bacterial ; Genomics ; Software ; *Libraries ; Internet ; }, abstract = {MOTIVATION: As pan-genome approaches are largely employed for bacterial comparative genomics and evolution analyses, but still difficult to be carried out by non-bioinformatician biologists, there is a need for an innovative tool facilitating the exploration of bacterial pan-genomes.

RESULTS: PanExplorer is a web application providing various genomic analyses and reports, giving intuitive views that enable a better understanding of bacterial pan-genomes. As an example, we produced the pan-genome for 121 Anaplasmataceae strains (including 30 Ehrlichia, 15 Anaplasma, 68 Wolbachia).

PanExplorer is written in Perl CGI and relies on several JavaScript libraries for visualization (hotmap.js, MauveViewer, CircosJS). It is freely available at http://panexplorer.southgreen.fr. The source code has been released in a GitHub repository https://github.com/SouthGreenPlatform/PanExplorer. A documentation section is available on PanExplorer website.}, } @article {pmid35913193, year = {2022}, author = {Zheng, X and Dai, X and Zhu, Y and Yang, J and Jiang, H and Dong, H and Huang, L}, title = {(Meta)Genomic Analysis Reveals Diverse Energy Conservation Strategies Employed by Globally Distributed Gemmatimonadota.}, journal = {mSystems}, volume = {7}, number = {4}, pages = {e0022822}, pmid = {35913193}, issn = {2379-5077}, mesh = {Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Bacteria ; Genomics ; *Microbiota/genetics ; }, abstract = {Gemmatimonadota is a phylum-level lineage distributed widely but rarely reported. Only six representatives of Gemmatimonadota have so far been isolated and cultured in laboratory. The physiology, ecology, and evolutionary history of this phylum remain unknown. The 16S rRNA gene survey of our salt lake and deep-sea sediments, and Earth Microbiome Project (EMP) samples, reveals that Gemmatimonadota exist in diverse environments globally. In this study, we retrieved 17 metagenome-assembled genomes (MAGs) from salt lake sediments (12 MAGs) and deep-sea sediments (5 MAGs). Analysis of these MAGs and the nonredundant MAGs or genomes from public databases reveals Gemmatimonadota can degrade various complex organic substrates, and mainly employ heterotrophic pathways (e.g., glycolysis and tricarboxylic acid [TCA] cycle) for growth via aerobic respiration. And the processes of sufficient energy being stored in glucose through gluconeogenesis, followed by the synthesis of more complex compounds, are prevalent in Gemmatimonadota. A highly expandable pangenome for Gemmatimonadota has been observed, which presumably results from their adaptation to thriving in diverse environments. The enrichment of the Na[+]/H[+] antiporter in the SG8-23 order represents their adaptation to salty habitats. Notably, we identified a novel lineage of the SG8-23 order, which is potentially anoxygenic phototrophic. This lineage is not closely related to the phototrophs in the order of Gemmatimonadales. The two orders differ distinctly in the gene organization and phylogenetic relationship of their photosynthesis gene clusters, indicating photosystems in Gemmatimonadota have evolved in two independent routes. IMPORTANCE The phylum Gemmatimonadota is widely distributed in various environments. However, their physiology, ecology and evolutionary history remain unknown, primary due to the limited cultured isolates and available genomes. We were intrigued to find out how widespread this phylum is, and how it can thrive under diverse conditions. Our results here expand the knowledge of the genetic and metabolic diversity of Gemmatimonadota, and shed light on the diverse energy conservation strategies (i.e., oxidative phosphorylation, substrate phosphorylation, and photosynthetic phosphorylation) responsible for their global distribution. Moreover, gene organization and phylogenetic analysis of photosynthesis gene clusters in Gemmatimonadota provide a valuable insight into the evolutionary history of photosynthesis.}, } @article {pmid35910650, year = {2022}, author = {Zhang, Y and Chu, H and Yu, L and He, F and Gao, Y and Tang, L}, title = {Analysis of the Taxonomy, Synteny, and Virulence Factors for Soft Rot Pathogen Pectobacterium aroidearum in Amorphophallus konjac Using Comparative Genomics.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {868709}, pmid = {35910650}, issn = {1664-302X}, abstract = {Bacterial soft rot is a devastating disease for a wide range of crops, vegetables, and ornamental plants including konjac (Amorphophallus konjac). However, the pangenome and genomic plasticity of the konjac soft rot pathogens is little explored. In this study, we reported the complete genome sequences of 11 bacterial isolates that can cause typical soft rot symptoms in konjac by in vitro and in vivo pathogenicity tests. Based on in silico DNA-DNA hybridization, average nucleotide identity and phylogenomic analysis, all 11 isolates were determined to be Pectobacterium aroidearum. In addition, synteny analysis of these genomes revealed considerable chromosomal inversions, one of which is triggered by homologous recombination of ribose operon. Pangenome analysis and COG enrichment analysis showed that the pangenome of P. aroidearum is open and that accessory genes are enriched in replication, recombination, and repair. Variations in type IV secretion system and type VI secretion system were found, while plant cell wall degrading enzymes were conserved. Furthermore, sequence analyses also provided evidence for the presence of a type V secretion system in Pectobacterium. These findings advance our understanding of the pathogenicity determinants, genomic plasticity, and evolution of P. aroidearum.}, } @article {pmid35909760, year = {2022}, author = {Wu, J and Xu, XD and Liu, L and Ma, L and Pu, Y and Wang, W and Hua, XY and Song, JM and Liu, K and Lu, G and Fang, Y and Li, X and Sun, W}, title = {A Chromosome Level Genome Assembly of a Winter Turnip Rape (Brassica rapa L.) to Explore the Genetic Basis of Cold Tolerance.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {936958}, pmid = {35909760}, issn = {1664-462X}, abstract = {Winter rapeseed (Brassica rapa L.) is an important overwintering oilseed crop that is widely planted in northwest China and suffers chronic low temperatures in winter. So the cold stress becomes one of the major constraints that limit its production. The currently existing genomes limit the understanding of the cold-tolerant genetic basis of rapeseed. Here we assembled a high-quality long-read genome of B. rapa "Longyou-7" cultivar, which has a cold-tolerant phenotype, and constructed a graph-based pan-genome to detect the structural variations within homologs of currently reported cold-tolerant related genes in the "Longyou-7" genome, which provides an additional elucidation of the cold-tolerant genetic basis of "Longyou-7" cultivar and promotes the development of cold-tolerant breeding in B. rapa.}, } @article {pmid35909191, year = {2022}, author = {Aurongzeb, M and Rashid, Y and Habib Ahmed Naqvi, S and Muhammad Talha Malik, H and Kamran Azim, M and Hassan, SS and Yasir, M and Karim, A}, title = {Insights into genome evolution, pan-genome, and phylogenetic implication through mitochondrial genome sequence of Naegleria fowleri species.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {13152}, pmid = {35909191}, issn = {2045-2322}, mesh = {DNA, Mitochondrial/genetics/metabolism ; DNA, Protozoan ; Evolution, Molecular ; *Genome, Mitochondrial/genetics ; *Naegleria/genetics ; *Naegleria fowleri/genetics ; Phylogeny ; }, abstract = {In the current study, we have systematically analysed the mitochondrial DNA (mtDNA) sequence of Naegleria fowleri (N. fowleri) isolate AY27, isolated from Karachi, Pakistan. The N. fowleri isolate AY27 has a circular mtDNA (49,541 bp), which harbours 69 genes (46 protein-coding genes, 21 tRNAs and 2 rRNAs). The pan-genome analysis of N. fowleri species showed a Bpan value of 0.137048, which implies that the pan-genome is open. KEGG classified core, accessory and unique gene clusters for human disease, metabolism, environmental information processing, genetic information processing and organismal system. Similarly, COG characterization of protein showed that core and accessory genes are involved in metabolism, information storages and processing, and cellular processes and signaling. The Naegleria species (n = 6) formed a total of 47 gene clusters; 42 single-copy gene clusters and 5 orthologous gene clusters. It was noted that 100% genes of Naegleria species were present in the orthogroups. We identified 44 single nucleotide polymorphisms (SNP) in the N. fowleri isolate AY27 mtDNA using N. fowleri strain V511 as a reference. Whole mtDNA phylogenetic tree analysis showed that N. fowleri isolates AY27 is closely related to N. fowleri (Accession no. JX174181.1). The ANI (Average Nucleotide Identity) values presented a much clear grouping of the Naegleria species compared to the whole mtDNA based phylogenetic analysis. The current study gives a comprehensive understanding of mtDNA architecture as well as a comparison of Naegleria species (N. fowleri and N. gruberi species) at the mitochondrial genome sequence level.}, } @article {pmid35902069, year = {2022}, author = {Singh, PK and Rawal, HC and Panda, AK and Roy, J and Mondal, TK and Sharma, TR}, title = {Pan-genomic, transcriptomic, and miRNA analyses to decipher genetic diversity and anthocyanin pathway genes among the traditional rice landraces.}, journal = {Genomics}, volume = {114}, number = {5}, pages = {110436}, doi = {10.1016/j.ygeno.2022.110436}, pmid = {35902069}, issn = {1089-8646}, mesh = {Anthocyanins ; Gene Expression Regulation, Plant ; Genetic Variation ; Genomics ; India ; *MicroRNAs/genetics/metabolism ; *Oryza/genetics/metabolism ; Transcriptome ; }, abstract = {Black rice is famous for containing high anthocyanin while Joha rice is aromatic with low anthocyanin containing rice from the North-Eastern Region (NER) of India. However, there are limited reports on the anthocyanin biosynthesis in Manipur Black rice. Therefore, the present study was aimed to understand the origin, domestication and anthocyanin biosynthesis pathways in Black rice using the next generation sequencing approaches. With the sequencing data, various analyses were carried out for differential expression and construction of a pan-genome. Protein coding RNA and small RNA sequencing analysis aided in determining 7415 and 131 differentially expressed transcripts and miRNAs, respectively in NER rice. This is the first extensive study on identification and expression analysis of miRNAs and their target genes in regulating anthocyanin biosynthesis in NER rice. This study will aid in better understanding for decoding the theory of high or low anthocyanin content in different rice genotypes.}, } @article {pmid35893567, year = {2022}, author = {Hernández-Mendoza, A and Salgado-Morales, R and Morán-Vázquez, A and López-Torres, D and García-Gómez, BI and Dantán-González, E}, title = {Molecular Characterization of pBOq-IncQ and pBOq-95LK Plasmids of Escherichia coli BOq 01, a New Isolated Strain from Poultry Farming, Involved in Antibiotic Resistance.}, journal = {Microorganisms}, volume = {10}, number = {8}, pages = {}, pmid = {35893567}, issn = {2076-2607}, support = {CONACyT PDCPN 0247780//Consejo Nacional de Ciencia y Tecnología/ ; }, abstract = {The increase in antimicrobial resistance has raised questions about how to use these drugs safely, especially in veterinary medicine, animal nutrition, and agriculture. Escherichia coli is an important human and animal pathogen that frequently contains plasmids carrying antibiotic resistance genes. Extra chromosomal elements are required for various functions or conditions in microorganisms. Several phage-like plasmids have been identified, which are important in antibiotic resistance. In this work, the molecular characterization of the pBOq-IncQ (4.5 kb) and pBOq-95LK (95 kb) plasmids found in the E. coli strain BOq 01, a multidrug resistant bacteria isolated from a poultry farm, are considered. Plasmid pBOq-IncQ belongs to the incQ incompatibility plasmid family and is involved in sulfonamide resistance. Plasmid pBOq-95LK is a lytic phage-like plasmid that is involved in the lysis of the E. coli BOq 01 strain and carries a bleomycin resistance gene and a strain cured of this plasmid shows bleomycin sensitivity. Induction of the lytic cycle indicates that this phage-like plasmid is an active phage. This type of plasmid has been reported to acquire genes such as mcr-1, which codes for colistin resistance and bacterial persistence and is a significant public health threat. A genome comparison, a pangenomic and phylogenomic analysis with other phage-like plasmids reported in the literature were performed to understand better the evolution of this kind of plasmid in bacteria and its potential importance in antibiotic resistance.}, } @article {pmid35891291, year = {2022}, author = {Alshabrmi, FM and Alrumaihi, F and Alrasheedi, SF and Al-Megrin, WAI and Almatroudi, A and Allemailem, KS}, title = {An In-Silico Investigation to Design a Multi-Epitopes Vaccine against Multi-Drug Resistant Hafnia alvei.}, journal = {Vaccines}, volume = {10}, number = {7}, pages = {}, pmid = {35891291}, issn = {2076-393X}, abstract = {Antimicrobial resistance has become a significant health issue because of the misuse of antibiotics in our daily lives, resulting in high rates of morbidity and mortality. Hafnia alvei is a rod-shaped, Gram-negative and facultative anaerobic bacteria. The medical community has emphasized H. alvei's possible association with gastroenteritis. As of now, there is no licensed vaccine for H. alvei, and as such, computer aided vaccine design approaches could be an ideal approach to highlight the potential vaccine epitopes against this bacteria. By using bacterial pan-genome analysis (BPGA), we were able to study the entire proteomes of H. alvei with the aim of developing a vaccine. Based on the analysis, 20,370 proteins were identified as core proteins, which were further used in identifying potential vaccine targets based on several vaccine candidacy parameters. The prioritized vaccine targets against the bacteria are; type 1 fimbrial protein, flagellar hook length control protein (FliK), flagellar hook associated protein (FlgK), curli production assembly/transport protein (CsgF), fimbria/pilus outer membrane usher protein, fimbria/pilus outer membrane usher protein, molecular chaperone, flagellar filament capping protein (FliD), TonB-dependent hemoglobin /transferrin/lactoferrin family receptor, Porin (OmpA), flagellar basal body rod protein (FlgF) and flagellar hook-basal body complex protein (FliE). During the epitope prediction phase, different antigenic, immunogenic, non-Allergenic, and non-Toxic epitopes were predicted for the above-mentioned proteins. The selected epitopes were combined to generate a multi-epitope vaccine construct and a cholera toxin B subunit (adjuvant) was added to enhance the vaccine's antigenicity. Downward analyses of vaccines were performed using a vaccine three-dimensional model. Docking studies have confirmed that the vaccine strongly binds with MHC-I, MHC-II, and TLR-4 immune cell receptors. Additionally, molecular dynamics simulations confirmed that the vaccine epitopes were exposed to nature and to the host immune system and interpreted strong intermolecular binding between the vaccine and receptors. Based on the results of the study, the model vaccine construct seems to have the capacity to produce protective immune responses in the host, making it an attractive candidate for further in vitro and in vivo studies.}, } @article {pmid35889115, year = {2022}, author = {Jang, H and Eshwar, A and Lehner, A and Gangiredla, J and Patel, IR and Beaubrun, JJ and Chase, HR and Negrete, F and Finkelstein, S and Weinstein, LM and Ko, K and Addy, N and Ewing, L and Woo, J and Lee, Y and Seo, K and Jaradat, Z and Srikumar, S and Fanning, S and Stephan, R and Tall, BD and Gopinath, GR}, title = {Characterization of Cronobacter sakazakii Strains Originating from Plant-Origin Foods Using Comparative Genomic Analyses and Zebrafish Infectivity Studies.}, journal = {Microorganisms}, volume = {10}, number = {7}, pages = {}, pmid = {35889115}, issn = {2076-2607}, abstract = {Cronobacter sakazakii continues to be isolated from ready-to-eat fresh and frozen produce, flours, dairy powders, cereals, nuts, and spices, in addition to the conventional sources of powdered infant formulae (PIF) and PIF production environments. To understand the sequence diversity, phylogenetic relationship, and virulence of C. sakazakii originating from plant-origin foods, comparative molecular and genomic analyses, and zebrafish infection (ZI) studies were applied to 88 strains. Whole genome sequences of the strains were generated for detailed bioinformatic analysis. PCR analysis showed that all strains possessed a pESA3-like virulence plasmid similar to reference C. sakazakii clinical strain BAA-894. Core genome analysis confirmed a shared genomic backbone with other C. sakazakii strains from food, clinical and environmental strains. Emerging nucleotide diversity in these plant-origin strains was highlighted using single nucleotide polymorphic alleles in 2000 core genes. DNA hybridization analyses using a pan-genomic microarray showed that these strains clustered according to sequence types (STs) identified by multi-locus sequence typing (MLST). PHASTER analysis identified 185 intact prophage gene clusters encompassing 22 different prophages, including three intact Cronobacter prophages: ENT47670, ENT39118, and phiES15. AMRFinderPlus analysis identified the CSA family class C β-lactamase gene in all strains and a plasmid-borne mcr-9.1 gene was identified in three strains. ZI studies showed that some plant-origin C. sakazakii display virulence comparable to clinical strains. Finding virulent plant-origin C. sakazakii possessing significant genomic features of clinically relevant STs suggests that these foods can serve as potential transmission vehicles and supports widening the scope of continued surveillance for this important foodborne pathogen.}, } @article {pmid35889095, year = {2022}, author = {Melo-Bolívar, JF and Ruiz Pardo, RY and Junca, H and Sidjabat, HE and Cano-Lozano, JA and Villamil Díaz, LM}, title = {Competitive Exclusion Bacterial Culture Derived from the Gut Microbiome of Nile Tilapia (Oreochromis niloticus) as a Resource to Efficiently Recover Probiotic Strains: Taxonomic, Genomic, and Functional Proof of Concept.}, journal = {Microorganisms}, volume = {10}, number = {7}, pages = {}, pmid = {35889095}, issn = {2076-2607}, support = {ING181-2016//Universidad de La Sabana/ ; INGPHD-6-2017//Universidad de La Sabana/ ; INGPHD-9-2019//Universidad de La Sabana/ ; 808-2018-contract CT 329-2019//MINCIENCIAS- Patrimonio Autónomo Fondo Nacional de Financiamiento para la Ciencia, la Tecnología y la Innovación Francisco José de Caldas/ ; 727-2015, contract CT 122-2017//MINCIENCIAS/ ; }, abstract = {This study aims to mine a previously developed continuous-flow competitive exclusion culture (CFCEC) originating from the Tilapia gut microbiome as a rational and efficient autochthonous probiotic strain recovery source. Three isolated strains were tested on their adaptability to host gastrointestinal conditions, their antibacterial activities against aquaculture bacterial pathogens, and their antibiotic susceptibility patterns. Their genomes were fully sequenced, assembled, annotated, and relevant functions inferred, such as those related to pinpointed probiotic activities and phylogenomic comparative analyses to the closer reported strains/species relatives. The strains are possible candidates of novel genus/species taxa inside Lactococcus spp. and Priestia spp. (previously known as Bacillus spp.) These results were consistent with reports on strains inside these phyla exhibiting probiotic features, and the strains we found are expanding their known diversity. Furthermore, their pangenomes showed that these bacteria have indeed a set of so far uncharacterized genes that may play a role in the antagonism to competing strains or specific symbiotic adaptations to the fish host. In conclusion, CFCEC proved to effectively allow the enrichment and further pure culture isolation of strains with probiotic potential.}, } @article {pmid35889069, year = {2022}, author = {Bukhari, SAR and Irfan, M and Ahmad, I and Chen, L}, title = {Comparative Genomics and Pan-Genome Driven Prediction of a Reduced Genome of Akkermansia muciniphila.}, journal = {Microorganisms}, volume = {10}, number = {7}, pages = {}, pmid = {35889069}, issn = {2076-2607}, support = {RGP.02-87-43//King Khalid University/ ; 31572150//National Natural Science Foundation of China/ ; }, abstract = {Akkermanisia muciniphila imparts important health benefits and is considered a next-generation probiotic. It is imperative to understand the genomic diversity and metabolic potential of the species for safer applications as probiotics. As it resides with both health-promoting and pathogenic bacteria, understanding the evolutionary patterns are crucial, but this area remains largely unexplored. Moreover, pan-genome has previously been established based on only a limited number of strains and without careful strain selection. The pan-genomics have become very important for understanding species diversity and evolution. In the current study, a systematic approach was used to find a refined pan-genome profile of A. muciniphila by excluding too-diverse strains based on average nucleotide identity-based species demarcation. The strains were divided into four phylogroups using a variety of clustering techniques. Horizontal gene transfer and recombination patterns were also elucidated. Evolutionary patterns revealed that different phylogroups were expanding differently. Furthermore, a comparative evaluation of the metabolic potential of the pan-genome and its subsections was performed. Lastly, the study combines functional annotation, persistent genome, and essential genes to devise an approach to determine a minimal genome that can systematically remove unwanted genes, including virulent factors. The selection of one strain to be used as a chassis for the prediction of a reduced genome was very carefully performed by analyzing several genomic parameters, including the number of unique genes and the resistance and pathogenic potential of the strains. The strategy could be applied to other microbes, including human-associated microbiota, towards a common goal of predicting a minimal or a reduced genome.}, } @article {pmid35887414, year = {2022}, author = {Maphosa, MN and Steenkamp, ET and Kanzi, AM and van Wyk, S and De Vos, L and Santana, QC and Duong, TA and Wingfield, BD}, title = {Intra-Species Genomic Variation in the Pine Pathogen Fusarium circinatum.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {7}, pages = {}, pmid = {35887414}, issn = {2309-608X}, support = {98353//South African Department of Science and Innovation's South African Research Chair Initiative/ ; 40945//DSI-NRF Centre of Excellence in Plant Health Biotechnology/ ; }, abstract = {Fusarium circinatum is an important global pathogen of pine trees. Genome plasticity has been observed in different isolates of the fungus, but no genome comparisons are available. To address this gap, we sequenced and assembled to chromosome level five isolates of F. circinatum. These genomes were analysed together with previously published genomes of F. circinatum isolates, FSP34 and KS17. Multi-sample variant calling identified a total of 461,683 micro variants (SNPs and small indels) and a total of 1828 macro structural variants of which 1717 were copy number variants and 111 were inversions. The variant density was higher on the sub-telomeric regions of chromosomes. Variant annotation revealed that genes involved in transcription, transport, metabolism and transmembrane proteins were overrepresented in gene sets that were affected by high impact variants. A core genome representing genomic elements that were conserved in all the isolates and a non-redundant pangenome representing all genomic elements is presented. Whole genome alignments showed that an average of 93% of the genomic elements were present in all isolates. The results of this study reveal that some genomic elements are not conserved within the isolates and some variants are high impact. The described genome-scale variations will help to inform novel disease management strategies against the pathogen.}, } @article {pmid35887246, year = {2022}, author = {Li, G and Shu, J and Jin, J and Shu, J and Feng, H and Chen, J and He, Y}, title = {Development of a Multi-Epitope Vaccine for Mycoplasma hyopneumoniae and Evaluation of Its Immune Responses in Mice and Piglets.}, journal = {International journal of molecular sciences}, volume = {23}, number = {14}, pages = {}, pmid = {35887246}, issn = {1422-0067}, support = {2022SNJF060, 2022C02031, LGF21C080001, LGC21C180001//This work was funded by Agricultural science and technology cooperation program of Zhejiang Province [2022SNJF060], Key Research and Development Program of Zhejiang Province [2022C02031], and Zhejiang Province Public Welfare Technology Application researc/ ; }, mesh = {Animals ; Bacterial Vaccines ; Epitopes ; Escherichia coli ; Immunity, Cellular ; Immunodominant Epitopes ; *Mycoplasma hyopneumoniae/genetics ; *Pneumonia of Swine, Mycoplasmal/prevention & control ; Swine ; }, abstract = {Mycoplasma hyopneumoniae (Mhp), the primary pathogen causing Mycoplasma pneumonia of swine (MPS), brings massive economic losses worldwide. Genomic variability and post-translational protein modification can enhance the immune evasion of Mhp, which makes MPS prone to recurrent outbreaks on farms, even with vaccination or other treatments. The reverse vaccinology pipeline has been developed as an attractive potential method for vaccine development due to its high efficiency and applicability. In this study, a multi-epitope vaccine for Mhp was developed, and its immune responses were evaluated in mice and piglets. Genomic core proteins of Mhp were retrieved through pan-genome analysis, and four immunodominant antigens were screened by host homologous protein removal, membrane protein screening, and virulence factor identification. One immunodominant antigen, AAV27984.1 (membrane nuclease), was expressed by E. coli and named rMhp597. For epitope prioritization, 35 B-cell-derived epitopes were identified from the four immunodominant antigens, and 10 MHC-I and 6 MHC-II binding epitopes were further identified. The MHC-I/II binding epitopes were merged and combined to produce recombinant proteins MhpMEV and MhpMEVC6His, which were used for animal immunization and structural analysis, respectively. Immunization of mice and piglets demonstrated that MhpMEV could induce humoral and cellular immune responses. The mouse serum antibodies could detect all 11 synthetic epitopes, and the piglet antiserum suppressed the nuclease activity of rMhp597. Moreover, piglet serum antibodies could also detect cultured Mhp strain 168. In summary, this study provides immunoassay results for a multi-epitope vaccine derived from the reverse vaccinology pipeline, and offers an alternative vaccine for MPS.}, } @article {pmid35886259, year = {2022}, author = {Rida, T and Ahmad, S and Ullah, A and Ismail, S and Tahir Ul Qamar, M and Afsheen, Z and Khurram, M and Saqib Ishaq, M and Alkhathami, AG and Alatawi, EA and Alrumaihi, F and Allemailem, KS}, title = {Pan-Genome Analysis of Oral Bacterial Pathogens to Predict a Potential Novel Multi-Epitopes Vaccine Candidate.}, journal = {International journal of environmental research and public health}, volume = {19}, number = {14}, pages = {}, pmid = {35886259}, issn = {1660-4601}, mesh = {Base Composition ; *Computational Biology/methods ; *Epitopes, T-Lymphocyte/chemistry/genetics ; Molecular Docking Simulation ; Phylogeny ; RNA, Ribosomal, 16S ; Sequence Analysis, DNA ; Vaccines, Subunit/genetics ; }, abstract = {Porphyromonas gingivalis is a Gram-negative anaerobic bacterium, mainly present in the oral cavity and causes periodontal infections. Currently, no licensed vaccine is available against P. gingivalis and other oral bacterial pathogens. To develop a vaccine against P. gingivalis, herein, we applied a bacterial pan-genome analysis (BPGA) on the bacterial genomes that retrieved a total number of 4908 core proteins, which were further utilized for the identification of good vaccine candidates. After several vaccine candidacy analyses, three proteins, namely lytic transglycosylase domain-containing protein, FKBP-type peptidyl-propyl cis-trans isomerase and superoxide dismutase, were shortlisted for epitopes prediction. In the epitopes prediction phase, different types of B and T-cell epitopes were predicted and only those with an antigenic, immunogenic, non-allergenic, and non-toxic profile were selected. Moreover, all the predicted epitopes were joined with each other to make a multi-epitopes vaccine construct, which was linked further to the cholera toxin B-subunit to enhance the antigenicity of the vaccine. For downward analysis, a three dimensional structure of the designed vaccine was modeled. The modeled structure was checked for binding potency with major histocompatibility complex I (MHC-I), major histocompatibility complex II (MHC-II), and Toll-like receptor 4 (TLR-4) immune cell receptors which revealed that the designed vaccine performed proper binding with respect to immune cell receptors. Additionally, the binding efficacy of the vaccine was validated through a molecular dynamic simulation that interpreted strong intermolecular vaccine-receptor binding and confirmed the exposed situation of vaccine epitopes to the host immune system. In conclusion, the study suggested that the model vaccine construct has the potency to generate protective host immune responses and that it might be a good vaccine candidate for experimental in vivo and in vitro studies.}, } @article {pmid35884840, year = {2022}, author = {Ezzeroug Ezzraimi, A and Hannachi, N and Mariotti, A and Rolland, C and Levasseur, A and Baron, SA and Rolain, JM and Camoin-Jau, L}, title = {The Antibacterial Effect of Platelets on Escherichia coli Strains.}, journal = {Biomedicines}, volume = {10}, number = {7}, pages = {}, pmid = {35884840}, issn = {2227-9059}, abstract = {Platelets play an important role in defense against pathogens; however, the interaction between Escherichia coli and platelets has not been well described and detailed. Our goal was to study the interaction between platelets and selected strains of E. coli in order to evaluate the antibacterial effect of platelets and to assess bacterial effects on platelet activation. Washed platelets and supernatants of pre-activated platelets were incubated with five clinical colistin-resistant and five laboratory colistin-sensitive strains of E. coli in order to study bacterial growth. Platelet activation was measured with flow cytometry by evaluating CD62P expression. To identify the difference in strain behavior toward platelets, a pangenome analysis using Roary and O-antigen serotyping was carried out. Both whole platelets and the supernatant of activated platelets inhibited growth of three laboratory colistin-sensitive strains. In contrast, platelets promoted growth of the other strains. There was a negative correlation between platelet activation and bacterial growth. The Roary results showed no logical clustering to explain the mechanism of platelet resistance. The diversity of the responses might be due to strains of different types of O-antigen. Our results show a bidirectional interaction between platelets and E. coli whose expression is dependent on the bacterial strain involved.}, } @article {pmid35879566, year = {2022}, author = {Suraby, EJ and Sruthi, KB and Antony, G}, title = {Genome-wide identification of type III effectors and other virulence factors in Ralstonia pseudosolanacearum causing bacterial wilt in ginger (Zingiber officinale).}, journal = {Molecular genetics and genomics : MGG}, volume = {297}, number = {5}, pages = {1371-1388}, pmid = {35879566}, issn = {1617-4623}, support = {PDF/2016/003228/LS//Science and Engineering Research Board/ ; }, mesh = {*Ginger ; Phylogeny ; Plant Diseases ; Ralstonia ; *Ralstonia solanacearum ; Virulence Factors ; }, abstract = {Ralstonia pseudosolanacearum causes bacterial wilt in ginger, reducing ginger production worldwide. We sequenced the whole genome of a highly virulent phylotype I, race 4, biovar 3 Ralstonia pseudosolanacearum strain GRsMep isolated from a severely infected ginger field in India. R. pseudosolanacearum GRsMep genome is organised into two replicons: chromosome and megaplasmid with a total genome size of 5,810,605 bp. This strain encodes approximately 72 effectors which include a combination of core effectors as well as highly variable, diverse repertoire of type III effectors. Comparative genome analysis with GMI1000 identified conservation in the genes involved in the general virulence mechanism. Our analysis identified type III effectors, RipBJ and RipBO as present in GRsMep but absent in the reported genomes of other strains infecting Zingiberaceae family. GRsMep contains 126 unique genes when compared to the pangenome of the Ralstonia strains that infect the Zingiberaceae family. The whole-genome data of R. pseudosolanacearum strain will serve as a resource for exploring the evolutionary processes that structure and regulate the virulence determinants of the strain. Pathogenicity testing of the transposon insertional mutant library of GRsMep through virulence assay on ginger plants identified a few candidate virulence determinants specific to bacterial wilt in ginger.}, } @article {pmid35879467, year = {2022}, author = {Dang, VH and Hill, CB and Zhang, XQ and Angessa, TT and McFawn, LA and Li, C}, title = {Multi-locus genome-wide association studies reveal novel alleles for flowering time under vernalisation and extended photoperiod in a barley MAGIC population.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {135}, number = {9}, pages = {3087-3102}, pmid = {35879467}, issn = {1432-2242}, support = {ITFS//Murdoch University/ ; MSS//Murdoch University/ ; }, mesh = {Alleles ; Edible Grain/genetics ; Genetic Markers ; *Genome-Wide Association Study ; *Hordeum/genetics ; Phenotype ; Photoperiod ; Plant Breeding ; Quantitative Trait Loci ; }, abstract = {Key genes controlling flowering and interactions of different photoperiod alleles with various environments were identified in a barley MAGIC population. A new candidate gene for vernalisation requirements was also detected. Optimal flowering time has a major impact on grain yield in crop species, including the globally important temperate cereal crop barley (Hordeum vulgare L.). Understanding the genetics of flowering is a key avenue to enhancing yield potential. Although bi-parental populations were used intensively to map genes controlling flowering, their lack of genetic diversity requires additional work to obtain desired gene combinations in the selected lines, especially when the two parental cultivars did not carry the genes. Multi-parent mapping populations, which use a combination of four or eight parental cultivars, have higher genetic and phenotypic diversity and can provide novel genetic combinations that cannot be achieved using bi-parental populations. This study uses a Multi-parent advanced generation intercross (MAGIC) population from four commercial barley cultivars to identify genes controlling flowering time in different environmental conditions. Genome-wide association studies (GWAS) were performed using 5,112 high-quality markers from Diversity Arrays Technology sequencing (DArT-seq), and Kompetitive allele-specific polymerase chain reaction (KASP) genetic markers were developed. Phenotypic data were collected from fifteen different field trials for three consecutive years. Planting was conducted at various sowing times, and plants were grown with/without additional vernalisation and extended photoperiod treatments. This study detected fourteen stable regions associated with flowering time across multiple environments. GWAS combined with pangenome data highlighted the role of CEN gene in flowering and enabled the prediction of different CEN alleles from parental lines. As the founder lines of the multi-parental population are elite germplasm, the favourable alleles identified in this study are directly relevant to breeding, increasing the efficiency of subsequent breeding strategies and offering better grain yield and adaptation to growing conditions.}, } @article {pmid35873727, year = {2022}, author = {Wang, Z and Yang, J and Cheng, F and Li, P and Xin, X and Wang, W and Yu, Y and Zhang, D and Zhao, X and Yu, S and Zhang, F and Dong, Y and Su, T}, title = {Subgenome dominance and its evolutionary implications in crop domestication and breeding.}, journal = {Horticulture research}, volume = {9}, number = {}, pages = {uhac090}, pmid = {35873727}, issn = {2662-6810}, abstract = {Polyploidization or whole-genome duplication (WGD) is a well-known speciation and adaptation mechanism in angiosperms, while subgenome dominance is a crucial phenomenon in allopolyploids, established following polyploidization. The dominant subgenomes contribute more to genome evolution and homoeolog expression bias, both of which confer advantages for short-term phenotypic adaptation and long-term domestication. In this review, we firstly summarize the probable mechanistic basis for subgenome dominance, including the effects of genetic [transposon, genetic incompatibility, and homoeologous exchange (HE)], epigenetic (DNA methylation and histone modification), and developmental and environmental factors on this evolutionary process. We then move to Brassica rapa, a typical allopolyploid with subgenome dominance. Polyploidization provides the B. rapa genome not only with the genomic plasticity for adapting to changeable environments, but also an abundant genetic basis for morphological variation, making it a representative species for subgenome dominance studies. According to the 'two-step theory', B. rapa experienced genome fractionation twice during WGD, in which most of the genes responding to the environmental cues and phytohormones were over-retained, enhancing subgenome dominance and consequent adaption. More than this, the pangenome of 18 B. rapa accessions with different morphotypes recently constructed provides further evidence to reveal the impacts of polyploidization and subgenome dominance on intraspecific diversification in B. rapa. Above and beyond the fundamental understanding of WGD and subgenome dominance in B. rapa and other plants, however, it remains elusive why subgenome dominance has tissue- and spatiotemporal-specific features and could shuffle between homoeologous regions of different subgenomes by environments in allopolyploids. We lastly propose acceleration of the combined application of resynthesized allopolyploids, omics technology, and genome editing tools to deepen mechanistic investigations of subgenome dominance, both genetic and epigenetic, in a variety of species and environments. We believe that the implications of genomic and genetic basis of a variety of ecologically, evolutionarily, and agriculturally interesting traits coupled with subgenome dominance will be uncovered and aid in making new discoveries and crop breeding.}, } @article {pmid35873140, year = {2022}, author = {Kopf, A and Bunk, B and Coldewey, SM and Gunzer, F and Riedel, T and Schröttner, P}, title = {Comparative Genomic Analysis of the Human Pathogen Wohlfahrtiimonas Chitiniclastica Provides Insight Into the Identification of Antimicrobial Resistance Genotypes and Potential Virulence Traits.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {912427}, pmid = {35873140}, issn = {2235-2988}, mesh = {*Anti-Bacterial Agents/pharmacology/therapeutic use ; *Drug Resistance, Bacterial/genetics ; Gammaproteobacteria ; Genomics ; Genotype ; Humans ; Macrolides ; Virulence/genetics ; }, abstract = {Recent studies suggest that Wohlfahrtiimonas chitiniclastica may be the cause of several diseases in humans including sepsis and bacteremia making the bacterium as a previously underappreciated human pathogen. However, very little is known about the pathogenicity and genetic potential of W. chitiniclastica; therefore, it is necessary to conduct systematic studies to gain a deeper understanding of its virulence characteristics and treatment options. In this study, the entire genetic repertoire of all publicly available W. chitiniclastica genomes was examined including in silico characterization of bacteriophage content, antibiotic resistome, and putative virulence profile. The pan-genome of W. chitiniclastica comprises 3819 genes with 1622 core genes (43%) indicating a putative metabolic conserved species. Furthermore, in silico analysis indicated presumed resistome expansion as defined by the presence of genome-encoded transposons and bacteriophages. While macrolide resistance genes macA and macB are located within the core genome, additional antimicrobial resistance genotypes for tetracycline (tetH, tetB, and tetD), aminoglycosides (ant(2'')-Ia, aac(6')-Ia,aph(3'')-Ib, aph(3')-Ia, and aph(6)-Id)), sulfonamide (sul2), streptomycin (strA), chloramphenicol (cat3), and beta-lactamase (blaVEB) are distributed among the accessory genome. Notably, our data indicate that the type strain DSM 18708[T] does not encode any additional clinically relevant antibiotic resistance genes, whereas drug resistance is increasing within the W. chitiniclastica clade. This trend should be monitored with caution. To the best of our knowledge, this is the first comprehensive genome analysis of this species, providing new insights into the genome of this opportunistic human pathogen.}, } @article {pmid35865929, year = {2022}, author = {Li, Y and Wang, Y and Liu, J}, title = {Genomic Insights Into the Interspecific Diversity and Evolution of Mobiluncus, a Pathogen Associated With Bacterial Vaginosis.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {939406}, pmid = {35865929}, issn = {1664-302X}, abstract = {Bacterial vaginosis (BV) is a common vaginal infection and has been associated with increased risk for a wide array of health issues. BV is linked with a variety of heterogeneous pathogenic anaerobic bacteria, among which Mobiluncus is strongly associated with BV diagnosis. However, their genetic features, pathogenicity, interspecific diversity, and evolutionary characters have not been illustrated at genomic level. The current study performed phylogenomic and comparative genomic analyses of Mobiluncus. Phylogenomic analyses revealed remarkable phylogenetic distinctions among different species. Compared with M. curtisii, M. mulieris had a larger genome and pangenome size with more insertion sequences but less CRISPR-Cas systems. In addition, these two species were diverse in profile of virulence factors, but harbored similar antibiotic resistance genes. Statistically different functional genome profiles between strains from the two species were determined, as well as correlations of some functional genes/pathways with putative pathogenicity. We also showed that high levels of horizontal gene transfer might be an important strategy for species diversification and pathogenicity. Collectively, this study provides the first genome sequence level description of Mobiluncus, and may shed light on its virulence/pathogenicity, functional diversification, and evolutionary dynamics. Our study could facilitate the further investigations of this important pathogen, and might improve the future treatment of BV.}, } @article {pmid35863717, year = {2022}, author = {Dindhoria, K and Kumar, S and Baliyan, N and Raphel, S and Halami, PM and Kumar, R}, title = {Bacillus licheniformis MCC 2514 genome sequencing and functional annotation for providing genetic evidence for probiotic gut adhesion properties and its applicability as a bio-preservative agent.}, journal = {Gene}, volume = {840}, number = {}, pages = {146744}, doi = {10.1016/j.gene.2022.146744}, pmid = {35863717}, issn = {1879-0038}, mesh = {*Bacillus licheniformis/genetics/metabolism ; Bacteria/genetics ; *Bacteriocins/genetics/metabolism ; Genome, Bacterial ; *Probiotics ; }, abstract = {Bacillus licheniformis is a well-known probiotic that can be found in a variety of foods. The strain Bacillus licheniformis MCC 2514 was previously characterized by our group for its bio-physiological capabilities establishing it as a promising probiotic, but information on the genetic evidence for its attributes was lacking. In the current study, whole genome analysis identified the underlying molecular determinants responsible for its probiotic potential. The circular genome of MCC 2514 was 4,230,480 bp with 46.2% GC content, 24 rRNA, and 83 tRNA genes. The pangenome analysis between B. licheniformis MCC 2514 and 12 other B. licheniformis strains revealed a pangenome of 6008 genes and core genome of 3775 genes. Genome mining revealed NRPS and bacteriocins producing gene clusters indicating its biocontrol properties. Several genes encoding carbohydrate degrading enzymes, which aid in proper food degradation in the intestine, were also observed. Stress tolerance, vitamin, and essential amino acids biosynthesis related genes were found, which are important characteristics of a probiotic strain. Additionally, vital genes responsible for gut adhesion and biofilm formation were observed in its genome. The bacterium has been shown to improve the shelf life of idli batter by preventing whey separation, CO2, and odour production while maintaining the pH of 3.96-4.29, especially at cold temperatures. It has significantly reduced coliform contamination at both room and low temperatures, demonstrating its bio-preservative ability, which is also corroborated by the presence of the NRPS and bacteriocin gene clusters in its genome. The present study helped to understand both, the ability of B. licheniformis MCC 2514 to adapt the intestinal gut environment and its probiotic functionality for food preservation.}, } @article {pmid35863000, year = {2022}, author = {Wang, Z and Guo, G and Li, Q and Li, P and Li, M and Zhou, L and Tan, Z and Zhang, W}, title = {Combing Immunoinformatics with Pangenome Analysis To Design a Multiepitope Subunit Vaccine against Klebsiella pneumoniae K1, K2, K47, and K64.}, journal = {Microbiology spectrum}, volume = {10}, number = {4}, pages = {e0114822}, pmid = {35863000}, issn = {2165-0497}, mesh = {Anti-Bacterial Agents/therapeutic use ; Carbapenems/therapeutic use ; *Cross Infection/drug therapy ; Humans ; *Klebsiella Infections/drug therapy/epidemiology/prevention & control ; Klebsiella pneumoniae/genetics ; Molecular Docking Simulation ; Vaccines, Subunit/genetics/therapeutic use ; }, abstract = {Klebsiella pneumoniae is an opportunistic Gram-negative bacterium that has become a leading causative agent of nosocomial infections, mainly infecting patients with immunosuppressive diseases. Capsular (K) serotypes K1, K2, K47, and K64 are commonly associated with higher virulence (hypervirulent Klebsiella pneumoniae), and more threateningly, isolates belonging to the last two K serotypes are also frequently associated with resistance to carbapenem (hypervirulent carbapenem-resistant Klebsiella pneumoniae). The prevalence of these isolates has posed significant threats to human health, and there are no appropriate therapies available against them. Therefore, in this study, a method combining immunoinformatics and pangenome analysis was applied for contriving a multiepitope subunit vaccine against these four threatening serotypes. To obtain cross-protection, 12 predicted conserved antigens were screened from the core genome of 274 complete Klebsiella pneumoniae genomes (KL1, KL2, KL47, and KL64), from which the epitopes of T and B cells were extracted for vaccine construction. In addition, the immunological properties, the interaction with Toll-like receptors, and the stability in a simulative humoral environment were evaluated by immunoinformatics methods, molecular docking, and molecular dynamics simulation. All of these evaluations indicated the potency of this constructed vaccine to be an effective therapeutic agent. Lastly, the cDNA of the designed vaccine was optimized and ligated to pET-28a(+) for expression vector construction. Overall, our research provides a newly cross-protective control strategy against these troublesome pathogens and paves the way for the development of a safe and effective vaccine. IMPORTANCE Klebsiella pneumoniae is an opportunistic Gram-negative bacterium that has become a leading causative agent of nosocomial infections. Among the numerous capsular serotypes, K1, K2, K47, and K64 are commonly associated with higher virulence (hypervirulent K. pneumoniae). More threateningly, the last two serotypes are frequently associated with resistance to carbapenem (hypervirulent carbapenem-resistant K. pneumoniae). However, there is currently no therapeutic agent or vaccine specifically against these isolates. Therefore, development of a vaccine against these pathogens is very essential. In this study, for the first time, a method combining pangenome analysis, reverse vaccinology, and immunoinformatics was applied for contriving a multiepitope subunit vaccine against K. pneumoniae isolates of K1, K2, K47, and K64. Also, the immunological properties of the constructed vaccine were evaluated and its high potency was revealed. Overall, our research will pave the way for the vaccine development against these four threatening capsular serotypes of K. pneumoniae.}, } @article {pmid35862809, year = {2022}, author = {Sassi, M and Bronsard, J and Pascreau, G and Emily, M and Donnio, PY and Revest, M and Felden, B and Wirth, T and Augagneur, Y}, title = {Forecasting Staphylococcus aureus Infections Using Genome-Wide Association Studies, Machine Learning, and Transcriptomic Approaches.}, journal = {mSystems}, volume = {7}, number = {4}, pages = {e0037822}, pmid = {35862809}, issn = {2379-5077}, mesh = {Animals ; Humans ; Staphylococcus aureus/genetics ; Genome-Wide Association Study ; Transcriptome ; *Staphylococcal Infections/diagnosis ; RNA ; *Bacteremia/microbiology ; Machine Learning ; }, abstract = {Staphylococcus aureus is a major human and animal pathogen, colonizing diverse ecological niches within its hosts. Predicting whether an isolate will infect a specific host and its subsequent clinical fate remains unknown. In this study, we investigated the S. aureus pangenome using a curated set of 356 strains, spanning a wide range of hosts, origins, and clinical display and antibiotic resistance profiles. We used genome-wide association study (GWAS) and random forest (RF) algorithms to discriminate strains based on their origins and clinical sources. Here, we show that the presence of sak and scn can discriminate strains based on their host specificity, while other genes such as mecA are often associated with virulent outcomes. Both GWAS and RF indicated the importance of intergenic regions (IGRs) and coding DNA sequence (CDS) but not sRNAs in forecasting an outcome. Additional transcriptomic analyses performed on the most prevalent clonal complex 8 (CC8) clonal types, in media mimicking nasal colonization or bacteremia, indicated three RNAs as potential RNA markers to forecast infection, followed by 30 others that could serve as infection severity predictors. Our report shows that genetic association and transcriptomics are complementary approaches that will be combined in a single analytical framework to improve our understanding of bacterial pathogenesis and ultimately identify potential predictive molecular markers. IMPORTANCE Predicting the outcome of bacterial colonization and infections, based on extensive genomic and transcriptomic data from a given pathogen, would be of substantial help for clinicians in treating and curing patients. In this report, genome-wide association studies and random forest algorithms have defined gene combinations that differentiate human from animal strains, colonization from diseases, and nonsevere from severe diseases, while it revealed the importance of IGRs and CDS, but not small RNAs (sRNAs), in anticipating an outcome. In addition, transcriptomic analyses performed on the most prevalent clonal types, in media mimicking either nasal colonization or bacteremia, revealed significant differences and therefore potent RNA markers. Overall, the use of both genomic and transcriptomic data in a single analytical framework can enhance our understanding of bacterial pathogenesis.}, } @article {pmid35862683, year = {2022}, author = {Baseggio, L and Rudenko, O and Engelstädter, J and Barnes, AC}, title = {The Evolution of a Specialized, Highly Virulent Fish Pathogen through Gene Loss and Acquisition of Host-Specific Survival Mechanisms.}, journal = {Applied and environmental microbiology}, volume = {88}, number = {14}, pages = {e0022222}, pmid = {35862683}, issn = {1098-5336}, mesh = {Animals ; *Fish Diseases/microbiology ; Fishes/microbiology ; *Gram-Negative Bacterial Infections/microbiology ; Photobacterium/metabolism ; Phylogeny ; }, abstract = {Photobacterium damselae comprises two subspecies, P. damselae subsp. damselae and P. damselae subsp. piscicida, that contrast remarkably despite their taxonomic relationship. The former is opportunistic and free-living but can cause disease in compromised individuals from a broad diversity of taxa, while the latter is a highly specialized, primary fish pathogen. Here, we employ new closed curated genome assemblies from Australia to estimate the global phylogenetic structure of the species P. damselae. We identify genes responsible for the shift from an opportunist to a host-adapted fish pathogen, potentially via an arthropod vector as fish-to-fish transmission was not achieved in repeated cohabitation challenges despite high virulence for Seriola lalandi. Acquisition of ShdA adhesin and of thiol peroxidase may have allowed the environmental, generalist ancestor to colonize zooplankton and to occasionally enter in fish host sentinel cells. As dependence on the host has increased, P. damselae has lost nonessential genes, such as those related to nitrite and sulfite reduction, urea degradation, a type 6 secretion system (T6SS) and several toxin-antitoxin (TA) systems. Similar to the evolution of Yersinia pestis, the loss of urease may be the crucial event that allowed the pathogen to stably colonize zooplankton vectors. Acquisition of host-specific genes, such as those required to form a sialic acid capsule, was likely necessary for the emergent P. damselae subsp. piscicida to become a highly specialized, facultative intracellular fish pathogen. Processes that have shaped P. damselae subsp. piscicida from subsp. damselae are similar to those underlying evolution of Yersinia pestis from Y. pseudotuberculosis. IMPORTANCE Photobacterium damselae subsp. damselae is a ubiquitous marine bacterium and opportunistic pathogen of compromised hosts of diverse taxa. In contrast, its sister subspecies P. damselae subsp. piscicida (Pdp) is highly virulent in fish. Pdp has evolved from a single subclade of Pdd through gene loss and acquisition. We show that fish-to-fish transmission does not occur in repeated infection models in the primary host, Seriola lalandi, and present genomic evidence for vector-borne transmission, potentially via zooplankton. The broad genomic changes from generalist Pdd to specialist Pdp parallel those of the environmental opportunist Yersinia pseudotuberculosis to vector-borne plague bacterium Y. pestis and demonstrate that evolutionary processes in bacterial pathogens are universal between the terrestrial and marine biosphere.}, } @article {pmid35861394, year = {2022}, author = {Jonkheer, EM and van Workum, DM and Sheikhizadeh Anari, S and Brankovics, B and de Haan, JR and Berke, L and van der Lee, TAJ and de Ridder, D and Smit, S}, title = {PanTools v3: functional annotation, classification and phylogenomics.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {18}, pages = {4403-4405}, pmid = {35861394}, issn = {1367-4811}, support = {TU 16022//Dutch Ministry of Economic Affairs in the Topsector Program 'Horticulture and Starting Materials'/ ; }, mesh = {Humans ; Phylogeny ; *SARS-CoV-2/genetics ; *COVID-19 ; Software ; Genome, Viral ; }, abstract = {SUMMARY: The ever-increasing number of sequenced genomes necessitates the development of pangenomic approaches for comparative genomics. Introduced in 2016, PanTools is a platform that allows pangenome construction, homology grouping and pangenomic read mapping. The use of graph database technology makes PanTools versatile, applicable from small viral genomes like SARS-CoV-2 up to large plant or animal genomes like tomato or human. Here, we present our third major update to PanTools that enables the integration of functional annotations and provides both gene-level analyses and phylogenetics.

PanTools is implemented in Java 8 and released under the GNU GPLv3 license. Software and documentation are available at https://git.wur.nl/bioinformatics/pantools.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid35860541, year = {2022}, author = {Hussain, B and Akpınar, BA and Alaux, M and Algharib, AM and Sehgal, D and Ali, Z and Aradottir, GI and Batley, J and Bellec, A and Bentley, AR and Cagirici, HB and Cattivelli, L and Choulet, F and Cockram, J and Desiderio, F and Devaux, P and Dogramaci, M and Dorado, G and Dreisigacker, S and Edwards, D and El-Hassouni, K and Eversole, K and Fahima, T and Figueroa, M and Gálvez, S and Gill, KS and Govta, L and Gul, A and Hensel, G and Hernandez, P and Crespo-Herrera, LA and Ibrahim, A and Kilian, B and Korzun, V and Krugman, T and Li, Y and Liu, S and Mahmoud, AF and Morgounov, A and Muslu, T and Naseer, F and Ordon, F and Paux, E and Perovic, D and Reddy, GVP and Reif, JC and Reynolds, M and Roychowdhury, R and Rudd, J and Sen, TZ and Sukumaran, S and Ozdemir, BS and Tiwari, VK and Ullah, N and Unver, T and Yazar, S and Appels, R and Budak, H}, title = {Capturing Wheat Phenotypes at the Genome Level.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {851079}, pmid = {35860541}, issn = {1664-462X}, abstract = {Recent technological advances in next-generation sequencing (NGS) technologies have dramatically reduced the cost of DNA sequencing, allowing species with large and complex genomes to be sequenced. Although bread wheat (Triticum aestivum L.) is one of the world's most important food crops, efficient exploitation of molecular marker-assisted breeding approaches has lagged behind that achieved in other crop species, due to its large polyploid genome. However, an international public-private effort spanning 9 years reported over 65% draft genome of bread wheat in 2014, and finally, after more than a decade culminated in the release of a gold-standard, fully annotated reference wheat-genome assembly in 2018. Shortly thereafter, in 2020, the genome of assemblies of additional 15 global wheat accessions was released. As a result, wheat has now entered into the pan-genomic era, where basic resources can be efficiently exploited. Wheat genotyping with a few hundred markers has been replaced by genotyping arrays, capable of characterizing hundreds of wheat lines, using thousands of markers, providing fast, relatively inexpensive, and reliable data for exploitation in wheat breeding. These advances have opened up new opportunities for marker-assisted selection (MAS) and genomic selection (GS) in wheat. Herein, we review the advances and perspectives in wheat genetics and genomics, with a focus on key traits, including grain yield, yield-related traits, end-use quality, and resistance to biotic and abiotic stresses. We also focus on reported candidate genes cloned and linked to traits of interest. Furthermore, we report on the improvement in the aforementioned quantitative traits, through the use of (i) clustered regularly interspaced short-palindromic repeats/CRISPR-associated protein 9 (CRISPR/Cas9)-mediated gene-editing and (ii) positional cloning methods, and of genomic selection. Finally, we examine the utilization of genomics for the next-generation wheat breeding, providing a practical example of using in silico bioinformatics tools that are based on the wheat reference-genome sequence.}, } @article {pmid35856711, year = {2022}, author = {Guitart-Matas, J and Gonzalez-Escalona, N and Maguire, M and Vilaró, A and Martinez-Urtaza, J and Fraile, L and Migura-Garcia, L}, title = {Revealing Genomic Insights of the Unexplored Porcine Pathogen Actinobacillus pleuropneumoniae Using Whole Genome Sequencing.}, journal = {Microbiology spectrum}, volume = {10}, number = {4}, pages = {e0118522}, pmid = {35856711}, issn = {2165-0497}, mesh = {*Actinobacillus Infections/microbiology/veterinary ; *Actinobacillus pleuropneumoniae/genetics ; Animals ; Genomics ; *Pleuropneumonia/microbiology/veterinary ; Serotyping ; Swine ; *Swine Diseases/microbiology ; Whole Genome Sequencing ; }, abstract = {Actinobacillus pleuropneumoniae (APP) is the causative agent of pleuropneumonia in pigs, one of the most relevant bacterial respiratory diseases in the swine industry. To date, 19 serotypes have been described based on capsular polysaccharide typing with significant virulence dissimilarities. In this study, 16 APP isolates from Spanish origin were selected to perform antimicrobial susceptibility tests and comparative genomic analysis using whole genome sequencing (WGS). To obtain a more comprehensive worldwide molecular epidemiologic analyses, all APP whole genome assemblies available at the National Center for Biotechnology Information (NCBI) at the time of the study were also included. An in-house in silico PCR approach enabled the correct serotyping of unserotyped or incorrectly serotyped isolates and allowed for the discrimination between serotypes 9 and 11. A pangenome analysis identified the presence or absence of gene clusters to be serotype specific, as well as virulence profile analyses targeting the apx operons. Antimicrobial resistance genes were correlated to the presence of specific plasmids. Altogether, this study provides new insights into the genetic variability within APP serotypes, correlates phenotypic tests with bioinformatic analyses and manifests the benefits of populated databases for a better assessment of diversity and variability of relatively unknown pathogens. Overall, genomic comparative analysis enhances the understanding of transmission and epidemiological patterns of this species and suggests vertical transmission of the pathogen, including the resistance genes, within the Spanish integrated systems. IMPORTANCE Pleuropneumonia is one of the most relevant respiratory infections in the swine industry. Despite Actinobacillus pleuropneumoniae (APP) being one of the most important pathogens in the pig production, this is the first comparative study including all available whole genome sequencing data from NCBI. Moreover, this study also includes 16 APP isolates of Spanish origin with known epidemiological relationships through vertical integrated systems. Genomic comparisons provided a deeper understanding of molecular and epidemiological knowledge between different APP serotypes. Furthermore, determination of resistance and toxin profiles allowed correlation with the presence of mobile genetic elements and specific serotype, respectively.}, } @article {pmid35856667, year = {2022}, author = {Babiker, A and Bower, C and Lutgring, JD and Petit, RA and Howard-Anderson, J and Ansari, U and McAllister, G and Adamczyk, M and Breaker, E and Satola, SW and Jacob, JT and Woodworth, MH}, title = {Clinical and Genomic Epidemiology of mcr-9-Carrying Carbapenem-Resistant Enterobacterales Isolates in Metropolitan Atlanta, 2012 to 2017.}, journal = {Microbiology spectrum}, volume = {10}, number = {4}, pages = {e0252221}, pmid = {35856667}, issn = {2165-0497}, support = {K23 AI144036/AI/NIAID NIH HHS/United States ; U54 CK000485/CK/NCEZID CDC HHS/United States ; U54CK000485/ACL/ACL HHS/United States ; }, mesh = {Anti-Bacterial Agents/pharmacology ; *Carbapenems/pharmacology ; *Colistin/pharmacology ; Drug Resistance, Bacterial/genetics ; Genomics ; Microbial Sensitivity Tests ; Phylogeny ; Plasmids ; }, abstract = {Colistin is a last-resort antibiotic for multidrug-resistant Gram-negative infections. Recently, the ninth allele of the mobile colistin resistance (mcr) gene family, designated mcr-9, was reported. However, its clinical and public health significance remains unclear. We queried genomes of carbapenem-resistant Enterobacterales (CRE) for mcr-9 from a convenience sample of clinical isolates collected between 2012 and 2017 through the Georgia Emerging Infections Program, a population- and laboratory-based surveillance program. Isolates underwent phenotypic characterization and whole-genome sequencing. Phenotypic characteristics, genomic features, and clinical outcomes of mcr-9-positive and -negative CRE cases were then compared. Among 235 sequenced CRE genomes, 13 (6%) were found to harbor mcr-9, all of which were Enterobacter cloacae complex. The median MIC and rates of heteroresistance and inducible resistance to colistin were similar between mcr-9-positive and -negative isolates. However, rates of resistance were higher among mcr-9-positive isolates across most antibiotic classes. All cases had significant health care exposures. The 90-day mortality was similarly high in both mcr-9-positive (31%) and -negative (7%) CRE cases. Nucleotide identity and phylogenetic analysis did not reveal geotemporal clustering. mcr-9-positive isolates had a significantly higher number of median [range] antimicrobial resistance (AMR) genes (16 [4 to 22] versus 6 [2 to 15]; P < 0.001) than did mcr-9-negative isolates. Pangenome tests confirmed a significant association of mcr-9 detection with mobile genetic element and heavy metal resistance genes. Overall, the presence of mcr-9 was not associated with significant changes in colistin resistance or clinical outcomes, but continued genomic surveillance to monitor for emergence of AMR genes is warranted. IMPORTANCE Colistin is a last-resort antibiotic for multidrug-resistant Gram-negative infections. A recently described allele of the mobile colistin resistance (mcr) gene family, designated mcr-9, has been widely reported among Enterobacterales species. However, its clinical and public health significance remains unclear. We compared characteristics and outcomes of mcr-9-positive and -negative CRE cases. All cases were acquired in the health care setting and associated with a high rate of mortality. The presence of mcr-9 was not associated with significant changes in colistin resistance, heteroresistance, or inducible resistance but was associated with resistance to other antimicrobials and antimicrobial resistance (AMR), virulence, and heavy metal resistance (HMR) genes. Overall, the presence of mcr-9 was not associated with significant phenotypic changes or clinical outcomes. However, given the increase in AMR and HMR gene content and potential clinical impact, continued genomic surveillance of multidrug-resistant organisms to monitor for emergence of AMR genes is warranted.}, } @article {pmid35851621, year = {2022}, author = {Wang, Y and Du, F and Wang, J and Wang, K and Tian, C and Qi, X and Lu, F and Liu, X and Ye, X and Jiao, Y}, title = {Improving bread wheat yield through modulating an unselected AP2/ERF gene.}, journal = {Nature plants}, volume = {8}, number = {8}, pages = {930-939}, pmid = {35851621}, issn = {2055-0278}, mesh = {*Bread ; Edible Grain/genetics ; Ethylenes ; Humans ; Plant Breeding ; Repressor Proteins ; *Triticum/genetics ; }, abstract = {Crop breeding heavily relies on natural genetic variation. However, additional new variations are desired to meet the increasing human demand. Inflorescence architecture determines grain number per spike, a major determinant of bread wheat (Triticum aestivum L.) yield. Here, using Brachypodium distachyon as a wheat proxy, we identified DUO-B1, encoding an APETALA2/ethylene response factor (AP2/ERF) transcription factor, regulating spike inflorescence architecture in bread wheat. Mutations of DUO-B1 lead to mild supernumerary spikelets, increased grain number per spike and, importantly, increased yield under field conditions without affecting other major agronomic traits. DUO-B1 suppresses cell division and promotes the expression of BH[t]/WFZP, whose mutations could lead to branched 'miracle-wheat'. Pan-genome analysis indicated that DUO-B1 has not been utilized in breeding, and holds promise to increase wheat yield further.}, } @article {pmid35841149, year = {2022}, author = {Liu, Y and Tian, Z}, title = {Super graph-based pan-genome: Bringing rice functional genomic study into a new dawn.}, journal = {Molecular plant}, volume = {15}, number = {9}, pages = {1409-1411}, doi = {10.1016/j.molp.2022.07.005}, pmid = {35841149}, issn = {1752-9867}, mesh = {Genome ; Genomics ; *Oryza/genetics ; }, } @article {pmid35838756, year = {2022}, author = {Ksiezarek, M and Grosso, F and Ribeiro, TG and Peixe, L}, title = {Genomic diversity of genus Limosilactobacillus.}, journal = {Microbial genomics}, volume = {8}, number = {7}, pages = {}, pmid = {35838756}, issn = {2057-5858}, mesh = {*Genome ; *Genomics ; Lactobacillaceae ; }, abstract = {The genus Limosilactobacillus (formerly Lactobacillus) contains multiple species considered to be adapted to vertebrates, yet their genomic diversity has not been explored. In this study, we performed comparative genomic analysis of Limosilactobacillus (22 species; 332 genomes) isolated from different niches, further focusing on human strains (11 species; 74 genomes) and their adaptation features to specific body sites. Phylogenomic analysis of Limosilactobacillus showed misidentification of some strains deposited in public databases and existence of putative novel Limosilactobacillus species. The pangenome analysis revealed a remarkable genomic diversity (only 1.3 % of gene clusters are shared), and we did not observe a strong association of the accessory genome with different niches. The pangenome of Limosilactobacillus reuteri and Limosilactobacillus fermentum was open, suggesting that acquisition of genes is still occurring. Although most Limosilactobacillus were predicted as antibiotic susceptible (83%), acquired antibiotic-resistance genes were common in L. reuteri from food-producing animals. Genes related to lactic acid isoform production (>95 %) and putative bacteriocins (70.2%) were identified in most Limosilactobacillus strains, while prophages (55.4%) and CRISPR-Cas systems (32.0%) were less prevalent. Among strains from human sources, several metabolic pathways were predicted as conserved and completed. Their accessory genome was highly variable and did not cluster according to different human body sites, with some exceptions (urogenital Limosilactobacillus vaginalis , Limosilactobacillus portuensis , Limosilactobacillus urinaemulieris and Limosilactobacillus coleohominis or gastrointestinal Limosilactobacillus mucosae). Moreover, we identified 12 Kyoto Encyclopedia of Genes and Genomes (KEGG) orthologues that were significantly enriched in strains from particular body sites. We concluded that evolution of the highly diverse Limosilactobacillus is complex and not always related to niche or human body site origin.}, } @article {pmid35832805, year = {2022}, author = {Zaidi, SE and Zaheer, R and Barbieri, R and Cook, SR and Hannon, SJ and Booker, CW and Church, D and Van Domselaar, G and Zovoilis, A and McAllister, TA}, title = {Genomic Characterization of Enterococcus hirae From Beef Cattle Feedlots and Associated Environmental Continuum.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {859990}, pmid = {35832805}, issn = {1664-302X}, abstract = {Enterococci are commensal bacteria of the gastrointestinal tract of humans, animals, and insects. They are also found in soil, water, and plant ecosystems. The presence of enterococci in human, animal, and environmental settings makes these bacteria ideal candidates to study antimicrobial resistance in the One-Health continuum. This study focused on Enterococcus hirae isolates (n = 4,601) predominantly isolated from beef production systems including bovine feces (n = 4,117, 89.5%), catch-basin water (n = 306, 66.5%), stockpiled bovine manure (n = 24, 0.5%), and natural water sources near feedlots (n = 145, 32%), and a few isolates from urban wastewater (n = 9, 0.2%) denoted as human-associated environmental samples. Antimicrobial susceptibility profiling of a subset (n = 1,319) of E. hirae isolates originating from beef production systems (n = 1,308) showed high resistance to tetracycline (65%) and erythromycin (57%) with 50.4% isolates harboring multi-drug resistance, whereas urban wastewater isolates (n = 9) were resistant to nitrofurantoin (44.5%) and tigecycline (44.5%) followed by linezolid (33.3%). Genes for tetracycline (tetL, M, S/M, and O/32/O) and macrolide resistance erm(B) were frequently found in beef production isolates. Antimicrobial resistance profiles of E. hirae isolates recovered from different environmental settings appeared to reflect the kind of antimicrobial usage in beef and human sectors. Comparative genomic analysis of E. hirae isolates showed an open pan-genome that consisted of 1,427 core genes, 358 soft core genes, 1701 shell genes, and 7,969 cloud genes. Across species comparative genomic analysis conducted on E. hirae, Enterococcus faecalis and Enterococcus faecium genomes revealed that E. hirae had unique genes associated with vitamin production, cellulose, and pectin degradation, traits which may support its adaptation to the bovine digestive tract. E. faecium and E. faecalis more frequently harbored virulence genes associated with biofilm formation, iron transport, and cell adhesion, suggesting niche specificity within these species.}, } @article {pmid35821092, year = {2022}, author = {Shang, L and Li, X and He, H and Yuan, Q and Song, Y and Wei, Z and Lin, H and Hu, M and Zhao, F and Zhang, C and Li, Y and Gao, H and Wang, T and Liu, X and Zhang, H and Zhang, Y and Cao, S and Yu, X and Zhang, B and Zhang, Y and Tan, Y and Qin, M and Ai, C and Yang, Y and Zhang, B and Hu, Z and Wang, H and Lv, Y and Wang, Y and Ma, J and Wang, Q and Lu, H and Wu, Z and Liu, S and Sun, Z and Zhang, H and Guo, L and Li, Z and Zhou, Y and Li, J and Zhu, Z and Xiong, G and Ruan, J and Qian, Q}, title = {A super pan-genomic landscape of rice.}, journal = {Cell research}, volume = {32}, number = {10}, pages = {878-896}, pmid = {35821092}, issn = {1748-7838}, mesh = {Domestication ; Genome, Plant ; Genomics ; Leucine/genetics ; Nucleotides ; *Oryza/genetics ; }, abstract = {Pan-genomes from large natural populations can capture genetic diversity and reveal genomic complexity. Using de novo long-read assembly, we generated a graph-based super pan-genome of rice consisting of a 251-accession panel comprising both cultivated and wild species of Asian and African rice. Our pan-genome reveals extensive structural variations (SVs) and gene presence/absence variations. Additionally, our pan-genome enables the accurate identification of nucleotide-binding leucine-rich repeat genes and characterization of their inter- and intraspecific diversity. Moreover, we uncovered grain weight-associated SVs which specify traits by affecting the expression of their nearby genes. We characterized genetic variants associated with submergence tolerance, seed shattering and plant architecture and found independent selection for a common set of genes that drove adaptation and domestication in Asian and African rice. This super pan-genome facilitates pinpointing of lineage-specific haplotypes for trait-associated genes and provides insights into the evolutionary events that have shaped the genomic architecture of various rice species.}, } @article {pmid35821091, year = {2022}, author = {Olsen, KM}, title = {The rice pangenome branches out.}, journal = {Cell research}, volume = {32}, number = {10}, pages = {867-868}, pmid = {35821091}, issn = {1748-7838}, mesh = {Genomics ; *Oryza/genetics ; }, } @article {pmid35819407, year = {2022}, author = {Li, LF and Xu, L and Li, WH and Sun, JQ}, title = {Sinomicrobium kalidii sp. nov., an indole-3-acetic acid-producing endophyte from a shoot of halophyte Kalidium cuspidatum.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {72}, number = {7}, pages = {}, doi = {10.1099/ijsem.0.005452}, pmid = {35819407}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Base Composition ; *Chenopodiaceae ; DNA, Bacterial/genetics ; Endophytes/genetics ; Fatty Acids/chemistry ; Indoleacetic Acids ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Salt-Tolerant Plants ; Sequence Analysis, DNA ; Soil Microbiology ; }, abstract = {To better understand the effects of endophytic bacteria on halophytes, a bacteria that produced indole-3-acetic acid and 1-aminocyclopropane-1-carboxylic acid deaminase, designated HD2P242[T], was isolated from a shoot of Kalidium cuspidatum collected in Tumd Right Banner, Inner Mongolia, PR China. The cells of strain HD2P242[T] were Gram-stain-negative, strictly aerobic, motile by gliding, non-spore-forming and rod-shaped. Strain HD2P242[T] grew at pH 6.0-9.0 (optimum, pH 7.0) and 10-45 °C (optimum 37 °C), in the presence of 0-8 % (w/v) NaCl (optimum, 4 %). The strain was positive for oxidase and catalase. The phylogenetic trees based on the 16S rRNA gene sequences and the whole genome sequences both showed that strain HD2P242[T] clustered with Sinomicrobium pectinilyticum 5DNS001[T] and S. oceani SCSIO 03483[T], and had 95.6, 94.3 and <94.3 % 16S rRNA gene similarities to S. pectinilyticum 5DNS001[T], S. oceani SCSIO 03483[T] and all the other current type strains. Strain HD2P242[T] contained menaquinone 6 as its sole respiratory quinone. Its major polar lipids were phosphatidylethanolamine, two unidentified aminolipids, two unidentified phospholipids and an unidentified lipid. The major fatty acids were iso-C17 : 0, iso-C16 : 0 3-OH, anteiso-C17 : 0 and summed feature 6 (C19 : 1 ω9c and/or C19 : 1 ω11c). The genome consisted of a 5 364 211 bp circular chromosome, with a G+C content of 45.1 mol%, predicting 4391 coding sequence genes, 47 tRNA genes and two rRNA operons. The average nucleotide identity based on blast and the digital DNA-DNA hybridization values of strain HD2P242[T] with S. oceani SCSIO 03483[T] and S. pectinilyticum 5DNS001[T] were 73.8 and 77.0%, and 22.3 and 22.2%, respectively. The comparative genome analysis showed that the pan-genomes of strain HD2P242[T] and three Sinomicrobium type strains possessed 4236 clusters, whereas the core genome possessed 2162 clusters, which accounted for 52.3 % of all the clusters. The genomic analysis revealed that all four Sinomicrobium members could utilize d-glucose by the glycolysis-gluconeogenesis pathway or the pentose phosphate pathway. The tricarboxylic acid cycle was utilized as a metabolic centre. The phylogenetic, physiological and phenotypic characteristics allowed the discrimination of strain HD2P242[T] from its phylogenetic relatives. Therefore, Sinomicrobium kalidii sp. nov. is proposed, and the type strain is HD2P242[T] (=CGMCC 1.19025[T]=KCTC 92136[T]).}, } @article {pmid35818004, year = {2022}, author = {Contreras-Moreira, B and Del Río, ÁR and Cantalapiedra, CP and Sancho, R and Vinuesa, P}, title = {Pangenome Analysis of Plant Transcripts and Coding Sequences.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2512}, number = {}, pages = {121-152}, doi = {10.1007/978-1-0716-2429-6_9}, pmid = {35818004}, issn = {1940-6029}, mesh = {Humans ; Phylogeny ; *Software ; }, abstract = {The pangenome of a species is the sum of the genomes of its individuals. As coding sequences often represent only a small fraction of each genome, analyzing the pangene set can be a cost-effective strategy for plants with large genomes or highly heterozygous species. Here, we describe a step-by-step protocol to analyze plant pangene sets with the software GET_HOMOLOGUES-EST . After a short introduction, where the main concepts are illustrated, the remaining sections cover the installation and typical operations required to analyze and annotate pantranscriptomes and gene sets of plants. The recipes include instructions on how to call core and accessory genes, how to compute a presence-absence pangenome matrix, and how to identify and analyze private genes, present only in some genotypes. Downstream phylogenetic analyses are also discussed.}, } @article {pmid35818000, year = {2022}, author = {Tay Fernandez, CG and Marsh, JI and Nestor, BJ and Gill, M and Golicz, AA and Bayer, PE and Edwards, D}, title = {An SGSGeneloss-Based Method for Constructing a Gene Presence-Absence Table Using Mosdepth.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2512}, number = {}, pages = {73-80}, doi = {10.1007/978-1-0716-2429-6_5}, pmid = {35818000}, issn = {1940-6029}, mesh = {*Genome ; *Genomics/methods ; High-Throughput Nucleotide Sequencing/methods ; Humans ; Sequence Analysis, DNA/methods ; }, abstract = {Presence-absence variants (PAV) are genomic regions present in some individuals of a species, but not others. PAVs have been shown to contribute to genomic diversity, especially in bacteria and plants. These structural variations have been linked to traits and can be used to track a species' evolutionary history. PAVs are usually called by aligning short read sequence data from one or more individuals to a reference genome or pangenome assembly, and then comparing coverage. Regions where reads do not align define absence in that individual, and the regions are classified as PAVs. The method below details how to align sequence reads to a reference and how to use the sequencing-coverage calculator Mosdepth to identify PAVs and construct a PAV table for use in downstream comparative genome analysis.}, } @article {pmid35817620, year = {2022}, author = {Garg, S and Balboa, R and Kuja, J}, title = {Chromosome-scale haplotype-resolved pangenomics.}, journal = {Trends in genetics : TIG}, volume = {38}, number = {11}, pages = {1103-1107}, doi = {10.1016/j.tig.2022.06.011}, pmid = {35817620}, issn = {0168-9525}, mesh = {*Chromosomes/genetics ; Haplotypes/genetics ; Humans ; }, abstract = {Complete pangenomics is crucial for understanding genetic diversity and evolution across the tree of life. Chromosome-scale, haplotype-resolved pangenomics allows complex structural variations, long-range interactions, and associated functions to be discerned in species populations. We explore the need for high-resolution pangenomes, discuss computational strategies for their development, and describe applications in biodiversity and human health.}, } @article {pmid35815569, year = {2022}, author = {González-Díaz, A and Berbel, D and Ercibengoa, M and Cercenado, E and Larrosa, N and Quesada, MD and Casabella, A and Cubero, M and Marimón, JM and Domínguez, MÁ and Carrera-Salinas, A and Càmara, J and Martín-Galiano, AJ and Yuste, J and Martí, S and Ardanuy, C}, title = {Genomic features of predominant non-PCV13 serotypes responsible for adult invasive pneumococcal disease in Spain.}, journal = {The Journal of antimicrobial chemotherapy}, volume = {77}, number = {9}, pages = {2389-2398}, doi = {10.1093/jac/dkac199}, pmid = {35815569}, issn = {1460-2091}, support = {//Instituto de Salud Carlos III/ ; //European Regional Development Fund/European Social Fund/ ; //Amazon Web Services/ ; //ISCIII/ ; //AWS/ ; }, mesh = {Anti-Bacterial Agents/pharmacology ; Genomics ; Humans ; Penicillins ; *Pneumococcal Infections/epidemiology/prevention & control ; Pneumococcal Vaccines ; Serogroup ; Spain/epidemiology ; }, abstract = {BACKGROUND: Although pneumococcal conjugate vaccines (PCVs) effectively prevent invasive pneumococcal disease (IPD), serotype replacement has occurred.

OBJECTIVES: We studied the pangenome, antibiotic resistance mechanisms and presence of mobile elements in predominant non-PCV13 serotypes causing adult IPD after PCV13 vaccine introduction in Spain.

METHODS: We conducted a multicentre study comparing three periods in six Spanish hospitals and analysed through whole genome sequencing representative strains collected in the pre-PCV13, early-PCV13 and late-PCV13 periods.

RESULTS: Among 2197 cases of adult IPD identified, 110 pneumococci expressing non-PCV13 capsules were sequenced. Seven predominant serotypes accounted for 42.6% of IPD episodes in the late-PCV13 period: serotypes 8 (14.4%), 12F (7.5%), 9N (5.2%), 11A (4.1%), 22F (3.9%), 24F (3.9%) and 16F (3.6%). All predominant non-PCV13 serotypes were highly clonal, comprising one or two clonal complexes (CC). In general, CC538, CC4048, CC3016F, CC43322F and CC669N, related to predominant non-PCV13 serotypes, were antibiotic susceptible. CC15611A was associated with resistance to co-trimoxazole, penicillin and amoxicillin. CC23024F was non-susceptible to penicillin and resistant to erythromycin, clindamycin, and tetracycline. Six composite transposon structures of the Tn5252-family were found in CC23024F, CC98912F and CC3016F carrying different combinations of erm(B), tet(M), and cat. Pangenome analysis revealed differences in accessory genomes among the different CC, with most variety in CC3016F (23.9%) and more conservation in CC15611A (8.5%).

CONCLUSIONS: We identified highly clonal predominant serotypes responsible for IPD in adults. The detection of not only conjugative elements carrying resistance determinants but also clones previously associated with vaccine serotypes (CC15611A and CC23024F) highlights the importance of the accessory genome.}, } @article {pmid35809555, year = {2022}, author = {Yang, T and Gao, F}, title = {High-quality pan-genome of Escherichia coli generated by excluding confounding and highly similar strains reveals an association between unique gene clusters and genomic islands.}, journal = {Briefings in bioinformatics}, volume = {23}, number = {4}, pages = {}, doi = {10.1093/bib/bbac283}, pmid = {35809555}, issn = {1477-4054}, mesh = {*Escherichia coli/genetics ; Genome, Bacterial ; *Genomic Islands ; Multigene Family ; Phylogeny ; }, abstract = {The pan-genome analysis of bacteria provides detailed insight into the diversity and evolution of a bacterial population. However, the genomes involved in the pan-genome analysis should be checked carefully, as the inclusion of confounding strains would have unfavorable effects on the identification of core genes, and the highly similar strains could bias the results of the pan-genome state (open versus closed). In this study, we found that the inclusion of highly similar strains also affects the results of unique genes in pan-genome analysis, which leads to a significant underestimation of the number of unique genes in the pan-genome. Therefore, these strains should be excluded from pan-genome analysis at the early stage of data processing. Currently, tens of thousands of genomes have been sequenced for Escherichia coli, which provides an unprecedented opportunity as well as a challenge for pan-genome analysis of this classical model organism. Using the proposed strategies, a high-quality E. coli pan-genome was obtained, and the unique genes was extracted and analyzed, revealing an association between the unique gene clusters and genomic islands from a pan-genome perspective, which may facilitate the identification of genomic islands.}, } @article {pmid35805383, year = {2022}, author = {Alshammari, A and Alharbi, M and Alghamdi, A and Alharbi, SA and Ashfaq, UA and Tahir Ul Qamar, M and Ullah, A and Irfan, M and Khan, A and Ahmad, S}, title = {Computer-Aided Multi-Epitope Vaccine Design against Enterobacter xiangfangensis.}, journal = {International journal of environmental research and public health}, volume = {19}, number = {13}, pages = {}, pmid = {35805383}, issn = {1660-4601}, mesh = {Anti-Bacterial Agents ; Computational Biology ; Computers ; Enterobacter ; Epitopes/chemistry ; Molecular Docking Simulation ; *Proteome ; *Vaccines ; }, abstract = {Antibiotic resistance is a global public health threat and is associated with high mortality due to antibiotics' inability to treat bacterial infections. Enterobacter xiangfangensis is an emerging antibiotic-resistant bacterial pathogen from the Enterobacter genus and has the ability to acquire resistance to multiple antibiotic classes. Currently, there is no effective vaccine against Enterobacter species. In this study, a chimeric vaccine is designed comprising different epitopes screened from E. xiangfangensis proteomes using immunoinformatic and bioinformatic approaches. In the first phase, six fully sequenced proteomes were investigated by bacterial pan-genome analysis, which revealed that the pathogen consists of 21,996 core proteins, 3785 non-redundant proteins and 18,211 redundant proteins. The non-redundant proteins were considered for the vaccine target prioritization phase where different vaccine filters were applied. By doing so, two proteins; ferrichrome porin (FhuA) and peptidoglycan-associated lipoprotein (Pal) were shortlisted for epitope prediction. Based on properties of antigenicity, allergenicity, water solubility and DRB*0101 binding ability, three epitopes (GPAPTIAAKR, ATKTDTPIEK and RNNGTTAEI) were used in multi-epitope vaccine designing. The designed vaccine construct was analyzed in a docking study with immune cell receptors, which predicted the vaccine's proper binding with said receptors. Molecular dynamics analysis revealed that the vaccine demonstrated stable binding dynamics, and binding free energy calculations further validated the docking results. In conclusion, these in silico results may help experimentalists in developing a vaccine against E. xiangfangensis in specific and Enterobacter in general.}, } @article {pmid35799353, year = {2022}, author = {Dabbaghie, F and Ebler, J and Marschall, T}, title = {BubbleGun: enumerating bubbles and superbubbles in genome graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {17}, pages = {4217-4219}, pmid = {35799353}, issn = {1367-4811}, support = {//German Federal Ministry for Research and Education/ ; 031L0184A//BMBF/ ; }, mesh = {Humans ; Sequence Analysis, DNA/methods ; *Software ; *Algorithms ; Genome ; Genomics/methods ; }, abstract = {MOTIVATION: With the fast development of sequencing technology, accurate de novo genome assembly is now possible even for larger genomes. Graph-based representations of genomes arise both as part of the assembly process, but also in the context of pangenomes representing a population. In both cases, polymorphic loci lead to bubble structures in such graphs. Detecting bubbles is hence an important task when working with genomic variants in the context of genome graphs.

RESULTS: Here, we present a fast general-purpose tool, called BubbleGun, for detecting bubbles and superbubbles in genome graphs. Furthermore, BubbleGun detects and outputs runs of linearly connected bubbles and superbubbles, which we call bubble chains. We showcase its utility on de Bruijn graphs and compare our results to vg's snarl detection. We show that BubbleGun is considerably faster than vg especially in bigger graphs, where it reports all bubbles in less than 30 min on a human sample de Bruijn graph of around 2 million nodes.

BubbleGun is available and documented as a Python3 package at https://github.com/fawaz-dabbaghieh/bubble_gun under MIT license.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid35799261, year = {2022}, author = {Vezina, B and Rosa, MN and Canu, A and Tola, S}, title = {Genomic surveillance reveals antibiotic resistance gene transmission via phage recombinases within sheep mastitis-associated Streptococcus uberis.}, journal = {BMC veterinary research}, volume = {18}, number = {1}, pages = {264}, pmid = {35799261}, issn = {1746-6148}, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; *Bacteriophages ; Cattle ; *Cattle Diseases ; Drug Resistance, Microbial ; Female ; Genomics ; *Mastitis, Bovine/epidemiology ; Multilocus Sequence Typing/veterinary ; Recombinases ; Sheep ; *Sheep Diseases/epidemiology ; *Streptococcal Infections/epidemiology/veterinary ; Streptococcus ; }, abstract = {BACKGROUND: Streptococcus uberis is one of the main causative agents of ovine mastitis, however little is known about this global, environmental pathogen and its genomic mechanisms of disease. In this study, we performed genomic analysis on 46 S. uberis isolates collected from mastitis-infected sheep in Sardinia (Italy).

RESULTS: Genomes were assigned into lineage clusters using PopPUNK, which found 27 distinct isolate clusters, indicating considerable genetic variability consistent with environmental isolates. Geographic trends were identified including regional linkage of several isolate clusters. Multi-locus Sequence Typing (MLST) performed poorly and provided no new insights. Genomes were then screened for antimicrobial resistance genes, which were compared to phenotypic resistance profiles. Isolates showed consistent phenotypic resistance to aminoglycosides with variable resistance to novobiocin and tetracycline. In general, identification of antimicrobial resistance genes did not correlate with phenotypic resistance profiles, indicating unknown genetic determinants. A multi-antimicrobial resistance cassette (aminoglycoside, lincosamide and streptogramin) was identified in the chromosome of three genomes, flanked by vestigial phage recombinases. This locus appears to have spread horizontally within discrete S. uberis populations within a 40 km radius (Sassari region). Genomes were screened for putative virulence factors, which identified 16 genes conserved between sheep and cow isolates, with no host-specific genes shared uniformly across all host-specific isolates. Pangenomic analysis was then performed to identify core genes which were putatively surface-exposed, for identification of potential vaccine targets. As all genomes encoded sortase, core genes were screened for the sortase cleavage motif. Of the 1445 core S. uberis genes, 64 were putative sortase substrates and were predominantly adhesins, permeases and peptidases, consistent with compounds found within ruminant milk such as xanthine, fibronectin and lactoferrin.

CONCLUSIONS: This study demonstrated the importance of whole genome sequencing for surveillance of S. uberis and tracking horizontal acquisition of antimicrobial resistance genes, as well as providing insight into genetic determinants of disease, which cannot be inferred from the MLST schemes. Future mastitis surveillance should be informed by genomic analysis.}, } @article {pmid35795189, year = {2022}, author = {Saraiva, MMS and Benevides, VP and da Silva, NMV and Varani, AM and de Freitas Neto, OC and Berchieri, Â and Delgado-Suárez, EJ and Rocha, ADL and Eguale, T and Munyalo, JA and Kariuki, S and Gebreyes, WA and de Oliveira, CJB}, title = {Genomic and Evolutionary Analysis of Salmonella enterica Serovar Kentucky Sequence Type 198 Isolated From Livestock In East Africa.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {772829}, pmid = {35795189}, issn = {2235-2988}, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Ciprofloxacin ; Drug Resistance, Multiple, Bacterial/genetics ; Genomics ; Kentucky ; Livestock ; Phylogeny ; *Salmonella enterica/genetics ; Serogroup ; Streptomycin ; }, abstract = {Since its emergence in the beginning of the 90's, multidrug-resistant (MDR) Salmonella enterica subsp. enterica serovar Kentucky has become a significant public health problem, especially in East Africa. This study aimed to investigate the antimicrobial resistance profile and the genotypic relatedness of Salmonella Kentucky isolated from animal sources in Ethiopia and Kenya (n=19). We also investigated population evolutionary dynamics through phylogenetic and pangenome analyses with additional publicly available Salmonella Kentucky ST198 genomes (n=229). All the 19 sequenced Salmonella Kentucky isolates were identified as ST198. Among these isolates, the predominant genotypic antimicrobial resistance profile observed in ten (59.7%) isolates included the aac(3)-Id, aadA7, strA-strB, blaTEM-1B, sul1, and tet(A) genes, which mediated resistance to gentamicin, streptomycin/spectinomycin, streptomycin, ampicillin, sulfamethoxazole and tetracycline, respectively; and gyrA and parC mutations associated to ciprofloxacin resistance. Four isolates harbored plasmid types Incl1 and/or Col8282; two of them carried both plasmids. Salmonella Pathogenicity islands (SPI-1 to SPI-5) were highly conserved in the 19 sequenced Salmonella Kentucky isolates. Moreover, at least one Pathogenicity Island (SPI 1-4, SPI 9 or C63PI) was identified among the 229 public Salmonella Kentucky genomes. The phylogenetic analysis revealed that almost all Salmonella Kentucky ST198 isolates (17/19) stemmed from a single strain that has accumulated ciprofloxacin resistance-mediating mutations. A total of 8,104 different genes were identified in a heterogenic and still open Salmonella Kentucky ST198 pangenome. Considering the virulence factors and antimicrobial resistance genes detected in Salmonella Kentucky, the implications of this pathogen to public health and the epidemiological drivers for its dissemination must be investigated.}, } @article {pmid35779281, year = {2022}, author = {Garg, G and Kamphuis, LG and Bayer, PE and Kaur, P and Dudchenko, O and Taylor, CM and Frick, KM and Foley, RC and Gao, LL and Aiden, EL and Edwards, D and Singh, KB}, title = {A pan-genome and chromosome-length reference genome of narrow-leafed lupin (Lupinus angustifolius) reveals genomic diversity and insights into key industry and biological traits.}, journal = {The Plant journal : for cell and molecular biology}, volume = {111}, number = {5}, pages = {1252-1266}, pmid = {35779281}, issn = {1365-313X}, support = {UM1 HG009375/HG/NHGRI NIH HHS/United States ; RM1 HG011016/HG/NHGRI NIH HHS/United States ; }, mesh = {Australia ; Chromosomes ; Genomics ; Humans ; *Lupinus/genetics ; Plant Breeding ; }, abstract = {Narrow-leafed lupin (NLL; Lupinus angustifolius) is a key rotational crop for sustainable farming systems, whose grain is high in protein content. It is a gluten-free, non-genetically modified, alternative protein source to soybean (Glycine max) and as such has gained interest as a human food ingredient. Here, we present a chromosome-length reference genome for the species and a pan-genome assembly comprising 55 NLL lines, including Australian and European cultivars, breeding lines and wild accessions. We present the core and variable genes for the species and report on the absence of essential mycorrhizal associated genes. The genome and pan-genomes of NLL and its close relative white lupin (Lupinus albus) are compared. Furthermore, we provide additional evidence supporting LaRAP2-7 as the key alkaloid regulatory gene for NLL and demonstrate the NLL genome is underrepresented in classical NLR disease resistance genes compared to other sequenced legume species. The NLL genomic resources generated here coupled with previously generated RNA sequencing datasets provide new opportunities to fast-track lupin crop improvement.}, } @article {pmid35763423, year = {2022}, author = {Ceres, KM and Stanhope, MJ and Gröhn, YT}, title = {A critical evaluation of Mycobacterium bovis pangenomics, with reference to its utility in outbreak investigation.}, journal = {Microbial genomics}, volume = {8}, number = {6}, pages = {}, pmid = {35763423}, issn = {2057-5858}, mesh = {Animals ; Cattle ; Disease Outbreaks ; *Mycobacterium bovis/genetics ; *Tuberculosis, Bovine/microbiology ; }, abstract = {The increased accessibility of next generation sequencing has allowed enough genomes from a given bacterial species to be sequenced to describe the distribution of genes in the pangenome, without limiting analyses to genes present in reference strains. Although some taxa have thousands of whole genome sequences available on public databases, most genomes were sequenced with short read technology, resulting in incomplete assemblies. Studying pangenomes could lead to important insights into adaptation, pathogenicity, or molecular epidemiology, however given the known information loss inherent in analyzing contig-level assemblies, these inferences may be biased or inaccurate. In this study we describe the pangenome of a clonally evolving pathogen, Mycobacterium bovis , and examine the utility of gene content variation in M. bovis outbreak investigation. We constructed the M. bovis pangenome using 1463 de novo assembled genomes. We tested the assumption of strict clonal evolution by studying evidence of recombination in core genes and analyzing the distribution of accessory genes among core monophyletic groups. To determine if gene content variation could be utilized in outbreak investigation, we carefully examined accessory genes detected in a well described M. bovis outbreak in Minnesota. We found significant errors in accessory gene classification. After accounting for these errors, we show that M. bovis has a much smaller accessory genome than previously described and provide evidence supporting ongoing clonal evolution and a closed pangenome, with little gene content variation generated over outbreaks. We also identified frameshift mutations in multiple genes, including a mutation in glpK, which has recently been associated with antibiotic tolerance in Mycobacterium tuberculosis . A pangenomic approach enables a more comprehensive analysis of genome dynamics than is possible with reference-based approaches; however, without critical evaluation of accessory gene content, inferences of transmission patterns employing these loci could be misguided.}, } @article {pmid35758593, year = {2022}, author = {Sang, J and Zhuang, D and Zhang, T and Wu, Q and Yu, J and Zhang, Z}, title = {Convergent and Divergent Age Patterning of Gut Microbiota Diversity in Humans and Nonhuman Primates.}, journal = {mSystems}, volume = {7}, number = {4}, pages = {e0151221}, pmid = {35758593}, issn = {2379-5077}, mesh = {Animals ; Adult ; Humans ; *Gastrointestinal Microbiome/genetics ; Macaca mulatta/genetics ; RNA, Ribosomal, 16S/genetics ; Hydroxyproline ; Leucine ; }, abstract = {The gut microbiome has significant effects on healthy aging and aging-related diseases, whether in humans or nonhuman primates. However, little is known about the divergence and convergence of gut microbial diversity between humans and nonhuman primates during aging, which limits their applicability for studying the gut microbiome's role in human health and aging. Here, we performed 16S rRNA gene sequencing analysis for captive rhesus macaques (Macaca mulatta) and compared this data set with other freely available gut microbial data sets containing four human populations (Chinese, Japanese, Italian, and British) and two nonhuman primates (wild lemurs [Lemur catta] and wild chimpanzees [Pan troglodytes]). Based on the consistent V4 region of the 16S rRNA gene, beta diversity analysis suggested significantly separated gut microbial communities associated with host backgrounds of seven host groups, but within each group, significant gut microbial divergences were observed, and indicator bacterial genera were identified as associated with aging. We further discovered six common anti-inflammatory gut bacteria (Prevotellamassilia, Prevotella, Gemmiger, Coprococcus, Faecalibacterium, and Roseburia) that had butyrate-producing potentials suggested by pangenomic analysis and that showed similar dynamic changes in at least two selected host groups during aging, independent of distinct host backgrounds. Finally, we found striking age-related changes in 66 plasma metabolites in macaques. Two highly changed metabolites, hydroxyproline and leucine, enriched in adult macaques were significantly and positively correlated with Prevotella and Prevotellamassilia. Furthermore, genus-level pangenome analysis suggested that those six common indicator bacteria can synthesize leucine and arginine as hydroxyproline and proline precursors in both humans and macaques. IMPORTANCE This study provides the first comprehensive investigation of age patterning of gut microbiota of four human populations and three nonhuman primates and found that Prevotellamassilia, Prevotella, Gemmiger, Coprococcus, Faecalibacterium, and Roseburia may be common antiaging microbial markers in both humans and nonhuman primates due to their potential metabolic capabilities for host health benefits. Our results also provide key support for using macaques as animal models in studies of the gut microbiome's role during human aging.}, } @article {pmid35752938, year = {2022}, author = {Liu, C and Wang, Y and Peng, J and Fan, B and Xu, D and Wu, J and Cao, Z and Gao, Y and Wang, X and Li, S and Su, Q and Zhang, Z and Wang, S and Wu, X and Shang, Q and Shi, H and Shen, Y and Wang, B and Tian, J}, title = {High-quality genome assembly and pan-genome studies facilitate genetic discovery in mung bean and its improvement.}, journal = {Plant communications}, volume = {3}, number = {6}, pages = {100352}, pmid = {35752938}, issn = {2590-3462}, mesh = {*Vigna/genetics ; Genome-Wide Association Study ; Plant Breeding ; *Fabaceae/genetics ; Polymorphism, Single Nucleotide ; }, abstract = {Mung bean is an economically important legume crop species that is used as a food, consumed as a vegetable, and used as an ingredient and even as a medicine. To explore the genomic diversity of mung bean, we assembled a high-quality reference genome (Vrad_JL7) that was ∼479.35 Mb in size, with a contig N50 length of 10.34 Mb. A total of 40,125 protein-coding genes were annotated, representing ∼96.9% of the genetic region. We also sequenced 217 accessions, mainly landraces and cultivars from China, and identified 2,229,343 high-quality single-nucleotide polymorphisms (SNPs). Population structure revealed that the Chinese accessions diverged into two groups and were distinct from non-Chinese lines. Genetic diversity analysis based on genomic data from 750 accessions in 23 countries supported the hypothesis that mung bean was first domesticated in south Asia and introduced to east Asia probably through the Silk Road. We constructed the first pan-genome of mung bean germplasm and assembled 287.73 Mb of non-reference sequences. Among the genes, 83.1% were core genes and 16.9% were variable. Presence/absence variation (PAV) events of nine genes involved in the regulation of the photoperiodic flowering pathway were identified as being under selection during the adaptation process to promote early flowering in the spring. Genome-wide association studies (GWASs) revealed 2,912 SNPs and 259 gene PAV events associated with 33 agronomic traits, including a SNP in the coding region of the SWEET10 homolog (jg24043) involved in crude starch content and a PAV event in a large fragment containing 11 genes for color-related traits. This high-quality reference genome and pan-genome will provide insights into mung bean breeding.}, } @article {pmid35752768, year = {2022}, author = {Guo, G and Wang, Z and Li, Q and Yu, Y and Li, Y and Tan, Z and Zhang, W}, title = {Genomic characterization of Streptococcus parasuis, a close relative of Streptococcus suis and also a potential opportunistic zoonotic pathogen.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {469}, pmid = {35752768}, issn = {1471-2164}, support = {KYCX21_0643//Postgraduate Research & Practice Innovation Program of Jiangsu Province/ ; ZD2021037//Key scientific research project of Jiangsu Commission of Health/ ; 31772751//National Natural Science Foundation of China/ ; NAUSY-MS12//Guidance Foundation, the Sanya Institute of Nanjing Agricultural University/ ; }, mesh = {Animals ; Cattle ; Genomics ; *Streptococcal Infections/veterinary ; Streptococcus ; *Streptococcus suis/genetics ; Swine ; *Swine Diseases ; Virulence/genetics ; }, abstract = {Streptococcus parasuis (S. parasuis) is a close relative of Streptococcus suis (S. suis), composed of former members of S. suis serotypes 20, 22 and 26. S. parasuis could infect pigs and cows, and recently, human infection cases have been reported, making S. parasuis a potential opportunistic zoonotic pathogen. In this study, we analysed the genomic characteristics of S. parasuis, using pan-genome analysis, and compare some phenotypic determinants such as capsular polysaccharide, integrative conjugative elements, CRISPR-Cas system and pili, and predicted the potential virulence genes by associated analysis of the clinical condition of isolated source animals and genotypes. Furthermore, to discuss the relationship with S. suis, we compared these characteristics of S. parasuis with those of S. suis. We found that the characteristics of S. parasuis are similar to those of S. suis, both of them have "open" pan-genome, their antimicrobial resistance gene profiles are similar and a srtF pilus cluster of S. suis was identified in S. parasuis genome. But S. parasuis still have its unique characteristics, two novel pilus clusters are and three different type CRISPR-Cas system were found. Therefore, this study provides novel insights into the interspecific and intraspecific genetic characteristics of S. parasuis, which can be useful for further study of this opportunistic pathogen, such as serotyping, diagnostics, vaccine development, and study of the pathogenesis mechanism.}, } @article {pmid35752693, year = {2022}, author = {Huang, G and Zhu, Y}, title = {Insights of section-wide pan-genome into hybrid potato breeding.}, journal = {Science China. Life sciences}, volume = {65}, number = {10}, pages = {2125-2127}, pmid = {35752693}, issn = {1869-1889}, mesh = {Genome ; Plant Breeding ; *Solanum tuberosum/genetics ; Tetraploidy ; }, } @article {pmid35751915, year = {2022}, author = {Menghwar, H and Perez-Casal, J}, title = {Comparative genomic analysis of Canadian Mycoplasma bovis strains isolated from Bison and Cattle.}, journal = {Comparative immunology, microbiology and infectious diseases}, volume = {87}, number = {}, pages = {101835}, doi = {10.1016/j.cimid.2022.101835}, pmid = {35751915}, issn = {1878-1667}, mesh = {Animals ; *Bison ; Canada/epidemiology ; Cattle ; Female ; Genomics ; *Mycoplasma Infections/epidemiology/veterinary ; *Mycoplasma bovis/genetics ; Virulence Factors/genetics ; }, abstract = {Mycoplasma bovis (M. bovis) in cattle causes pneumonia, arthritis, otitis media, and mastitis. In addition, multiple outbreaks have been recorded in North American bison. The genomic data on Canadian M. bovis in bison and cattle to date is limited. Whole-genome sequencing (WGS) was used to assess the degree of genome conservation across four Canadian M. bovis strains recovered from bison and cattle. Whole-genome sequences of four M. bovis isolates (Mb1, Mb160, Mb300, Mb304) and the PG45 reference genome were utilized to identify the M. bovis genomic similarity, whole-genome single nucleotide polymorphism (WGS-SNP), virulence determinants, and genomic islands. The pan-genome analysis showed that M. bovis encodes a minimum of 971 genes, while the core genome contained 637 genes. Comparative genomics revealed limited diversity in gene content between bison and cattle isolates. Whole-genome SNP analysis showed that the four M. bovis isolates differed from each other and to PG45. A total of 40 putative virulence genes associated with adhesion, colonization, and destruction of tissues were found in the bison and cattle isolates using the virulence factors database (VFDB). These putative virulence factors were equally distributed among isolates. Genomic Islands (GIs) ranging from 4 to 9 and associated with transposases, restriction-modification, ribosomal hypothetical proteins, variable surface lipoproteins, and unknowns were also identified. Overall, the genomic characterization of these isolates may provide new insights into the mechanisms of pathogenicity in M. bovis.}, } @article {pmid35750675, year = {2022}, author = {Kutyna, DR and Onetto, CA and Williams, TC and Goold, HD and Paulsen, IT and Pretorius, IS and Johnson, DL and Borneman, AR}, title = {Construction of a synthetic Saccharomyces cerevisiae pan-genome neo-chromosome.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {3628}, pmid = {35750675}, issn = {2041-1723}, mesh = {Chromosomes, Artificial, Yeast/genetics ; *Genome, Fungal/genetics ; *Saccharomyces cerevisiae/genetics ; Synthetic Biology ; }, abstract = {The Synthetic Yeast Genome Project (Sc2.0) represents the first foray into eukaryotic genome engineering and a framework for designing and building the next generation of industrial microbes. However, the laboratory strain S288c used lacks many of the genes that provide phenotypic diversity to industrial and environmental isolates. To address this shortcoming, we have designed and constructed a neo-chromosome that contains many of these diverse pan-genomic elements and which is compatible with the Sc2.0 design and test framework. The presence of this neo-chromosome provides phenotypic plasticity to the Sc2.0 parent strain, including expanding the range of utilizable carbon sources. We also demonstrate that the induction of programmable structural variation (SCRaMbLE) provides genetic diversity on which further adaptive gains could be selected. The presence of this neo-chromosome within the Sc2.0 backbone may therefore provide the means to adapt synthetic strains to a wider variety of environments, a process which will be vital to transitioning Sc2.0 from the laboratory into industrial applications.}, } @article {pmid35750315, year = {2022}, author = {Li, W and Liu, J and Zhang, H and Liu, Z and Wang, Y and Xing, L and He, Q and Du, H}, title = {Plant pan-genomics: recent advances, new challenges, and roads ahead.}, journal = {Journal of genetics and genomics = Yi chuan xue bao}, volume = {49}, number = {9}, pages = {833-846}, doi = {10.1016/j.jgg.2022.06.004}, pmid = {35750315}, issn = {1673-8527}, mesh = {Domestication ; *Genome, Plant/genetics ; *Genomics/methods ; }, abstract = {Pan-genomics can encompass most of the genetic diversity of a species or population and has proved to be a powerful tool for studying genomic evolution and the origin and domestication of species, and for providing information for plant improvement. Plant genomics has greatly progressed because of improvements in sequencing technologies and the rapid reduction of sequencing costs. Nevertheless, pan-genomics still presents many challenges, including computationally intensive assembly methods, high costs with large numbers of samples, ineffective integration of big data, and difficulty in applying it to downstream multi-omics analysis and breeding research. In this review, we summarize the definition and recent achievements of plant pan-genomics, computational technologies used for pan-genome construction, and the applications of pan-genomes in plant genomics and molecular breeding. We also discuss challenges and perspectives for future pan-genomics studies and provide a detailed pipeline for sample selection, genome assembly and annotation, structural variation identification, and construction and application of graph-based pan-genomes. The aim is to provide important guidance for plant pan-genome research and a better understanding of the genetic basis of genome evolution, crop domestication, and phenotypic diversity for future studies.}, } @article {pmid35748708, year = {2022}, author = {Bradbury, PJ and Casstevens, T and Jensen, SE and Johnson, LC and Miller, ZR and Monier, B and Romay, MC and Song, B and Buckler, ES}, title = {The Practical Haplotype Graph, a platform for storing and using pangenomes for imputation.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {15}, pages = {3698-3702}, pmid = {35748708}, issn = {1367-4811}, support = {IOS-1238014//U. S. Department of Agriculture-Agricultural Research Service, National Science Foundation Research-PGR/ ; OPP1159867//Bill and Melinda Gates Foundation/ ; }, mesh = {Haplotypes ; *Plant Breeding ; *Genome ; Genomics/methods ; Software ; }, abstract = {MOTIVATION: Pangenomes provide novel insights for population and quantitative genetics, genomics and breeding not available from studying a single reference genome. Instead, a species is better represented by a pangenome or collection of genomes. Unfortunately, managing and using pangenomes for genomically diverse species is computationally and practically challenging. We developed a trellis graph representation anchored to the reference genome that represents most pangenomes well and can be used to impute complete genomes from low density sequence or variant data.

RESULTS: The Practical Haplotype Graph (PHG) is a pangenome pipeline, database (PostGRES & SQLite), data model (Java, Kotlin or R) and Breeding API (BrAPI) web service. The PHG has already been able to accurately represent diversity in four major crops including maize, one of the most genomically diverse species, with up to 1000-fold data compression. Using simulated data, we show that, at even 0.1× coverage, with appropriate reads and sequence alignment, imputation results in extremely accurate haplotype reconstruction. The PHG is a platform and environment for the understanding and application of genomic diversity.

All resources listed here are freely available. The PHG Docker used to generate the simulation results is https://hub.docker.com/ as maizegenetics/phg:0.0.27. PHG source code is at https://bitbucket.org/bucklerlab/practicalhaplotypegraph/src/master/. The code used for the analysis of simulated data is at https://bitbucket.org/bucklerlab/phg-manuscript/src/master/. The PHG database of NAM parent haplotypes is in the CyVerse data store (https://de.cyverse.org/de/) and named/iplant/home/shared/panzea/panGenome/PHG_db_maize/phg_v5Assemblies_20200608.db.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid35746733, year = {2022}, author = {Chandrasekar, SS and Phanse, Y and Riel, M and Hildebrand, RE and Hanafy, M and Osorio, JE and Abdelgayed, SS and Talaat, AM}, title = {Systemic Neutralizing Antibodies and Local Immune Responses Are Critical for the Control of SARS-CoV-2.}, journal = {Viruses}, volume = {14}, number = {6}, pages = {}, pmid = {35746733}, issn = {1999-4915}, mesh = {Animals ; *Antibodies, Neutralizing ; Antibodies, Viral ; Antibody Formation ; *COVID-19/prevention & control ; COVID-19 Vaccines ; Humans ; Mice ; Mice, Inbred BALB C ; SARS-CoV-2 ; Spike Glycoprotein, Coronavirus ; }, abstract = {Antibody measurements are primarily used to evaluate experimental and approved COVID-19 vaccines, which is unilateral considering our immune responses' complex nature. Previously, we showed that nanoparticle plasmid DNA adjuvant system, QAC, and MVA based vaccines were immunogenic against SARS-CoV-2. Here, we report on the protective efficacy of systemic humoral and mucosal cell-mediated immune responses in transgenic mice models against SARS-CoV-2 following nanoparticle immunization. Parenteral, intramuscular administration of QAC-based plasmid DNA vaccine-encoding SARS-CoV-2 S and N led to the induction of significant serum neutralizing humoral responses, which reduced viral burden in the lungs and prevented viral dissemination to the brain. In contrast, the mucosal, intranasal administration of a heterologous vaccine elicited significant mucosal cell-mediated immune responses in the lungs that limited lung viral replication. The presented results demonstrate that serum neutralizing humoral and local lung T-cell immune responses are critical for the control of SARS-CoV-2 replication.}, } @article {pmid35746494, year = {2022}, author = {Al-Megrin, WAI and Karkashan, A and Alnuqaydan, AM and Aba Alkhayl, FF and Alrumaihi, F and Almatroudi, A and Allemailem, KS}, title = {Design of a Multi-Epitopes Based Chimeric Vaccine against Enterobacter cloacae Using Pan-Genome and Reverse Vaccinology Approaches.}, journal = {Vaccines}, volume = {10}, number = {6}, pages = {}, pmid = {35746494}, issn = {2076-393X}, abstract = {Enterobacter cloacae (EC) is a significant emerging pathogen that is occasionally associated with lung infection, surgical site infection, urinary infection, sepsis, and outbreaks in neonatal intensive care units. In light of the fact that there is currently no approved vaccine or therapeutic option for the treatment of EC, the current study was developed to concentrate on applications based on modern computational approaches to design a multi-epitope-based E. cloacae peptide vaccine (MEBEPV) expressing the antigenic determinants prioritized from the EC genome. Integrated computational analyses identified two potential protein targets (phosphoporin protein-PhoE and putative outer-membrane porin protein) for further exploration on the basis of pangenome subtractive proteomics and immunoinformatic in-depth examination of the core proteomes. Then, a multi-epitope peptide vaccine was designed, which comprised shortlisted epitopes that were capable of eliciting both innate and adaptive immunity, as well as the cholera toxin's B-subunit, which was used as an adjuvant in the vaccine formulation. To ensure maximum expression, the vaccine's 3D structure was developed and the loop was refined, improving the stability by disulfide engineering, and the physicochemical characteristics of the recombinant vaccine sequence were found to be ideal for both in vitro and in vivo experimentation. Blind docking was then used for the prediction of the MEBEPV predominant blinding mode with MHCI, MHCII, and TLR3 innate immune receptors, with lowest global energy of -18.64 kJ/mol, -48.25 kJ/mol, and -5.20 kJ/mol for MHC-I, MHC-II, and TLR-4, respectively, with docked complexes considered for simulation. In MD and MMGBSA investigations, the docked models of MEBEPV-TLR3, MEBEPV-MHCI, and MEBEPV-MHCII were found to be stable during the course of the simulation. MM-GBSA analysis calculated -122.17 total net binding free energies for the TLR3-vaccine complex, -125.4 for the MHC I-vaccine complex, and -187.94 for the MHC II-vaccine complex. Next, MM-PBSA analysis calculated -115.63 binding free energy for the TLR3-vaccine complex, -118.19 for the MHC I-vaccine complex, and -184.61 for the MHC II-vaccine complex. When the vaccine was tested in silico, researchers discovered that it was capable of inducing both types of immune responses (cell mediated and humoral) at the same time. Even though the suggested MEBEPV has the potential to be a powerful contender against E. cloacae-associated illnesses, further testing in the laboratory will be required before it can be declared safe and immunogenic.}, } @article {pmid35745530, year = {2022}, author = {Jungkhun, N and Gomes de Farias, AR and Watcharachaiyakup, J and Kositcharoenkul, N and Ham, JH and Patarapuwadol, S}, title = {Phylogenetic Characterization and Genome Sequence Analysis of Burkholderia glumae Strains Isolated in Thailand as the Causal Agent of Rice Bacterial Panicle Blight.}, journal = {Pathogens (Basel, Switzerland)}, volume = {11}, number = {6}, pages = {}, pmid = {35745530}, issn = {2076-0817}, support = {1015305//National Institute of Food and Agriculture (NIFA) - USDA/ ; }, abstract = {Burkholderia glumae is one of the most critical rice-pathogenic bacteria, and it causes bacterial panicle blight (BPB) in rice plants. In 2017, BPB symptoms were observed from rice fields in Chiang Rai, Northern Thailand. Sixty-one isolates obtained from the symptomatic panicles of rice were initially identified as B. glumae by polymerase chain reaction (PCR) using species-specific primers. Among them, six selected strains isolated from the susceptible japonica rice cultivar DOA2 were characterized in terms of morpho-physiology, pathology, phylogenetics, and genomics. Our genome sequence analysis of the six selected strains revealed the presence of multiple prophages, which may reflect the high level of diversity in this bacterial species through dynamic horizontal gene transfer processes, including phage infection. This notion was supported by the results of phylogenetic and phylogenomic analyses, which showed the formation of several subgroups not related to the years of isolation or the geographical origins. This study reports the isolation of B. glumae as the causal pathogen of BPB disease in japonica rice in Thailand and provides genomic resources to better understand the biology and diversity of this plant pathogenic bacterium. Further studies with a vast collection of B. glumae strains from various rice-growing regions around the world are needed to elucidate the evolution, variability, and lifestyle of the pathogen.}, } @article {pmid35739387, year = {2022}, author = {Edwards, D and Batley, J}, title = {Graph pangenomes find missing heritability.}, journal = {Nature genetics}, volume = {54}, number = {7}, pages = {919-920}, pmid = {35739387}, issn = {1546-1718}, mesh = {*Genome-Wide Association Study ; *Models, Genetic ; }, } @article {pmid35736064, year = {2022}, author = {Guo, Y and Liu, Z and Fu, Y and Li, Y and Dai, Y and Xiao, S}, title = {Pan-Genomes Provide Insights into the Genetic Basis of Auricularia heimuer Domestication.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {6}, pages = {}, pmid = {35736064}, issn = {2309-608X}, support = {grant number 2021YFD1600401//the Key Project on R&D of the Ministry of Science and Technology/ ; No. D17014//the Program of Creation and Utilization of Germplasm of Mushroom Crop of "111" Project/ ; No. 2017B01011//the National-level International Joint Research Centre/ ; }, abstract = {In order to reveal the genetic variation signals of Auricularia heimuer that have occurred during their domestication and to find potential functional gene families, we constructed a monokaryotic pan-genome of A. heimuer representing four cultivated strains and four wild strains. The pan-genome contained 14,089 gene families, of which 67.56% were core gene families and 31.88% were dispensable gene families. We screened substrate utilization-related genes such as the chitinase gene ahchi1 of the glycoside hydrolase (GH) 18 family and a carbohydrate-binding module (CBM)-related gene from the dispensable families of cultivated populations. The genomic difference in the ahchi1 gene between the wild and cultivated genomes was caused by a 33 kb presence/absence variation (PAV). The detection rate of the ahchi1 gene was 93.75% in the cultivated population, significantly higher than that in the wild population (17.39%), indicating that it has been selected in cultivated strains. Principal component analysis (PCA) of the polymorphic markers in fragments near the ahchi1 gene was enriched in cultivated strains, and this was caused by multiple independent instances of artificial selection. We revealed for the first time the genetic basis of the ahchi1 gene in domestication, thereby providing a foundation for elucidating the potential function of the ahchi1 gene in the breeding of A. heimuer.}, } @article {pmid35733954, year = {2022}, author = {Cooper, ZS and Rapp, JZ and Shoemaker, AMD and Anderson, RE and Zhong, ZP and Deming, JW}, title = {Evolutionary Divergence of Marinobacter Strains in Cryopeg Brines as Revealed by Pangenomics.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {879116}, pmid = {35733954}, issn = {1664-302X}, abstract = {Marinobacter spp. are cosmopolitan in saline environments, displaying a diverse set of metabolisms that allow them to competitively occupy these environments, some of which can be extreme in both salinity and temperature. Here, we introduce a distinct cluster of Marinobacter genomes, composed of novel isolates and in silico assembled genomes obtained from subzero, hypersaline cryopeg brines, relic seawater-derived liquid habitats within permafrost sampled near Utqiaġvik, Alaska. Using these new genomes and 45 representative publicly available genomes of Marinobacter spp. from other settings, we assembled a pangenome to examine how the new extremophile members fit evolutionarily and ecologically, based on genetic potential and environmental source. This first genus-wide genomic analysis revealed that Marinobacter spp. in general encode metabolic pathways that are thermodynamically favored at low temperature, cover a broad range of organic compounds, and optimize protein usage, e.g., the Entner-Doudoroff pathway, the glyoxylate shunt, and amino acid metabolism. The new isolates contributed to a distinct clade of subzero brine-dwelling Marinobacter spp. that diverged genotypically and phylogenetically from all other Marinobacter members. The subzero brine clade displays genomic characteristics that may explain competitive adaptations to the extreme environments they inhabit, including more abundant membrane transport systems (e.g., for organic substrates, compatible solutes, and ions) and stress-induced transcriptional regulatory mechanisms (e.g., for cold and salt stress) than in the other Marinobacter clades. We also identified more abundant signatures of potential horizontal transfer of genes involved in transcription, the mobilome, and a variety of metabolite exchange systems, which led to considering the importance of this evolutionary mechanism in an extreme environment where adaptation via vertical evolution is physiologically rate limited. Assessing these new extremophile genomes in a pangenomic context has provided a unique view into the ecological and evolutionary history of the genus Marinobacter, particularly with regard to its remarkable diversity and its opportunism in extremely cold and saline environments.}, } @article {pmid35733110, year = {2022}, author = {Kuzmanović, N and Biondi, E and Overmann, J and Puławska, J and Verbarg, S and Smalla, K and Lassalle, F}, title = {Genomic analysis provides novel insights into diversification and taxonomy of Allorhizobium vitis (i.e. Agrobacterium vitis).}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {462}, pmid = {35733110}, issn = {1471-2164}, mesh = {Agrobacterium/genetics ; Genomics ; Phylogeny ; Plant Tumors ; *Rhizobiaceae/genetics ; *Vitis/genetics/microbiology ; }, abstract = {BACKGROUND: Allorhizobium vitis (formerly named Agrobacterium vitis or Agrobacterium biovar 3) is the primary causative agent of crown gall disease of grapevine worldwide. We obtained and analyzed whole-genome sequences of diverse All. vitis strains to get insights into their diversification and taxonomy.

RESULTS: Pairwise genome comparisons and phylogenomic analysis of various All. vitis strains clearly indicated that All. vitis is not a single species, but represents a species complex composed of several genomic species. Thus, we emended the description of All. vitis, which now refers to a restricted group of strains within the All. vitis species complex (i.e. All. vitis sensu stricto) and proposed a description of a novel species, All. ampelinum sp. nov. The type strain of All. vitis sensu stricto remains the current type strain of All. vitis, K309[T]. The type strain of All. ampelinum sp. nov. is S4[T]. We also identified sets of gene clusters specific to the All. vitis species complex, All. vitis sensu stricto and All. ampelinum, respectively, for which we predicted the biological function and infer the role in ecological diversification of these clades, including some we could experimentally validate. All. vitis species complex-specific genes confer tolerance to different stresses, including exposure to aromatic compounds. Similarly, All. vitis sensu stricto-specific genes confer the ability to degrade 4-hydroxyphenylacetate and a putative compound related to gentisic acid. All. ampelinum-specific genes have putative functions related to polyamine metabolism and nickel assimilation. Congruently with the genome-based classification, All. vitis sensu stricto and All. ampelinum were clearly delineated by MALDI-TOF MS analysis. Moreover, our genome-based analysis indicated that Allorhizobium is clearly separated from other genera of the family Rhizobiaceae.

CONCLUSIONS: Comparative genomics and phylogenomic analysis provided novel insights into the diversification and taxonomy of Allorhizobium vitis species complex, supporting our redefinition of All. vitis sensu stricto and description of All. ampelinum. Our pan-genome analyses suggest that these species have differentiated ecologies, each relying on specialized nutrient consumption or toxic compound degradation to adapt to their respective niche.}, } @article {pmid35731940, year = {2022}, author = {Romero Picazo, D and Werner, A and Dagan, T and Kupczok, A}, title = {Pangenome Evolution in Environmentally Transmitted Symbionts of Deep-Sea Mussels Is Governed by Vertical Inheritance.}, journal = {Genome biology and evolution}, volume = {14}, number = {7}, pages = {}, pmid = {35731940}, issn = {1759-6653}, mesh = {Animals ; Bacteria/genetics ; Gene Transfer, Horizontal ; Genome, Bacterial ; Methane ; *Mytilidae/genetics/microbiology ; Phylogeny ; Sulfur ; Symbiosis/genetics ; }, abstract = {Microbial pangenomes vary across species; their size and structure are determined by genetic diversity within the population and by gene loss and horizontal gene transfer (HGT). Many bacteria are associated with eukaryotic hosts where the host colonization dynamics may impact bacterial genome evolution. Host-associated lifestyle has been recognized as a barrier to HGT in parentally transmitted bacteria. However, pangenome evolution of environmentally acquired symbionts remains understudied, often due to limitations in symbiont cultivation. Using high-resolution metagenomics, here we study pangenome evolution of two co-occurring endosymbionts inhabiting Bathymodiolus brooksi mussels from a single cold seep. The symbionts, sulfur-oxidizing (SOX) and methane-oxidizing (MOX) gamma-proteobacteria, are environmentally acquired at an early developmental stage and individual mussels may harbor multiple strains of each symbiont species. We found differences in the accessory gene content of both symbionts across individual mussels, which are reflected by differences in symbiont strain composition. Compared with core genes, accessory genes are enriched in genome plasticity functions. We found no evidence for recent HGT between both symbionts. A comparison between the symbiont pangenomes revealed that the MOX population is less diverged and contains fewer accessory genes, supporting that the MOX association with B. brooksi is more recent in comparison to that of SOX. Our results show that the pangenomes of both symbionts evolved mainly by vertical inheritance. We conclude that genome evolution of environmentally transmitted symbionts that associate with individual hosts over their lifetime is affected by a narrow symbiosis where the frequency of HGT is constrained.}, } @article {pmid35731562, year = {2022}, author = {Moran, RA and Liu, H and Doughty, EL and Hua, X and Cummins, EA and Liveikis, T and McNally, A and Zhou, Z and van Schaik, W and Yu, Y}, title = {GR13-type plasmids in Acinetobacter potentiate the accumulation and horizontal transfer of diverse accessory genes.}, journal = {Microbial genomics}, volume = {8}, number = {6}, pages = {}, pmid = {35731562}, issn = {2057-5858}, support = {MR/S013660/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {*Acinetobacter baumannii/genetics ; Plasmids/genetics ; }, abstract = {Carbapenem and other antibiotic resistance genes (ARGs) can be found in plasmids in Acinetobacter , but many plasmid types in this genus have not been well-characterized. Here we describe the distribution, diversity and evolutionary capacity of rep group 13 (GR13) plasmids that are found in Acinetobacter species from diverse environments. Our investigation was prompted by the discovery of two GR13 plasmids in A. baumannii isolated in an intensive care unit (ICU). The plasmids harbour distinct accessory genes: pDETAB5 contains bla NDM-1 and genes that confer resistance to four further antibiotic classes, while pDETAB13 carries putative alcohol tolerance determinants. Both plasmids contain multiple dif modules, which are flanked by pdif sites recognized by XerC/XerD tyrosine recombinases. The ARG-containing dif modules in pDETAB5 are almost identical to those found in pDETAB2, a GR34 plasmid from an unrelated A. baumannii isolated in the same ICU a month prior. Examination of a further 41 complete, publicly available plasmid sequences revealed that the GR13 pangenome consists of just four core but 1186 accessory genes, 123 in the shell and 1063 in the cloud, reflecting substantial capacity for diversification. The GR13 core genome includes genes for replication and partitioning, and for a putative tyrosine recombinase. Accessory segments encode proteins with diverse putative functions, including for metabolism, antibiotic/heavy metal/alcohol tolerance, restriction-modification, an anti-phage system and multiple toxin–antitoxin systems. The movement of dif modules and actions of insertion sequences play an important role in generating diversity in GR13 plasmids. Discrete GR13 plasmid lineages are internationally disseminated and found in multiple Acinetobacter species, which suggests they are important platforms for the accumulation, horizontal transmission and persistence of accessory genes in this genus.}, } @article {pmid35731345, year = {2022}, author = {Da Silva, WM and Larzabal, M and Aburjaile, FF and Riviere, N and Martorelli, L and Bono, J and Amadio, A and Cataldi, A}, title = {Whole-genome sequencing analysis of Shiga toxin-producing Escherichia coli O22:H8 isolated from cattle prediction pathogenesis and colonization factors and position in STEC universe phylogeny.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {60}, number = {7}, pages = {689-704}, pmid = {35731345}, issn = {1976-3794}, mesh = {Animals ; Cattle ; *Escherichia coli Infections/microbiology/veterinary ; *Escherichia coli Proteins/genetics ; Phylogeny ; Shiga Toxin/genetics ; *Shiga-Toxigenic Escherichia coli/genetics ; Virulence Factors/genetics/metabolism ; }, abstract = {Shiga toxin-producing Escherichia coli (STEC) is a foodborne pathogen capable of causing illness in humans. In a previous study, our group showed that a STEC isolate belonging to O22:H8 serotype (strain 154) can interfere with STEC O157:H7 colonization both in vitro and in vivo. Using whole-genome sequencing and genomic comparative, we predicted a subset of genes acquired by O22:H8 strain 154 through horizontal gene transfer that might be responsible for the phenotype previously described by our group. Among them were identified genes related to the pathogenesis of non-LEE (locus of enterocyte effacement) STEC, specific metabolic processes, antibiotic resistance and genes encoding for the T6SS-1 that is related to inter-bacterial competition. In addition, we showed that this strain carries stx1c and stx2dact, a mucus-inducible variant. The results obtained in this study provide insights into STEC genomic plasticity and the importance of genomic islands in the adaptation and pathogenesis of this pathogen.}, } @article {pmid35731037, year = {2022}, author = {Wu, L and Xie, J and Qi, Y and Su, T and Jiang, L and Zhou, W and Jiang, Y and Zhang, C and Zhong, X and Cao, Y and Wang, W}, title = {Mutational landscape of non-functional adrenocortical adenomas.}, journal = {Endocrine-related cancer}, volume = {29}, number = {9}, pages = {521-532}, doi = {10.1530/ERC-21-0410}, pmid = {35731037}, issn = {1479-6821}, mesh = {*Adrenal Cortex Neoplasms/pathology ; *Adrenal Gland Neoplasms ; *Adrenocortical Adenoma/genetics/metabolism ; Carcinogenesis ; G Protein-Coupled Inwardly-Rectifying Potassium Channels/genetics ; Humans ; Mutation ; beta Catenin/genetics/metabolism ; }, abstract = {Adrenal incidentalomas are the most frequent human neoplasms. Recent genomic investigations on functional adrenocortical tumors have demonstrated that somatic mutations in PRKACA and KCNJ5 responsible for the development of adrenocortical adenomas (ACAs) are associated with hypercortisolism and aldosteronism, respectively. Several studies have identified CTNNB1 mutations in ACAs and have been mostly involved in the tumorigenesis of non-functional ACA (NFACA). However, integrated genomic characterization of NFACAs is lacking. In the current study, we utilized pan-genomic methods to comprehensively analyze 60 NFACA samples. A total of 1264 somatic mutations in coding regions among the 60 samples were identified, with a median of 15 non-silent mutations per tumor. Twenty-two NFACAs (36.67%) had genetic alterations in CTNNB1. We also identified several somatic mutations in genes of the cAMP/PKA pathway and KCNJ5. Histone modification genes (KMT2A, KMT2C, and KMT2D) were altered in 10% of cases. Germline mutations of MEN1 and RET were also found. Finally, by comparison of our transcriptome data with those available in the TCGA, we illustrated the molecular characterization of NFACA. We revealed the genetic profiling and molecular landscape of NFACA. Wnt/β-catenin pathway activation as shown ssby nuclear and/or cytoplasmic β-catenin accumulation is frequent, occurring in about one-third of ACA cases. cytochrome P450 enzymes could be markers to reveal the functional status of adrenocortical tumors. These observations strongly suggest the involvement of the Wnt/β-catenin pathway in benign adrenal tumorigenesis and possibly in the regulation of steroid secretion.}, } @article {pmid35730965, year = {2022}, author = {Bai, X and Ylinen, E and Zhang, J and Salmenlinna, S and Halkilahti, J and Saxen, H and Narayanan, A and Jahnukainen, T and Matussek, A}, title = {Comparative Genomics of Shiga Toxin-Producing Escherichia coli Strains Isolated from Pediatric Patients with and without Hemolytic Uremic Syndrome from 2000 to 2016 in Finland.}, journal = {Microbiology spectrum}, volume = {10}, number = {4}, pages = {e0066022}, pmid = {35730965}, issn = {2165-0497}, mesh = {Child ; *Escherichia coli Infections/epidemiology/microbiology ; Finland/epidemiology ; Genomics ; *Hemolytic-Uremic Syndrome/epidemiology/microbiology ; Humans ; Shiga Toxin ; *Shiga-Toxigenic Escherichia coli/genetics ; }, abstract = {Shiga toxin-producing Escherichia coli (STEC) infection can cause mild to severe illness, such as nonbloody or bloody diarrhea, and the fatal hemolytic uremic syndrome (HUS). The molecular mechanism underlying the variable pathogenicity of STEC infection is not fully defined so far. Here, we performed a comparative genomics study on a large collection of clinical STEC strains collected from STEC-infected pediatric patients with and without HUS in Finland over a 16-year period, aiming to identify the bacterial genetic factors that can predict the risk to cause HUS and poor renal outcome. Of 240 STEC strains included in this study, 52 (21.7%) were from pediatric patients with HUS. Serotype O157:H7 was the main cause of HUS, and Shiga toxin gene subtype stx2a was significantly associated with HUS. Comparative genomics and pangenome-wide association studies identified a number of virulence and accessory genes overrepresented in HUS-associated STEC compared to non-HUS STEC strains, including genes encoding cytolethal distending toxins, type III secretion system effectors, adherence factors, etc. No virulence or accessory gene was significantly associated with risk factors for poor renal outcome among HUS patients assessed in this study, including need for and duration of dialysis, presence and duration of anuria, and leukocyte counts. Whole-genome phylogeny and multiple-correspondence analysis of pangenomes could not separate HUS STEC from non-HUS STEC strains, suggesting that STEC strains with diverse genetic backgrounds may independently acquire genetic elements that determine their varied pathogenicity. Our findings indicate that nonbacterial factors, i.e., characteristics of the host immunity, might affect STEC virulence and clinical outcomes. IMPORTANCE Shiga toxin-producing Escherichia coli (STEC) is a serious public health burden worldwide which causes outbreaks of gastrointestinal diseases and the fatal hemolytic uremic syndrome (HUS) characterized by the triad of mechanical hemolytic anemia, thrombocytopenia, and acute renal failure. Understanding the mechanism underlying the disease severity and patient outcome is of high importance. Using comparative genomics on a large collection of clinical STEC strains from STEC-infected patients with and without HUS, our study provides a reference of STEC genetic factors/variants that can be used as predictors of the development of HUS, which will aid risk assessment at the early stage of STEC infection. Additionally, our findings suggest that nonbacterial factors may play a primary role in the renal outcome in STEC-infected patients with HUS; further studies are needed to validate this.}, } @article {pmid35729190, year = {2022}, author = {Rocha, J and Henriques, I and Gomila, M and Manaia, CM}, title = {Common and distinctive genomic features of Klebsiella pneumoniae thriving in the natural environment or in clinical settings.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {10441}, pmid = {35729190}, issn = {2045-2322}, mesh = {Anti-Bacterial Agents ; Genomics ; Humans ; *Klebsiella Infections/microbiology ; *Klebsiella pneumoniae ; Microbial Sensitivity Tests ; Multilocus Sequence Typing ; Phylogeny ; beta-Lactamases/genetics ; }, abstract = {The Klebsiella pneumoniae complex is comprised of ubiquitous bacteria that can be found in soils, plants or water, and as humans' opportunistic pathogens. This study aimed at inferring common and distinctive features in clinical and environmental K. pneumoniae. Whole genome sequences of members of the K. pneumoniae complex (including K. variicola, n = 6; and K. quasipneumoniae, n = 7), of clinical (n = 78) and environmental (n = 61) origin from 21 countries were accessed from the GenBank. These genomes were compared based on phylogeny, pangenome and selected clinically relevant traits. Phylogenetic analysis based on 2704 genes of the core genome showed close relatedness between clinical and environmental strains, in agreement with the multi-locus sequence typing. Eight out of the 62 sequence types (STs) identified, included both clinical and environmental genomes (ST11, ST14, ST15, ST37, ST45, ST147, ST348, ST437). Pangenome-wide association studies did not evidence significant differences between clinical and environmental genomes. However, the genomes of clinical isolates presented significantly more exclusive genes related to antibiotic resistance/plasmids, while the environmental isolates yielded significantly higher allelic diversity of genes related with functions such as efflux or oxidative stress. The study suggests that K. pneumoniae can circulate among the natural environment and clinical settings, probably under distinct adaptation pressures.}, } @article {pmid35727540, year = {2022}, author = {Sollitto, M and Kenny, NJ and Greco, S and Tucci, CF and Calcino, AD and Gerdol, M}, title = {Detecting Structural Variants and Associated Gene Presence-Absence Variation Phenomena in the Genomes of Marine Organisms.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2498}, number = {}, pages = {53-76}, pmid = {35727540}, issn = {1940-6029}, mesh = {*Aquatic Organisms/genetics ; Genetic Variation ; Genome ; *Genomic Structural Variation ; Genomics/methods ; Sequence Analysis, DNA ; }, abstract = {As complete genomes become easier to attain, even from previously difficult-to-sequence species, and as genomic resequencing becomes more routine, it is becoming obvious that genomic structural variation is more widespread than originally thought and plays an important role in maintaining genetic variation in populations. Structural variants (SVs) and associated gene presence-absence variation (PAV) can be important players in local adaptation, allowing the maintenance of genetic variation and taking part in other evolutionarily relevant phenomena. While recent studies have highlighted the importance of structural variation in Mollusca, the prevalence of this phenomenon in the broader context of marine organisms remains to be fully investigated.Here, we describe a straightforward and broadly applicable method for the identification of SVs in fully assembled diploid genomes, leveraging the same reads used for assembly. We also explain a gene PAV analysis protocol, which could be broadly applied to any species with a fully sequenced reference genome available. Although the strength of these approaches have been tested and proven in marine invertebrates, which tend to have high levels of heterozygosity, possibly due to their lifestyle traits, they are also applicable to other species across the tree of life, providing a ready means to begin investigations into this potentially widespread phenomena.}, } @article {pmid35727397, year = {2022}, author = {Kumar, S and Bansal, K and Sethi, SK}, title = {Reclassification of Streptococcus ilei as a later heterotypic synonym of Streptococcus koreensis based on whole-genome sequence analysis.}, journal = {Archives of microbiology}, volume = {204}, number = {7}, pages = {408}, pmid = {35727397}, issn = {1432-072X}, mesh = {Bacterial Typing Techniques ; DNA, Bacterial/genetics ; Humans ; Nucleic Acid Hybridization ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; *Streptococcus/genetics ; }, abstract = {The genus Streptococcus, a member of family Streptococcaceae, is known for its wide range of industrial, clinical and human relevance. Among the species of genus Streptococcus two members, namely Streptococcus koreensis and Streptococcus ilei, were isolated from subgingival dental plaque and human small intestinal fluid, respectively. The 16S rRNA gene sequence similarity of the type strains of these members shows a similarity of 99.87%. In this study, we performed a systematic study to clarify the taxonomic assignment of these two species. Genome similarity assessment based on whole-genome sequence information such as average nucleotide identity using orthoANI and fastANI, digital DNA-DNA hybridization value between S. koreensis and S. ilei were 96.31, 96.60, 86.4 and 97.63, respectively. All these genome similarity values clearly exceeded the species delineation cutoffs. Phylogenetic assessment using 16S rRNA gene and whole-genome information using PhyloPhlAn, which uses around 400 conserved genes across bacterial phyla, provides additional evidence for these members forming a monophyletic clade in the phylogenetic tree. Pan genome analysis suggests a very large core genome (n = 1374) and the presence of no unique gene between the genomes of S. koreensis and S. ilei. Additionally, we found highly syntenic genomes of type strains of these two species. Based on these evidences, we propose S. ilei should be reclassified as a later heterotypic synonym of S. koreensis.}, } @article {pmid35727037, year = {2022}, author = {Montelongo, C and Mores, CR and Putonti, C and Wolfe, AJ and Abouelfetouh, A}, title = {Whole-Genome Sequencing of Staphylococcus aureus and Staphylococcus haemolyticus Clinical Isolates from Egypt.}, journal = {Microbiology spectrum}, volume = {10}, number = {4}, pages = {e0241321}, pmid = {35727037}, issn = {2165-0497}, support = {R01 DK104718/DK/NIDDK NIH HHS/United States ; }, mesh = {Anti-Bacterial Agents/pharmacology ; Egypt/epidemiology ; Humans ; *Methicillin-Resistant Staphylococcus aureus/genetics ; Microbial Sensitivity Tests ; Multilocus Sequence Typing ; *Staphylococcal Infections/epidemiology ; Staphylococcus/genetics ; Staphylococcus aureus/genetics ; Staphylococcus haemolyticus/genetics ; }, abstract = {Infections caused by antibiotic-resistant Staphylococcus are a global concern. This is true in the Middle East, where increasingly resistant Staphylococcus aureus and Staphylococcus haemolyticus strains have been detected. While extensive surveys have revealed the prevalence of infections caused by antibiotic-resistant staphylococci in Europe, Asia, and North America, the population structure of antibiotic-resistant staphylococci recovered from patients and clinical settings in Egypt remains uncharacterized. We performed whole-genome sequencing of 56 S. aureus and 10 S. haemolyticus isolates from Alexandria Main University Hospital; 46 of the S. aureus genomes and all 10 of the S. haemolyticus genomes carry mecA, which confers methicillin resistance. Supplemented with additional publicly available genomes from the other parts of the Middle East (34 S. aureus and 6 S. haemolyticus), we present the largest genomic study to date of staphylococcal isolates from the Middle East. These genomes include 20 S. aureus multilocus sequence types (MLST), including 3 new ones. They also include 9 S. haemolyticus MLSTs, including 1 new one. Phylogenomic analyses of each species' core genome largely mirrored those of the MLSTs, irrespective of geographical origin. The hospital-acquired spa t037/ST239-SCCmec III/MLST CC8 clone represented the largest clade, comprising 22% of the S. aureus isolates. Like S. aureus genome surveys of other regions, these isolates from the Middle East have an open pangenome, a strong indicator of gene exchange of virulence factors and antibiotic resistance genes with other reservoirs. Our genome analyses will inform antibiotic stewardship and infection control plans in the Middle East. IMPORTANCE Staphylococci are understudied despite their prevalence within the Middle East. Methicillin-resistant Staphylococcus aureus (MRSA) is endemic to hospitals in Egypt, as are other antibiotic-resistant strains of S. aureus and S. haemolyticus. To provide insight into the strains circulating in Egypt, we performed whole-genome sequencing of 56 S. aureus and 10 S. haemolyticus isolates from Alexandria Main University Hospital. Through analysis of these genomes, as well as all available S. aureus and S. haemolyticus genomes from the Middle East (n = 40), we were able to produce a picture of the diversity in this region more complete than those afforded by traditional molecular typing strategies. For example, we identified 4 new MLSTs. Most strains harbored genes associated with multidrug resistance, toxin production, biofilm formation, and immune evasion. These data provide invaluable insight for future antibiotic stewardship and infection control within the Middle East.}, } @article {pmid35722513, year = {2022}, author = {Parakkunnel, R and Bhojaraja Naik, K and Susmita, C and Girimalla, V and Bhaskar, KU and Sripathy, KV and Shantharaja, CS and Aravindan, S and Kumar, S and Lakhanpaul, S and Bhat, KV}, title = {Evolution and co-evolution: insights into the divergence of plant heat shock factor genes.}, journal = {Physiology and molecular biology of plants : an international journal of functional plant biology}, volume = {28}, number = {5}, pages = {1029-1047}, pmid = {35722513}, issn = {0971-5894}, abstract = {UNLABELLED: The Heat Shock Factor (Hsf) genes are widely distributed across the plant kingdom regulating the plant response to various abiotic stresses. In addition to natural selection, breeding and accelerated selection changed the structure and function of Hsf genes. 1076 Hsf genes from 30 genera from primitive algae to the most advanced plant species and major crop plants were used for phylogenetic analysis. The interspecific divergence was studied with 11 members of genus Oryza while intraspecific divergence was studied with sesame pan-genome adapted to diverse ecological niches. B2 genes in eudicots and monocots originated separately while A1 gave rise to the recently evolved Class-C genes and land colonization happened with evolution of A1 genes. An increase in the number of lineages in the Oryza clade with the evolution of AA genome indicated independent domestication and positive selection was observed in > 53% of loci whereas the highly conserved homologues were under purifying selection. The paralogous genes under positive selection exhibited more domain changes for diversified function and increased fitness. A significant co-evolving cluster involving amino acids Phenylalanine, Lysine and Valine played crucial role in maintaining hydrophobic core along with highly conserved Tryptophan residues. A mutation of Glutamic acid to Glutamine was observed in A8 genes of Lamiales affecting protein solvency. Breeding resulted in accumulation of mutations reducing the hydrophobicity of proteins and a further reduction in protein aggregation. This study identify genome duplications, non-neutral selection and co-evolving residues as causing drastic changes in the conserved domain of Hsf proteins.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s12298-022-01183-7.}, } @article {pmid35722315, year = {2022}, author = {Podrzaj, L and Burtscher, J and Domig, KJ}, title = {Comparative Genomics Provides Insights Into Genetic Diversity of Clostridium tyrobutyricum and Potential Implications for Late Blowing Defects in Cheese.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {889551}, pmid = {35722315}, issn = {1664-302X}, abstract = {Clostridium tyrobutyricum has been recognized as the main cause of late blowing defects (LBD) in cheese leading to considerable economic losses for the dairy industry. Although differences in spoilage ability among strains of this species have been acknowledged, potential links to the genetic diversity and functional traits remain unknown. In the present study, we aimed to investigate and characterize genomic variation, pan-genomic diversity and key traits of C. tyrobutyricum by comparing the genomes of 28 strains. A comparative genomics analysis revealed an "open" pangenome comprising 9,748 genes and a core genome of 1,179 genes shared by all test strains. Among those core genes, the majority of genes encode proteins related to translation, ribosomal structure and biogenesis, energy production and conversion, and amino acid metabolism. A large part of the accessory genome is composed of sets of unique, strain-specific genes ranging from about 5 to more than 980 genes. Furthermore, functional analysis revealed several strain-specific genes related to replication, recombination and repair, cell wall, membrane and envelope biogenesis, and defense mechanisms that might facilitate survival under stressful environmental conditions. Phylogenomic analysis divided strains into two clades: clade I contained human, mud, and silage isolates, whereas clade II comprised cheese and milk isolates. Notably, these two groups of isolates showed differences in certain hypothetical proteins, transcriptional regulators and ABC transporters involved in resistance to oxidative stress. To the best of our knowledge, this is the first study to provide comparative genomics of C. tyrobutyricum strains related to LBD. Importantly, the findings presented in this study highlight the broad genetic diversity of C. tyrobutyricum, which might help us understand the diversity in spoilage potential of C. tyrobutyricum in cheese and provide some clues for further exploring the gene modules responsible for the spoilage ability of this species.}, } @article {pmid35720548, year = {2022}, author = {Wang, Y and Habekuß, A and Jayakodi, M and Mascher, M and Snowdon, RJ and Stahl, A and Fuß, J and Ordon, F and Perovic, D}, title = {High-Resolution Mapping of Barley mild mosaic virus Resistance Gene rym15.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {908170}, pmid = {35720548}, issn = {1664-462X}, abstract = {Barley yellow mosaic virus (BaYMV) and Barley mild mosaic virus (BaMMV), which are transmitted by the soil-borne plasmodiophorid Polymyxa graminis, cause high yield losses in barley. In previous studies, the recessive BaMMV resistance gene rym15, derived from the Japanese landrace Chikurin Ibaraki 1, was mapped on chromosome 6HS of Hordeum vulgare. In this study, 423 F4 segmental recombinant inbred lines (RILs) were developed from crosses of Chikurin Ibaraki 1 with two BaMMV-susceptible cultivars, Igri (139 RILs) and Uschi (284 RILs). A set of 32 competitive allele-specific PCR (KASP) assays, designed using single nucleotide polymorphisms (SNPs) from the barley 50 K Illumina Infinium iSelect SNP chip, genotyping by sequencing (GBS) and whole-genome sequencing (WGS), was used as a backbone for construction of two high-resolution maps. Using this approach, the target locus was narrowed down to 0.161 cM and 0.036 cM in the Igri × Chikurin Ibaraki 1 (I × C) and Chikurin Ibaraki 1 × Uschi (C × U) populations, respectively. Corresponding physical intervals of 11.3 Mbp and 0.281 Mbp were calculated for I × C and C × U, respectively, according to the Morex v3 genome sequence. In the 0.281 Mbp target region, six high confidence (HC) and two low confidence (LC) genes were identified. Genome assemblies of BaMMV-susceptible cultivars Igri and Golden Promise from the barley pan-genome, and a HiFi assembly of Chikurin Ibaraki 1 together with re-sequencing data for the six HC and two LC genes in susceptible parental cultivar Uschi revealed functional SNPs between resistant and susceptible genotypes only in two of the HC genes. These SNPs are the most promising candidates for the development of functional markers and the two genes represent promising candidates for functional analysis.}, } @article {pmid35720310, year = {2022}, author = {Tan, YC and Lahiri, C}, title = {Promising Acinetobacter baumannii Vaccine Candidates and Drug Targets in Recent Years.}, journal = {Frontiers in immunology}, volume = {13}, number = {}, pages = {900509}, pmid = {35720310}, issn = {1664-3224}, mesh = {*Acinetobacter baumannii ; Anti-Bacterial Agents/pharmacology ; Bacterial Vaccines ; Computational Biology/methods ; Molecular Docking Simulation ; }, abstract = {In parallel to the uncontrolled use of antibiotics, the emergence of multidrug-resistant bacteria, like Acinetobacter baumannii, has posed a severe threat. A. baumannii predominates in the nosocomial setting due to its ability to persist in hospitals and survive antibiotic treatment, thereby eventually leading to an increasing prevalence and mortality due to its infection. With the increasing spectra of drug resistance and the incessant collapse of newly discovered antibiotics, new therapeutic countermeasures have been in high demand. Hence, recent research has shown favouritism towards the long-term solution of designing vaccines. Therefore, being a realistic alternative strategy to combat this pathogen, anti-A. Baumannii vaccines research has continued unearthing various antigens with variable results over the last decade. Again, other approaches, including pan-genomics, subtractive proteomics, and reverse vaccination strategies, have shown promise for identifying promiscuous core vaccine candidates that resulted in chimeric vaccine constructs. In addition, the integration of basic knowledge of the pathobiology of this drug-resistant bacteria has also facilitated the development of effective multiantigen vaccines. As opposed to the conventional trial-and-error approach, incorporating the in silico methods in recent studies, particularly network analysis, has manifested a great promise in unearthing novel vaccine candidates from the A. baumannii proteome. Some studies have used multiple A. baumannii data sources to build the co-functional networks and analyze them by k-shell decomposition. Additionally, Whole Genomic Protein Interactome (GPIN) analysis has utilized a rational approach for identifying essential proteins and presenting them as vaccines effective enough to combat the deadly pathogenic threats posed by A. baumannii. Others have identified multiple immune nodes using network-based centrality measurements for synergistic antigen combinations for different vaccination strategies. Protein-protein interactions have also been inferenced utilizing structural approaches, such as molecular docking and molecular dynamics simulation. Similar workflows and technologies were employed to unveil novel A. baumannii drug targets, with a similar trend in the increasing influx of in silico techniques. This review integrates the latest knowledge on the development of A. baumannii vaccines while highlighting the in silico methods as the future of such exploratory research. In parallel, we also briefly summarize recent advancements in A. baumannii drug target research.}, } @article {pmid35714801, year = {2022}, author = {Nanjani, S and Soni, R and Paul, D and Keharia, H}, title = {Genome analysis uncovers the prolific antagonistic and plant growth-promoting potential of endophyte Bacillus velezensis K1.}, journal = {Gene}, volume = {836}, number = {}, pages = {146671}, doi = {10.1016/j.gene.2022.146671}, pmid = {35714801}, issn = {1879-0038}, mesh = {*Bacillus/genetics ; Bacillus subtilis/physiology ; Biological Control Agents ; *Endophytes/chemistry/genetics ; Genome, Bacterial ; Plant Diseases/microbiology ; Soil ; }, abstract = {Insights into the application of endophytic bacilli in sustainable agricultural practices have opened up new avenues for the inhibition of soil-borne pathogens and the improvement of plant health. Bacillus subtilis K1, an endophytic bacterium originally isolated from aerial roots of Ficus benghalensis is a potential biocontrol agent secreting a mixture of surfactins, iturins and fengycins. The current study extends the characterization of this bacterium through genomic and comparative genomics approaches. The sequencing of the bacterial genome at Illumina MiSeq platform revealed that it possessed a 4,103,502-bp circular chromosome with 45.98% GC content and 4325 predicted protein-coding sequences. Based on phylogenomics and whole-genome average nucleotide identity, the B. subtilis K1 was taxonomically classified as Bacillus velezensis. The formerly evaluated phenotypic traits viz. C-source utilization and lipopeptide-mediated fungal antagonism were correlated to their molecular determinants. The genome also harbored several genes associated with induced systemic resistance and plant growth promotion i.e, phytohormone production, nitrogen assimilation and reduction, siderophore production, phosphate solubilization, biofilm formation, swarming motility, acetoin and butanediol synthesis. The production of antifungal volatile organic compounds and plant growth promotion was experimentally demonstrated by volatile compound assay and seed germination assay on cumin and groundnut. The isolate also holds great prospects for application as a soil inoculant as indicated by enhancement in the growth of groundnut via in planta pot studies. Bacterial pan-genome analysis based on a comparison of whole genomes with eighteen other Bacillus strains was also conducted. Comparative examination of biosynthetic gene clusters across all genomes indicated that the largest number of gene clusters were harbored by the K1 genome. Based on the findings, we propose K1 as a model for scrutinizing non-ribosomally synthesized peptide synthetase and polyketide synthetase derived molecules.}, } @article {pmid35712352, year = {2022}, author = {Posada-Reyes, AB and Balderas-Martínez, YI and Ávila-Ríos, S and Vinuesa, P and Fonseca-Coronado, S}, title = {An Epistatic Network Describes oppA and glgB as Relevant Genes for Mycobacterium tuberculosis.}, journal = {Frontiers in molecular biosciences}, volume = {9}, number = {}, pages = {856212}, pmid = {35712352}, issn = {2296-889X}, abstract = {Mycobacterium tuberculosis is an acid-fast bacterium that causes tuberculosis worldwide. The role of epistatic interactions among different loci of the M. tuberculosis genome under selective pressure may be crucial for understanding the disease and the molecular basis of antibiotic resistance acquisition. Here, we analyzed polymorphic loci interactions by applying a model-free method for epistasis detection, SpydrPick, on a pan-genome-wide alignment created from a set of 254 complete reference genomes. By means of the analysis of an epistatic network created with the detected epistatic interactions, we found that glgB (α-1,4-glucan branching enzyme) and oppA (oligopeptide-binding protein) are putative targets of co-selection in M. tuberculosis as they were associated in the network with M. tuberculosis genes related to virulence, pathogenesis, transport system modulators of the immune response, and antibiotic resistance. In addition, our work unveiled potential pharmacological applications for genotypic antibiotic resistance inherent to the mutations of glgB and oppA as they epistatically interact with fprA and embC, two genes recently included as antibiotic-resistant genes in the catalog of the World Health Organization. Our findings showed that this approach allows the identification of relevant epistatic interactions that may lead to a better understanding of M. tuberculosis by deciphering the complex interactions of molecules involved in its metabolism, virulence, and pathogenesis and that may be applied to different bacterial populations.}, } @article {pmid35710371, year = {2022}, author = {Tantoso, E and Eisenhaber, B and Kirsch, M and Shitov, V and Zhao, Z and Eisenhaber, F}, title = {To kill or to be killed: pangenome analysis of Escherichia coli strains reveals a tailocin specific for pandemic ST131.}, journal = {BMC biology}, volume = {20}, number = {1}, pages = {146}, pmid = {35710371}, issn = {1741-7007}, mesh = {Escherichia coli/genetics/metabolism ; *Escherichia coli Infections/epidemiology/microbiology ; *Escherichia coli Proteins/genetics ; Genome, Bacterial ; Humans ; Pandemics ; Phylogeny ; Prophages ; }, abstract = {BACKGROUND: Escherichia coli (E. coli) has been one of the most studied model organisms in the history of life sciences. Initially thought just to be commensal bacteria, E. coli has shown wide phenotypic diversity including pathogenic isolates with great relevance to public health. Though pangenome analysis has been attempted several times, there is no systematic functional characterization of the E. coli subgroups according to the gene profile.

RESULTS: Systematically scanning for optimal parametrization, we have built the E. coli pangenome from 1324 complete genomes. The pangenome size is estimated to be ~25,000 gene families (GFs). Whereas the core genome diminishes as more genomes are added, the softcore genome (≥95% of strains) is stable with ~3000 GFs regardless of the total number of genomes. Apparently, the softcore genome (with a 92% or 95% generation threshold) can define the genome of a bacterial species listing the critically relevant, evolutionarily most conserved or important classes of GFs. Unsupervised clustering of common E. coli sequence types using the presence/absence GF matrix reveals distinct characteristics of E. coli phylogroups B1, B2, and E. We highlight the bi-lineage nature of B1, the variation of the secretion and of the iron acquisition systems in ST11 (E), and the incorporation of a highly conserved prophage into the genome of ST131 (B2). The tail structure of the prophage is evolutionarily related to R2-pyocin (a tailocin) from Pseudomonas aeruginosa PAO1. We hypothesize that this molecular machinery is highly likely to play an important role in protecting its own colonies; thus, contributing towards the rapid rise of pandemic E. coli ST131.

CONCLUSIONS: This study has explored the optimized pangenome development in E. coli. We provide complete GF lists and the pangenome matrix as supplementary data for further studies. We identified biological characteristics of different E. coli subtypes, specifically for phylogroups B1, B2, and E. We found an operon-like genome region coding for a tailocin specific for ST131 strains. The latter is a potential killer weapon providing pandemic E. coli ST131 with an advantage in inter-bacterial competition and, suggestively, explains their dominance as human pathogen among E. coli strains.}, } @article {pmid35708861, year = {2022}, author = {Mohanty, JK and Jha, UC and Dixit, GP and Parida, SK}, title = {Harnessing the hidden allelic diversity of wild Cicer to accelerate genomics-assisted chickpea crop improvement.}, journal = {Molecular biology reports}, volume = {49}, number = {6}, pages = {5697-5715}, pmid = {35708861}, issn = {1573-4978}, mesh = {Alleles ; *Cicer/genetics ; Genome, Plant/genetics ; Genomics ; Plant Breeding ; }, abstract = {Chickpea, commonly called Bengal gram or Garbanzo bean, faces a productivity crisis around the globe due to numerous biotic and abiotic stresses. The eroded genetic base of the cultivated Cicer gene pool is becoming a significant bottleneck in developing stress-resilient chickpea cultivars. In this scenario, the crop wild relatives (CWR) of chickpea, with the useful genomic wealth of their wild adaptation, give a ray of hope to improve the genetic background of the cultivated Cicer gene pool. To extrapolate these unearthed genomic diversities of wild, we require a thorough understanding of the pre-historic domestication episodes that are changing their shape with the expansion of the available scientific evidence. Keeping aforesaid in view, the current review article provides a glimpsed overview on several efforts done so far to reveal the mysterious origin and evolution of the Cicer gene pool, along with the constraints in their utilization for chickpea crop improvement. It encapsulates various stress-resilient CWR of chickpea and their use in several pre-breeding programs to develop numerous breeding populations for crop genetic enhancement. Further, this review will recapitulate the significant contributions of structural, functional and comparative genomics, pan-genomics and diverse genomics-assisted breeding strategy in dissecting the untapped trait-specific allelic/gene diversity and domestication pattern behind the CWR of chickpea, along with their potential and promises. We expect the newly explored genetic variations may be used in the breeding programs for re-wilding the cultigens' genomic background to open a new avenue for genetic gain and crop improvement capacity of chickpea.}, } @article {pmid35705841, year = {2022}, author = {Kong, X and Wang, H and Guo, G and Li, P and Tong, P and Liu, M and Ma, X and Dong, C and Li, Y and Zhang, H and Zhang, W}, title = {Duck sewage source coliphage P762 can lyse STEC and APEC.}, journal = {Virus genes}, volume = {58}, number = {5}, pages = {436-447}, pmid = {35705841}, issn = {1572-994X}, support = {U1803109//Innovative Research Group Project of the National Natural Science Foundation of China/ ; BE2017654//Collaborative Innovation Center for Modern Science and Technology and Industrial Development of Jiangxi Traditional Medicine/ ; gxyq2019201//The project of supporting outstanding young talents in universities of anhui province/ ; wzykjtd202002//Wuhu Institute of Technology level science and technology team/ ; 2020jxtd282//Animal epidemic prevention and quarantine teaching team of Anhui quality engineering project/ ; }, mesh = {Agar ; Animals ; Anti-Bacterial Agents ; *Bacteriophages/genetics ; Coliphages/genetics ; Ducks ; *Escherichia coli Infections/microbiology/prevention & control ; Sewage ; *Shiga-Toxigenic Escherichia coli/genetics ; }, abstract = {Multiple pathogenic types or serotypes restrict treatment for colibacillosis. In addition, rising antibiotic resistance has heightened public awareness to prevent and control pathogenic Escherichia coli. The bacteriophage is a viable technique to treat colibacillosis as an alternative to antibiotics. P762, a coliphage isolated from duck farm sewage, was demonstrated to cloud lyse Shiga toxin-producing Escherichia Coli serotypes O157 and non-O157 (17/39), Avian pathogenic E. coli covered serotype O78, O83, and O9 (5/19), and other pathogenic Escherichia coli (5/17). Additional fundamental biological characteristics analysis revealed that P762 is stable at pH 3 ~ 11 and temperature between 4 °C and 60 °C, and its optimum multiplicity of infection (MOI) is 0.1. The one-step curve of P762 exhibited three bursts of growth stage: two rapid and one slow stage. Furthermore, the first rapid burst size is 80 CFU/PFU, the burst size of the slow stage is 10 CFU/PFU, and the second rapid burst size is about 990 CFU/PFU. In addition, P762 can form a "halo" on a double agar plate, implying that the phage secretes depolymerase. With 95.14% identity and 90% query coverage, genome sequence analysis revealed that P762 is most closely related to Escherichia phage DY1, which belongs to the genus Kayfunavirus. After screening using RAST and VFDB, no virulence factors were discovered in P762. In vitro antibacterial tests revealed that P762 has high bactericidal activity in lettuce leaves contaminated with STEC. In conclusion, phage P762 might be employed in the future to prevent and control pathogenic Escherichia coli.}, } @article {pmid35699368, year = {2022}, author = {De Oliveira, AL and Srivastava, A and Espada-Hinojosa, S and Bright, M}, title = {The complete and closed genome of the facultative generalist Candidatus Endoriftia persephone from deep-sea hydrothermal vents.}, journal = {Molecular ecology resources}, volume = {22}, number = {8}, pages = {3106-3123}, pmid = {35699368}, issn = {1755-0998}, support = {31543-B29//Austrian Science Fund/ ; }, mesh = {DNA Restriction-Modification Enzymes/genetics ; Epigenesis, Genetic ; *Hydrothermal Vents ; Sulfur ; Symbiosis/genetics ; Transposases/genetics ; }, abstract = {The mutualistic interactions between Riftia pachyptila and its endosymbiont Candidatus Endoriftia persephone (short Endoriftia) have been extensively researched. However, the closed Endoriftia genome is still lacking. Here, by employing single-molecule real-time sequencing we present the closed chromosomal sequence of Endoriftia. In contrast to theoretical predictions of enlarged and mobile genetic element-rich genomes related to facultative endosymbionts, the closed Endoriftia genome is streamlined with fewer than expected coding sequence regions, insertion-, prophage-sequences and transposase-coding sequences. Automated and manually curated functional analyses indicated that Endoriftia is more versatile regarding sulphur metabolism than previously reported. We identified the presence of two identical rRNA operons and two long CRISPR regions in the closed genome. Additionally, pangenome analyses revealed the presence of three types of secretion systems (II, IV and VI) in the different Endoriftia populations indicating lineage-specific adaptations. The in depth mobilome characterization identified the presence of shared genomic islands in the different Endoriftia drafts and in the closed genome, suggesting that the acquisition of foreign DNA predates the geographical dispersal of the different endosymbiont populations. Finally, we found no evidence of epigenetic regulation in Endoriftia, as revealed by gene screenings and absence of methylated modified base motifs in the genome. As a matter of fact, the restriction-modification system seems to be dysfunctional in Endoriftia, pointing to a higher importance of molecular memory-based immunity against phages via spacer incorporation into CRISPR system. The Endoriftia genome is the first closed tubeworm endosymbiont to date and will be valuable for future gene oriented and evolutionary comparative studies.}, } @article {pmid35695507, year = {2022}, author = {Mustapha, MM and Srinivasa, VR and Griffith, MP and Cho, ST and Evans, DR and Waggle, K and Ezeonwuka, C and Snyder, DJ and Marsh, JW and Harrison, LH and Cooper, VS and Van Tyne, D}, title = {Genomic Diversity of Hospital-Acquired Infections Revealed through Prospective Whole-Genome Sequencing-Based Surveillance.}, journal = {mSystems}, volume = {7}, number = {3}, pages = {e0138421}, pmid = {35695507}, issn = {2379-5077}, support = {U01 AI124302/AI/NIAID NIH HHS/United States ; KL2 TR001856/TR/NCATS NIH HHS/United States ; R21Al109459//HHS | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; R01 AI127472/AI/NIAID NIH HHS/United States ; }, mesh = {Humans ; *Genome, Bacterial/genetics ; *Genomics ; Whole Genome Sequencing ; Anti-Bacterial Agents ; Hospitals ; }, abstract = {Healthcare-associated infections (HAIs) cause mortality, morbidity, and waste of health care resources. HAIs are also an important driver of antimicrobial resistance, which is increasing around the world. Beginning in November 2016, we instituted an initiative to detect outbreaks of HAIs using prospective whole-genome sequencing-based surveillance of bacterial pathogens collected from hospitalized patients. Here, we describe the diversity of bacteria sampled from hospitalized patients at a single center, as revealed through systematic analysis of bacterial isolate genomes. We sequenced the genomes of 3,004 bacterial isolates from hospitalized patients collected over a 25-month period. We identified bacteria belonging to 97 distinct species, which were distributed among 14 groups of related species. Within these groups, isolates could be distinguished from one another by both average nucleotide identity (ANI) and principal-component analysis of accessory genes (PCA-A). Core genome genetic distances and rates of evolution varied among species, which has practical implications for defining shared ancestry during outbreaks and for our broader understanding of the origins of bacterial strains and species. Finally, antimicrobial resistance genes and putative mobile genetic elements were frequently observed, and our systematic analysis revealed patterns of occurrence across the different species sampled from our hospital. Overall, this study shows how understanding the population structure of diverse pathogens circulating in a single health care setting can improve the discriminatory power of genomic epidemiology studies and can help define the processes leading to strain and species differentiation. IMPORTANCE Hospitalized patients are at increased risk of becoming infected with antibiotic-resistant organisms. We used whole-genome sequencing to survey and compare over 3,000 clinical bacterial isolates collected from hospitalized patients at a large medical center over a 2-year period. We identified nearly 100 different bacterial species, which we divided into 14 different groups of related species. When we examined how genetic relatedness differed between species, we found that different species were likely evolving at different rates within our hospital. This is significant because the identification of bacterial outbreaks in the hospital currently relies on genetic similarity cutoffs, which are often applied uniformly across organisms. Finally, we found that antibiotic resistance genes and mobile genetic elements were abundant and were shared among the bacterial isolates we sampled. Overall, this study provides an in-depth view of the genomic diversity and evolutionary processes of bacteria sampled from hospitalized patients, as well as genetic similarity estimates that can inform hospital outbreak detection and prevention efforts.}, } @article {pmid35695431, year = {2022}, author = {Hwang, Y and Girguis, PR}, title = {Differentiated Evolutionary Strategies of Genetic Diversification in Atlantic and Pacific Thaumarchaeal Populations.}, journal = {mSystems}, volume = {7}, number = {3}, pages = {e0147721}, pmid = {35695431}, issn = {2379-5077}, mesh = {*Ammonia ; Phylogeny ; *Ecosystem ; Oceans and Seas ; Archaea/genetics ; }, abstract = {Some marine microbes are seemingly "ubiquitous," thriving across a wide range of environmental conditions. While the increased depth in metagenomic sequencing has led to a growing body of research on within-population heterogeneity in environmental microbial populations, there have been fewer systematic comparisons and characterizations of population-level genetic diversity over broader expanses of time and space. Here, we investigated the factors that govern the diversification of ubiquitous microbial taxa found within and between ocean basins. Specifically, we use mapped metagenomic paired reads to examine the genetic diversity of ammonia-oxidizing archaeal ("Candidatus Nitrosopelagicus brevis") populations in the Pacific (Hawaii Ocean Time-series [HOT]) and Atlantic (Bermuda Atlantic Time Series [BATS]) Oceans sampled over 2 years. We observed higher nucleotide diversity in "Ca. N. brevis" at HOT, driven by a higher rate of homologous recombination. In contrast, "Ca. N. brevis" at BATS featured a more open pangenome with a larger set of genes that were specific to BATS, suggesting a history of dynamic gene gain and loss events. Furthermore, we identified highly differentiated genes that were regulatory in function, some of which exhibited evidence of recent selective sweeps. These findings indicate that different modes of genetic diversification likely incur specific adaptive advantages depending on the selective pressures that they are under. Within-population diversity generated by the environment-specific strategies of genetic diversification is likely key to the ecological success of "Ca. N. brevis." IMPORTANCE Ammonia-oxidizing archaea (AOA) are one of the most abundant chemolithoautotrophic microbes in the marine water column and are major contributors to global carbon and nitrogen cycling. Despite their ecological importance and geographical pervasiveness, there have been limited systematic comparisons and characterizations of their population-level genetic diversity over time and space. Here, we use metagenomic time series from two ocean observatories to address the fundamental questions of how abiotic and biotic factors shape the population-level genetic diversity and how natural microbial populations adapt across diverse habitats. We show that the marine AOA "Candidatus Nitrosopelagicus brevis" in different ocean basins exhibits distinct modes of genetic diversification in response to their selective regimes shaped by nutrient availability and patterns of environmental fluctuations. Our findings specific to "Ca. N. brevis" have broader implications, particularly in understanding the population-level responses to the changing climate and predicting its impact on biogeochemical cycles.}, } @article {pmid35690455, year = {2022}, author = {Palma, F and Radomski, N and Guérin, A and Sévellec, Y and Félix, B and Bridier, A and Soumet, C and Roussel, S and Guillier, L}, title = {Genomic elements located in the accessory repertoire drive the adaptation to biocides in Listeria monocytogenes strains from different ecological niches.}, journal = {Food microbiology}, volume = {106}, number = {}, pages = {103757}, doi = {10.1016/j.fm.2021.103757}, pmid = {35690455}, issn = {1095-9998}, mesh = {Animals ; Benzalkonium Compounds/pharmacology ; Chlorides ; *Disinfectants/pharmacology ; Drug Resistance, Bacterial/genetics ; Ecosystem ; Genomics ; *Listeria monocytogenes ; }, abstract = {In response to the massive use of biocides for controlling Listeria monocytogenes (hereafter Lm) contaminations along the food chain, strains showing biocide tolerance emerged. Here, accessory genomic elements were associated with biocide tolerance through pangenome-wide associations performed on 197 Lm strains from different lineages, ecological, geographical and temporal origins. Mobile elements, including prophage-related loci, the Tn6188_qacH transposon and pLMST6_emrC plasmid, were widespread across lineage I and II food strains and associated with tolerance to benzalkonium-chloride (BC), a quaternary ammonium compound (QAC) widely used in food processing. The pLMST6_emrC was also associated with tolerance to another QAC, the didecyldimethylammonium-chloride, displaying a pleiotropic effect. While no associations were detected for chemically reactive biocides (alcohols and chlorines), genes encoding for cell-surface proteins were associated with BC or polymeric biguanide tolerance. The latter was restricted to lineage I strains from animal and the environment. In conclusion, different genetic markers, with polygenic nature or not, appear to have driven the Lm adaptation to biocide, especially in food strains but also from animal and the environment. These markers could aid to monitor and predict the spread of biocide tolerant Lm genotypes across different ecological niches, finally reducing the risk of such strains in food industrial settings.}, } @article {pmid35685364, year = {2022}, author = {Quan, C and Lu, H and Lu, Y and Zhou, G}, title = {Population-scale genotyping of structural variation in the era of long-read sequencing.}, journal = {Computational and structural biotechnology journal}, volume = {20}, number = {}, pages = {2639-2647}, pmid = {35685364}, issn = {2001-0370}, abstract = {Population-scale studies of structural variation (SV) are growing rapidly worldwide with the development of long-read sequencing technology, yielding a considerable number of novel SVs and complete gap-closed genome assemblies. Herein, we highlight recent studies using a hybrid sequencing strategy and present the challenges toward large-scale genotyping for SVs due to the reference bias. Genotyping SVs at a population scale remains challenging, which severely impacts genotype-based population genetic studies or genome-wide association studies of complex diseases. We summarize academic efforts to improve genotype quality through linear or graph representations of reference and alternative alleles. Graph-based genotypers capable of integrating diverse genetic information are effectively applied to large and diverse cohorts, contributing to unbiased downstream analysis. Meanwhile, there is still an urgent need in this field for efficient tools to construct complex graphs and perform sequence-to-graph alignments.}, } @article {pmid35684146, year = {2022}, author = {Lin, G and Liu, Q and Wang, L and Li, H and Zhao, J and Zhang, H and Wang, G and Chen, W}, title = {The Comparative Analysis of Genomic Diversity and Genes Involved in Carbohydrate Metabolism of Eighty-Eight Bifidobacterium pseudocatenulatum Isolates from Different Niches of China.}, journal = {Nutrients}, volume = {14}, number = {11}, pages = {}, pmid = {35684146}, issn = {2072-6643}, support = {31972052//National Natural Science Foundation of China/ ; 32021005//National Natural Science Foundation of China/ ; 31820103010//National Natural Science Foundation of China/ ; JUSRP22006//Fundamental Research Funds for the Central Universities/ ; JUSRP51501//Fundamental Research Funds for the Central Universities/ ; }, mesh = {Animals ; *Bifidobacterium pseudocatenulatum/metabolism ; Carbohydrate Metabolism/genetics ; Carbohydrates ; Cattle ; Female ; *Gastrointestinal Microbiome/genetics ; Genomics ; Glycoside Hydrolases/genetics/metabolism ; Humans ; Mice ; }, abstract = {Eighty-eight Bifidobacterium pseudocatenulatum strains, which were isolated from human, chicken and cow fecal samples from different niches of China, were compared genomically in this study to evaluate their diversity. It was found that B. pseudocatenulatum displayed a closed pan-genome, including abundant glycoside hydrolase families of the carbohydrate active enzyme (CAZy). A total of 30 kinds of glycoside hydrolases (GHs), 14 kinds of glycosyl transferases (GTs), 13 kinds of carbohydrate-binding modules (CBMs), 6 kinds of carbohydrate-esterases (CEs), and 2 kinds of auxiliary activities (AAs) gene families were identified across the genomes of the 88 B. pseudocatenulatum strains. Specifically, this showed that significant differences were also present in the number of 10 carbohydrate-active enzyme gene families (GT51, GH13_32, GH26, GH42, GH121, GH3, AA3, CBM46, CE2, and CE6) among the strains derived from the hosts of different age groups, particularly between strains from infants and those from other human age groups. Twelve different individuals of B. pseudocatenulatum from four main clusters were selected for further study to reveal the genetic diversity of carbohydrate metabolism-related genes within the same phylogenetics. The animal experiment showed that 3 weeks of oral administration and 1 week after cessation of administration of these strains did not markedly alter the serum routine inflammatory indicators in mice. Furthermore, the administration of these strains did not significantly cause adverse changes in the gut microbiota, as indicated by the α- and β-diversity indexes, relative to the control group (normal diet). Beyond that, FAHBZ9L5 significantly increased the abundance of B. pseudocatenulatum after 3 weeks and significantly increased the abundance of acetic acid and butyric acid in the host's intestinal tract 3 and 4 weeks after the first administration, respectively, compared with the control group. Corresponding to this, comparative genomic analyses of 12 B. pseudocatenulatum suggest that FAHBZ9L5-specific genes were rich in ABC transporters and carbohydrate esterase. Combining the results of comparative genomics analyses and animal experiment, it is suggested that the strains containing certain gene clusters contribute to another competitive growth advantage of B. pseudocatenulatum, which facilitates its intestinal carbohydrate metabolism in a host.}, } @article {pmid35676474, year = {2022}, author = {Zhou, Y and Zhang, Z and Bao, Z and Li, H and Lyu, Y and Zan, Y and Wu, Y and Cheng, L and Fang, Y and Wu, K and Zhang, J and Lyu, H and Lin, T and Gao, Q and Saha, S and Mueller, L and Fei, Z and Städler, T and Xu, S and Zhang, Z and Speed, D and Huang, S}, title = {Graph pangenome captures missing heritability and empowers tomato breeding.}, journal = {Nature}, volume = {606}, number = {7914}, pages = {527-534}, pmid = {35676474}, issn = {1476-4687}, mesh = {Alleles ; Crops, Agricultural/genetics ; *Genetic Variation ; *Genome, Plant/genetics ; *Genome-Wide Association Study ; Linkage Disequilibrium ; *Solanum lycopersicum/genetics/metabolism ; *Plant Breeding ; }, abstract = {Missing heritability in genome-wide association studies defines a major problem in genetic analyses of complex biological traits[1,2]. The solution to this problem is to identify all causal genetic variants and to measure their individual contributions[3,4]. Here we report a graph pangenome of tomato constructed by precisely cataloguing more than 19 million variants from 838 genomes, including 32 new reference-level genome assemblies. This graph pangenome was used for genome-wide association study analyses and heritability estimation of 20,323 gene-expression and metabolite traits. The average estimated trait heritability is 0.41 compared with 0.33 when using the single linear reference genome. This 24% increase in estimated heritability is largely due to resolving incomplete linkage disequilibrium through the inclusion of additional causal structural variants identified using the graph pangenome. Moreover, by resolving allelic and locus heterogeneity, structural variants improve the power to identify genetic factors underlying agronomically important traits leading to, for example, the identification of two new genes potentially contributing to soluble solid content. The newly identified structural variants will facilitate genetic improvement of tomato through both marker-assisted selection and genomic selection. Our study advances the understanding of the heritability of complex traits and demonstrates the power of the graph pangenome in crop breeding.}, } @article {pmid35672470, year = {2022}, author = {Kim, E and Yang, SM and Kim, IS and Kim, HY}, title = {Identification of novel molecular targets for Weissella species-specific real-time PCR based on pangenome analysis.}, journal = {Applied microbiology and biotechnology}, volume = {106}, number = {11}, pages = {4157-4168}, pmid = {35672470}, issn = {1432-0614}, support = {PJ01662001//Rural Development Administration/ ; }, mesh = {DNA Primers/genetics ; Humans ; RNA, Ribosomal, 16S/genetics ; Real-Time Polymerase Chain Reaction ; Species Specificity ; *Weissella/genetics ; }, abstract = {Some Weissella species are used in probiotic products because of their beneficial effects in humans, whereas some species are considered as opportunistic pathogens that cause infections in humans. Therefore, an accurate and rapid identification of Weissella species is essential to control pathogenic Weissella species or isolate new functional strains with probiotic effects from their habitat. The objective of our study was to extract novel molecular targets using pangenome analysis for the identification of major Weissella species present in food. With 50 genomes representing 11 Weissella species, novel molecular targets were mined based on their 100% presence in the respective strains of the target species and absence in the strains of non-target bacteria. Primers based on molecular targets showed positive results for the corresponding species, whereas 79 non-target strains showed negative results. Standard curves revealed good linearity in the range of 10[3]-10[8] colony-forming units per reaction. Our method was successfully applied to 74 Weissella strains isolated from food samples to demonstrate that the molecular targets provided a viable alternative to the 16S rRNA sequence. Furthermore, it was possible to identify and quantify Weissella communities in fermented foods. These results demonstrate that our method can be used for effective and accurate screening for the presence of Weissella species in foods. KEY POINTS: • This is first study to mine novel targets for differentiating 11 Weissella species. • The novel targets showed higher resolution than the 16S rRNA gene sequence. • The PCR method effectively detected Weissella species with opposing properties.}, } @article {pmid35668795, year = {2022}, author = {Sun, Y and Zhang, PT and Kou, DR and Han, YC and Fang, JC and Ni, JP and Jiang, B and Wang, X and Zhang, YJ and Wang, W and Kong, XD}, title = {Terpene Synthases in Rice Pan-Genome and Their Responses to Chilo suppressalis Larvae Infesting.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {905982}, pmid = {35668795}, issn = {1664-462X}, abstract = {Terpene synthase (TPS) catalyzes the synthesis of terpenes and plays an important role in plant defense. This study identified 45 OsTPS genes (32 core genes and 13 variable genes) based on the high-quality rice gene-based pan-genome. This indicates limitations in OsTPS gene studies based on a single reference genome. In the present study, through collinearity between multiple rice genomes, one OsTPS gene absent in the reference (Nipponbare) genome was found and two TPS genes in the reference genome were found to have atypical structures, which would have been ignored in single genome analysis. OsTPS genes were divided into five groups and TPS-b was lost according to the phylogenetic tree. OsTPSs in TPS-c and TPS-g were all core genes indicating these two groups were stable during domestication. In addition, through the analysis of transcriptome data, some structural variations were found to affect the expression of OsTPS genes. Through the Ka/Ks calculation of OsTPS genes, we found that different OsTPS genes were under different selection pressure during domestication; for example, OsTPS22 and OsTPS29 experienced stronger positive selection than the other OsTPS genes. After Chilo suppressalis larvae infesting, 25 differentially expressed OsTPS genes were identified, which are involved in the diterpene phytoalexins precursors biosynthesis and ent-kaurene biosynthesis pathways. Overall, the present study conducted a bioinformatics analysis of OsTPS genes using a high-quality rice pan-genome, which provided a basis for further study of OsTPS genes.}, } @article {pmid35664542, year = {2022}, author = {Orlando, F and Romanel, A and Trujillo, B and Sigouros, M and Wetterskog, D and Quaini, O and Leone, G and Xiang, JZ and Wingate, A and Tagawa, S and Jayaram, A and Linch, M and , and Jamal-Hanjani, M and Swanton, C and Rubin, MA and Wyatt, AW and Beltran, H and Attard, G and Demichelis, F}, title = {Allele-informed copy number evaluation of plasma DNA samples from metastatic prostate cancer patients: the PCF_SELECT consortium assay.}, journal = {NAR cancer}, volume = {4}, number = {2}, pages = {zcac016}, pmid = {35664542}, issn = {2632-8674}, support = {R37 CA241486/CA/NCI NIH HHS/United States ; }, abstract = {Sequencing of cell-free DNA (cfDNA) in cancer patients' plasma offers a minimally-invasive solution to detect tumor cell genomic alterations to aid real-time clinical decision-making. The reliability of copy number detection decreases at lower cfDNA tumor fractions, limiting utility at earlier stages of the disease. To test a novel strategy for detection of allelic imbalance, we developed a prostate cancer bespoke assay, PCF_SELECT, that includes an innovative sequencing panel covering ∼25 000 high minor allele frequency SNPs and tailored analytical solutions to enable allele-informed evaluation. First, we assessed it on plasma samples from 50 advanced prostate cancer patients. We then confirmed improved detection of genomic alterations in samples with <10% tumor fractions when compared against an independent assay. Finally, we applied PCF_SELECT to serial plasma samples intensively collected from three patients previously characterized as harboring alterations involving DNA repair genes and consequently offered PARP inhibition. We identified more extensive pan-genome allelic imbalance than previously recognized in prostate cancer. We confirmed high sensitivity detection of BRCA2 allelic imbalance with decreasing tumor fractions resultant from treatment and identified complex ATM genomic states that may be incongruent with protein losses. Overall, we present a framework for sensitive detection of allele-specific copy number changes in cfDNA.}, } @article {pmid35663888, year = {2022}, author = {Yero, D and Jia, B and Gao, F}, title = {Editorial: Insights in Evolutionary and Genomic Microbiology: 2021.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {915593}, pmid = {35663888}, issn = {1664-302X}, } @article {pmid35663880, year = {2022}, author = {Surachat, K and Kantachote, D and Wonglapsuwan, M and Chukamnerd, A and Deachamag, P and Mittraparp-Arthorn, P and Jeenkeawpiam, K}, title = {Complete Genome Sequence of Weissella cibaria NH9449 and Comprehensive Comparative-Genomic Analysis: Genomic Diversity and Versatility Trait Revealed.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {826683}, pmid = {35663880}, issn = {1664-302X}, abstract = {Lactic acid bacteria (LAB) in the genus Weissella spp. contain traits in their genome that confer versatility. In particular, Weissella cibaria encodes several beneficial genes that are useful in biotechnological applications. The complete genome of W. cibaria NH9449 was sequenced and an in silico comparative analysis was performed to gain insight into the genomic diversity among members of the genus Weissella. A total of 219 Weissella genomes were used in a bioinformatics analysis of pan-genomes, phylogenetics, self-defense mechanisms, virulence factors, antimicrobial resistance, and carbohydrate-active enzymes. These investigations showed that the strain NH9449 encodes several restriction-modification-related genes and a CRISPR-Cas region in its genome. The identification of carbohydrate-active enzyme-encoding genes indicated that this strain could be beneficial in biotechnological applications. The comparative genomic analysis reveals the very high genomic diversity in this genus, and some marked differences in genetic variation and genes among Weissella species. The calculated average amino acid identity (AAI) and phylogenetic analysis of core and accessory genes shows the possible existence of three new species in this genus. These new genomic insights into Weissella species and their biological functions could be useful in the food industry and other applications.}, } @article {pmid35663029, year = {2022}, author = {Schulz, T and Wittler, R and Stoye, J}, title = {Sequence-based pangenomic core detection.}, journal = {iScience}, volume = {25}, number = {6}, pages = {104413}, pmid = {35663029}, issn = {2589-0042}, abstract = {One of the most basic kinds of analysis to be performed on a pangenome is the detection of its core, i.e., the information shared among all members. Pangenomic core detection is classically done on the gene level and many tools focus exclusively on core detection in prokaryotes. Here, we present a new method for sequence-based pangenomic core detection. Our model generalizes from a strict core definition allowing us to flexibly determine suitable core properties depending on the research question and the dataset under consideration. We propose an algorithm based on a colored de Bruijn graph that runs in linear time with respect to the number of k-mers in the graph. An implementation of our method is called Corer. Because of the usage of a colored de Bruijn graph, it works alignment-free, is provided with a small memory footprint, and accepts as input assembled genomes as well as sequencing reads.}, } @article {pmid35661553, year = {2022}, author = {Carvalho, GG and Calarga, AP and Zorgi, NE and Astudillo-Trujillo, CA and Gontijo, MTP and Brocchi, M and Giorgio, S and Kabuki, DY}, title = {Virulence and DNA sequence analysis of Cronobacter spp. isolated from infant cereals.}, journal = {International journal of food microbiology}, volume = {376}, number = {}, pages = {109745}, doi = {10.1016/j.ijfoodmicro.2022.109745}, pmid = {35661553}, issn = {1879-3460}, mesh = {Aged ; *Cronobacter/genetics ; *Cronobacter sakazakii ; Edible Grain ; Food Microbiology ; Humans ; Infant ; Infant Formula ; Infant, Newborn ; Sequence Analysis, DNA ; Virulence/genetics ; }, abstract = {Cronobacter spp. is an opportunistic pathogen that causes severe infections, affecting newborns and infants, and is also an emerging cause of hospital-acquired infection in elderly populations. These infections are mainly associated with the consumption of infant formulas, even though these bacteria have been isolated from other foods as well. Cronobacter spp. invades epithelial cells and escapes the immune response mechanisms, multiplying inside macrophages. However, the pathogenesis and virulence factors of these bacteria have not been fully elucidated and need to be further studied. Therefore, this study aimed to evaluate the ability of Cronobacter spp. strains isolated from infant cereals to invade and survive within macrophages, investigate the virulence phenotype using the Galleria mellonella model, and identify possible genes involved in bacterial pathogenesis through pan-genome analysis. All the isolates were able to invade macrophages and the survival of bacteria decreased over a 72 h period, with bacterial cell counts reaching up to 10[6] CFU/ml. Cronobacter sakazakii isolate 112 exhibited a similar mortality rate (40-70%) to the ATCC BAA 894 strain (Cronobacter sakazakii) in G. mellonella assay. In addition, some unique virulence genes (isolate 7, ada_2, tcmA_1, acrB_3; isolate 78, ampC_2, rihC_1 and isolate 112, fimH, ylpA, gtrA) were identified within isolates with the invasive profile in the in vivo and in vitro assays. Furthermore, isolates from different species were grouped into seven distinct clusters in the pan-genome analysis. The most virulent isolates (7, 78, and 112) were grouped in distinct subclusters in the cladogram. This work revealed potential Cronobacter spp. pathogenic strains recovered from infant cereals.}, } @article {pmid35657601, year = {2022}, author = {Labarge, B and Hennessy, M and Zhang, L and Goldrich, D and Chartrand, S and Purnell, C and Wright, S and Goldenberg, D and Broach, JR}, title = {Human Papillomavirus Integration Strictly Correlates with Global Genome Instability in Head and Neck Cancer.}, journal = {Molecular cancer research : MCR}, volume = {20}, number = {9}, pages = {1420-1428}, pmid = {35657601}, issn = {1557-3125}, mesh = {*Alphapapillomavirus/genetics ; *Carcinoma, Squamous Cell/genetics ; DNA, Viral/genetics ; Genomic Instability ; *Head and Neck Neoplasms/genetics ; Humans ; *Oropharyngeal Neoplasms/genetics ; Papillomaviridae/genetics ; *Papillomavirus Infections/genetics ; Virus Integration/genetics ; }, abstract = {UNLABELLED: Human papillomavirus (HPV)-positive head and neck cancers, predominantly oropharyngeal squamous cell carcinoma (OPSCC), exhibit epidemiologic, clinical, and molecular characteristics distinct from those OPSCCs lacking HPV. We applied a combination of whole-genome sequencing and optical genome mapping to interrogate the genome structure of HPV-positive OPSCCs. We found that the virus had integrated in the host genome in two thirds of the tumors examined but resided solely extrachromosomally in the other third. Integration of the virus occurred at essentially random sites within the genome. Focal amplification of the virus and the genomic sequences surrounding it often occurred subsequent to integration, with the number of tandem repeats in the chromosome accounting for the increased copy number of the genome sequences flanking the site of integration. In all cases, viral integration correlated with pervasive genome-wide somatic alterations at sites distinct from that of viral integration and comprised multiple insertions, deletions, translocations, inversions, and point mutations. Few or no somatic mutations were present in tumors with only episomal HPV. Our data could be interpreted by positing that episomal HPV is captured in the host genome following an episode of global genome instability during tumor development. Viral integration correlated with higher grade tumors, which may be explained by the associated extensive mutation of the genome and suggests that HPV integration status may inform prognosis.

IMPLICATIONS: Our results indicate that HPV integration in head and neck cancer correlates with extensive pangenomic structural variation, which may have prognostic implications.}, } @article {pmid35653240, year = {2022}, author = {Boatwright, JL and Sapkota, S and Jin, H and Schnable, JC and Brenton, Z and Boyles, R and Kresovich, S}, title = {Sorghum Association Panel whole-genome sequencing establishes cornerstone resource for dissecting genomic diversity.}, journal = {The Plant journal : for cell and molecular biology}, volume = {111}, number = {3}, pages = {888-904}, pmid = {35653240}, issn = {1365-313X}, mesh = {Edible Grain/genetics ; Genome ; Genome-Wide Association Study ; Genomics/methods ; Plant Breeding/methods ; Polymorphism, Single Nucleotide/genetics ; *Sorghum/genetics ; }, abstract = {Association mapping panels represent foundational resources for understanding the genetic basis of phenotypic diversity and serve to advance plant breeding by exploring genetic variation across diverse accessions. We report the whole-genome sequencing (WGS) of 400 sorghum (Sorghum bicolor (L.) Moench) accessions from the Sorghum Association Panel (SAP) at an average coverage of 38× (25-72×), enabling the development of a high-density genomic marker set of 43 983 694 variants including single-nucleotide polymorphisms (approximately 38 million), insertions/deletions (indels) (approximately 5 million), and copy number variants (CNVs) (approximately 170 000). We observe slightly more deletions among indels and a much higher prevalence of deletions among CNVs compared to insertions. This new marker set enabled the identification of several novel putative genomic associations for plant height and tannin content, which were not identified when using previous lower-density marker sets. WGS identified and scored variants in 5-kb bins where available genotyping-by-sequencing (GBS) data captured no variants, with half of all bins in the genome falling into this category. The predictive ability of genomic best unbiased linear predictor (GBLUP) models was increased by an average of 30% by using WGS markers rather than GBS markers. We identified 18 selection peaks across subpopulations that formed due to evolutionary divergence during domestication, and we found six Fst peaks resulting from comparisons between converted lines and breeding lines within the SAP that were distinct from the peaks associated with historic selection. This population has served and continues to serve as a significant public resource for sorghum research and demonstrates the value of improving upon existing genomic resources.}, } @article {pmid35647330, year = {2022}, author = {Mohite, OS and Lloyd, CJ and Monk, JM and Weber, T and Palsson, BO}, title = {Pangenome analysis of Enterobacteria reveals richness of secondary metabolite gene clusters and their associated gene sets.}, journal = {Synthetic and systems biotechnology}, volume = {7}, number = {3}, pages = {900-910}, pmid = {35647330}, issn = {2405-805X}, abstract = {In silico genome mining provides easy access to secondary metabolite biosynthetic gene clusters (BGCs) encoding the biosynthesis of many bioactive compounds, which are the basis for many important drugs used in human medicine. However, the association between BGCs and other functions encoded in the genomes of producers have remained elusive. Here, we present a systems biology workflow that integrates genome mining with a detailed pangenome analysis for detecting genes associated with a particular BGC. We analyzed 3,889 enterobacterial genomes and found 13,266 BGCs, represented by 252 distinct BGC families and 347 additional singletons. A pangenome analysis revealed 88 genes putatively associated with a specific BGC coding for the colon cancer-related colibactin that code for diverse metabolic and regulatory functions. The presented workflow opens up the possibility to discover novel secondary metabolites, better understand their physiological roles, and provides a guide to identify and analyze BGC associated gene sets.}, } @article {pmid35644986, year = {2022}, author = {Bayer, PE and Petereit, J and Durant, É and Monat, C and Rouard, M and Hu, H and Chapman, B and Li, C and Cheng, S and Batley, J and Edwards, D}, title = {Wheat Panache: A pangenome graph database representing presence-absence variation across sixteen bread wheat genomes.}, journal = {The plant genome}, volume = {15}, number = {3}, pages = {e20221}, doi = {10.1002/tpg2.20221}, pmid = {35644986}, issn = {1940-3372}, mesh = {*Bread ; Genome, Plant ; Plant Breeding ; Sequence Analysis, DNA ; *Triticum/genetics ; }, abstract = {Bread wheat (Triticum aestivum L.) is one of humanity's most important staple crops, characterized by a large and complex genome with a high level of gene presence-absence variation (PAV) between cultivars, hampering genomic approaches for crop improvement. With the growing global population and the increasing impact of climate change on crop yield, there is an urgent need to apply genomic approaches to accelerate wheat breeding. With recent advances in DNA sequencing technology, a growing number of high-quality reference genomes are becoming available, reflecting the genetic content of a diverse range of cultivars. However, information on the presence or absence of genomic regions has been hard to visualize and interrogate because of the size of these genomes and the lack of suitable bioinformatics tools. To address this limitation, we have produced a wheat pangenome graph maintained within an online database to facilitate interrogation and comparison of wheat cultivar genomes. The database allows users to visualize regions of the pangenome to assess PAV between bread wheat genomes.}, } @article {pmid35641504, year = {2022}, author = {Leonard, AS and Crysnanto, D and Fang, ZH and Heaton, MP and Vander Ley, BL and Herrera, C and Bollwein, H and Bickhart, DM and Kuhn, KL and Smith, TPL and Rosen, BD and Pausch, H}, title = {Structural variant-based pangenome construction has low sensitivity to variability of haplotype-resolved bovine assemblies.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {3012}, pmid = {35641504}, issn = {2041-1723}, mesh = {Animals ; Cattle ; Diploidy ; *Genome/genetics ; Haplotypes ; *High-Throughput Nucleotide Sequencing ; Sequence Analysis, DNA ; }, abstract = {Advantages of pangenomes over linear reference assemblies for genome research have recently been established. However, potential effects of sequence platform and assembly approach, or of combining assemblies created by different approaches, on pangenome construction have not been investigated. Here we generate haplotype-resolved assemblies from the offspring of three bovine trios representing increasing levels of heterozygosity that each demonstrate a substantial improvement in contiguity, completeness, and accuracy over the current Bos taurus reference genome. Diploid coverage as low as 20x for HiFi or 60x for ONT is sufficient to produce two haplotype-resolved assemblies meeting standards set by the Vertebrate Genomes Project. Structural variant-based pangenomes created from the haplotype-resolved assemblies demonstrate significant consensus regardless of sequence platform, assembler algorithm, or coverage. Inspecting pangenome topologies identifies 90 thousand structural variants including 931 overlapping with coding sequences; this approach reveals variants affecting QRICH2, PRDM9, HSPA1A, TAS2R46, and GC that have potential to affect phenotype.}, } @article {pmid35639788, year = {2022}, author = {Garrison, E and Kronenberg, ZN and Dawson, ET and Pedersen, BS and Prins, P}, title = {A spectrum of free software tools for processing the VCF variant call format: vcflib, bio-vcf, cyvcf2, hts-nim and slivar.}, journal = {PLoS computational biology}, volume = {18}, number = {5}, pages = {e1009123}, pmid = {35639788}, issn = {1553-7358}, support = {R01 GM123489/GM/NIGMS NIH HHS/United States ; }, mesh = {Computational Biology ; *Ecosystem ; *Genetic Variation/genetics ; Nucleotides ; Software ; }, abstract = {Since its introduction in 2011 the variant call format (VCF) has been widely adopted for processing DNA and RNA variants in practically all population studies-as well as in somatic and germline mutation studies. The VCF format can represent single nucleotide variants, multi-nucleotide variants, insertions and deletions, and simple structural variants called and anchored against a reference genome. Here we present a spectrum of over 125 useful, complimentary free and open source software tools and libraries, we wrote and made available through the multiple vcflib, bio-vcf, cyvcf2, hts-nim and slivar projects. These tools are applied for comparison, filtering, normalisation, smoothing and annotation of VCF, as well as output of statistics, visualisation, and transformations of files variants. These tools run everyday in critical biomedical pipelines and countless shell scripts. Our tools are part of the wider bioinformatics ecosystem and we highlight best practices. We shortly discuss the design of VCF, lessons learnt, and how we can address more complex variation through pangenome graph formats, variation that can not easily be represented by the VCF format.}, } @article {pmid35639001, year = {2022}, author = {Koide, S and Nagano, Y and Takizawa, S and Sakaguchi, K and Soga, E and Hayashi, W and Tanabe, M and Denda, T and Kimura, K and Arakawa, Y and Nagano, N}, title = {Genomic Traits Associated with Virulence and Antimicrobial Resistance of Invasive Group B Streptococcus Isolates with Reduced Penicillin Susceptibility from Elderly Adults.}, journal = {Microbiology spectrum}, volume = {10}, number = {3}, pages = {e0056822}, pmid = {35639001}, issn = {2165-0497}, mesh = {Adult ; Aged ; Anti-Bacterial Agents/pharmacology ; *Bacteremia ; Drug Resistance, Bacterial/genetics ; Genomics ; Humans ; Infant, Newborn ; Microbial Sensitivity Tests ; Penicillins/pharmacology ; Phylogeny ; *Streptococcal Infections/epidemiology ; Streptococcus agalactiae/genetics ; Virulence/genetics ; }, abstract = {This study aimed to investigate genomic traits underlying the antimicrobial resistance and virulence of multidrug-resistant (MDR) group B streptococci with reduced penicillin susceptibility (PRGBS) recovered from elderly patients with bloodstream infections, which remain poorly characterized. The pangenome was found to be open, with the predicted pan- and core genome sizes being 3,531 and 1,694 genes, respectively. Accessory and unique genes were enriched for the Clusters of Orthologous Groups (COG) categories L, Replication, recombination, and repair, and K, Transcription. All MDR PRGBS isolates retained a core virulence gene repertoire (bibA, fbsA/-B/-C, cspA, cfb, hylB, scpB, lmb, and the cyl operon), supporting an invasive ability similar to that of the other invasive GBS, penicillin-susceptible GBS (PSGBS), and noninvasive PRGBS isolates. The putative sequence type 1 (ST1)-specific AlpST-1 virulence gene was also retained among the serotype Ia/ST1 PRGBS isolates. In addition to tet(M) and erm(B), mef(A)-msr(D) elements or the high-level gentamicin resistance gene aac(6')-aph(2″), which are both rare in PSGBS, were detected among those MDR PRGBS isolates. In the core single-nucleotide polymorphism (SNP) phylogenetic tree, all invasive ST1 PRGBS isolates with serotypes Ia and III were placed together in a clade with a recombination rate of 3.97, which was 36 times higher than the value found for a clade formed by serotype V/ST1 PSGBS isolates derived mostly from human blood. ST1 has been the predominant sequence type among the PRGBS isolates in Japan, and serotypes Ia and III have been very rare among the ST1 PSGBS isolates. Thus, these lineages that mostly consisted of serotypes Ia/ST1 and III/ST1 PRGBS could possibly emerge through recombination within the ST1 populations. IMPORTANCE Streptococcus agalactiae, or group B Streptococcus (GBS), is recognized as the leading cause of neonatal invasive infections. However, an increasing incidence of invasive GBS infections among nonpregnant adults, particularly the elderly and those with underlying diseases, has been observed. There is a trend toward the increasing occurrence of penicillin nonsusceptibility among GBS clinical isolates, from 4.8% in 2008 to 5.8% in 2020 in Japan. Also, in the United States, the frequency of adult invasive GBS isolates suggestive of β-lactam nonsusceptibility increased from 0.7% in 2015 to 1.0% in 2016. In adults, mortality has been significantly higher among patients with bacteremia than among those without bacteremia. Our study revealed that invasive GBS with reduced penicillin susceptibility (PRGBS) isolates harbor major virulence and resistance genes known among GBS, highlighting the need for large population-based genomic surveillance studies to better understand the clinical relevance of invasive PRGBS isolates.}, } @article {pmid35638828, year = {2022}, author = {Shambhu, S and Cella, E and Jubair, M and Azarian, T}, title = {Complete Genome Sequences of Nine Streptococcus pneumoniae Serotype 3 Clonal Complex 180 Strains.}, journal = {Microbiology resource announcements}, volume = {11}, number = {7}, pages = {e0027522}, pmid = {35638828}, issn = {2576-098X}, abstract = {We announce the complete genomes of nine Streptococcus pneumoniae strains belonging to serotype 3 clonal complex 180 (CC180). The genomes consist of a single circularized contig with an average length of 2.033 Mbp. Pangenome analysis identified 1,762 core genes and 412 accessory genes. These genomes are the basis for future population genomic studies.}, } @article {pmid35632394, year = {2022}, author = {Attar, R and Alatawi, EA and Aba Alkhayl, FF and Alharbi, KN and Allemailem, KS and Almatroudi, A}, title = {Immunoinformatics and Biophysics Approaches to Design a Novel Multi-Epitopes Vaccine Design against Staphylococcus auricularis.}, journal = {Vaccines}, volume = {10}, number = {5}, pages = {}, pmid = {35632394}, issn = {2076-393X}, abstract = {Due to the misuse of antibiotics in our daily lives, antimicrobial resistance (AMR) has become a major health problem. Penicillin, the first antibiotic, was used in the 1930s and led to the emergence of AMR. Due to alterations in the microbe's genome and the evolution of new resistance mechanisms, antibiotics are losing efficacy against microbes. There are high rates of mortality and morbidity due to antibiotic resistance, so addressing this major health issue requires new approaches. Staphylococcus auricularis is a Gram-positive cocci and is capable of causing opportunistic infections and sepsis. S. auricularis is resistant to several antibiotics and does not currently have a licensed vaccine. In this study, we used bacterial pan-genome analysis (BPGA) to study S. auricularis pan-genome and applied a reverse immunology approach to prioritize vaccine targets against S. auricularis. A total of 15,444 core proteins were identified by BPGA analysis, which were then used to identify good vaccine candidates considering potential vaccine filters. Two vaccine candidates were evaluated for epitope prediction including the superoxide dismutase and gamma-glutamyl transferase protein. The epitope prediction phase involved the prediction of a variety of B-Cell and T-cell epitopes, and the epitopes that met certain criteria, such as antigenicity, immunogenicity, non-allergenicity, and non-toxicity were chosen. A multi-epitopes vaccine construct was then constructed from all the predicted epitopes, and a cholera toxin B-subunit adjuvant was also added to increase vaccine antigenicity. Three-dimensional models of the vaccine were used for downward analyses. Using the best-modeled structure, binding potency was tested with MHC-I, MHC-II and TLR-4 immune cells receptors, proving that the vaccine binds strongly with the receptors. Further, molecular dynamics simulations interpreted strong intermolecular binding between the vaccine and receptors and confirmed the vaccine epitopes exposed to the host immune system. The results support that the vaccine candidate may be capable of eliciting a protective immune response against S. auricularis and may be a promising candidate for experimental in vitro and in vivo studies.}, } @article {pmid35630466, year = {2022}, author = {Pédron, J and van der Wolf, JM and Portier, P and Caullireau, E and Van Gijsegem, F}, title = {The Broad Host Range Plant Pathogen Dickeya dianthicola Shows a High Genetic Diversity.}, journal = {Microorganisms}, volume = {10}, number = {5}, pages = {}, pmid = {35630466}, issn = {2076-2607}, support = {SPREE (ANR-17-CE32-0004-04)//Agence Nationale de la Recherche/ ; }, abstract = {The wide host range phytopathogen D. dianthicola, first described in ornamentals in the 1950s, rapidly became a threat for potato production in Europe and, more recently, worldwide. Previous genomic analyses, mainly of strains isolated from potato, revealed little sequence diversity. To further analyse D. dianthicola genomic diversity, we used a larger genome panel of 41 isolates encompassing more strains isolated from potato over a wide time scale and more strains isolated from other hosts. The phylogenetic and pan-genomic trees revealed a large cluster of highly related genomes but also the divergence of two more distant strains, IPO 256 and 67.19, isolated from potato and impatiens, respectively, and the clustering of the three strains isolated from Kalanchoe with one more distinct potato strain. An SNP-based minimal spanning tree highlighted both diverse clusters of (nearly) clonal strains and several strains scattered in the MST, irrespective of country or date of isolation, that differ by several thousand SNPs. This study reveals a higher diversity in D. dianthicola than previously described. It indicates the clonal spread of this pathogen over long distances, as suspected from worldwide seed trading, and possible multiple introductions of D. dianthicola from alternative sources of contaminations.}, } @article {pmid35630423, year = {2022}, author = {Hwang, CY and Cho, ES and Yoon, DJ and Cha, IT and Jung, DH and Nam, YD and Park, SL and Lim, SI and Seo, MJ}, title = {Genomic and Physiological Characterization of Metabacillus flavus sp. nov., a Novel Carotenoid-Producing Bacilli Isolated from Korean Marine Mud.}, journal = {Microorganisms}, volume = {10}, number = {5}, pages = {}, pmid = {35630423}, issn = {2076-2607}, support = {Collaborate Research Program//Korea Food Research Institute (KFRI) and the Korean Institute of Geoscience and Mineral Resources (KIGAM)/ ; Research Assistance Program (2020)//Incheon National University/ ; }, abstract = {The newly isolated strain KIGAM252[T] was found to be facultatively anaerobic, Gram-stain-positive, spore-forming, and rod-shaped. They grew at 10-45 °C, pH 6.0-10.0, and were able to tolerate up to 6% NaCl in the growth medium. Phylogenetic analysis indicated that the KIGAM252[T] strain was related to the genus Metabacillus. The cell membrane fatty acid composition of strain KIGAM252[T] included C15:0 anteiso and C15:0 iso (25.6%) as the major fatty acids, and menaquinone 7 was the predominant isoprenoid quinone. The major polar lipids were diphosphatidylglycerol and phosphatidylglycerol. The size of the whole genome was 4.30 Mbp, and the G + C content of the DNA was 43.8%. Average nucleotide and amino acid identity and in silico DNA-DNA hybridization values were below the species delineation threshold. Pan-genomic analysis revealed that 15.8% of all genes present in strain KIGAM252[T] was unique to the strain. The analysis of the secondary biosynthetic pathway predicted the carotenoid synthetic gene cluster in the strain KIGAM252[T]. Based on these current polyphasic taxonomic data, strain KIGAM252[T] represents a novel species of the genus Metabacillus that produces carotenoids, for which we propose the name Metabacillus flavus sp. nov. The type of strain was KIGAM252[T] (=KCTC 43261[T] = JCM 34406[T]).}, } @article {pmid35630358, year = {2022}, author = {Uceda-Campos, G and Feitosa-Junior, OR and Santiago, CRN and Pierry, PM and Zaini, PA and de Santana, WO and Martins-Junior, J and Barbosa, D and Digiampietri, LA and Setubal, JC and da Silva, AM}, title = {Comparative Genomics of Xylella fastidiosa Explores Candidate Host-Specificity Determinants and Expands the Known Repertoire of Mobile Genetic Elements and Immunity Systems.}, journal = {Microorganisms}, volume = {10}, number = {5}, pages = {}, pmid = {35630358}, issn = {2076-2607}, support = {08/11703-4//São Paulo Research Foundation/ ; 3385/2013//Coordenação de Aperfeicoamento de Pessoal de Nível Superior/ ; 11/09409-3//São Paulo Research Foundation/ ; 09/13527-1//São Paulo Research Foundation/ ; 11/01217-8//São Paulo Research Foundation/ ; }, abstract = {Xylella fastidiosa causes diseases in many plant species. Originally confined to the Americas, infecting mainly grapevine, citrus, and coffee, X. fastidiosa has spread to several plant species in Europe causing devastating diseases. Many pathogenicity and virulence factors have been identified, which enable the various X. fastidiosa strains to successfully colonize the xylem tissue and cause disease in specific plant hosts, but the mechanisms by which this happens have not been fully elucidated. Here we present thorough comparative analyses of 94 whole-genome sequences of X. fastidiosa strains from diverse plant hosts and geographic regions. Core-genome phylogeny revealed clades with members sharing mostly a geographic region rather than a host plant of origin. Phylogenetic trees for 1605 orthologous CDSs were explored for potential candidates related to host specificity using a score of mapping metrics. However, no candidate host-specificity determinants were strongly supported using this approach. We also show that X. fastidiosa accessory genome is represented by an abundant and heterogeneous mobilome, including a diversity of prophage regions. Our findings provide a better understanding of the diversity of phylogenetically close genomes and expand the knowledge of X. fastidiosa mobile genetic elements and immunity systems.}, } @article {pmid35630311, year = {2022}, author = {Carter, MQ and Laniohan, N and Lo, CC and Chain, PSG}, title = {Comparative Genomics Applied to Systematically Assess Pathogenicity Potential in Shiga Toxin-Producing Escherichia coli O145:H28.}, journal = {Microorganisms}, volume = {10}, number = {5}, pages = {}, pmid = {35630311}, issn = {2076-2607}, support = {5325-42000-052-00D//Agricultural Research Service/ ; }, abstract = {Shiga toxin-producing Escherichia coli (STEC) O145:H28 can cause severe disease in humans and is a predominant serotype in STEC O145 environmental isolates. Here, comparative genomics was applied to a set of clinical and environmental strains to systematically evaluate the pathogenicity potential in environmental strains. While the core genes-based tree separated all O145:H28 strains from the non O145:H28 reference strains, it failed to segregate environmental strains from the clinical. In contrast, the accessory genes-based tree placed all clinical strains in the same clade regardless of their genotypes or serotypes, apart from the environmental strains. Loss-of-function mutations were common in the virulence genes examined, with a high frequency in genes related to adherence, autotransporters, and the type three secretion system. Distinct differences in pathogenicity islands LEE, OI-122, and OI-57, the acid fitness island, and the tellurite resistance island were detected between the O145:H28 and reference strains. A great amount of genetic variation was detected in O145:H28, which was mainly attributed to deletions, insertions, and gene acquisition at several chromosomal "hot spots". Our study demonstrated a distinct virulence gene repertoire among the STEC O145:H28 strains originating from the same geographical region and revealed unforeseen contributions of loss-of-function mutations to virulence evolution and genetic diversification in STEC.}, } @article {pmid35628779, year = {2022}, author = {Liebal, UW and Ullmann, L and Lieven, C and Kohl, P and Wibberg, D and Zambanini, T and Blank, LM}, title = {Ustilago maydis Metabolic Characterization and Growth Quantification with a Genome-Scale Metabolic Model.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {5}, pages = {}, pmid = {35628779}, issn = {2309-608X}, support = {FSC-2186//Deutsche Forschungsgemeinschaft/ ; }, abstract = {Ustilago maydis is an important plant pathogen that causes corn smut disease and serves as an effective biotechnological production host. The lack of a comprehensive metabolic overview hinders a full understanding of the organism's environmental adaptation and a full use of its metabolic potential. Here, we report the first genome-scale metabolic model (GSMM) of Ustilago maydis (iUma22) for the simulation of metabolic activities. iUma22 was reconstructed from sequencing and annotation using PathwayTools, and the biomass equation was derived from literature values and from the codon composition. The final model contains over 25% annotated genes (6909) in the sequenced genome. Substrate utilization was corrected by BIOLOG phenotype arrays, and exponential batch cultivations were used to test growth predictions. The growth data revealed a decrease in glucose uptake rate with rising glucose concentration. A pangenome of four different U. maydis strains highlighted missing metabolic pathways in iUma22. The new model allows for studies of metabolic adaptations to different environmental niches as well as for biotechnological applications.}, } @article {pmid35628419, year = {2022}, author = {Edwards, S and León-Zayas, R and Ditter, R and Laster, H and Sheehan, G and Anderson, O and Beattie, T and Mellies, JL}, title = {Microbial Consortia and Mixed Plastic Waste: Pangenomic Analysis Reveals Potential for Degradation of Multiple Plastic Types via Previously Identified PET Degrading Bacteria.}, journal = {International journal of molecular sciences}, volume = {23}, number = {10}, pages = {}, pmid = {35628419}, issn = {1422-0067}, support = {1931150//National Science Foundation/ ; }, mesh = {Bacteria/genetics/metabolism ; *Microbial Consortia ; Plasticizers ; Plastics/metabolism ; *Polyethylene Terephthalates ; }, abstract = {The global utilization of single-use, non-biodegradable plastics, such as bottles made of polyethylene terephthalate (PET), has contributed to catastrophic levels of plastic pollution. Fortunately, microbial communities are adapting to assimilate plastic waste. Previously, our work showed a full consortium of five bacteria capable of synergistically degrading PET. Using omics approaches, we identified the key genes implicated in PET degradation within the consortium's pangenome and transcriptome. This analysis led to the discovery of a novel PETase, EstB, which has been observed to hydrolyze the oligomer BHET and the polymer PET. Besides the genes implicated in PET degradation, many other biodegradation genes were discovered. Over 200 plastic and plasticizer degradation-related genes were discovered through the Plastic Microbial Biodegradation Database (PMBD). Diverse carbon source utilization was observed by a microbial community-based assay, which, paired with an abundant number of plastic- and plasticizer-degrading enzymes, indicates a promising possibility for mixed plastic degradation. Using RNAseq differential analysis, several genes were predicted to be involved in PET degradation, including aldehyde dehydrogenases and several classes of hydrolases. Active transcription of PET monomer metabolism was also observed, including the generation of polyhydroxyalkanoate (PHA)/polyhydroxybutyrate (PHB) biopolymers. These results present an exciting opportunity for the bio-recycling of mixed plastic waste with upcycling potential.}, } @article {pmid35627219, year = {2022}, author = {Wu, XT and Xiong, ZP and Chen, KX and Zhao, GR and Feng, KR and Li, XH and Li, XR and Tian, Z and Huo, FL and Wang, MX and Song, W}, title = {Genome-Wide Identification and Transcriptional Expression Profiles of PP2C in the Barley (Hordeum vulgare L.) Pan-Genome.}, journal = {Genes}, volume = {13}, number = {5}, pages = {}, pmid = {35627219}, issn = {2073-4425}, mesh = {Domestication ; Genes, Plant ; Genome, Plant ; *Hordeum/enzymology/genetics ; *Multigene Family ; Phylogeny ; *Protein Phosphatase 2C/genetics ; }, abstract = {The gene family protein phosphatase 2C (PP2C) is related to developmental processes and stress responses in plants. Barley (Hordeum vulgare L.) is a popular cereal crop that is primarily utilized for human consumption and nutrition. However, there is little knowledge regarding the PP2C gene family in barley. In this study, a total of 1635 PP2C genes were identified in 20 barley pan-genome accessions. Then, chromosome localization, physical and chemical feature predictions and subcellular localization were systematically analyzed. One wild barley accession (B1K-04-12) and one cultivated barley (Morex) were chosen as representatives to further analyze and compare the differences in HvPP2Cs between wild and cultivated barley. Phylogenetic analysis showed that these HvPP2Cs were divided into 12 subgroups. Additionally, gene structure, conserved domain and motif, gene duplication event detection, interaction networks and gene expression profiles were analyzed in accessions Morex and B1K-04-12. In addition, qRT-PCR experiments in Morex indicated that seven HvMorexPP2C genes were involved in the response to aluminum and low pH stresses. Finally, a series of positively selected homologous genes were identified between wild accession B1K-04-12 and another 14 cultivated materials, indicating that these genes are important during barley domestication. This work provides a global overview of the putative physiological and biological functions of PP2C genes in barley. We provide a broad framework for understanding the domestication- and evolutionary-induced changes in PP2C genes between wild and cultivated barley.}, } @article {pmid35625323, year = {2022}, author = {Khan, K and Basharat, Z and Jalal, K and Mashraqi, MM and Alzamami, A and Alshamrani, S and Uddin, R}, title = {Identification of Therapeutic Targets in an Emerging Gastrointestinal Pathogen Campylobacter ureolyticus and Possible Intervention through Natural Products.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {11}, number = {5}, pages = {}, pmid = {35625323}, issn = {2079-6382}, abstract = {Campylobacter ureolyticus is a Gram-negative, anaerobic, non-spore-forming bacteria that causes gastrointestinal infections. Being the most prevalent cause of bacterial enteritis globally, infection by this bacterium is linked with significant morbidity and mortality in children and immunocompromised patients. No information on pan-therapeutic drug targets for this species is available yet. In the current study, a pan-genome analysis was performed on 13 strains of C. ureolyticus to prioritize potent drug targets from the identified core genome. In total, 26 druggable proteins were identified using subtractive genomics. To the best of the authors' knowledge, this is the first report on the mining of drug targets in C. ureolyticus. UDP-3-O-acyl-N-acetylglucosamine deacetylase (LpxC) was selected as a promiscuous pharmacological target for virtual screening of two bacterial-derived natural product libraries, i.e., postbiotics (n = 78) and streptomycin (n = 737) compounds. LpxC inhibitors from the ZINC database (n = 142 compounds) were also studied with reference to LpxC of C. ureolyticus. The top three docked compounds from each library (including ZINC26844580, ZINC13474902, ZINC13474878, Notoginsenoside St-4, Asiaticoside F, Paraherquamide E, Phytoene, Lycopene, and Sparsomycin) were selected based on their binding energies and validated using molecular dynamics simulations. To help identify potential risks associated with the selected compounds, ADMET profiling was also performed and most of the compounds were considered safe. Our findings may serve as baseline information for laboratory studies leading to the discovery of drugs for use against C. ureolyticus infections.}, } @article {pmid35616118, year = {2022}, author = {Yao, E and Blake, VC and Cooper, L and Wight, CP and Michel, S and Cagirici, HB and Lazo, GR and Birkett, CL and Waring, DJ and Jannink, JL and Holmes, I and Waters, AJ and Eickholt, DP and Sen, TZ}, title = {GrainGenes: a data-rich repository for small grains genetics and genomics.}, journal = {Database : the journal of biological databases and curation}, volume = {2022}, number = {}, pages = {}, pmid = {35616118}, issn = {1758-0463}, mesh = {Avena/genetics ; Chromosome Mapping ; Databases, Genetic ; *Genome, Plant/genetics ; Genomics ; *Hordeum/genetics ; Quantitative Trait Loci ; Triticum/genetics ; }, abstract = {As one of the US Department of Agriculture-Agricultural Research Service flagship databases, GrainGenes (https://wheat.pw.usda.gov) serves the data and community needs of globally distributed small grains researchers for the genetic improvement of the Triticeae family and Avena species that include wheat, barley, rye and oat. GrainGenes accomplishes its mission by continually enriching its cross-linked data content following the findable, accessible, interoperable and reusable principles, enhancing and maintaining an intuitive web interface, creating tools to enable easy data access and establishing data connections within and between GrainGenes and other biological databases to facilitate knowledge discovery. GrainGenes operates within the biological database community, collaborates with curators and genome sequencing groups and contributes to the AgBioData Consortium and the International Wheat Initiative through the Wheat Information System (WheatIS). Interactive and linked content is paramount for successful biological databases and GrainGenes now has 2917 manually curated gene records, including 289 genes and 254 alleles from the Wheat Gene Catalogue (WGC). There are >4.8 million gene models in 51 genome browser assemblies, 6273 quantitative trait loci and >1.4 million genetic loci on 4756 genetic and physical maps contained within 443 mapping sets, complete with standardized metadata. Most notably, 50 new genome browsers that include outputs from the Wheat and Barley PanGenome projects have been created. We provide an example of an expression quantitative trait loci track on the International Wheat Genome Sequencing Consortium Chinese Spring wheat browser to demonstrate how genome browser tracks can be adapted for different data types. To help users benefit more from its data, GrainGenes created four tutorials available on YouTube. GrainGenes is executing its vision of service by continuously responding to the needs of the global small grains community by creating a centralized, long-term, interconnected data repository. Database URL:https://wheat.pw.usda.gov.}, } @article {pmid35615513, year = {2022}, author = {Neuzil-Bunesova, V and Ramirez Garcia, A and Modrackova, N and Makovska, M and Sabolova, M and Spröer, C and Bunk, B and Blom, J and Schwab, C}, title = {Feed Insects as a Reservoir of Granadaene-Producing Lactococci.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {848490}, pmid = {35615513}, issn = {1664-302X}, abstract = {Insects are a component of the diet of different animal species and have been suggested as the major source of human dietary protein for the future. However, insects are also carriers of potentially pathogenic microbes that constitute a risk to food and feed safety. In this study, we reported the occurrence of a hemolytic orange pigmented producing phenotype of Lactococcus garvieae/petauri/formosensis in the fecal microbiota of golden lion tamarins (Leontopithecus rosalia) and feed larvae (Zophobas atratus). Feed insects were identified as a regular source of L. garvieae/petauri/formosensis based on a reanalysis of available 16S rRNA gene libraries. Pan-genome analysis suggested the existence of four clusters within the L. garvieae/petauri/formosensis group. The presence of cyl cluster indicated that some strains of the L. garvieae/petauri/formosensis group produced a pigment similar to granadaene, an orange cytotoxic lipid produced by group B streptococci, including Streptococcus agalactiae. Pigment production by L. garvieae/petauri/formosensis strains was dependent on the presence of the fermentable sugars, with no pigment being observed at pH <4.7. The addition of buffering compounds or arginine, which can be metabolized to ammonium, restored pigment formation. In addition, pigment formation might be related to the source of peptone. These data suggest that edible insects are a possible source of granadaene-producing lactococci, which can be considered a pathogenic risk with zoonotic potential.}, } @article {pmid35612623, year = {2022}, author = {Bach, E and Rangel, CP and Ribeiro, IDA and Passaglia, LMP}, title = {Pangenome analyses of Bacillus pumilus, Bacillus safensis, and Priestia megaterium exploring the plant-associated features of bacilli strains isolated from canola.}, journal = {Molecular genetics and genomics : MGG}, volume = {297}, number = {4}, pages = {1063-1079}, pmid = {35612623}, issn = {1617-4623}, support = {155771/2018-3//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; }, mesh = {*Bacillus/genetics ; *Bacillus pumilus/genetics ; DNA ; Phylogeny ; }, abstract = {Previous genome mining of the strains Bacillus pumilus 7PB, Bacillus safensis 1TAz, 8Taz, and 32PB, and Priestia megaterium 16PB isolated from canola revealed differences in the profile of antimicrobial biosynthetic genes when compared to the species type strains. To evaluate not only the similarities among B. pumilus, B. safensis, and P. megaterium genomes but also the specificities found in the canola bacilli, we performed comparative genomic analyses through the pangenome evaluation of each species. Besides that, other genome features were explored, especially focusing on plant-associated and biotechnological characteristics. The combination of the genome metrics Average Nucleotide Identity and digital DNA-DNA hybridization formulas 1 and 3 adopting the universal thresholds of 95 and 70%, respectively, was suitable to verify the identification of strains from these groups. On average, core genes corresponded to 45%, 52%, and 34% of B. pumilus, B. safensis, and P. megaterium open pangenomes, respectively. Many genes related to adaptations to plant-associated lifestyles were predicted, especially in the Bacillus genomes. These included genes for acetoin production, polyamines utilization, root exudate chemoreceptors, biofilm formation, and plant cell-wall degrading enzymes. Overall, we could observe that strains of these species exhibit many features in common, whereas most of their variable genome portions have features yet to be uncovered. The observed antifungal activity of canola bacilli might be a result of the synergistic action of secondary metabolites, siderophores, and chitinases. Genome analysis confirmed that these species and strains have biotechnological potential to be used both as agricultural inoculants or hydrolases producers. Up to our knowledge, this is the first work that evaluates the pangenome features of P. megaterium.}, } @article {pmid35604683, year = {2022}, author = {Saldarriaga-Córdoba, M and Avendaño-Herrera, R}, title = {Comparative pan-genomic analysis of 51 Renibacterium salmoninarum indicates heterogeneity in the principal virulence factor, the 57 kDa protein.}, journal = {Journal of fish diseases}, volume = {45}, number = {8}, pages = {1173-1188}, doi = {10.1111/jfd.13653}, pmid = {35604683}, issn = {1365-2761}, support = {grant FONDAP 15110027//Agencia Nacional de Investigación y Desarrollo (ANID, Chile)/ ; }, mesh = {Animals ; *Fish Diseases/microbiology ; Genomics ; *Kidney Diseases/microbiology ; *Micrococcaceae/genetics ; Phylogeny ; Renibacterium ; Salmon ; Virulence Factors/genetics/metabolism ; }, abstract = {Renibacterium salmoninarum, a Gram-positive intracellular pathogen, is the causative agent of bacterial kidney disease (BKD), the impacts of which are high mortalities and economic losses for the salmon industry. This study provides novel analyses for the whole-genome sequences of 50 R. salmoninarum isolates and the reference strain ATCC 33209 using a pan-genomic approach to elucidate phylogenomic relationships and identify unique and shared genes associated with pathogenicity and infection mechanisms. Genome size varied from 3,061,638 to 3,155,332 bp; gene count from 3452 to 3580; and predicted coding sequences from 3402 to 3527. Comparative analyses revealed an open, but approaching closed, pan-genome. The pan-genome analysis recovered 4064 genes, with a core genome containing 3306 genes. Phylogenetic analysis of R. salmoninarum showed high genomic homogeneity, apart from one isolate obtained from Salmo trutta in Norway. All genomes presented the 57-kDa protein (p57). Strain ATCC 33209 and the Chilean isolates H-2 and DJ2R presented two copies of the msa gene, while the remaining isolates had one copy. The pan-genome analysis further identified differences in the number of copies and length of the signalling peptide for p57, the principal virulence factor reported for this bacterium. This heterogeneity could be associated with the secretion levels of p57, potentially influencing virulence. Additionally identified were numerous common genes related to iron uptake, the stress response and regulation, and cell signalling-all of which constitute the pathogenic repertoire of R. salmoninarum. This investigation provides information that is applicable in future studies for identifying therapeutic targets and/or for designing new strategies (e.g., vaccines) to prevent BKD infections in salmon farming.}, } @article {pmid35604129, year = {2022}, author = {de Korne-Elenbaas, J and Bruisten, SM and van Dam, AP and Maiden, MCJ and Harrison, OB}, title = {The Neisseria gonorrhoeae Accessory Genome and Its Association with the Core Genome and Antimicrobial Resistance.}, journal = {Microbiology spectrum}, volume = {10}, number = {3}, pages = {e0265421}, pmid = {35604129}, issn = {2165-0497}, support = {/WT_/Wellcome Trust/United Kingdom ; 218205/Z/19/Z/WT_/Wellcome Trust/United Kingdom ; 214374/Z/18/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Anti-Bacterial Agents/pharmacology ; Drug Resistance, Bacterial/genetics ; *Gonorrhea ; Humans ; Microbial Sensitivity Tests ; *Neisseria gonorrhoeae/genetics ; }, abstract = {The bacterial accessory genome provides the genetic flexibility needed to facilitate environment and host adaptation. In Neisseria gonorrhoeae, known accessory elements include plasmids which can transfer and mediate antimicrobial resistance (AMR); however, chromosomal accessory genes could also play a role in AMR. Here, the gonococcal accessory genome was characterized using gene-by-gene approaches and its association with the core genome and AMR were assessed. The gonococcal accessory gene pool consisted of 247 genes, which were mainly genes located on large mobile genetic elements, phage associated genes, or genes encoding putative secretion systems. Accessory elements showed similar synteny across genomes, indicating either a predisposition for particular genomic locations or ancestral inheritance that are conserved during strain expansion. Significant associations were found between the prevalence of accessory elements and core genome multi-locus sequence types (cgMLST), consistent with a structured gonococcal population despite frequent horizontal gene transfer (HGT). Increased prevalence of putative DNA exchange regulators was significantly associated with AMR, which included a putative secretion system, methyltransferases and a toxin-antitoxin system. Although frequent HGT results in high genetic diversity in the gonococcus, we found that this is mediated by a small gene pool. In fact, a highly organized genome composition was identified with a strong association between the accessory and core genome. Increased prevalence of DNA exchange regulators in antimicrobial resistant isolates suggests that genetic material exchange plays a role in the development or maintenance of AMR. These findings enhance our understanding of gonococcal genome architecture and have important implications for gonococcal population biology. IMPORTANCE The emergence of antimicrobial resistance (AMR) against third generation cephalosporins in Neisseria gonorrhoeae is a major public health concern, as these are antibiotics of last resort for the effective treatment of gonorrhea. Although the resistance mechanisms against this class of antibiotics have not been entirely resolved, resistance against other classes of antibiotics, such as tetracyclines, is known to be mediated through plasmids, which are known gonococcal extra-chromosomal accessory elements. A complete assessment of the chromosomal accessory genome content and its role in AMR has not yet been undertaken. Here, we comprehensively characterize the gonococcal accessory genome to better understand genome architecture as well as the evolution and mechanisms of AMR in this species.}, } @article {pmid35602063, year = {2022}, author = {Wang, C and Ye, Q and Jiang, A and Zhang, J and Shang, Y and Li, F and Zhou, B and Xiang, X and Gu, Q and Pang, R and Ding, Y and Wu, S and Chen, M and Wu, Q and Wang, J}, title = {Pseudomonas aeruginosa Detection Using Conventional PCR and Quantitative Real-Time PCR Based on Species-Specific Novel Gene Targets Identified by Pangenome Analysis.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {820431}, pmid = {35602063}, issn = {1664-302X}, abstract = {Mining novel specific molecular targets and establishing efficient identification methods are significant for detecting Pseudomonas aeruginosa, which can enable P. aeruginosa tracing in food and water. Pangenome analysis was used to analyze the whole genomic sequences of 2017 strains (including 1,000 P. aeruginosa strains and 1,017 other common foodborne pathogen strains) downloaded from gene databases to obtain novel species-specific genes, yielding a total of 11 such genes. Four novel target genes, UCBPP-PA14_00095, UCBPP-PA14_03237, UCBPP-PA14_04976, and UCBPP-PA14_03627, were selected for use, which had 100% coverage in the target strain and were not present in nontarget bacteria. PCR primers (PA1, PA2, PA3, and PA4) and qPCR primers (PA12, PA13, PA14, and PA15) were designed based on these target genes to establish detection methods. For the PCR primer set, the minimum detection limit for DNA was 65.4 fg/μl, which was observed for primer set PA2 of the UCBPP-PA14_03237 gene. The detection limit in pure culture without pre-enrichment was 10[5] colony-forming units (CFU)/ml for primer set PA1, 10[3] CFU/ml for primer set PA2, and 10[4] CFU/ml for primer set PA3 and primer set PA4. Then, qPCR standard curves were established based on the novel species-specific targets. The standard curves showed perfect linear correlations, with R [2] values of 0.9901 for primer set PA12, 0.9915 for primer set PA13, 0.9924 for primer set PA14, and 0.9935 for primer set PA15. The minimum detection limit of the real-time PCR (qPCR) assay was 10[2] CFU/ml for pure cultures of P. aeruginosa. Compared with the endpoint PCR and traditional culture methods, the qPCR assay was more sensitive by one or two orders of magnitude. The feasibility of these methods was satisfactory in terms of sensitivity, specificity, and efficiency after evaluating 29 ready-to-eat vegetable samples and was almost consistent with that of the national standard detection method. The developed assays can be applied for rapid screening and detection of pathogenic P. aeruginosa, providing accurate results to inform effective monitoring measures in order to improve microbiological safety.}, } @article {pmid35602040, year = {2022}, author = {Geng, R and Cheng, L and Cao, C and Liu, Z and Liu, D and Xiao, Z and Wu, X and Huang, Z and Feng, Q and Luo, C and Chen, Z and Zhang, Z and Jiang, C and Ren, M and Yang, A}, title = {Comprehensive Analysis Reveals the Genetic and Pathogenic Diversity of Ralstonia solanacearum Species Complex and Benefits Its Taxonomic Classification.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {854792}, pmid = {35602040}, issn = {1664-302X}, abstract = {Ralstonia solanacearum species complex (RSSC) is a diverse group of plant pathogens that attack a wide range of hosts and cause devastating losses worldwide. In this study, we conducted a comprehensive analysis of 131 RSSC strains to detect their genetic diversity, pathogenicity, and evolution dynamics. Average nucleotide identity analysis was performed to explore the genomic relatedness among these strains, and finally obtained an open pangenome with 32,961 gene families. To better understand the diverse evolution and pathogenicity, we also conducted a series of analyses of virulence factors (VFs) and horizontal gene transfer (HGT) in the pangenome and at the single genome level. The distribution of VFs and mobile genetic elements (MGEs) showed significant differences among different groups and strains, which were consistent with the new nomenclatures of the RSSC with three distinct species. Further functional analysis showed that most HGT events conferred from Burkholderiales and played a great role in shaping the genomic plasticity and genetic diversity of RSSC genomes. Our work provides insights into the genetic polymorphism, evolution dynamics, and pathogenetic variety of RSSC and provides strong supports for the new taxonomic classification, as well as abundant resources for studying host specificity and pathogen emergence.}, } @article {pmid35602010, year = {2022}, author = {Mizzi, R and Plain, KM and Whittington, R and Timms, VJ}, title = {Global Phylogeny of Mycobacterium avium and Identification of Mutation Hotspots During Niche Adaptation.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {892333}, pmid = {35602010}, issn = {1664-302X}, abstract = {Mycobacterium avium is separated into four subspecies: M. avium subspecies avium (MAA), M. avium subspecies silvaticum (MAS), M. avium subspecies hominissuis (MAH), and M. avium subspecies paratuberculosis (MAP). Understanding the mechanisms of host and tissue adaptation leading to their clinical significance is vital to reduce the economic, welfare, and public health concerns associated with diseases they may cause in humans and animals. Despite substantial phenotypic diversity, the subspecies nomenclature is controversial due to high genetic similarity. Consequently, a set of 1,230 M. avium genomes was used to generate a phylogeny, investigate SNP hotspots, and identify subspecies-specific genes. Phylogeny reiterated the findings from previous work and established that Mycobacterium avium is a species made up of one highly diverse subspecies, known as MAH, and at least two clonal pathogens, named MAA and MAP. Pan-genomes identified coding sequences unique to each subspecies, and in conjunction with a mapping approach, mutation hotspot regions were revealed compared to the reference genomes for MAA, MAH, and MAP. These subspecies-specific genes may serve as valuable biomarkers, providing a deeper understanding of genetic differences between M. avium subspecies and the virulence mechanisms of mycobacteria. Furthermore, SNP analysis demonstrated common regions between subspecies that have undergone extensive mutations during niche adaptation. The findings provide insights into host and tissue specificity of this genetically conserved but phenotypically diverse species, with the potential to provide new diagnostic targets and epidemiological and therapeutic advances.}, } @article {pmid35590072, year = {2022}, author = {Verdez, S and Thomas, Q and Garret, P and Verstuyft, C and Tisserant, E and Vitobello, A and Mau-Them, FT and Philippe, C and Bardou, M and Luu, M and Bourredjem, A and Callier, P and Thauvin-Robinet, C and Picard, N and Faivre, L and Duffourd, Y}, title = {Exome sequencing allows detection of relevant pharmacogenetic variants in epileptic patients.}, journal = {The pharmacogenomics journal}, volume = {22}, number = {5-6}, pages = {258-263}, pmid = {35590072}, issn = {1473-1150}, mesh = {Humans ; *Pharmacogenomic Variants ; Phenytoin ; Exome/genetics ; Retrospective Studies ; *Epilepsy/diagnosis/drug therapy/genetics ; }, abstract = {Beyond the identification of causal genetic variants in the diagnosis of Mendelian disorders, exome sequencing can detect numerous variants with potential relevance for clinical care. Clinical interventions can thus be conducted to improve future health outcomes for patients and their at-risk relatives, such as predicting late-onset genetic disorders accessible to prevention, treatment or identifying differential drug efficacy and safety. To evaluate the interest of such pharmacogenetic information, we designed an "in house" pipeline to determine the status of 122 PharmGKB (Pharmacogenomics Knowledgebase) variant-drug combinations in 31 genes. This pipeline was applied to a cohort of 90 epileptic patients who had previously an exome sequencing (ES) analysis, to determine the frequency of pharmacogenetic variants. We performed a retrospective analysis of drug plasma concentrations and treatment efficacy in patients bearing at least one relevant PharmGKB variant. For PharmGKB level 1A variants, CYP2C9 status for phenytoin prescription was the only relevant information. Nineteen patients were treated with phenytoin, among phenytoin-treated patients, none were poor metabolizers and four were intermediate metabolizers. While being treated with a standard protocol (10-23 mg/kg/30 min loading dose followed by 5 mg/kg/8 h maintenance dose), all identified intermediate metabolizers had toxic plasma concentrations (20 mg/L). In epileptic patients, pangenomic sequencing can provide information about common pharmacogenetic variants likely to be useful to guide therapeutic drug monitoring, and in the case of phenytoin, to prevent clinical toxicity caused by high plasma levels.}, } @article {pmid35588244, year = {2022}, author = {Gluck-Thaler, E and Ralston, T and Konkel, Z and Ocampos, CG and Ganeshan, VD and Dorrance, AE and Niblack, TL and Wood, CW and Slot, JC and Lopez-Nicora, HD and Vogan, AA}, title = {Giant Starship Elements Mobilize Accessory Genes in Fungal Genomes.}, journal = {Molecular biology and evolution}, volume = {39}, number = {5}, pages = {}, pmid = {35588244}, issn = {1537-1719}, mesh = {DNA Transposable Elements ; Eukaryotic Cells ; *Genome, Fungal ; Humans ; *Virulence Factors ; }, abstract = {Accessory genes are variably present among members of a species and are a reservoir of adaptive functions. In bacteria, differences in gene distributions among individuals largely result from mobile elements that acquire and disperse accessory genes as cargo. In contrast, the impact of cargo-carrying elements on eukaryotic evolution remains largely unknown. Here, we show that variation in genome content within multiple fungal species is facilitated by Starships, a newly discovered group of massive mobile elements that are 110 kb long on average, share conserved components, and carry diverse arrays of accessory genes. We identified hundreds of Starship-like regions across every major class of filamentous Ascomycetes, including 28 distinct Starships that range from 27 to 393 kb and last shared a common ancestor ca. 400 Ma. Using new long-read assemblies of the plant pathogen Macrophomina phaseolina, we characterize four additional Starships whose activities contribute to standing variation in genome structure and content. One of these elements, Voyager, inserts into 5S rDNA and contains a candidate virulence factor whose increasing copy number has contrasting associations with pathogenic and saprophytic growth, suggesting Voyager's activity underlies an ecological trade-off. We propose that Starships are eukaryotic analogs of bacterial integrative and conjugative elements based on parallels between their conserved components and may therefore represent the first dedicated agents of active gene transfer in eukaryotes. Our results suggest that Starships have shaped the content and structure of fungal genomes for millions of years and reveal a new concerted route for evolution throughout an entire eukaryotic phylum.}, } @article {pmid35585492, year = {2022}, author = {Ghimire, N and Kim, B and Lee, CM and Oh, TJ}, title = {Comparative genome analysis among Variovorax species and genome guided aromatic compound degradation analysis emphasizing 4-hydroxybenzoate degradation in Variovorax sp. PAMC26660.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {375}, pmid = {35585492}, issn = {1471-2164}, support = {PM21030//the Ministry of Oceans and Fisheries, Korea/ ; }, mesh = {Carbon ; *Parabens ; Phylogeny ; *Xenobiotics ; }, abstract = {BACKGROUND: While the genus Variovorax is known for its aromatic compound metabolism, no detailed study of the peripheral and central pathways of aromatic compound degradation has yet been reported. Variovorax sp. PAMC26660 is a lichen-associated bacterium isolated from Antarctica. The work presents the genome-based elucidation of peripheral and central catabolic pathways of aromatic compound degradation genes in Variovorax sp. PAMC26660. Additionally, the accessory, core and unique genes were identified among Variovorax species using the pan genome analysis tool. A detailed analysis of the genes related to xenobiotic metabolism revealed the potential roles of Variovorax sp. PAMC26660 and other species in bioremediation.

RESULTS: TYGS analysis, dDDH, phylogenetic placement and average nucleotide identity (ANI) analysis identified the strain as Variovorax sp. Cell morphology was assessed using scanning electron microscopy (SEM). On analysis of the core, accessory, and unique genes, xenobiotic metabolism accounted only for the accessory and unique genes. On detailed analysis of the aromatic compound catabolic genes, peripheral pathway related to 4-hydroxybenzoate (4-HB) degradation was found among all species while phenylacetate and tyrosine degradation pathways were present in most of the species including PAMC26660. Likewise, central catabolic pathways, like protocatechuate, gentisate, homogentisate, and phenylacetyl-CoA, were also present. The peripheral pathway for 4-HB degradation was functionally tested using PAMC26660, which resulted in the growth using it as a sole source of carbon.

CONCLUSIONS: Computational tools for genome and pan genome analysis are important to understand the behavior of an organism. Xenobiotic metabolism-related genes, that only account for the accessory and unique genes infer evolution through events like lateral gene transfer, mutation and gene rearrangement. 4-HB, an aromatic compound present among lichen species is utilized by lichen-associated Variovorax sp. PAMC26660 as the sole source of carbon. The strain holds genes and pathways for its utilization. Overall, this study outlines the importance of Variovorax in bioremediation and presents the genomic information of the species.}, } @article {pmid35579358, year = {2022}, author = {Nanni, AV and Morse, AM and Newman, JRB and Choquette, NE and Wedow, JM and Liu, Z and Leakey, ADB and Conesa, A and Ainsworth, EA and McIntyre, LM}, title = {Variation in leaf transcriptome responses to elevated ozone corresponds with physiological sensitivity to ozone across maize inbred lines.}, journal = {Genetics}, volume = {221}, number = {4}, pages = {}, pmid = {35579358}, issn = {1943-2631}, support = {R01 GM128193/GM/NIGMS NIH HHS/United States ; R03 CA222444/CA/NCI NIH HHS/United States ; }, mesh = {Gene Expression Regulation, Plant ; Genotype ; *Ozone/metabolism/toxicity ; Plant Leaves/genetics/metabolism ; Transcriptome ; *Zea mays/genetics/metabolism ; }, abstract = {We examine the impact of sustained elevated ozone concentration on the leaf transcriptome of 5 diverse maize inbred genotypes, which vary in physiological sensitivity to ozone (B73, Mo17, Hp301, C123, and NC338), using long reads to assemble transcripts and short reads to quantify expression of these transcripts. More than 99% of the long reads, 99% of the assembled transcripts, and 97% of the short reads map to both B73 and Mo17 reference genomes. Approximately 95% of the genes with assembled transcripts belong to known B73-Mo17 syntenic loci and 94% of genes with assembled transcripts are present in all temperate lines in the nested association mapping pan-genome. While there is limited evidence for alternative splicing in response to ozone stress, there is a difference in the magnitude of differential expression among the 5 genotypes. The transcriptional response to sustained ozone stress in the ozone resistant B73 genotype (151 genes) was modest, while more than 3,300 genes were significantly differentially expressed in the more sensitive NC338 genotype. There is the potential for tandem duplication in 30% of genes with assembled transcripts, but there is no obvious association between potential tandem duplication and differential expression. Genes with a common response across the 5 genotypes (83 genes) were associated with photosynthesis, in particular photosystem I. The functional annotation of genes not differentially expressed in B73 but responsive in the other 4 genotypes (789) identifies reactive oxygen species. This suggests that B73 has a different response to long-term ozone exposure than the other 4 genotypes. The relative magnitude of the genotypic response to ozone, and the enrichment analyses are consistent regardless of whether aligning short reads to: long read assembled transcripts; the B73 reference; the Mo17 reference. We find that prolonged ozone exposure directly impacts the photosynthetic machinery of the leaf.}, } @article {pmid35578144, year = {2022}, author = {Alvarez, F and Simonetti, E and Draghi, WO and Vinacour, M and Palumbo, MC and Do Porto, DF and Montecchia, MS and Roberts, IN and Ruiz, JA}, title = {Genome mining of Burkholderia ambifaria strain T16, a rhizobacterium able to produce antimicrobial compounds and degrade the mycotoxin fusaric acid.}, journal = {World journal of microbiology & biotechnology}, volume = {38}, number = {7}, pages = {114}, pmid = {35578144}, issn = {1573-0972}, support = {PICT2017-3193//Agencia Nacional de Promoción Científica y Tecnológica/ ; PICT2017-1500//Agencia Nacional de Promoción Científica y Tecnológica/ ; PICT2017-1500//Agencia Nacional de Promoción Científica y Tecnológica/ ; PICT2017-1500//Agencia Nacional de Promoción Científica y Tecnológica/ ; UBACyT 20020130200117BA//Secretaria de Ciencia y Tecnica, Universidad de Buenos Aires/ ; UBACyT 20020130200117BA//Secretaria de Ciencia y Tecnica, Universidad de Buenos Aires/ ; UBACyT 20020130200117BA//Secretaria de Ciencia y Tecnica, Universidad de Buenos Aires/ ; UBACyT 20020130200117BA//Secretaria de Ciencia y Tecnica, Universidad de Buenos Aires/ ; UBACyT 20020130200117BA//Secretaria de Ciencia y Tecnica, Universidad de Buenos Aires/ ; PUE 0136//Consejo Nacional de Investigaciones Científicas y Técnicas/ ; PUE 0136//Consejo Nacional de Investigaciones Científicas y Técnicas/ ; PUE 0136//Consejo Nacional de Investigaciones Científicas y Técnicas/ ; PUE 0136//Consejo Nacional de Investigaciones Científicas y Técnicas/ ; }, mesh = {*Anti-Infective Agents/metabolism ; *Burkholderia/metabolism ; *Burkholderia cepacia complex/genetics ; Fusaric Acid/metabolism ; Genome, Bacterial ; *Mycotoxins/metabolism ; }, abstract = {Burkholderia ambifaria T16 is a bacterium isolated from the rhizosphere of barley plants that showed a remarkable antifungal activity. This strain was also able to degrade fusaric acid (5-Butylpyridine-2-carboxylic acid) and detoxify this mycotoxin in inoculated barley seedlings. Genes and enzymes responsible for fusaric acid degradation have an important biotechnological potential in the control of fungal diseases caused by fusaric acid producers, or in the biodegradation/bio catalysis processes of pyridine derivatives. In this study, the complete genome of B. ambifaria T16 was sequenced and analyzed to identify genes involved in survival and competition in the rhizosphere, plant growth promotion, fungal growth inhibition, and degradation of aromatic compounds. The genomic analysis revealed the presence of several operons for the biosynthesis of antimicrobial compounds, such as pyrrolnitrin, ornibactin, occidiofungin and the membrane-associated AFC-BC11. These compounds were also detected in bacterial culture supernatants by mass spectrometry analysis. In addition, this strain has multiple genes contributing to its plant growth-promoting profile, including those for acetoin, 2,3-butanediol and indole-3-acetic acid production, siderophores biosynthesis, and solubilisation of organic and inorganic phosphate. A pan-genomic analysis demonstrated that the genome of strain T16 possesses large gene clusters that are absent in the genomes of B. ambifaria reference strains. According to predictions, most of these clusters would be involved in aromatic compounds degradation. One genomic region, encoding flavin-dependent monooxygenases of unknown function, is proposed as a candidate responsible for fusaric acid degradation.}, } @article {pmid35575437, year = {2022}, author = {Abdullah, IT and Ulijasz, AT and Girija, UV and Tam, S and Andrew, P and Hiller, NL and Wallis, R and Yesilkaya, H}, title = {Structure-function analysis for the development of peptide inhibitors for a Gram-positive quorum sensing system.}, journal = {Molecular microbiology}, volume = {117}, number = {6}, pages = {1464-1478}, pmid = {35575437}, issn = {1365-2958}, support = {R01 AI135060/AI/NIAID NIH HHS/United States ; R01 AI139077/AI/NIAID NIH HHS/United States ; }, mesh = {Bacterial Proteins/metabolism ; *Gene Expression Regulation, Bacterial ; Peptides/metabolism ; *Quorum Sensing/genetics ; Streptococcus pneumoniae/metabolism ; }, abstract = {The Streptococcus pneumoniae Rgg144/SHP144 regulator-peptide quorum sensing (QS) system is critical for nutrient utilization, oxidative stress response, and virulence. Here, we characterized this system by assessing the importance of each residue within the active short hydrophobic peptide (SHP) by alanine-scanning mutagenesis and testing the resulting peptides for receptor binding and activation of the receptor. Interestingly, several of the mutations had little effect on binding to Rgg144 but reduced transcriptional activation appreciably. In particular, a proline substitution (P21A) reduced transcriptional activation by 29-fold but bound with a 3-fold higher affinity than the wild-type SHP. Consistent with the function of Rgg144, the mutant peptide led to decreased utilization of mannose and increased susceptibility to superoxide generator paraquat. Pangenome comparison showed full conservation of P21 across SHP144 allelic variants. Crystallization of Rgg144 in the absence of peptide revealed a comparable structure to the DNA bound and free forms of its homologs suggesting similar mechanisms of activation. Together, these analyses identify key interactions in a critical pneumococcal QS system. Further manipulation of the SHP has the potential to facilitate the development of inhibitors that are functional across strains. The approach described here is likely to be effective across QS systems in multiple species.}, } @article {pmid35573770, year = {2022}, author = {Chen, H and Li, Y and Xie, X and Chen, M and Xue, L and Wang, J and Ye, Q and Wu, S and Yang, R and Zhao, H and Zhang, J and Ding, Y and Wu, Q}, title = {Exploration of the Molecular Mechanisms Underlying the Anti-Photoaging Effect of Limosilactobacillus fermentum XJC60.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {838060}, pmid = {35573770}, issn = {2235-2988}, mesh = {Animals ; Antioxidants/metabolism ; Guinea Pigs ; Reactive Oxygen Species/metabolism ; Skin ; *Skin Aging ; Ultraviolet Rays ; }, abstract = {Although lactic acid bacteria (LAB) were shown to be effective for preventing photoaging, the underlying molecular mechanisms have not been fully elucidated. Accordingly, we examined the anti-photoaging potential of 206 LAB isolates and discovered 32 strains with protective activities against UV-induced injury. All of these 32 LABs exhibited high levels of 2,2-diphenyl-picrylhydrazyl, as well as hydroxyl free radical scavenging ability (46.89-85.13% and 44.29-95.97%, respectively). Genome mining and metabonomic verification of the most effective strain, Limosilactobacillus fermentum XJC60, revealed that the anti-photoaging metabolite of LAB was nicotinamide (NAM; 18.50 mg/L in the cell-free serum of XJC60). Further analysis revealed that LAB-derived NAM could reduce reactive oxygen species levels by 70%, stabilize the mitochondrial membrane potential, and increase the NAD[+]/NADH ratio in UV-injured skin cells. Furthermore, LAB-derived NAM downregulated the transcript levels of matrix metalloproteinase (MMP)-1, MMP-3, interleukin (IL)-1β, IL-6, and IL-8 in skin cells. In vivo, XJC60 relieved imflammation and protected skin collagen fiber integrity in UV-injured Guinea pigs. Overall, our findings elucidate that LAB-derived NAM might protect skin from photoaging by stabilizing mitochondrial function, establishing a therotical foundation for the use of probiotics in the maintenance of skin health.}, } @article {pmid35567182, year = {2022}, author = {Petereit, J and Marsh, JI and Bayer, PE and Danilevicz, MF and Thomas, WJW and Batley, J and Edwards, D}, title = {Genetic and Genomic Resources for Soybean Breeding Research.}, journal = {Plants (Basel, Switzerland)}, volume = {11}, number = {9}, pages = {}, pmid = {35567182}, issn = {2223-7747}, support = {DP210100296; DP200100762, and DE210100398//Australian Research Council/ ; 9177539 and 9177591//Grains Research and Development Corporation/ ; }, abstract = {Soybean (Glycine max) is a legume species of significant economic and nutritional value. The yield of soybean continues to increase with the breeding of improved varieties, and this is likely to continue with the application of advanced genetic and genomic approaches for breeding. Genome technologies continue to advance rapidly, with an increasing number of high-quality genome assemblies becoming available. With accumulating data from marker arrays and whole-genome resequencing, studying variations between individuals and populations is becoming increasingly accessible. Furthermore, the recent development of soybean pangenomes has highlighted the significant structural variation between individuals, together with knowledge of what has been selected for or lost during domestication and breeding, information that can be applied for the breeding of improved cultivars. Because of this, resources such as genome assemblies, SNP datasets, pangenomes and associated databases are becoming increasingly important for research underlying soybean crop improvement.}, } @article {pmid35563985, year = {2022}, author = {Yi, Z and Xie, J}, title = {Genomic Analysis of Two Representative Strains of Shewanella putrefaciens Isolated from Bigeye Tuna: Biofilm and Spoilage-Associated Behavior.}, journal = {Foods (Basel, Switzerland)}, volume = {11}, number = {9}, pages = {}, pmid = {35563985}, issn = {2304-8158}, support = {31972142//National Natural Science Foundation of China/ ; 19DZ1207503//key project of Science and Technology Commission of Shanghai Municipality/ ; CARS-47//China Agriculture Research System of MOF and MARA/ ; 19DZ2284000//Shanghai Municipal Science and Technology Project to enhance the capabilities of the platform/ ; }, abstract = {Shewanella putrefaciens can cause the spoilage of seafood and shorten its shelf life. In this study, both strains of S. putrefaciens (YZ08 and YZ-J) isolated from spoiled bigeye tuna were subjected to in-depth phenotypic and genotypic characterization to better understand their roles in seafood spoilage. The complete genome sequences of strains YZ08 and YZ-J were reported. Unique genes of the two S. putrefaciens strains were identified by pan-genomic analysis. In vitro experiments revealed that YZ08 and YZ-J could adapt to various environmental stresses, including cold-shock temperature, pH, NaCl, and nutrient stresses. YZ08 was better at adapting to NaCl stress, and its genome possessed more NaCl stress-related genes compared with the YZ-J strain. YZ-J was a higher biofilm and exopolysaccharide producer than YZ08 at 4 and 30 °C, while YZ08 showed greater motility and enhanced capacity for biogenic amine metabolism, trimethylamine metabolism, and sulfur metabolism compared with YZ-J at both temperatures. That YZ08 produced low biofilm and exopolysaccharide contents and displayed high motility may be associated with the presence of more a greater number of genes encoding chemotaxis-related proteins (cheX) and low expression of the bpfA operon. This study provided novel molecular targets for the development of new antiseptic antisepsis strategies.}, } @article {pmid35562911, year = {2022}, author = {Du, Y and Jin, Y and Li, B and Yue, J and Yin, Z}, title = {Comparative Genomic Analysis of Vibrio cincinnatiensis Provides Insights into Genetic Diversity, Evolutionary Dynamics, and Pathogenic Traits of the Species.}, journal = {International journal of molecular sciences}, volume = {23}, number = {9}, pages = {}, pmid = {35562911}, issn = {1422-0067}, support = {ZR2021QC208//Shandong Provincial Natural Science Foundation/ ; 010/721000//Scientific Research Foundation of Shandong Agricultural University/ ; 2018ZX10101-003-001-008//the National Science and Technology Major Project of Infectious Diseases/ ; }, mesh = {Gene Transfer, Horizontal ; Genetic Variation ; *Genome, Bacterial ; Genomics/methods ; Phylogeny ; *Vibrio/genetics ; }, abstract = {Vibrio cincinnatiensis is a poorly understood pathogenic Vibrio species, and the underlying mechanisms of its genetic diversity, genomic plasticity, evolutionary dynamics, and pathogenicity have not yet been comprehensively investigated. Here, a comparative genomic analysis of V. cincinnatiensis was constructed. The open pan-genome with a flexible gene repertoire exhibited genetic diversity. The genomic plasticity and stability were characterized by the determinations of diverse mobile genetic elements (MGEs) and barriers to horizontal gene transfer (HGT), respectively. Evolutionary divergences were exhibited by the difference in functional enrichment and selective pressure between the different components of the pan-genome. The evolution on the Chr I and Chr II core genomes was mainly driven by purifying selection. Predicted essential genes in V. cincinnatiensis were mainly found in the core gene families on Chr I and were subject to stronger evolutionary constraints. We identified diverse virulence-related elements, including the gene clusters involved in encoding flagella, secretion systems, several pili, and scattered virulence genes. Our results indicated the pathogenic potential of V. cincinnatiensis and highlighted that HGT events from other Vibrio species promoted pathogenicity. This pan-genome study provides comprehensive insights into this poorly understood species from the genomic perspective.}, } @article {pmid35560205, year = {2022}, author = {Song, JM and Zhang, Y and Zhou, ZW and Lu, S and Ma, W and Lu, C and Chen, LL and Guo, L}, title = {Oil plant genomes: current state of the science.}, journal = {Journal of experimental botany}, volume = {73}, number = {9}, pages = {2859-2874}, doi = {10.1093/jxb/erab472}, pmid = {35560205}, issn = {1460-2431}, support = {31871658//National Natural Science Foundation of China/ ; 2019CFA014//Hubei Provincial Natural Science Foundation of China/ ; 2016YFD0101000//National Key Research and Development Plan of China/ ; B20051//Higher Education Discipline Innovation Project/ ; }, mesh = {Crops, Agricultural/genetics ; *Genome, Plant ; Genomics ; *Polyploidy ; }, abstract = {Vegetable oils are an indispensable nutritional component of the human diet as well as important raw materials for a variety of industrial applications such as pharmaceuticals, cosmetics, oleochemicals, and biofuels. Oil plant genomes are highly diverse, and their genetic variation leads to a diversity in oil biosynthesis and accumulation along with agronomic traits. This review discusses plant oil biosynthetic pathways, current state of genome assembly, polyploidy and asymmetric evolution of genomes of oil plants and their wild relatives, and research progress of pan-genomics in oil plants. The availability of complete high-resolution genomes and pan-genomes has enabled the identification of structural variations in the genomes that are associated with the diversity of agronomic and environment fitness traits. These and future genomes also provide powerful tools to understand crop evolution and to harvest the rich natural variations to improve oil crops for enhanced productivity, oil quality, and adaptability to changing environments.}, } @article {pmid35557713, year = {2022}, author = {Zhou, J and Hu, M and Hu, A and Li, C and Ren, X and Tao, M and Xue, Y and Chen, S and Tang, C and Xu, Y and Zhang, L and Zhou, X}, title = {Isolation and Genome Analysis of Pectobacterium colocasium sp. nov. and Pectobacterium aroidearum, Two New Pathogens of Taro.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {852750}, pmid = {35557713}, issn = {1664-462X}, abstract = {Bacterial soft rot is one of the most destructive diseases of taro (Colocasia esculenta) worldwide. In recent years, frequent outbreaks of soft rot disease have seriously affected taro production and became a major constraint to the development of taro planting in China. However, little is known about the causal agents of this disease, and the only reported pathogens are two Dickeya species and P. carotovorum. In this study, we report taro soft rot caused by two novel Pectobacterium strains, LJ1 and LJ2, isolated from taro corms in Ruyuan County, Shaoguan City, Guangdong Province, China. We showed that LJ1 and LJ2 fulfill Koch's postulates for taro soft rot. The two pathogens can infect taro both individually and simultaneously, and neither synergistic nor antagonistic interaction was observed between the two pathogens. Genome sequencing of the two strains indicated that LJ1 represents a novel species of the genus Pectobacterium, for which the name "Pectobacterium colocasium sp. nov." is proposed, while LJ2 belongs to Pectobacterium aroidearum. Pan-genome analysis revealed multiple pathogenicity-related differences between LJ1, LJ2, and other Pectobacterium species, including unique virulence factors, variation in the copy number and organization of Type III, IV, and VI secretion systems, and differential production of plant cell wall degrading enzymes. This study identifies two new soft rot Pectobacteriaceae (SRP) pathogens causing taro soft rot in China, reports a new case of co-infection of plant pathogens, and provides valuable resources for further investigation of the pathogenic mechanisms of SRP.}, } @article {pmid35552372, year = {2022}, author = {Guarracino, A and Heumos, S and Nahnsen, S and Prins, P and Garrison, E}, title = {ODGI: understanding pangenome graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {13}, pages = {3319-3326}, pmid = {35552372}, issn = {1367-4811}, support = {R01 GM123489/GM/NIGMS NIH HHS/United States ; U01 DA047638/DA/NIDA NIH HHS/United States ; #2118709//NSF PPoSS/ ; //Federal Ministry for Economic Affairs and Energy of Germany/ ; //BMBF/ ; 031A537B//German Network for Bioinformatics Infrastructure/ ; }, mesh = {*Software ; *Genome ; Genomics ; Algorithms ; Documentation ; }, abstract = {MOTIVATION: Pangenome graphs provide a complete representation of the mutual alignment of collections of genomes. These models offer the opportunity to study the entire genomic diversity of a population, including structurally complex regions. Nevertheless, analyzing hundreds of gigabase-scale genomes using pangenome graphs is difficult as it is not well-supported by existing tools. Hence, fast and versatile software is required to ask advanced questions to such data in an efficient way.

RESULTS: We wrote Optimized Dynamic Genome/Graph Implementation (ODGI), a novel suite of tools that implements scalable algorithms and has an efficient in-memory representation of DNA pangenome graphs in the form of variation graphs. ODGI supports pre-built graphs in the Graphical Fragment Assembly format. ODGI includes tools for detecting complex regions, extracting pangenomic loci, removing artifacts, exploratory analysis, manipulation, validation and visualization. Its fast parallel execution facilitates routine pangenomic tasks, as well as pipelines that can quickly answer complex biological questions of gigabase-scale pangenome graphs.

ODGI is published as free software under the MIT open source license. Source code can be downloaded from https://github.com/pangenome/odgi and documentation is available at https://odgi.readthedocs.io. ODGI can be installed via Bioconda https://bioconda.github.io/recipes/odgi/README.html or GNU Guix https://github.com/pangenome/odgi/blob/master/guix.scm.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid35551692, year = {2022}, author = {Cella, E and Sutcliffe, CG and Tso, C and Paul, E and Ritchie, N and Colelay, J and Denny, E and Grant, LR and Weatherholtz, RC and Hammitt, LL and Azarian, T}, title = {Carriage prevalence and genomic epidemiology of Staphylococcus aureus among Native American children and adults in the Southwestern USA.}, journal = {Microbial genomics}, volume = {8}, number = {5}, pages = {}, pmid = {35551692}, issn = {2057-5858}, support = {K22 AI141582/AI/NIAID NIH HHS/United States ; }, mesh = {Adult ; Child ; Cross-Sectional Studies ; Genomics ; Humans ; Prevalence ; *Staphylococcal Infections/epidemiology/microbiology ; *Staphylococcus aureus ; American Indian or Alaska Native ; }, abstract = {Native American individuals in the Southwestern USA experience a higher burden of invasive Staphylococcus aureus disease than the general population. However, little is known about S. aureus carriage in these communities. A cross-sectional study was conducted to determine the carriage prevalence, risk factors and genomic epidemiology of S. aureus among Native American children (<5 years, n=121) and adults (≥18 years, n=167) in the Southwestern USA. Short- and long-read sequencing data were generated using Illumina and Oxford Nanopore Technology platforms to produce high-quality hybrid assemblies, and antibiotic-resistance, virulence and pangenome analyses were performed. S. aureus carriage prevalence was 20.7 % among children, 30.2 % among adults 18-64 years and 16.7 % among adults ≥65 years. Risk factors among adults included recent surgery, prior S. aureus infection among household members, and recent use of gyms or locker rooms by household members. No risk factors were identified among children. The bacterial population structure was dominated by clonal complex 1 (CC1) (21.1 %), CC5 (22.2 %) and CC8 (22.2 %). Isolates from children and adults were intermixed throughout the phylogeny. While the S. aureus population was diverse, the carriage prevalence was comparable to that in the general USA population. Genomic and risk-factor data suggest household, community and healthcare transmission are important components of the local epidemiology.}, } @article {pmid35550024, year = {2022}, author = {Mesa, V and Monot, M and Ferraris, L and Popoff, M and Mazuet, C and Barbut, F and Delannoy, J and Dupuy, B and Butel, MJ and Aires, J}, title = {Core-, pan- and accessory genome analyses of Clostridium neonatale: insights into genetic diversity.}, journal = {Microbial genomics}, volume = {8}, number = {5}, pages = {}, pmid = {35550024}, issn = {2057-5858}, mesh = {*Clostridium/genetics ; Genetic Variation ; *Genome, Bacterial ; Humans ; Infant, Newborn ; Phylogeny ; }, abstract = {Clostridium neonatale is a potential opportunistic pathogen recovered from faecal samples in cases of necrotizing enterocolitis (NEC), a gastrointestinal disease affecting preterm neonates. Although the C. neonatale species description and name validation were published in 2018, comparative genomics are lacking. In the present study, we provide the closed genome assembly of the C. neonatale ATCC BAA-265[T] (=250.09) reference strain with a manually curated functional annotation of the coding sequences. Pan-, core- and accessory genome analyses were performed using the complete 250.09 genome (4.7 Mb), three new assemblies (4.6-5.6 Mb), and five publicly available draft genome assemblies (4.6-4.7 Mb). The C. neonatale pan-genome contains 6840 genes, while the core-genome has 3387 genes. Pan-genome analysis revealed an 'open' state and genomic diversity. The strain-specific gene families ranged from five to 742 genes. Multiple mobile genetic elements were predicted, including a total of 201 genomic islands, 13 insertion sequence families, one CRISPR-Cas type I-B system and 15 predicted intact prophage signatures. Primary virulence classes including offensive, defensive, regulation of virulence-associated genes and non-specific virulence factors were identified. The presence of a tet(W/N/W) gene encoding a tetracycline resistance ribosomal protection protein and a 23S rRNA methyltransferase ermQ gene were identified in two different strains. Together, our results revealed a genetic diversity and plasticity of C. neonatale genomes and provide a comprehensive view of this species genomic features, paving the way for the characterization of its biological capabilities.}, } @article {pmid35529944, year = {2022}, author = {Zhou, X and Liu, Z}, title = {Unlocking plant metabolic diversity: A (pan)-genomic view.}, journal = {Plant communications}, volume = {3}, number = {2}, pages = {100300}, pmid = {35529944}, issn = {2590-3462}, mesh = {*Genome, Plant/genetics ; *Genomics ; Multigene Family ; Plants/genetics ; Repetitive Sequences, Nucleic Acid ; }, abstract = {Plants produce a remarkable diversity of structurally and functionally diverse natural chemicals that serve as adaptive compounds throughout their life cycles. However, unlocking this metabolic diversity is significantly impeded by the size, complexity, and abundant repetitive elements of typical plant genomes. As genome sequencing becomes routine, we anticipate that links between metabolic diversity and genetic variation will be strengthened. In addition, an ever-increasing number of plant genomes have revealed that biosynthetic gene clusters are not only a hallmark of microbes and fungi; gene clusters for various classes of compounds have also been found in plants, and many are associated with important agronomic traits. We present recent examples of plant metabolic diversification that have been discovered through the exploration and exploitation of various genomic and pan-genomic data. We also draw attention to the fundamental genomic and pan-genomic basis of plant chemodiversity and discuss challenges and future perspectives for investigating metabolic diversity in the coming pan-genomics era.}, } @article {pmid35510788, year = {2022}, author = {White, H and Vos, M and Sheppard, SK and Pascoe, B and Raymond, B}, title = {Signatures of selection in core and accessory genomes indicate different ecological drivers of diversification among Bacillus cereus clades.}, journal = {Molecular ecology}, volume = {31}, number = {13}, pages = {3584-3597}, pmid = {35510788}, issn = {1365-294X}, support = {BB/M009122/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Bacillus cereus/genetics ; Gene Transfer, Horizontal/genetics ; *Genome, Bacterial/genetics ; Phenotype ; Phylogeny ; }, abstract = {Bacterial clades are often ecologically distinct, despite extensive horizontal gene transfer (HGT). How selection works on different parts of bacterial pan-genomes to drive and maintain the emergence of clades is unclear. Focusing on the three largest clades in the diverse and well-studied Bacillus cereus sensu lato group, we identified clade-specific core genes (present in all clade members) and then used clade-specific allelic diversity to identify genes under purifying and diversifying selection. Clade-specific accessory genes (present in a subset of strains within a clade) were characterized as being under selection using presence/absence in specific clades. Gene ontology analyses of genes under selection revealed that different gene functions were enriched in different clades. Furthermore, some gene functions were enriched only amongst clade-specific core or accessory genomes. Genes under purifying selection were often clade-specific, while genes under diversifying selection showed signs of frequent HGT. These patterns are consistent with different selection pressures acting on both the core and the accessory genomes of different clades and can lead to ecological divergence in both cases. Examining variation in allelic diversity allows us to uncover genes under clade-specific selection, allowing ready identification of strains and their ecological niche.}, } @article {pmid35507269, year = {2022}, author = {Tantoso, E and Eisenhaber, B and Eisenhaber, F}, title = {Optimizing the Parametrization of Homologue Classification in the Pan-Genome Computation for a Bacterial Species: Case Study Streptococcus pyogenes.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2449}, number = {}, pages = {299-324}, pmid = {35507269}, issn = {1940-6029}, mesh = {Cluster Analysis ; *Genome, Bacterial ; Genomics/methods ; Multigene Family ; Phylogeny ; *Streptococcus pyogenes/genetics ; }, abstract = {The paradigm shift associated with the introduction of the pan-genome concept has drawn the attention from singular reference genomes toward the actual sequence diversity within organism populations, strain collections, clades, etc. A single genome is no longer sufficient to describe bacteria of interest, but instead, the genomic repertoire of all existing strains is the key to the metabolic, evolutionary, or pathogenic potential of a species. The classification of orthologous genes derived from a collection of taxonomically related genome sequences is central to bacterial pan-genome computational analysis. In this work, we present a review of methods for computing pan-genome gene clusters including their comparative analysis for the case of Streptococcus pyogenes strain genomes. We exhaustively scanned the parametrization space of the homologue searching procedures and find optimal parameters (sequence identity (60%) and coverage (50-60%) in the pairwise alignment) for the orthologous clustering of gene sequences. We find that the sequence identity threshold influences the number of gene families ~3 times stronger than the sequence coverage threshold.}, } @article {pmid35501686, year = {2022}, author = {Liu, H and Zhao, W and Hua, W and Liu, J}, title = {A large-scale population based organelle pan-genomes construction and phylogeny analysis reveal the genetic diversity and the evolutionary origins of chloroplast and mitochondrion in Brassica napus L.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {339}, pmid = {35501686}, issn = {1471-2164}, support = {31871664//National Natural Science Foundation of China/ ; CAAS-ZDRW2019003//Agricultural Science and Technology Innovation Program/ ; }, mesh = {Brassica/genetics ; *Brassica napus/genetics ; Brassica rapa/genetics ; Chloroplasts/genetics ; Genetic Variation ; *Genome, Chloroplast ; *Genome, Mitochondrial ; Genome, Plant ; Mitochondria/genetics ; Phylogeny ; }, abstract = {BACKGROUND: Allotetraploid oilseed rape (Brassica napus L.) is an important worldwide oil-producing crop. The origin of rapeseed is still undetermined due to the lack of wild resources. Despite certain genetic architecture and phylogenetic studies have been done focus on large group of Brassica nuclear genomes, the organelle genomes information under global pattern is largely unknown, which provide unique material for phylogenetic studies of B. napus. Here, based on de novo assemblies of 1,579 B. napus accessions collected globally, we constructed the chloroplast and mitochondrial pan-genomes of B. napus, and investigated the genetic diversity, phylogenetic relationships of B. napus, B. rapa and B. oleracea.

RESULTS: Based on mitotype-specific markers and mitotype-variant ORFs, four main cytoplasmic haplotypes were identified in our groups corresponding the nap, pol, ole, and cam mitotypes, among which the structure of chloroplast genomes was more conserved without any rearrangement than mitochondrial genomes. A total of 2,092 variants were detected in chloroplast genomes, whereas only 326 in mitochondrial genomes, indicating that chloroplast genomes exhibited a higher level of single-base polymorphism than mitochondrial genomes. Based on whole-genome variants diversity analysis, eleven genetic difference regions among different cytoplasmic haplotypes were identified on chloroplast genomes. The phylogenetic tree incorporating accessions of the B. rapa, B. oleracea, natural and synthetic populations of B. napus revealed multiple origins of B. napus cytoplasm. The cam-type and pol-type were both derived from B. rapa, while the ole-type was originated from B. oleracea. Notably, the nap-type cytoplasm was identified in both the B. rapa population and the synthetic B. napus, suggesting that B. rapa might be the maternal ancestor of nap-type B. napus.

CONCLUSIONS: The phylogenetic results provide novel insights into the organelle genomic evolution of Brassica species. The natural rapeseeds contained at least four cytoplastic haplotypes, of which the predominant nap-type might be originated from B. rapa. Besides, the organelle pan-genomes and the overall variation data offered useful resources for analysis of cytoplasmic inheritance related agronomical important traits of rapeseed, which can substantially facilitate the cultivation and improvement of rapeseed varieties.}, } @article {pmid35498663, year = {2022}, author = {Burridge, AJ and Winfield, MO and Wilkinson, PA and Przewieslik-Allen, AM and Edwards, KJ and Barker, GLA}, title = {The Use and Limitations of Exome Capture to Detect Novel Variation in the Hexaploid Wheat Genome.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {841855}, pmid = {35498663}, issn = {1664-462X}, abstract = {The bread wheat (Triticum aestivum) pangenome is a patchwork of variable regions, including translocations and introgressions from progenitors and wild relatives. Although a large number of these have been documented, it is likely that many more remain unknown. To map these variable regions and make them more traceable in breeding programs, wheat accessions need to be genotyped or sequenced. The wheat genome is large and complex and consequently, sequencing efforts are often targeted through exome capture. In this study, we employed exome capture prior to sequencing 12 wheat varieties; 10 elite T. aestivum cultivars and two T. aestivum landrace accessions. Sequence coverage across chromosomes was greater toward distal regions of chromosome arms and lower in centromeric regions, reflecting the capture probe distribution which itself is determined by the known telomere to centromere gene gradient. Superimposed on this general pattern, numerous drops in sequence coverage were observed. Several of these corresponded with reported introgressions. Other drops in coverage could not be readily explained and may point to introgressions that have not, to date, been documented.}, } @article {pmid35496987, year = {2022}, author = {Nwaiwu, O}, title = {Comparative genome analysis of the first Listeria monocytogenes core genome multi-locus sequence types CT2050 AND CT2051 strains with their close relatives.}, journal = {AIMS microbiology}, volume = {8}, number = {1}, pages = {61-72}, pmid = {35496987}, issn = {2471-1888}, abstract = {Genome sequences of the three strains of L. monocytogenes, which are the first core genome multi-locus sequence types (cgMLST) 2050 and 2051 were reviewed and compared with 21 close relatives and reference genomes. Using a pan-genomic approach to analyse whole genome sequences, it was found that the strains consisted of approximately 2200 shared genes and a much greater pool of genes present as an accessory genome. An unknown transmissible sequence of approximately 91 kb harbouring bacitracin resistance genes found in strain LmNG2 (1/2b) was revealed to be an Inc18 plasmid. The CT2051, strain LmNG3 (1/2a) haboured more unique genes (252 vs 230) than the well-known reference strain LmEGD-e (1/2a). More studies to monitor new strains can help reduce food-borne outbreaks.}, } @article {pmid35493726, year = {2022}, author = {Song, Y and Xu, X and Huang, Z and Xiao, Y and Yu, K and Jiang, M and Yin, S and Zheng, M and Meng, H and Han, Y and Wang, Y and Wang, D and Wei, Q}, title = {Corrigendum: Genomic Characteristics and Pan-Genome Analysis of Rhodococcus equi.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {884441}, doi = {10.3389/fcimb.2022.884441}, pmid = {35493726}, issn = {2235-2988}, abstract = {[This corrects the article DOI: 10.3389/fcimb.2022.807610.].}, } @article {pmid35489163, year = {2022}, author = {Mohd Saad, NS and Neik, TX and Thomas, WJW and Amas, JC and Cantila, AY and Craig, RJ and Edwards, D and Batley, J}, title = {Advancing designer crops for climate resilience through an integrated genomics approach.}, journal = {Current opinion in plant biology}, volume = {67}, number = {}, pages = {102220}, doi = {10.1016/j.pbi.2022.102220}, pmid = {35489163}, issn = {1879-0356}, mesh = {Climate Change ; *Crops, Agricultural/genetics ; Domestication ; Genomics ; *Plant Breeding/methods ; }, abstract = {Climate change and exponential population growth are exposing an immediate need for developing future crops that are highly resilient and adaptable to changing environments to maintain global food security in the next decade. Rigorous selection from long domestication history has rendered cultivated crops genetically disadvantaged, raising concerns in their ability to adapt to these new challenges and limiting their usefulness in breeding programmes. As a result, future crop improvement efforts must rely on integrating various genomic strategies ranging from high-throughput sequencing to machine learning, in order to exploit germplasm diversity and overcome bottlenecks created by domestication, expansive multi-dimensional phenotypes, arduous breeding processes, complex traits and big data.}, } @article {pmid35488861, year = {2022}, author = {Wang, Z and Rouard, M and Biswas, MK and Droc, G and Cui, D and Roux, N and Baurens, FC and Ge, XJ and Schwarzacher, T and Heslop-Harrison, PJS and Liu, Q}, title = {A chromosome-level reference genome of Ensete glaucum gives insight into diversity and chromosomal and repetitive sequence evolution in the Musaceae.}, journal = {GigaScience}, volume = {11}, number = {}, pages = {}, pmid = {35488861}, issn = {2047-217X}, mesh = {Chromosomes ; DNA Copy Number Variations ; DNA Transposable Elements ; *Musa/genetics ; *Musaceae/genetics ; Plant Breeding ; Retroelements ; Sequence Analysis, DNA ; }, abstract = {BACKGROUND: Ensete glaucum (2n = 2x = 18) is a giant herbaceous monocotyledonous plant in the small Musaceae family along with banana (Musa). A high-quality reference genome sequence assembly of E. glaucum is a resource for functional and evolutionary studies of Ensete, Musaceae, and the Zingiberales.

FINDINGS: Using Oxford Nanopore Technologies, chromosome conformation capture (Hi-C), Illumina and RNA survey sequence, supported by molecular cytogenetics, we report a high-quality 481.5 Mb genome assembly with 9 pseudo-chromosomes and 36,836 genes. A total of 55% of the genome is composed of repetitive sequences with predominantly LTR-retroelements (37%) and DNA transposons (7%). The single 5S ribosomal DNA locus had an exceptionally long monomer length of 1,056 bp, more than twice that of the monomers at multiple loci in Musa. A tandemly repeated satellite (1.1% of the genome, with no similar sequence in Musa) was present around all centromeres, together with a few copies of a long interspersed nuclear element (LINE) retroelement. The assembly enabled us to characterize in detail the chromosomal rearrangements occurring between E. glaucum and the x = 11 species of Musa. One E. glaucum chromosome has the same gene content as Musa acuminata, while others show multiple, complex, but clearly defined evolutionary rearrangements in the change between x= 9 and 11.

CONCLUSIONS: The advance towards a Musaceae pangenome including E. glaucum, tolerant of extreme environments, makes a complete set of gene alleles, copy number variation, and a reference for structural variation available for crop breeding and understanding environmental responses. The chromosome-scale genome assembly shows the nature of chromosomal fusion and translocation events during speciation, and features of rapid repetitive DNA change in terms of copy number, sequence, and genomic location, critical to understanding its role in diversity and evolution.}, } @article {pmid35483961, year = {2022}, author = {Markello, C and Huang, C and Rodriguez, A and Carroll, A and Chang, PC and Eizenga, J and Markello, T and Haussler, D and Paten, B}, title = {A complete pedigree-based graph workflow for rare candidate variant analysis.}, journal = {Genome research}, volume = {32}, number = {5}, pages = {893-903}, pmid = {35483961}, issn = {1549-5469}, mesh = {*Genome ; High-Throughput Nucleotide Sequencing ; INDEL Mutation ; Pedigree ; *Polymorphism, Single Nucleotide ; Software ; Workflow ; }, abstract = {Methods that use a linear genome reference for genome sequencing data analysis are reference-biased. In the field of clinical genetics for rare diseases, a resulting reduction in genotyping accuracy in some regions has likely prevented the resolution of some cases. Pangenome graphs embed population variation into a reference structure. Although pangenome graphs have helped to reduce reference mapping bias, further performance improvements are possible. We introduce VG-Pedigree, a pedigree-aware workflow based on the pangenome-mapping tool of Giraffe and the variant calling tool DeepTrio using a specially trained model for Giraffe-based alignments. We demonstrate mapping and variant calling improvements in both single-nucleotide variants (SNVs) and insertion and deletion (indel) variants over those produced by alignments created using BWA-MEM to a linear-reference and Giraffe mapping to a pangenome graph containing data from the 1000 Genomes Project. We have also adapted and upgraded deleterious-variant (DV) detecting methods and programs into a streamlined workflow. We used these workflows in combination to detect small lists of candidate DVs among 15 family quartets and quintets of the Undiagnosed Diseases Program (UDP). All candidate DVs that were previously diagnosed using the Mendelian models covered by the previously published methods were recapitulated by these workflows. The results of these experiments indicate that a slightly greater absolute count of DVs are detected in the proband population than in their matched unaffected siblings.}, } @article {pmid35483110, year = {2022}, author = {Alotaibi, G and Khan, K and Al Mouslem, AK and Ahmad Khan, S and Naseer Abbas, M and Abbas, M and Ali Shah, S and Jalal, K}, title = {Pan genome based reverse vaccinology approach to explore Enterococcus faecium (VRE) strains for identification of novel multi-epitopes vaccine candidate.}, journal = {Immunobiology}, volume = {227}, number = {3}, pages = {152221}, doi = {10.1016/j.imbio.2022.152221}, pmid = {35483110}, issn = {1878-3279}, mesh = {Aged ; Child ; Computational Biology ; *Enterococcus faecium/genetics ; Epitopes, T-Lymphocyte/genetics ; Humans ; Molecular Docking Simulation ; Vaccines, Subunit ; *Vaccinology ; Vancomycin Resistance ; }, abstract = {Enterococcus faecium is regarded as fourth most emerging common pathogen causing hospital acquired infections (HAIs), with high mortality rate, especially in children, elderly and immunocompromised patients. Recently, due to the emergence of E. faecium resistant strains especially vancomycin resistance (VRE) and their continuously growing resistivity to antibiotics, design of safe vaccine remains a choice for its control. Alternative control through vaccination has received much attention, but there is no clinically approved vaccine against this pathogen. Therefore, in current study we have applied a triple helix approach i.e., Pan-genome, subtractive genome and reverse vaccinology to identify and design potential vaccine candidates and multiepitope-based vaccine (MEV) construct against E. faecium (via core genome analysis from 216 strains). In this study, only 2 outer membrane proteins were identified through genome subtraction of resistant strains genes against human and essential proteins. Subsequently, phosphate ABC transporter substrate binding protein (Psts) was selected as a promiscuous vaccine candidate to develop a potent vaccine model. A final of four epitopes from CD8 + T-cell, CD4 + T-cell epitopes, and B-cell were shortlisted from outer membrane protein with highly antigenic, IFN-γ inducer, and overlapping characteristics for the construction of twelve vaccine models. The V3 construct was found to be highly immunogenic, non-toxic, non-allergenic, highly antigenic and most stable in terms of molecular docking and simulation studies against six HLAs, TLR2, and TLR4 complex. So far, this protein and multiepitope have never been characterized as vaccine targets against E. faecium. The current study proposed V3 as a significant vaccine candidate that could help the scientific community to treat E. faecium infections.}, } @article {pmid35481758, year = {2022}, author = {Wu, J and NicAogáin, K and McAuliffe, O and Jordan, K and O'Byrne, C}, title = {Phylogenetic and Phenotypic Analyses of a Collection of Food and Clinical Listeria monocytogenes Isolates Reveal Loss of Function of Sigma B from Several Clonal Complexes.}, journal = {Applied and environmental microbiology}, volume = {88}, number = {10}, pages = {e0005122}, pmid = {35481758}, issn = {1098-5336}, mesh = {*Bacterial Proteins/genetics ; Food Microbiology ; *Listeria monocytogenes/genetics ; Phenotype ; Phylogeny ; *Sigma Factor/genetics ; }, abstract = {To understand the molecular mechanisms that contribute to the stress responses of the important foodborne pathogen Listeria monocytogenes, we collected 139 strains (meat, n = 25; dairy, n = 10; vegetable, n = 8; seafood, n = 14; mixed food, n = 4; and food processing environments, n = 78), mostly isolated in Ireland, and subjected them to whole-genome sequencing. These strains were compared to 25 Irish clinical isolates and 4 well-studied reference strains. Core genome and pan-genome analysis confirmed a highly clonal and deeply branched population structure. Multilocus sequence typing showed that this collection contained a diverse range of strains from L. monocytogenes lineages I and II. Several groups of isolates with highly similar genome content were traced to single or multiple food business operators, providing evidence of strain persistence or prevalence, respectively. Phenotypic screening assays for tolerance to salt stress and resistance to acid stress revealed variants within several clonal complexes that were phenotypically distinct. Five of these phenotypic outliers were found to carry mutations in the sigB operon, which encodes the stress-inducible sigma factor sigma B. Transcriptional analysis confirmed that three of the strains that carried mutations in sigB, rsbV, or rsbU had reduced SigB activity, as predicted. These strains exhibited increased tolerance to salt stress and displayed decreased resistance to low pH stress. Overall, this study shows that loss-of-function mutations in the sigB operon are comparatively common in field isolates, probably reflecting the cost of the general stress response to reproductive fitness in this pathogen. IMPORTANCE The bacterial foodborne pathogen Listeria monocytogenes frequently contaminates various categories of food products and is able to cause life-threatening infections when ingested by humans. Thus, it is important to control the growth of this bacterium in food by understanding the mechanisms that allow its proliferation under suboptimal conditions. In this study, intraspecies heterogeneity in stress response was observed across a collection consisting of mainly Irish L. monocytogenes isolates. Through comparisons of genome sequence and phenotypes observed, we identified three strains with impairment of the general stress response regulator SigB. Two of these strains are used widely in food challenge studies for evaluating the growth potential of L. monocytogenes. Given that loss of SigB function is associated with atypical phenotypic properties, the use of these strains in food challenge studies should be re-evaluated.}, } @article {pmid35479110, year = {2022}, author = {de Sá, PHCG and Castro Alves, JT and Veras, AAO}, title = {Protocol to analyze the bacterial pangenome using PAN2HGENE software.}, journal = {STAR protocols}, volume = {3}, number = {2}, pages = {101327}, pmid = {35479110}, issn = {2666-1667}, mesh = {*Bacteria ; Genome ; Prokaryotic Cells ; *Software ; }, abstract = {The PAN2HGENE is a computational tool that enables two main analyses. First, the tool can identify gene products absent from the original prokaryotic genome sequence. Second, it enables automated comparative analysis for both complete and draft genomes. All analyses are performed through a simple and intuitive graphical user interface without the need for extensive and complex command lines. For complete details on the use and execution of this protocol, please refer to Silva de Oliveira (2021).}, } @article {pmid35478716, year = {2022}, author = {Ode, H and Nakata, Y and Nagashima, M and Hayashi, M and Yamazaki, T and Asakura, H and Suzuki, J and Kubota, M and Matsuoka, K and Matsuda, M and Mori, M and Sugimoto, A and Imahashi, M and Yokomaku, Y and Sadamasu, K and Iwatani, Y}, title = {Molecular epidemiological features of SARS-CoV-2 in Japan, 2020-1.}, journal = {Virus evolution}, volume = {8}, number = {1}, pages = {veac034}, pmid = {35478716}, issn = {2057-1577}, abstract = {There were five epidemic waves of coronavirus disease 2019 in Japan between 2020 and 2021. It remains unclear how the domestic waves arose and abated. To better understand this, we analyzed the pangenomic sequences of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) and characterized the molecular epidemiological features of the five epidemic waves in Japan. In this study, we performed deep sequencing to determine the pangenomic SARS-CoV-2 sequences of 1,286 samples collected in two cities far from each other, Tokyo Metropolis and Nagoya. Then, the spatiotemporal genetic changes of the obtained sequences were compared with the sequences available in the Global Initiative on Sharing All Influenza Data (GISAID) database. A total of 873 genotypes carrying different sets of mutations were identified in the five epidemic waves. Phylogenetic analysis demonstrated that sharp displacements of lineages and genotypes occurred between consecutive waves over the 2 years. In addition, a wide variety of genotypes were observed in the early half of each wave, whereas a few genotypes were detected across Japan during an entire wave. Phylogenetically, putative descendant genotypes observed late in each wave displayed regional clustering and evolution in Japan. The genetic diversity of SARS-CoV-2 displayed uneven dynamics during each epidemic wave in Japan. Our findings provide an important molecular epidemiological basis to aid in controlling future SARS-CoV-2 epidemics.}, } @article {pmid35476524, year = {2022}, author = {Norsigian, CJ and Danhof, HA and Brand, CK and Midani, FS and Broddrick, JT and Savidge, TC and Britton, RA and Palsson, BO and Spinler, JK and Monk, JM}, title = {Systems biology approach to functionally assess the Clostridioides difficile pangenome reveals genetic diversity with discriminatory power.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {119}, number = {18}, pages = {e2119396119}, pmid = {35476524}, issn = {1091-6490}, support = {U01 AI124290/AI/NIAID NIH HHS/United States ; U01 AI124316/AI/NIAID NIH HHS/United States ; F32 AI136404/AI/NIAID NIH HHS/United States ; R01 AI123278/AI/NIAID NIH HHS/United States ; P30 DK056338/DK/NIDDK NIH HHS/United States ; 1U01AI12429//HHS | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; T32 DK007664/DK/NIDDK NIH HHS/United States ; }, mesh = {Clostridioides ; *Clostridioides difficile/genetics ; *Cross Infection ; Genetic Variation ; Humans ; Systems Biology ; }, abstract = {Combatting Clostridioides difficile infections, a dominant cause of hospital-associated infections with incidence and resulting deaths increasing worldwide, is complicated by the frequent emergence of new virulent strains. Here, we employ whole-genome sequencing, high-throughput phenotypic screenings, and genome-scale models of metabolism to evaluate the genetic diversity of 451 strains of C. difficile. Constructing the C. difficile pangenome based on this set revealed 9,924 distinct gene clusters, of which 2,899 (29%) are defined as core, 2,968 (30%) are defined as unique, and the remaining 4,057 (41%) are defined as accessory. We develop a strain typing method, sequence typing by accessory genome (STAG), that identifies 176 genetically distinct groups of strains and allows for explicit interrogation of accessory gene content. Thirty-five strains representative of the overall set were experimentally profiled on 95 different nutrient sources, revealing 26 distinct growth profiles and unique nutrient preferences; 451 strain-specific genome scale models of metabolism were constructed, allowing us to computationally probe phenotypic diversity in 28,864 unique conditions. The models create a mechanistic link between the observed phenotypes and strain-specific genetic differences and exhibit an ability to correctly predict growth in 76% of measured cases. The typing and model predictions are used to identify and contextualize discriminating genetic features and phenotypes that may contribute to the emergence of new problematic strains.}, } @article {pmid35475644, year = {2022}, author = {Adomako, M and Ernst, D and Simkovsky, R and Chao, YY and Wang, J and Fang, M and Bouchier, C and Lopez-Igual, R and Mazel, D and Gugger, M and Golden, SS}, title = {Comparative Genomics of Synechococcus elongatus Explains the Phenotypic Diversity of the Strains.}, journal = {mBio}, volume = {13}, number = {3}, pages = {e0086222}, pmid = {35475644}, issn = {2150-7511}, support = {F32 GM130070/GM/NIGMS NIH HHS/United States ; R35 GM118290/GM/NIGMS NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; }, mesh = {Bacterial Proteins/genetics/metabolism ; Genomics ; Phenotype ; Photosynthesis ; *Synechococcus/metabolism ; }, abstract = {Strains of the freshwater cyanobacterium Synechococcus elongatus were first isolated approximately 60 years ago, and PCC 7942 is well established as a model for photosynthesis, circadian biology, and biotechnology research. The recent isolation of UTEX 3055 and subsequent discoveries in biofilm and phototaxis phenotypes suggest that lab strains of S. elongatus are highly domesticated. We performed a comprehensive genome comparison among the available genomes of S. elongatus and sequenced two additional laboratory strains to trace the loss of native phenotypes from the standard lab strains and determine the genetic basis of useful phenotypes. The genome comparison analysis provides a pangenome description of S. elongatus, as well as correction of extensive errors in the published sequence for the type strain PCC 6301. The comparison of gene sets and single nucleotide polymorphisms (SNPs) among strains clarifies strain isolation histories and, together with large-scale genome differences, supports a hypothesis of laboratory domestication. Prophage genes in laboratory strains, but not UTEX 3055, affect pigmentation, while unique genes in UTEX 3055 are necessary for phototaxis. The genomic differences identified in this study include previously reported SNPs that are, in reality, sequencing errors, as well as SNPs and genome differences that have phenotypic consequences. One SNP in the circadian response regulator rpaA that has caused confusion is clarified here as belonging to an aberrant clone of PCC 7942, used for the published genome sequence, that has confounded the interpretation of circadian fitness research. IMPORTANCE Synechococcus elongatus is a versatile and robust model cyanobacterium for photosynthetic metabolism and circadian biology research, with utility as a biological production platform. We compared the genomes of closely related S. elongatus strains to create a pangenome annotation to aid gene discovery for novel phenotypes. The comparative genomic analysis revealed the need for a new sequence of the species type strain PCC 6301 and includes two new sequences for S. elongatus strains PCC 6311 and PCC 7943. The genomic comparison revealed a pattern of early laboratory domestication of strains, clarifies the relationship between the strains PCC 6301 and UTEX 2973, and showed that differences in large prophage regions, operons, and even single nucleotides have effects on phenotypes as wide-ranging as pigmentation, phototaxis, and circadian gene expression.}, } @article {pmid35474671, year = {2021}, author = {Ferrés, I and Iraola, G}, title = {An object-oriented framework for evolutionary pangenome analysis.}, journal = {Cell reports methods}, volume = {1}, number = {5}, pages = {100085}, pmid = {35474671}, issn = {2667-2375}, mesh = {*Genomics ; *Ecosystem ; Genome, Bacterial/genetics ; Biological Evolution ; Escherichia coli/genetics ; }, abstract = {Pangenome analysis is fundamental to explore molecular evolution occurring in bacterial populations. Here, we introduce Pagoo, an R framework that enables straightforward handling of pangenome data. The encapsulated nature of Pagoo allows the storage of complex molecular and phenotypic information using an object-oriented approach. This facilitates to go back and forward to the data using a single programming environment and saving any stage of analysis (including the raw data) in a single file, making it sharable and reproducible. Pagoo provides tools to query, subset, compare, visualize, and perform statistical analyses, in concert with other microbial genomics packages available in the R ecosystem. As working examples, we used 1,000 Escherichia coli genomes to show that Pagoo is scalable, and a global dataset of Campylobacter fetus genomes to identify evolutionary patterns and genomic markers of host-adaptation in this pathogen.}, } @article {pmid35469019, year = {2022}, author = {Rhodes, J and Abdolrasouli, A and Dunne, K and Sewell, TR and Zhang, Y and Ballard, E and Brackin, AP and van Rhijn, N and Chown, H and Tsitsopoulou, A and Posso, RB and Chotirmall, SH and McElvaney, NG and Murphy, PG and Talento, AF and Renwick, J and Dyer, PS and Szekely, A and Bowyer, P and Bromley, MJ and Johnson, EM and Lewis White, P and Warris, A and Barton, RC and Schelenz, S and Rogers, TR and Armstrong-James, D and Fisher, MC}, title = {Population genomics confirms acquisition of drug-resistant Aspergillus fumigatus infection by humans from the environment.}, journal = {Nature microbiology}, volume = {7}, number = {5}, pages = {663-674}, pmid = {35469019}, issn = {2058-5276}, support = {219551/Z/19/Z/WT_/Wellcome Trust/United Kingdom ; MR/N006364/2/MRC_/Medical Research Council/United Kingdom ; 097377/WT_/Wellcome Trust/United Kingdom ; /DH_/Department of Health/United Kingdom ; MR/V033417/1/MRC_/Medical Research Council/United Kingdom ; MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; /WT_/Wellcome Trust/United Kingdom ; MR/V037315/1/MRC_/Medical Research Council/United Kingdom ; BB/M010996/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Anti-Infective Agents ; *Aspergillus fumigatus/genetics ; Azoles/pharmacology ; Drug Resistance, Fungal/genetics ; Humans ; Metagenomics ; Microbial Sensitivity Tests ; }, abstract = {Infections caused by the fungal pathogen Aspergillus fumigatus are increasingly resistant to first-line azole antifungal drugs. However, despite its clinical importance, little is known about how susceptible patients acquire infection from drug-resistant genotypes in the environment. Here, we present a population genomic analysis of 218 A. fumigatus isolates from across the UK and Ireland (comprising 153 clinical isolates from 143 patients and 65 environmental isolates). First, phylogenomic analysis shows strong genetic structuring into two clades (A and B) with little interclade recombination and the majority of environmental azole resistance found within clade A. Second, we show occurrences where azole-resistant isolates of near-identical genotypes were obtained from both environmental and clinical sources, indicating with high confidence the infection of patients with resistant isolates transmitted from the environment. Third, genome-wide scans identified selective sweeps across multiple regions indicating a polygenic basis to the trait in some genetic backgrounds. These signatures of positive selection are seen for loci containing the canonical genes encoding fungicide resistance in the ergosterol biosynthetic pathway, while other regions under selection have no defined function. Lastly, pan-genome analysis identified genes linked to azole resistance and previously unknown resistance mechanisms. Understanding the environmental drivers and genetic basis of evolving fungal drug resistance needs urgent attention, especially in light of increasing numbers of patients with severe viral respiratory tract infections who are susceptible to opportunistic fungal superinfections.}, } @article {pmid35469007, year = {2022}, author = {Shi, YM and Hirschmann, M and Shi, YN and Ahmed, S and Abebew, D and Tobias, NJ and Grün, P and Crames, JJ and Pöschel, L and Kuttenlochner, W and Richter, C and Herrmann, J and Müller, R and Thanwisai, A and Pidot, SJ and Stinear, TP and Groll, M and Kim, Y and Bode, HB}, title = {Global analysis of biosynthetic gene clusters reveals conserved and unique natural products in entomopathogenic nematode-symbiotic bacteria.}, journal = {Nature chemistry}, volume = {14}, number = {6}, pages = {701-712}, pmid = {35469007}, issn = {1755-4349}, mesh = {Animals ; *Biological Products ; Humans ; Insecta/genetics/microbiology ; Multigene Family ; *Nematoda/genetics/microbiology ; *Photorhabdus/genetics ; Symbiosis/genetics ; *Xenorhabdus/genetics ; }, abstract = {Microorganisms contribute to the biology and physiology of eukaryotic hosts and affect other organisms through natural products. Xenorhabdus and Photorhabdus (XP) living in mutualistic symbiosis with entomopathogenic nematodes generate natural products to mediate bacteria-nematode-insect interactions. However, a lack of systematic analysis of the XP biosynthetic gene clusters (BGCs) has limited the understanding of how natural products affect interactions between the organisms. Here we combine pangenome and sequence similarity networks to analyse BGCs from 45 XP strains that cover all sequenced strains in our collection and represent almost all XP taxonomy. The identified 1,000 BGCs belong to 176 families. The most conserved families are denoted by 11 BGC classes. We homologously (over)express the ubiquitous and unique BGCs and identify compounds featuring unusual architectures. The bioactivity evaluation demonstrates that the prevalent compounds are eukaryotic proteasome inhibitors, virulence factors against insects, metallophores and insect immunosuppressants. These findings explain the functional basis of bacterial natural products in this tripartite relationship.}, } @article {pmid35464848, year = {2022}, author = {Jha, UC and Nayyar, H and Parida, SK and Bakır, M and von Wettberg, EJB and Siddique, KHM}, title = {Progress of Genomics-Driven Approaches for Sustaining Underutilized Legume Crops in the Post-Genomic Era.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {831656}, pmid = {35464848}, issn = {1664-8021}, abstract = {Legume crops, belonging to the Fabaceae family, are of immense importance for sustaining global food security. Many legumes are profitable crops for smallholder farmers due to their unique ability to fix atmospheric nitrogen and their intrinsic ability to thrive on marginal land with minimum inputs and low cultivation costs. Recent progress in genomics shows promise for future genetic gains in major grain legumes. Still it remains limited in minor legumes/underutilized legumes, including adzuki bean, cluster bean, horse gram, lathyrus, red clover, urd bean, and winged bean. In the last decade, unprecedented progress in completing genome assemblies of various legume crops and resequencing efforts of large germplasm collections has helped to identify the underlying gene(s) for various traits of breeding importance for enhancing genetic gain and contributing to developing climate-resilient cultivars. This review discusses the progress of genomic resource development, including genome-wide molecular markers, key breakthroughs in genome sequencing, genetic linkage maps, and trait mapping for facilitating yield improvement in underutilized legumes. We focus on 1) the progress in genomic-assisted breeding, 2) the role of whole-genome resequencing, pangenomes for underpinning the novel genomic variants underlying trait gene(s), 3) how adaptive traits of wild underutilized legumes could be harnessed to develop climate-resilient cultivars, 4) the progress and status of functional genomics resources, deciphering the underlying trait candidate genes with putative function in underutilized legumes 5) and prospects of novel breeding technologies, such as speed breeding, genomic selection, and genome editing. We conclude the review by discussing the scope for genomic resources developed in underutilized legumes to enhance their production and play a critical role in achieving the "zero hunger" sustainable development goal by 2030 set by the United Nations.}, } @article {pmid35456775, year = {2022}, author = {Lau Vetter, MCY and Huang, B and Fenske, L and Blom, J}, title = {Metabolism of the Genus Guyparkeria Revealed by Pangenome Analysis.}, journal = {Microorganisms}, volume = {10}, number = {4}, pages = {}, pmid = {35456775}, issn = {2076-2607}, support = {2018YFC0309904-02//National Key R&D Program of China/ ; }, abstract = {Halophilic sulfur-oxidizing bacteria belonging to the genus Guyparkeria occur at both marine and terrestrial habitats. Common physiological characteristics displayed by Guyparkeria isolates have not yet been linked to the metabolic potential encoded in their genetic inventory. To provide a genetic basis for understanding the metabolism of Guyparkeria, nine genomes were compared to reveal the metabolic capabilities and adaptations. A detailed account is given on Guyparkeria's ability to assimilate carbon by fixation, to oxidize reduced sulfur, to oxidize thiocyanate, and to cope with salinity stress.}, } @article {pmid35456751, year = {2022}, author = {Néron, B and Littner, E and Haudiquet, M and Perrin, A and Cury, J and Rocha, EPC}, title = {IntegronFinder 2.0: Identification and Analysis of Integrons across Bacteria, with a Focus on Antibiotic Resistance in Klebsiella.}, journal = {Microorganisms}, volume = {10}, number = {4}, pages = {}, pmid = {35456751}, issn = {2076-2607}, support = {ANR-16-CONV-0005//Agence Nationale de la Recherche/ ; ANR-10-LABX-62-IBEID//Agence Nationale de la Recherche/ ; EQU201903007835//Fondation pour la Recherche Médicale/ ; }, abstract = {Integrons are flexible gene-exchanging platforms that contain multiple cassettes encoding accessory genes whose order is shuffled by a specific integrase. Integrons embedded within mobile genetic elements often contain multiple antibiotic resistance genes that they spread among nosocomial pathogens and contribute to the current antibiotic resistance crisis. However, most integrons are presumably sedentary and encode a much broader diversity of functions. IntegronFinder is a widely used software to identify novel integrons in bacterial genomes, but has aged and lacks some useful functionalities to handle very large datasets of draft genomes or metagenomes. Here, we present IntegronFinder version 2. We have updated the code, improved its efficiency and usability, adapted the output to incomplete genome data, and added a few novel functions. We describe these changes and illustrate the relevance of the program by analyzing the distribution of integrons across more than 20,000 fully sequenced genomes. We also take full advantage of its novel capabilities to analyze close to 4000 Klebsiella pneumoniae genomes for the presence of integrons and antibiotic resistance genes within them. Our data show that K. pneumoniae has a large diversity of integrons and the largest mobile integron in our database of plasmids. The pangenome of these integrons contains a total of 165 different gene families with most of the largest families being related with resistance to numerous types of antibiotics. IntegronFinder is a free and open-source software available on multiple public platforms.}, } @article {pmid35456404, year = {2022}, author = {Aggarwal, SK and Singh, A and Choudhary, M and Kumar, A and Rakshit, S and Kumar, P and Bohra, A and Varshney, RK}, title = {Pangenomics in Microbial and Crop Research: Progress, Applications, and Perspectives.}, journal = {Genes}, volume = {13}, number = {4}, pages = {}, pmid = {35456404}, issn = {2073-4425}, mesh = {Chromosome Mapping ; Humans ; *Plant Breeding ; *Plants/genetics ; }, abstract = {Advances in sequencing technologies and bioinformatics tools have fueled a renewed interest in whole genome sequencing efforts in many organisms. The growing availability of multiple genome sequences has advanced our understanding of the within-species diversity, in the form of a pangenome. Pangenomics has opened new avenues for future research such as allowing dissection of complex molecular mechanisms and increased confidence in genome mapping. To comprehensively capture the genetic diversity for improving plant performance, the pangenome concept is further extended from species to genus level by the inclusion of wild species, constituting a super-pangenome. Characterization of pangenome has implications for both basic and applied research. The concept of pangenome has transformed the way biological questions are addressed. From understanding evolution and adaptation to elucidating host-pathogen interactions, finding novel genes or breeding targets to aid crop improvement to design effective vaccines for human prophylaxis, the increasing availability of the pangenome has revolutionized several aspects of biological research. The future availability of high-resolution pangenomes based on reference-level near-complete genome assemblies would greatly improve our ability to address complex biological problems.}, } @article {pmid35451954, year = {2022}, author = {Yu, J and Xu, X and Wang, Y and Zhai, X and Pan, Z and Jiao, X and Zhang, Y}, title = {Prophage-mediated genome differentiation of the Salmonella Derby ST71 population.}, journal = {Microbial genomics}, volume = {8}, number = {4}, pages = {}, pmid = {35451954}, issn = {2057-5858}, mesh = {Animals ; *Genome, Bacterial/genetics ; Niacinamide/analogs & derivatives ; Poultry/genetics ; *Prophages/genetics ; Salmonella/genetics ; Swine ; }, abstract = {Although Salmonella Derby ST71 strains have been recognized as poultry-specific by previous studies, multiple swine-associated S. Derby ST71 strains were identified in this long-term, multi-site epidemic study. Here, 15 representative swine-associated S. Derby ST71 strains were sequenced and compared with 65 (one swine-associated and 64 poultry-associated) S. Derby ST71 strains available in the NCBI database at a pangenomic level through comparative genomics analysis to identify genomic features related to the differentiation of swine-associated strains and previously reported poultry-associated strains. The distribution patterns of known Salmonella pathogenicity islands (SPIs) and virulence factor (VF) encoding genes were not capable of differentiating between the two strain groups. The results demonstrated that the S. Derby ST71 population harbours an open pan-genome, and swine-associated ST71 strains contain many more genes than the poultry-associated strains, mainly attributed to the prophage sequence contents in the genomes. The numbers of prophage sequences identified in the swine-associated strains were higher than those in the poultry-associated strains. Prophages specifically harboured by the swine-associated strains were found to contain genes that facilitate niche adaptation for the bacterial hosts. Gene deletion experiments revealed that the dam gene specifically present in the prophage of the swine-associated strains is important for S. Derby to adhere onto the host cells. This study provides novel insights into the roles of prophages during the genome differentiation of Salmonella.}, } @article {pmid35447921, year = {2022}, author = {Jv, Y and Xi, C and Zhao, Y and Wang, W and Zhang, Y and Liu, K and Liu, W and Shan, K and Wang, C and Cao, R and Dai, C and Jv, Y and Zhu, W and Wang, H and He, Q and Hao, L}, title = {Pan-Genomic and Transcriptomic Analyses of Marine Pseudoalteromonas agarivorans Hao 2018 Revealed Its Genomic and Metabolic Features.}, journal = {Marine drugs}, volume = {20}, number = {4}, pages = {}, pmid = {35447921}, issn = {1660-3397}, support = {2020KJC-ZD08//Science, Education and Industry Integration Innovation Pilot Project of Qilu University of Tech-nology (Shandong Academy of Sciences)/ ; 2020KJC-ZD10//Science, Education and Industry Integration Innovation Pilot Project of Qilu University of Tech-nology (Shandong Academy of Sciences)/ ; No. 202002//Foundation of Qilu University of Technology of ESI Cultivating Subject for Biology and Biochemistry/ ; No. 2019JZZY021020//Foundation of Key R&D Program of Shandong Province/ ; ZR2021ZD29//Major Basic Research Project of Natural Science Foundation of Shandong Province/ ; No. ZZ20190302//Foundation of State Key Laboratory of Biobased Material and Green Papermaking/ ; SWCG 2018-01//Foundation of Shandong Provincial Key Laboratory of Biosensors/ ; ZR2012CM019//Natural Science Foundation of Shandong Province/ ; }, mesh = {Carbohydrates ; Genome, Bacterial/genetics ; Genomics ; Phylogeny ; *Pseudoalteromonas/genetics/metabolism ; *Transcriptome ; }, abstract = {The genomic and carbohydrate metabolic features of Pseudoalteromonas agarivorans Hao 2018 (P. agarivorans Hao 2018) were investigated through pan-genomic and transcriptomic analyses, and key enzyme genes that may encode the process involved in its extracellular polysaccharide synthesis were screened. The pan-genome of the P. agarivorans strains consists of a core-genome containing 2331 genes, an accessory-genome containing 956 genes, and a unique-genome containing 1519 genes. Clusters of Orthologous Groups analyses showed that P. agarivorans harbors strain-specifically diverse metabolisms, probably representing high evolutionary genome changes. The Kyoto Encyclopedia of Genes and Genomes and reconstructed carbohydrate metabolic pathways displayed that P. agarivorans strains can utilize a variety of carbohydrates, such as d-glucose, d-fructose, and d-lactose. Analyses of differentially expressed genes showed that compared with the stationary phase (24 h), strain P. agarivorans Hao 2018 had upregulated expression of genes related to the synthesis of extracellular polysaccharides in the logarithmic growth phase (2 h), and that the expression of these genes affected extracellular polysaccharide transport, nucleotide sugar synthesis, and glycosyltransferase synthesis. This is the first investigation of the genomic and metabolic features of P. agarivorans through pan-genomic and transcriptomic analyses, and these intriguing discoveries provide the possibility to produce novel marine drug lead compounds with high biological activity.}, } @article {pmid35446150, year = {2022}, author = {Liu, Y and Pei, T and Du, J and Yao, Q and Deng, MR and Zhu, H}, title = {Comparative Genomics Reveals Genetic Diversity and Metabolic Potentials of the Genus Qipengyuania and Suggests Fifteen Novel Species.}, journal = {Microbiology spectrum}, volume = {10}, number = {3}, pages = {e0126421}, pmid = {35446150}, issn = {2165-0497}, mesh = {Carotenoids ; DNA ; Genetic Variation ; *Genome, Bacterial/genetics ; *Genomics ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Members of the genus Qipengyuania are heterotrophic bacteria frequently isolated from marine environments with great application potential in areas such as carotenoid production. However, the genomic diversity, metabolic function, and adaption of this genus remain largely unclear. Here, 16 isolates related to the genus Qipengyuania were recovered from coastal samples and their genomes were sequenced. The phylogenetic inference of these isolates and reference type strains of this genus indicated that the 16S rRNA gene was insufficient to distinguish them at the species level; instead, the phylogenomic reconstruction could provide the reliable phylogenetic relationships and confirm 15 new well-supported branches, representing 15 putative novel genospecies corroborated by the digital DNA-DNA hybridization and average nucleotide identity analyses. Comparative genomics revealed that the genus Qipengyuania had an open pangenome and possessed multiple conserved genes and pathways related to metabolic functions and environmental adaptation, despite the presence of divergent genomic features and specific metabolic potential. Genetic analysis and pigment detection showed that the members of this genus were identified as carotenoid producers, while some proved to be potentially aerobic anoxygenic photoheterotrophs. Collectively, the first insight into the genetic diversity and metabolic potentials of the genus Qipengyuania will contribute to better understanding of the speciation and adaptive evolution in natural environments. IMPORTANCE The deciphering of the phylogenetic diversity and metabolic features of the abundant bacterial taxa is critical for exploring their ecological importance and application potential. Qipengyuania is a genus of frequently isolated heterotrophic microorganisms with great industrial application potential. Numerous strains related to the genus Qipengyuania have been isolated from diverse environments, but their genomic diversity and metabolic functions remain unclear. Our study revealed a high degree of genetic diversity, metabolic versatility, and environmental adaptation of the genus Qipengyuania using comparative genomics. Fifteen novel species of this genus have been established using a polyphasic taxonomic approach, expanding the number of described species to almost double. This study provided an overall view of the genus Qipengyuania at the genomic level and will enable us to better uncover its ecological roles and evolutionary history.}, } @article {pmid35444317, year = {2022}, author = {Wang, T and Antonacci-Fulton, L and Howe, K and Lawson, HA and Lucas, JK and Phillippy, AM and Popejoy, AB and Asri, M and Carson, C and Chaisson, MJP and Chang, X and Cook-Deegan, R and Felsenfeld, AL and Fulton, RS and Garrison, EP and Garrison, NA and Graves-Lindsay, TA and Ji, H and Kenny, EE and Koenig, BA and Li, D and Marschall, T and McMichael, JF and Novak, AM and Purushotham, D and Schneider, VA and Schultz, BI and Smith, MW and Sofia, HJ and Weissman, T and Flicek, P and Li, H and Miga, KH and Paten, B and Jarvis, ED and Hall, IM and Eichler, EE and Haussler, D and , }, title = {The Human Pangenome Project: a global resource to map genomic diversity.}, journal = {Nature}, volume = {604}, number = {7906}, pages = {437-446}, pmid = {35444317}, issn = {1476-4687}, support = {U01 HG010961/HG/NHGRI NIH HHS/United States ; U41 HG010972/HG/NHGRI NIH HHS/United States ; U01 HG010973/HG/NHGRI NIH HHS/United States ; U01 HG010963/HG/NHGRI NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; }, mesh = {*Genome, Human/genetics ; *Genomics ; Haplotypes/genetics ; High-Throughput Nucleotide Sequencing ; Humans ; Sequence Analysis, DNA ; }, abstract = {The human reference genome is the most widely used resource in human genetics and is due for a major update. Its current structure is a linear composite of merged haplotypes from more than 20 people, with a single individual comprising most of the sequence. It contains biases and errors within a framework that does not represent global human genomic variation. A high-quality reference with global representation of common variants, including single-nucleotide variants, structural variants and functional elements, is needed. The Human Pangenome Reference Consortium aims to create a more sophisticated and complete human reference genome with a graph-based, telomere-to-telomere representation of global genomic diversity. Here we leverage innovations in technology, study design and global partnerships with the goal of constructing the highest-possible quality human pangenome reference. Our goal is to improve data representation and streamline analyses to enable routine assembly of complete diploid genomes. With attention to ethical frameworks, the human pangenome reference will contain a more accurate and diverse representation of global genomic variation, improve gene-disease association studies across populations, expand the scope of genomics research to the most repetitive and polymorphic regions of the genome, and serve as the ultimate genetic resource for future biomedical research and precision medicine.}, } @article {pmid35440059, year = {2022}, author = {Ferrero-Serrano, Á and Sylvia, MM and Forstmeier, PC and Olson, AJ and Ware, D and Bevilacqua, PC and Assmann, SM}, title = {Experimental demonstration and pan-structurome prediction of climate-associated riboSNitches in Arabidopsis.}, journal = {Genome biology}, volume = {23}, number = {1}, pages = {101}, pmid = {35440059}, issn = {1474-760X}, mesh = {*Arabidopsis/genetics ; Climate ; Genome, Plant ; Genome-Wide Association Study ; Polymorphism, Single Nucleotide ; RNA, Messenger ; }, abstract = {BACKGROUND: Genome-wide association studies (GWAS) aim to correlate phenotypic changes with genotypic variation. Upon transcription, single nucleotide variants (SNVs) may alter mRNA structure, with potential impacts on transcript stability, macromolecular interactions, and translation. However, plant genomes have not been assessed for the presence of these structure-altering polymorphisms or "riboSNitches."

RESULTS: We experimentally demonstrate the presence of riboSNitches in transcripts of two Arabidopsis genes, ZINC RIBBON 3 (ZR3) and COTTON GOLGI-RELATED 3 (CGR3), which are associated with continentality and temperature variation in the natural environment. These riboSNitches are also associated with differences in the abundance of their respective transcripts, implying a role in regulating the gene's expression in adaptation to local climate conditions. We then computationally predict riboSNitches transcriptome-wide in mRNAs of 879 naturally inbred Arabidopsis accessions. We characterize correlations between SNPs/riboSNitches in these accessions and 434 climate descriptors of their local environments, suggesting a role of these variants in local adaptation. We integrate this information in CLIMtools V2.0 and provide a new web resource, T-CLIM, that reveals associations between transcript abundance variation and local environmental variation.

CONCLUSION: We functionally validate two plant riboSNitches and, for the first time, demonstrate riboSNitch conditionality dependent on temperature, coining the term "conditional riboSNitch." We provide the first pan-genome-wide prediction of riboSNitches in plants. We expand our previous CLIMtools web resource with riboSNitch information and with 1868 additional Arabidopsis genomes and 269 additional climate conditions, which will greatly facilitate in silico studies of natural genetic variation, its phenotypic consequences, and its role in local adaptation.}, } @article {pmid35435457, year = {2022}, author = {Belaouni, HA and Compant, S and Antonielli, L and Nikolic, B and Zitouni, A and Sessitsch, A}, title = {In-depth genome analysis of Bacillus sp. BH32, a salt stress-tolerant endophyte obtained from a halophyte in a semiarid region.}, journal = {Applied microbiology and biotechnology}, volume = {106}, number = {8}, pages = {3113-3137}, pmid = {35435457}, issn = {1432-0614}, mesh = {*Bacillus/genetics ; DNA ; Endophytes/genetics ; *Solanum lycopersicum/microbiology ; Salt Stress ; Salt-Tolerant Plants ; Triticum/microbiology ; }, abstract = {Endophytic strains belonging to the Bacillus cereus group were isolated from the halophytes Atriplex halimus L. (Amaranthaceae) and Tamarix aphylla L. (Tamaricaceae) from costal and continental regions in Algeria. Based on their salt tolerance (up to 5%), the strains were tested for their ability to alleviate salt stress in tomato and wheat. Bacillus sp. strain BH32 showed the highest potential to reduce salinity stress (up to + 50% and + 58% of dry weight improvement, in tomato and wheat, respectively, compared to the control). To determine putative mechanisms involved in salt tolerance and plant growth promotion, the whole genome of Bacillus sp. BH32 was sequenced, annotated, and used for comparative genomics against the genomes of closely related strains. The pangenome of Bacillus sp. BH32 and its closest relative was further analyzed. The phylogenomic analyses confirmed its taxonomic position, a member of the Bacillus cereus group, with intergenomic distances (GBDP analysis) pinpointing to a new taxon (digital DNA-DNA hybridization, dDDH < 70%). Genome mining unveiled several genes involved in stress tolerance, production of anti-oxidants and genes involved in plant growth promotion as well as in the production of secondary metabolites. KEY POINTS : • Bacillus sp. BH32 and other bacterial endophytes were isolated from halophytes, to be tested on tomato and wheat and to limit salt stress adverse effects. • The strain with the highest potential was then studied at the genomic level to highlight numerous genes linked to plant growth promotion and stress tolerance. • Pangenome approaches suggest that the strain belongs to a new taxon within the Bacillus cereus group.}, } @article {pmid35433080, year = {2022}, author = {Li, Z and Li, Z and Peng, Y and Lu, X and Kan, B}, title = {Trans-Regional and Cross-Host Spread of mcr-Carrying Plasmids Revealed by Complete Plasmid Sequences - 44 Countries, 1998-2020.}, journal = {China CDC weekly}, volume = {4}, number = {12}, pages = {242-248}, pmid = {35433080}, issn = {2096-7071}, abstract = {BACKGROUND: The surveillance of antimicrobial resistance genes (ARGs) and bacteria is one critical approach to prevent and control antimicrobial resistance (AMR). Next-generation sequencing (NGS) is a powerful tool in monitoring the emergence and spread of ARGs and resistant bacteria. The horizontal transfer of ARGs across host bacteria mediated by plasmids is a challenge in NGS surveillance for resistance because short-read sequencing can hardly generate the complete plasmid genome sequence, and the correlation between ARGs and plasmids are difficult to determine.

METHODS: The complete genome sequences of 455 mcr-carrying plasmids (pMCRs), and the data of their host bacteria and isolation regions were collected from the NCBI database. Genes of Inc types and ARGs were searched for each plasmid. The genome similarity of these plasmids was analyzed by pangenome clustering and genome alignment.

RESULTS: A total of 52 Inc types, including a variety of fusion plasmids containing 2 or more Inc types were identified in these pMCRs and carried by complex host bacteria. The cooccurrence of ARGs in pMCRs was generally observed, with an average of 3.9 ARGs per plasmid. Twenty-two clusters with consistent or highly similar sequences and gene compositions were identified by the pangenome clustering, which were characterized with distributions in different countries/regions, years or host bacteria in each cluster.

DISCUSSION: Based on the complete plasmid sequences, distribution of mcr genes in different Inc type plasmids, their co-existence with other AMRs, and transmission of one pMCR across regions and host bacteria can be revealed definitively. Complete plasmid genomes and comparisons in the laboratory network are necessary for spread tracing of ARG-carrying plasmids and risk assessment in AMR surveillance.}, } @article {pmid35432229, year = {2022}, author = {Yuan, PB and Zhan, Y and Zhu, JH and Ling, JH and Chen, EZ and Liu, WT and Wang, LJ and Zhong, YX and Chen, DQ}, title = {Pan-Genome Analysis of Laribacter hongkongensis: Virulence Gene Profiles, Carbohydrate-Active Enzyme Prediction, and Antimicrobial Resistance Characterization.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {862776}, pmid = {35432229}, issn = {1664-302X}, abstract = {Laribacter hongkongensis is a new emerging foodborne pathogen that causes community-acquired gastroenteritis and traveler's diarrhea. However, the genetic features of L. hongkongensis have not yet been properly understood. A total of 45 aquatic animal-associated L. hongkongensis strains isolated from intestinal specimens of frogs and grass carps were subjected to whole-genome sequencing (WGS), along with the genome data of 4 reported human clinical strains, the analysis of virulence genes, carbohydrate-active enzymes, and antimicrobial resistance (AMR) determinants were carried out for comprehensively understanding of this new foodborne pathogen. Human clinical strains were genetically more related to some strains from frogs inferred from phylogenetic trees. The distribution of virulence genes and carbohydrate-active enzymes exhibited different patterns among strains of different sources, reflecting their adaption to different host environments and indicating different potentials to infect humans. Thirty-two AMR genes were detected, susceptibility to 18 clinical used antibiotics including aminoglycoside, chloramphenicol, trimethoprim, and sulfa was checked to evaluate the availability of clinical medicines. Resistance to Rifampicin, Cefazolin, ceftazidime, Ampicillin, and ceftriaxone is prevalent in most strains, resistance to tetracycline, trimethoprim-sulfamethoxazole, ciprofloxacin, and levofloxacin are aggregated in nearly half of frog-derived strains, suggesting that drug resistance of frog-derived strains is more serious, and clinical treatment for L. hongkongensis infection should be more cautious.}, } @article {pmid35430877, year = {2022}, author = {Xu, S and Wei, M and Li, G and Li, Z and Che, Y and Han, L and Jia, W and Li, F and Li, D and Li, Z}, title = {Comprehensive Analysis of the Nocardia cyriacigeorgica Complex Reveals Five Species-Level Clades with Different Evolutionary and Pathogenicity Characteristics.}, journal = {mSystems}, volume = {7}, number = {3}, pages = {e0140621}, pmid = {35430877}, issn = {2379-5077}, mesh = {Humans ; Virulence/genetics ; Phylogeny ; *Nocardia/genetics ; Virulence Factors/genetics ; }, abstract = {Nocardia cyriacigeorgica is a common etiological agent of nocardiosis that has increasingly been implicated in serious pulmonary infections, especially in immunocompromised individuals. However, the evolution, diversity, and pathogenesis of N. cyriacigeorgica have remained unclear. Here, we performed a comparative genomic analysis using 91 N. cyriacigeorgica strains, 45 of which were newly sequenced in this study. Phylogenetic and average nucleotide identity (ANI) analyses revealed that N. cyriacigeorgica contained five species-level clades (8.6 to 14.6% interclade genetic divergence), namely, the N. cyriacigeorgica complex (NCC). Further pan-genome analysis revealed extensive differences among the five clades in nine functional categories, such as energy production, lipid metabolism, secondary metabolites, and signal transduction mechanisms. All 2,935 single-copy core genes undergoing purifying selection were highly conserved across NCC. However, clades D and E exhibited reduced selective constraints, compared to clades A to C. Horizontal gene transfer (HGT) and mobile genetic elements contributed to genomic plasticity, and clades A and B had experienced a higher level of HGT events than other clades. A total of 129 virulence factors were ubiquitous across NCC, such as the mce operon, hemolysin, and type VII secretion system (T7SS). However, different distributions of three toxin-coding genes and two new types of mce operons were detected, which might contribute to pathogenicity differences among the members of the NCC. Overall, our study provides comprehensive insights into the evolution, genetic diversity, and pathogenicity of NCC, facilitating the prevention of infections. IMPORTANCE Nocardia species are opportunistic bacterial pathogens that can affect all organ systems, primarily the skin, lungs, and brain. N. cyriacigeorgica is the most prevalent species within the genus, exhibits clinical significance, and can cause severe infections when disseminated throughout the body. However, the evolution, diversity, and pathogenicity of N. cyriacigeorgica remain unclear. Here, we have conducted a comparative genomic analysis of 91 N. cyriacigeorgica strains and revealed that N. cyriacigeorgica is not a single species but is composed of five closely related species. In addition, we discovered that these five species differ in many ways, involving selection pressure, horizontal gene transfer, functional capacity, pathogenicity, and antibiotic resistance. Overall, our work provides important clues in dissecting the evolution, genetic diversity, and pathogenicity of NCC, thereby advancing prevention measures against these infections.}, } @article {pmid35428201, year = {2022}, author = {Yang, MR and Wu, YW}, title = {Enhancing predictions of antimicrobial resistance of pathogens by expanding the potential resistance gene repertoire using a pan-genome-based feature selection approach.}, journal = {BMC bioinformatics}, volume = {23}, number = {Suppl 4}, pages = {131}, pmid = {35428201}, issn = {1471-2105}, support = {MOST108-2628-E-038-002-MY3//Ministry of Science and Technology, Taiwan/ ; MOST110-2221-E-038-019-MY3//Ministry of Science and Technology, Taiwan/ ; }, mesh = {*Anti-Bacterial Agents/pharmacology ; *Drug Resistance, Bacterial/genetics ; Genome, Bacterial ; Machine Learning ; Whole Genome Sequencing/methods ; }, abstract = {BACKGROUND: Predicting which pathogens might exhibit antimicrobial resistance (AMR) based on genomics data is one of the promising ways to swiftly and precisely identify AMR pathogens. Currently, the most widely used genomics approach is through identifying known AMR genes from genomic information in order to predict whether a pathogen might be resistant to certain antibiotic drugs. The list of known AMR genes, however, is still far from comprehensive and may result in inaccurate AMR pathogen predictions. We thus felt the need to expand the AMR gene set and proposed a pan-genome-based feature selection method to identify potential gene sets for AMR prediction purposes.

RESULTS: By building pan-genome datasets and extracting gene presence/absence patterns from four bacterial species, each with more than 2000 strains, we showed that machine learning models built from pan-genome data can be very promising for predicting AMR pathogens. The gene set selected by the eXtreme Gradient Boosting (XGBoost) feature selection approach further improved prediction outcomes, and an incremental approach selecting subsets of XGBoost-selected features brought the machine learning model performance to the next level. Investigating selected gene sets revealed that on average about 50% of genes had no known function and very few of them were known AMR genes, indicating the potential of the selected gene sets to expand resistance gene repertoires.

CONCLUSIONS: We demonstrated that a pan-genome-based feature selection approach is suitable for building machine learning models for predicting AMR pathogens. The extracted gene sets may provide future clues to expand our knowledge of known AMR genes and provide novel hypotheses for inferring bacterial AMR mechanisms.}, } @article {pmid35419298, year = {2022}, author = {Akwani, WC and van Vliet, AHM and Joel, JO and Andres, S and Diricks, M and Maurer, FP and Chambers, MA and Hingley-Wilson, SM}, title = {The Use of Comparative Genomic Analysis for the Development of Subspecies-Specific PCR Assays for Mycobacterium abscessus.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {816615}, pmid = {35419298}, issn = {2235-2988}, support = {MC_PC_19052/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Anti-Bacterial Agents ; Genomics ; Humans ; Multiplex Polymerase Chain Reaction ; *Mycobacterium/genetics ; *Mycobacterium Infections, Nontuberculous/diagnosis/microbiology ; *Mycobacterium abscessus/genetics ; }, abstract = {Mycobacterium abscessus complex (MABC) is an important pathogen of immunocompromised patients. Accurate and rapid determination of MABC at the subspecies level is vital for optimal antibiotic therapy. Here we have used comparative genomics to design MABC subspecies-specific PCR assays. Analysis of single nucleotide polymorphisms and core genome multilocus sequence typing showed clustering of genomes into three distinct clusters representing the MABC subspecies M. abscessus, M. bolletii and M. massiliense. Pangenome analysis of 318 MABC genomes from the three subspecies allowed for the identification of 15 MABC subspecies-specific genes. In silico testing of primer sets against 1,663 publicly available MABC genomes and 66 other closely related Mycobacterium genomes showed that all assays had >97% sensitivity and >98% specificity. Subsequent experimental validation of two subspecies-specific genes each showed the PCR assays worked well in individual and multiplex format with no false-positivity with 5 other mycobacteria of clinical importance. In conclusion, we have developed a rapid, accurate, multiplex PCR-assay for discriminating MABC subspecies that could improve their detection, diagnosis and inform correct treatment choice.}, } @article {pmid35418954, year = {2022}, author = {Wambui, J and Stevens, MJA and Cernela, N and Stephan, R}, title = {Unraveling the Genotypic and Phenotypic Diversity of the Psychrophilic Clostridium estertheticum Complex, a Meat Spoilage Agent.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {856810}, pmid = {35418954}, issn = {1664-302X}, abstract = {The spoilage of vacuum-packed meat by Clostridium estertheticum complex (CEC), which is accompanied by or without production of copious amounts of gas, has been linked to the acetone-butyrate-ethanol fermentation, but the mechanism behind the variable gas production has not been fully elucidated. The reconstruction and comparison of intra- and interspecies metabolic pathways linked to meat spoilage at the genomic level can unravel the genetic basis for the variable phenotype. However, this is hindered by unavailability of CEC genomes, which in addition, has hampered the determination of genetic diversity and its drivers within CEC. Therefore, the current study aimed at determining the diversity of CEC through comprehensive comparative genomics. Fifty CEC genomes from 11 CEC species were compared. Recombination and gene gain/loss events were identified as important sources of natural variation within CEC, with the latter being pronounced in genomospecies2 that has lost genes related to flagellar assembly and signaling. Pan-genome analysis revealed variations in carbohydrate metabolic and hydrogenases genes within the complex. Variable inter- and intraspecies gas production in meat by C. estertheticum and Clostridium tagluense were associated with the distribution of the [NiFe]-hydrogenase hyp gene cluster whose absence or presence was associated with occurrence or lack of pack distention, respectively. Through comparative genomics, we have shown CEC species exhibit high genetic diversity that can be partly attributed to recombination and gene gain/loss events. We have also shown genetic basis for variable gas production in meat can be attributed to the presence/absence of the hyp gene cluster.}, } @article {pmid35416699, year = {2022}, author = {Weisberg, AJ and Rahman, A and Backus, D and Tyavanagimatt, P and Chang, JH and Sachs, JL}, title = {Pangenome Evolution Reconciles Robustness and Instability of Rhizobial Symbiosis.}, journal = {mBio}, volume = {13}, number = {3}, pages = {e0007422}, pmid = {35416699}, issn = {2150-7511}, mesh = {Bacteria/metabolism ; *Bradyrhizobium/genetics/metabolism ; Ecosystem ; *Fabaceae/microbiology ; Nitrogen/metabolism ; Nitrogen Fixation ; *Rhizobium/genetics/metabolism ; Symbiosis/genetics ; }, abstract = {Root nodulating rhizobia are nearly ubiquitous in soils and provide the critical service of nitrogen fixation to thousands of legume species, including staple crops. However, the magnitude of fixed nitrogen provided to hosts varies markedly among rhizobia strains, despite host legumes having mechanisms to selectively reward beneficial strains and to punish ones that do not fix sufficient nitrogen. Variation in the services of microbial mutualists is considered paradoxical given host mechanisms to select beneficial genotypes. Moreover, the recurrent evolution of non-fixing symbiont genotypes is predicted to destabilize symbiosis, but breakdown has rarely been observed. Here, we deconstructed hundreds of genome sequences from genotypically and phenotypically diverse Bradyrhizobium strains and revealed mechanisms that generate variation in symbiotic nitrogen fixation. We show that this trait is conferred by a modular system consisting of many extremely large integrative conjugative elements and few conjugative plasmids. Their transmissibility and propensity to reshuffle genes generate new combinations that lead to uncooperative genotypes and make individual partnerships unstable. We also demonstrate that these same properties extend beneficial associations to diverse host species and transfer symbiotic capacity among diverse strains. Hence, symbiotic nitrogen fixation is underpinned by modularity, which engenders flexibility, a feature that reconciles evolutionary robustness and instability. These results provide new insights into mechanisms driving the evolution of mobile genetic elements. Moreover, they yield a new predictive model on the evolution of rhizobial symbioses, one that informs on the health of organisms and ecosystems that are hosts to symbionts and that helps resolve the long-standing paradox. IMPORTANCE Genetic variation is fundamental to evolution yet is paradoxical in symbiosis. Symbionts exhibit extensive variation in the magnitude of services they provide despite hosts having mechanisms to select and increase the abundance of beneficial genotypes. Additionally, evolution of uncooperative symbiont genotypes is predicted to destabilize symbiosis, but breakdown has rarely been observed. We analyzed genome sequences of Bradyrhizobium, bacteria that in symbioses with legume hosts, fix nitrogen, a nutrient essential for ecosystems. We show that genes for symbiotic nitrogen fixation are within elements that can move between bacteria and reshuffle gene combinations that change host range and quality of symbiosis services. Consequently, nitrogen fixation is evolutionarily unstable for individual partnerships, but is evolutionarily stable for legume-Bradyrhizobium symbioses in general. We developed a holistic model of symbiosis evolution that reconciles robustness and instability of symbiosis and informs on applications of rhizobia in agricultural settings.}, } @article {pmid35410384, year = {2022}, author = {Ebler, J and Ebert, P and Clarke, WE and Rausch, T and Audano, PA and Houwaart, T and Mao, Y and Korbel, JO and Eichler, EE and Zody, MC and Dilthey, AT and Marschall, T}, title = {Pangenome-based genome inference allows efficient and accurate genotyping across a wide spectrum of variant classes.}, journal = {Nature genetics}, volume = {54}, number = {4}, pages = {518-525}, pmid = {35410384}, issn = {1546-1718}, support = {U01 HG010973/HG/NHGRI NIH HHS/United States ; R01 HG010169/HG/NHGRI NIH HHS/United States ; U24 HG007497/HG/NHGRI NIH HHS/United States ; R01 HG002385/HG/NHGRI NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; }, mesh = {Algorithms ; *Genetic Variation ; *Genome, Human/genetics ; Genome-Wide Association Study ; *Genomics/methods ; Genotype ; High-Throughput Nucleotide Sequencing ; Humans ; Sequence Analysis, DNA ; }, abstract = {Typical genotyping workflows map reads to a reference genome before identifying genetic variants. Generating such alignments introduces reference biases and comes with substantial computational burden. Furthermore, short-read lengths limit the ability to characterize repetitive genomic regions, which are particularly challenging for fast k-mer-based genotypers. In the present study, we propose a new algorithm, PanGenie, that leverages a haplotype-resolved pangenome reference together with k-mer counts from short-read sequencing data to genotype a wide spectrum of genetic variation-a process we refer to as genome inference. Compared with mapping-based approaches, PanGenie is more than 4 times faster at 30-fold coverage and achieves better genotype concordances for almost all variant types and coverages tested. Improvements are especially pronounced for large insertions (≥50 bp) and variants in repetitive regions, enabling the inclusion of these classes of variants in genome-wide association studies. PanGenie efficiently leverages the increasing amount of haplotype-resolved assemblies to unravel the functional impact of previously inaccessible variants while being faster compared with alignment-based workflows.}, } @article {pmid35404220, year = {2022}, author = {Patrick, S}, title = {A tale of two habitats: Bacteroides fragilis, a lethal pathogen and resident in the human gastrointestinal microbiome.}, journal = {Microbiology (Reading, England)}, volume = {168}, number = {4}, pages = {}, doi = {10.1099/mic.0.001156}, pmid = {35404220}, issn = {1465-2080}, mesh = {*Bacterial Infections ; Bacteroides fragilis/genetics/metabolism ; *Gastrointestinal Microbiome ; Gastrointestinal Tract/microbiology ; Humans ; *Microbiota/genetics ; Polysaccharides/metabolism ; }, abstract = {Bacteroides fragilis is an obligately anaerobic Gram-negative bacterium and a major colonizer of the human large colon where Bacteroides is a predominant genus. During the growth of an individual clonal population, an astonishing number of reversible DNA inversion events occur, driving within-strain diversity. Additionally, the B. fragilis pan-genome contains a large pool of diverse polysaccharide biosynthesis loci, DNA restriction/modification systems and polysaccharide utilization loci, which generates remarkable between-strain diversity. Diversity clearly contributes to the success of B. fragilis within its normal habitat of the gastrointestinal (GI) tract and during infection in the extra-intestinal host environment. Within the GI tract, B. fragilis is usually symbiotic, for example providing localized nutrients for the gut epithelium, but B. fragilis within the GI tract may not always be benign. Metalloprotease toxin production is strongly associated with colorectal cancer. B. fragilis is unique amongst bacteria; some strains export a protein >99 % structurally similar to human ubiquitin and antigenically cross-reactive, which suggests a link to autoimmune diseases. B. fragilis is not a primary invasive enteric pathogen; however, if colonic contents contaminate the extra-intestinal host environment, it successfully adapts to this new habitat and causes infection; classically peritoneal infection arising from rupture of an inflamed appendix or GI surgery, which if untreated, can progress to bacteraemia and death. In this review selected aspects of B. fragilis adaptation to the different habitats of the GI tract and the extra-intestinal host environment are considered, along with the considerable challenges faced when studying this highly variable bacterium.}, } @article {pmid35404110, year = {2022}, author = {Baker, JL and Tang, X and LaBonte, S and Uranga, C and Edlund, A}, title = {mucG, mucH, and mucI Modulate Production of Mutanocyclin and Reutericyclins in Streptococcus mutans B04Sm5.}, journal = {Journal of bacteriology}, volume = {204}, number = {5}, pages = {e0004222}, pmid = {35404110}, issn = {1098-5530}, support = {F32 DE026947/DE/NIDCR NIH HHS/United States ; K99 DE029228/DE/NIDCR NIH HHS/United States ; R21 DE028609/DE/NIDCR NIH HHS/United States ; R00 DE024543/DE/NIDCR NIH HHS/United States ; }, mesh = {Biofilms ; *Dental Caries ; Humans ; Phylogeny ; *Streptococcus mutans/metabolism ; Tenuazonic Acid/analogs & derivatives/metabolism ; }, abstract = {Streptococcus mutans is considered a primary etiologic agent of dental caries, which is the most common chronic infectious disease worldwide. S. mutans B04Sm5 was recently shown to produce reutericyclins and mutanocyclin through the muc biosynthetic gene cluster and to utilize reutericyclins to inhibit the growth of neighboring commensal streptococci. In this study, examination of S. mutans and muc phylogeny suggested evolution of an ancestral S. mutans muc into three lineages within one S. mutans clade and then horizontal transfer of muc to other S. mutans clades. The roles of the mucG and mucH transcriptional regulators and the mucI transporter were also examined. mucH was demonstrated to encode a transcriptional activator of muc. mucH deletion reduced production of mutanocyclin and reutericyclins and eliminated the impaired growth and inhibition of neighboring streptococci phenotypes, which are associated with reutericyclin production. ΔmucG had increased mutanocyclin and reutericyclin production, which impaired growth and increased the ability to inhibit neighboring streptococci. However, deletion of mucG also caused reduced expression of mucD, mucE, and mucI. Deletion of mucI reduced mutanocyclin and reutericylin production but enhanced growth, suggesting that mucI may not transport reutericyclin as its homolog does in Limosilactobacillus reuteri. Further research is needed to determine the roles of mucG and mucI and to identify any cofactors affecting the activity of the mucG and mucH regulators. Overall, this study provided pangenome and phylogenetic analyses that serve as a resource for S. mutans research and began elucidation of the regulation of reutericyclins and mutanocyclin production in S. mutans. IMPORTANCE S. mutans must be able to outcompete neighboring organisms in its ecological niche in order to cause dental caries. S. mutans B04Sm5 inhibited the growth of neighboring commensal streptococci through production of reutericyclins via the muc biosynthetic gene cluster. In this study, an S. mutans pangenome database and updated phylogenetic tree were generated that will serve as valuable resources for the S. mutans research community and that provide insights into the carriage and evolution of S. mutans muc. The MucG and MucH regulators, and the MucI transporter, were shown to modulate production of reutericyclins and mutanocyclin. These genes also affected the ability of S. mutans to inhibit neighboring commensals, suggesting that they may play a role in S. mutans virulence.}, } @article {pmid35403388, year = {2022}, author = {Pan, W and Cheng, Z and Han, Z and Yang, H and Zhang, W and Zhang, H}, title = {Efficient genetic transformation and CRISPR/Cas9-mediated genome editing of watermelon assisted by genes encoding developmental regulators.}, journal = {Journal of Zhejiang University. Science. B}, volume = {23}, number = {4}, pages = {339-344}, pmid = {35403388}, issn = {1862-1783}, support = {ZR202103010168//the Excellent Youth Foundation of Shandong Scientific Committee/ ; 2021T140017//the Shandong Science and Technology Innovation Funds, and the China Postdoctoral Science Foundation/ ; }, mesh = {CRISPR-Cas Systems ; *Citrullus/genetics ; *Cucurbitaceae/genetics ; Gene Editing ; Plant Breeding ; Transformation, Genetic ; }, abstract = {Cucurbitaceae is an important family of flowering plants containing multiple species of important food plants, such as melons, cucumbers, squashes, and pumpkins. However, a highly efficient genetic transformation system has not been established for most of these species (Nanasato and Tabei, 2020). Watermelon (Citrullus lanatus), an economically important and globally cultivated fruit crop, is a model species for fruit quality research due to its rich diversity of fruit size, shape, flavor, aroma, texture, peel and flesh color, and nutritional composition (Guo et al., 2019). Through pan-genome sequencing, many candidate loci associated with fruit quality traits have been identified (Guo et al., 2019). However, few of these loci have been validated. The major barrier is the low transformation efficiency of the species, with only few successful cases of genetic transformation reported so far (Tian et al., 2017; Feng et al., 2021; Wang JF et al., 2021; Wang YP et al., 2021). For example, Tian et al. (2017) obtained only 16 transgenic lines from about 960 cotyledon fragments, yielding a transformation efficiency of 1.67%. Therefore, efficient genetic transformation could not only facilitate the functional genomic studies in watermelon as well as other horticultural species, but also speed up the transgenic and genome-editing breeding.}, } @article {pmid35401600, year = {2022}, author = {Sun, Y and Wang, J and Li, Y and Jiang, B and Wang, X and Xu, WH and Wang, YQ and Zhang, PT and Zhang, YJ and Kong, XD}, title = {Pan-Genome Analysis Reveals the Abundant Gene Presence/Absence Variations Among Different Varieties of Melon and Their Influence on Traits.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {835496}, pmid = {35401600}, issn = {1664-462X}, abstract = {Melon (Cucumismelo L.) is an important vegetable crop that has been subjected to domestication and improvement. Several varieties of melons with diverse phenotypes have been produced. In this study, we constructed a melon pan-genome based on 297 accessions comprising 168 Mb novel sequences and 4,325 novel genes. Based on the results, there were abundant genetic variations among different melon groups, including 364 unfavorable genes in the IMP_A vs. LDR_A group, 46 favorable genes, and 295 unfavorable genes in the IMP_M vs. LDR_M group. The distribution of 709 resistance gene analogs (RGAs) was also characterized across 297 melon lines, of which 603 were core genes. Further, 106 genes were found to be variable, 55 of which were absent in the reference melon genome. Using gene presence/absence variation (PAV)-based genome-wide association analysis (GWAS), 13 gene PAVs associated with fruit length, fruit shape, and fruit width were identified, four of which were located in pan-genome additional contigs.}, } @article {pmid35401459, year = {2022}, author = {Kaushik, A and Roberts, DP and Ramaprasad, A and Mfarrej, S and Nair, M and Lakshman, DK and Pain, A}, title = {Pangenome Analysis of the Soilborne Fungal Phytopathogen Rhizoctonia solani and Development of a Comprehensive Web Resource: RsolaniDB.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {839524}, pmid = {35401459}, issn = {1664-302X}, abstract = {Rhizoctonia solani is a collective group of genetically and pathologically diverse basidiomycetous fungi that damage economically important crops. Its isolates are classified into 13 Anastomosis Groups (AGs) and subgroups having distinctive morphology and host ranges. The genetic factors driving the unique features of R. solani pathology are not well characterized due to the limited availability of its annotated genomes. Therefore, we performed genome sequencing, assembly, annotation and functional analysis of 12 R. solani isolates covering 7 AGs and select subgroups (AG1-IA; AG1-IB; AG1-IC; AG2-2IIIB; AG3-PT, isolates Rhs 1AP and the hypovirulent Rhs1A1; AG3-TB; AG4-HG-I, isolates Rs23 and R118-11; AG5; AG6; and AG8), in which six genomes are reported for the first time. Using a pangenome comparative analysis of 12 R. solani isolates and 15 other Basidiomycetes, we defined the unique and shared secretomes, CAZymes, and effectors across the AGs. We have also elucidated the R. solani-derived factors potentially involved in determining AG-specific host preference, and the attributes distinguishing them from other Basidiomycetes. Finally, we present the largest repertoire of R. solani genomes and their annotated components as a comprehensive database, viz. RsolaniDB, with tools for large-scale data mining, functional enrichment and sequence analysis not available with other state-of-the-art platforms.}, } @article {pmid35396275, year = {2022}, author = {Zhang, F and Xue, H and Dong, X and Li, M and Zheng, X and Li, Z and Xu, J and Wang, W and Wei, C}, title = {Long-read sequencing of 111 rice genomes reveals significantly larger pan-genomes.}, journal = {Genome research}, volume = {32}, number = {5}, pages = {853-863}, pmid = {35396275}, issn = {1549-5469}, mesh = {Genome ; Genomics/methods ; High-Throughput Nucleotide Sequencing ; *Oryza/genetics ; Sequence Analysis, DNA ; }, abstract = {The concept of pan-genome, which is the collection of all genomes from a population, has shown a great potential in genomics study, especially for crop sciences. The rice pan-genome constructed from the second-generation sequencing (SGS) data is about 270 Mb larger than Nipponbare, the rice reference genome (NipRG), but it is still disadvantaged by incompleteness and loss of genomic contexts. The third-generation sequencing (TGS) with long reads can help to construct better pan-genomes. In this paper, we report a high-quality rice pan-genome construction method by introducing a series of new steps to deal with the long-read data, including unmapped sequence block filtering, redundancy removing, and sequence block elongating. Compared to NipRG, the long-read sequencing-based pan-genome constructed from 105 rice accessions, which contains 604 Mb novel sequences, is much more comprehensive than the one constructed from ∼3000 rice genomes sequenced with short reads. The repetitive sequences are the main components of novel sequences, which partially explain the differences between the pan-genomes based on TGS and SGS. Adding six wild rice accessions, there are about 879 Mb novel sequences and 19,000 novel genes in the rice pan-genome in total. In addition, we have created high-quality reference genomes for all representative rice populations, including five gapless reference genomes. This study has made significant progress in our understanding of the rice pan-genome, and this pan-genome construction method for long-read data can be applied to accelerate a broad range of genomics studies.}, } @article {pmid35395125, year = {2022}, author = {Kaashyap, M and Kaur, S and Ford, R and Edwards, D and Siddique, KHM and Varshney, RK and Mantri, N}, title = {Comprehensive transcriptomic analysis of two RIL parents with contrasting salt responsiveness identifies polyadenylated and non-polyadenylated flower lncRNAs in chickpea.}, journal = {Plant biotechnology journal}, volume = {20}, number = {7}, pages = {1402-1416}, pmid = {35395125}, issn = {1467-7652}, mesh = {*Cicer/genetics/metabolism ; Flowers/genetics/metabolism ; Gene Expression Profiling ; Gene Expression Regulation, Plant/genetics ; *RNA, Long Noncoding/genetics/metabolism ; Transcriptome/genetics ; }, abstract = {Salinity severely affects the yield of chickpea. Understanding the role of lncRNAs can shed light on chickpea salt tolerance mechanisms. However, because lncRNAs are encoded by multiple sites within the genome, their classification to reveal functional versatility at the transcriptional and the post-transcriptional levels is challenging. To address this, we deep sequenced 24 salt-challenged flower transcriptomes from two parental genotypes of a RIL population that significantly differ in salt tolerance ability. The transcriptomes for the first time included 12 polyadenylated and 12 non-polyadenylated RNA libraries to a sequencing depth of ~50 million reads. The ab initio transcriptome assembly comprised ~34 082 transcripts from three biological replicates of salt-tolerant (JG11) and salt-sensitive (ICCV2) flowers. A total of 9419 lncRNAs responding to salt stress were identified, 2345 of which were novel lncRNAs specific to chickpea. The expression of poly(A+) lncRNAs and naturally antisense transcribed RNAs suggest their role in post-transcriptional modification and gene silencing. Notably, 178 differentially expressed lncRNAs were induced in the tolerant genotype but repressed in the sensitive genotype. Co-expression network analysis revealed that the induced lncRNAs interacted with the FLOWERING LOCUS (FLC), chromatin remodelling and DNA methylation genes, thus inducing flowering during salt stress. Furthermore, 26 lncRNAs showed homology with reported lncRNAs such as COOLAIR, IPS1 and AT4, thus confirming the role of chickpea lncRNAs in controlling flowering time as a crucial salt tolerance mechanism in tolerant chickpea genotype. These robust set of differentially expressed lncRNAs provide a deeper insight into the regulatory mechanisms controlled by lncRNAs under salt stress.}, } @article {pmid35385921, year = {2022}, author = {Jung, H and Kim, HS and Han, G and Park, J and Seo, YS}, title = {Comparative Analyses of Four Complete Genomes in Pseudomonas amygdali Revealed Differential Adaptation to Hostile Environments and Secretion Systems.}, journal = {The plant pathology journal}, volume = {38}, number = {2}, pages = {167-174}, pmid = {35385921}, issn = {1598-2254}, support = {//National Research Foundation of Korea/ ; 2019R1A2C2006779//Ministry of Education/ ; NNIBR202202108//Nakdonggang National Institute of Biological Resources/ ; //Ministry of Environment/ ; }, abstract = {Pseudomonas amygdali is a hemibiotrophic phytopathogen that causes disease in woody and herbaceous plants. Complete genomes of four P. amygdali pathovars were comparatively analyzed to decipher the impact of genomic diversity on host colonization. The pan-genome indicated that 3,928 core genes are conserved among pathovars, while 504-1,009 are unique to specific pathovars. The unique genome contained many mobile elements and exhibited a functional distribution different from the core genome. Genes involved in O-antigen biosynthesis and antimicrobial peptide resistance were significantly enriched for adaptation to hostile environments. While the type III secretion system was distributed in the core genome, unique genomes revealed a different organization of secretion systems as follows: type I in pv. tabaci, type II in pv. japonicus, type IV in pv. morsprunorum, and type VI in pv. lachrymans. These findings provide genetic insight into the dynamic interactions of the bacteria with plant hosts.}, } @article {pmid35382730, year = {2022}, author = {Beier, S and Thomson, NR}, title = {Panakeia - a universal tool for bacterial pangenome analysis.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {265}, pmid = {35382730}, issn = {1471-2164}, mesh = {*Bacteria/genetics ; *Genome, Bacterial ; }, abstract = {BACKGROUND: Development of new pan-genome analysis tools is important, as the pangenome of a microbial species has become an important method to define the diversity of a selected taxon, most commonly a species, in the last years. This enables comparison of strains from different ecological niches and can be used to define the functional potential in a bacterial population. It gives us a much better view of microbial genomics than can be gained from singular genomes which after all are just single representatives of a much more varied population.

RESULTS: We present Panakeia, a tool which strives to be easy to use and providing a detailed view of the pangenome structure which can efficiently be utilised for discovery, or further in-depth analysis, of features of interest. It analyses synteny and multiple structural patterns of the pangenome, giving insights into the biological diversity and evolution of the studied taxon. Panakeia hence provides both broad and detailed information on the structure of a pangenome, for diverse and highly clonal populations of bacteria.

CONCLUSIONS: Previously published pangenome tools often reduce the information to a presence/absence matrix of unconnected genes or generate massive hard to interpret output graphs. However, Panakeia includes synteny and structural information and presents it in a way that can readily be used for further analysis. Panakeia can be downloaded at https://github.com/BioSina/Panakeia together with a detailed User Guide.}, } @article {pmid35380461, year = {2022}, author = {Sivertsen, A and Dyrhovden, R and Tellevik, MG and Bruvold, TS and Nybakken, E and Skutlaberg, DH and Skarstein, I and Kommedal, Ø}, title = {Escherichia marmotae-a Human Pathogen Easily Misidentified as Escherichia coli.}, journal = {Microbiology spectrum}, volume = {10}, number = {2}, pages = {e0203521}, pmid = {35380461}, issn = {2165-0497}, mesh = {*Anti-Infective Agents ; Escherichia ; Escherichia coli/genetics ; *Escherichia coli Infections/diagnosis/microbiology ; Humans ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Sepsis ; }, abstract = {We hereby present the first descriptions of human-invasive infections caused by Escherichia marmotae, a recently described species that encompasses the former "Escherichia cryptic clade V." We describe four cases, one acute sepsis of unknown origin, one postoperative sepsis after cholecystectomy, one spondylodiscitis, and one upper urinary tract infection. Cases were identified through unsystematic queries in a single clinical lab over 6 months. Through genome sequencing of the causative strains combined with available genomes from elsewhere, we demonstrate Es. marmotae to be a likely ubiquitous species containing genotypic virulence traits associated with Escherichia pathogenicity. The invasive isolates were scattered among isolates from a range of nonhuman sources in the phylogenetic analyses, thus indicating inherent virulence in multiple lineages. Pan genome analyses indicate that Es. marmotae has a large accessory genome and is likely to obtain ecologically advantageous traits, such as genes encoding antimicrobial resistance. Reliable identification might be possible by matrix-assisted laser desorption ionization-time of flight mass spectrometry (MALDI-TOF MS), but relevant spectra are missing in commercial databases. It can be identified through 16S rRNA gene sequencing. Escherichia marmotae could represent a relatively common human pathogen, and improved diagnostics will provide a better understanding of its clinical importance. IMPORTANCE Escherichia coli is the most common pathogen found in blood cultures and urine and among the most important pathogenic species in the realm of human health. The notion that some of these isolates are not Es. coli but rather another species within the same genus may have implications for what Es. coli constitutes. We only recently have obtained methods to separate the two species, which means that possible differences in important clinical aspects, such as antimicrobial resistance rates, virulence, and phylogenetic structure, may exist. We believe that Es. marmotae as a common pathogen is new merely because we have not looked or bothered to distinguish between the thousands of invasive Escherichia passing through microbiological laboratories each day.}, } @article {pmid35371168, year = {2022}, author = {Zhang, Z and Guo, J and Cai, X and Li, Y and Xi, X and Lin, R and Liang, J and Wang, X and Wu, J}, title = {Improved Reference Genome Annotation of Brassica rapa by Pacific Biosciences RNA Sequencing.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {841618}, pmid = {35371168}, issn = {1664-462X}, abstract = {The species Brassica rapa includes several important vegetable crops. The draft reference genome of B. rapa ssp. pekinensis was completed in 2011, and it has since been updated twice. The pangenome with structural variations of 18 B. rapa accessions was published in 2021. Although extensive genomic analysis has been conducted on B. rapa, a comprehensive genome annotation including gene structure, alternative splicing (AS) events, and non-coding genes is still lacking. Therefore, we used the Pacific Biosciences (PacBio) single-molecular long-read technology to improve gene models and produced the annotated genome version 3.5. In total, we obtained 753,041 full-length non-chimeric (FLNC) reads and collapsed these into 92,810 non-redundant consensus isoforms, capturing 48% of the genes annotated in the B. rapa reference genome annotation v3.1. Based on the isoform data, we identified 830 novel protein-coding genes that were missed in previous genome annotations, defined the untranslated regions (UTRs) of 20,340 annotated genes and corrected 886 wrongly spliced genes. We also identified 28,564 AS events and 1,480 long non-coding RNAs (lncRNAs). We produced a relatively complete and high-quality reference transcriptome for B. rapa that can facilitate further functional genomic research.}, } @article {pmid35369469, year = {2022}, author = {Sanz, MB and De Belder, D and de Mendieta, JM and Faccone, D and Poklepovich, T and Lucero, C and Rapoport, M and Campos, J and Tuduri, E and Saavedra, MO and Van der Ploeg, C and Rogé, A and , and Pasteran, F and Corso, A and Rosato, AE and Gomez, SA}, title = {Carbapenemase-Producing Extraintestinal Pathogenic Escherichia coli From Argentina: Clonal Diversity and Predominance of Hyperepidemic Clones CC10 and CC131.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {830209}, pmid = {35369469}, issn = {1664-302X}, abstract = {Extraintestinal pathogenic Escherichia coli (ExPEC) causes infections outside the intestine. Particular ExPEC clones, such as clonal complex (CC)/sequence type (ST)131, have been known to sequentially accumulate antimicrobial resistance that starts with chromosomal mutations against fluoroquinolones, followed with the acquisition of bla CTX-M-15 and, more recently, carbapenemases. Here we aimed to investigate the distribution of global epidemic clones of carbapenemase-producing ExPEC from Argentina in representative clinical isolates recovered between July 2008 and March 2017. Carbapenemase-producing ExPEC (n = 160) were referred to the Argentinean reference laboratory. Of these, 71 were selected for genome sequencing. Phenotypic and microbiological studies confirmed the presence of carbapenemases confirmed as KPC-2 (n = 52), NDM-1 (n = 16), IMP-8 (n = 2), and VIM-1 (n = 1) producers. The isolates had been recovered mainly from urine, blood, and abdominal fluids among others, and some were from screening samples. After analyzing the virulence gene content, 76% of the isolates were considered ExPEC, although non-ExPEC isolates were also obtained from extraintestinal sites. Pan-genome phylogeny and clonal analysis showed great clonal diversity, although the first phylogroup in abundance was phylogroup A, harboring CC10 isolates, followed by phylogroup B2 with CC/ST131, mostly H30Rx, the subclone co-producing CTX-M-15. Phylogroups D, B1, C, F, and E were also detected with fewer strains. CC10 and CC/ST131 were found throughout the country. In addition, CC10 nucleated most metalloenzymes, such as NDM-1. Other relevant international clones were identified, such as CC/ST38, CC155, CC14/ST1193, and CC23. Two isolates co-produced KPC-2 and OXA-163 or OXA-439, a point mutation variant of OXA-163, and three isolates co-produced MCR-1 among other resistance genes. To conclude, in this work, we described the molecular epidemiology of carbapenemase-producing ExPEC in Argentina. Further studies are necessary to determine the plasmid families disseminating carbapenemases in ExPEC in this region.}, } @article {pmid35365914, year = {2022}, author = {Johnson, LA and Hug, LA}, title = {Cloacimonadota metabolisms include adaptations in engineered environments that are reflected in the evolutionary history of the phylum.}, journal = {Environmental microbiology reports}, volume = {14}, number = {4}, pages = {520-529}, doi = {10.1111/1758-2229.13061}, pmid = {35365914}, issn = {1758-2229}, mesh = {Bacteria/genetics ; Biological Evolution ; *Ecosystem ; *Metagenome ; Metagenomics ; Phylogeny ; }, abstract = {Phylum Cloacimonadota (previously Cloacimonetes, WWE1) is an understudied bacterial lineage frequently associated with engineered and wastewater systems. Cloacimonadota members were abundant and diverse in metagenomic datasets from a municipal landfill, prompting an examination of phylogenetic relationships, metabolic diversity, and pangenomic dynamics across the phylum, based on the 30 publicly available genomes and 24 new metagenome-assembled genomes (MAGs) from landfill samples. We found that Cloacimonadota have distinct evolutionary histories associated with engineered versus natural environments and identified genomic features and metabolic strategies that correlate to habitat of origin. Metabolic reconstructions for MAGs predict an anaerobic, acetogenic, and mixed fermentative and flavin-bifurcation-based anaerobic respiratory lifestyle for the majority of Cloacimonadota surveyed. Genomes from engineered ecosystems encode a suite of genes not typically found in genomes from natural environments including acetate kinase, genes for cysteine degradation to pyruvate, increased diversity of carbon utilization enzymes, and different mechanisms for generating membrane potential and ATP synthesis. This phylum-level examination also clarifies the distribution of functions previously observed for members of the phylum, where propionate oxidation and reverse TCA cycles are not common components of Cloacimonadota metabolism.}, } @article {pmid35357213, year = {2022}, author = {Shropshire, WC and Dinh, AQ and Earley, M and Komarow, L and Panesso, D and Rydell, K and Gómez-Villegas, SI and Miao, H and Hill, C and Chen, L and Patel, R and Fries, BC and Abbo, L and Cober, E and Revolinski, S and Luterbach, CL and Chambers, H and Fowler, VG and Bonomo, RA and Shelburne, SA and Kreiswirth, BN and van Duin, D and Hanson, BM and Arias, CA}, title = {Accessory Genomes Drive Independent Spread of Carbapenem-Resistant Klebsiella pneumoniae Clonal Groups 258 and 307 in Houston, TX.}, journal = {mBio}, volume = {13}, number = {2}, pages = {e0049722}, pmid = {35357213}, issn = {2150-7511}, support = {P01 AI152999/AI/NIAID NIH HHS/United States ; UM1 AI104681/AI/NIAID NIH HHS/United States ; R01 AI143910/AI/NIAID NIH HHS/United States ; K24 AI121296/AI/NIAID NIH HHS/United States ; K01 AI148593/AI/NIAID NIH HHS/United States ; }, mesh = {*Carbapenem-Resistant Enterobacteriaceae/genetics ; Carbapenems/pharmacology ; Humans ; *Klebsiella Infections/epidemiology ; Klebsiella pneumoniae ; Prospective Studies ; }, abstract = {Carbapenem-resistant Klebsiella pneumoniae (CRKp) is an urgent public health threat. Worldwide dissemination of CRKp has been largely attributed to clonal group (CG) 258. However, recent evidence indicates the global emergence of a CRKp CG307 lineage. Houston, TX, is the first large city in the United States with detected cocirculation of both CRKp CG307 and CG258. We sought to characterize the genomic and clinical factors contributing to the parallel endemic spread of CG258 and CG307. CRKp isolates were collected as part of the prospective, Consortium on Resistance against Carbapenems in Klebsiella and other Enterobacterales 2 (CRACKLE-2) study. Hybrid short-read and long-read genome assemblies were generated from 119 CRKp isolates (95 originated from Houston hospitals). A comprehensive characterization of phylogenies, gene transfer, and plasmid content with pan-genome analysis was performed on all CRKp isolates. Plasmid mating experiments were performed with CG307 and CG258 isolates of interest. Dissection of the accessory genomes suggested independent evolution and limited horizontal gene transfer between CG307 and CG258 lineages. CG307 contained a diverse repertoire of mobile genetic elements, which were shared with other non-CG258 K. pneumoniae isolates. Three unique clades of Houston CG307 isolates clustered distinctly from other global CG307 isolates, indicating potential selective adaptation of particular CG307 lineages to their respective geographical niches. CG307 strains were often isolated from the urine of hospitalized patients, likely serving as important reservoirs for genes encoding carbapenemases and extended-spectrum β-lactamases. Our findings suggest parallel cocirculation of high-risk lineages with potentially divergent evolution. IMPORTANCE The prevalence of carbapenem-resistant Klebsiella pneumoniae (CRKp) infections in nosocomial settings remains a public health challenge. High-risk clones such as clonal group 258 (CG258) are particularly concerning due to their association with blaKPC carriage, which can severely complicate antimicrobial treatments. There is a recent emergence of clonal group 307 (CG307) worldwide with little understanding of how this successful clone has been able to adapt while cocirculating with CG258. We provide the first evidence of potentially divergent evolution between CG258 and CG307 with limited sharing of adaptive genes. Houston, TX, is home to the largest medical center in the world, with a large influx of domestic and international patients. Thus, our unique geographical setting, where two pandemic strains of CRKp are circulating, provides an indication of how differential accessory genome content can drive stable, endemic populations of CRKp. Pan-genomic analyses such as these can reveal unique signatures of successful CRKp dissemination, such as the CG307-associated plasmid (pCG307_HTX), and provide invaluable insights into the surveillance of local carbapenem-resistant Enterobacterales (CRE) epidemiology.}, } @article {pmid35352958, year = {2022}, author = {Gan, L and Yan, C and Cui, J and Xue, G and Fu, H and Du, B and Zhao, H and Feng, J and Feng, Y and Fan, Z and Mao, P and Fu, T and Xu, Z and Du, S and Liu, S and Zhang, R and Zhang, Q and Li, N and Cui, X and Li, X and Zhou, Y and Huang, L and Yuan, J}, title = {Genetic Diversity and Pathogenic Features in Klebsiella pneumoniae Isolates from Patients with Pyogenic Liver Abscess and Pneumonia.}, journal = {Microbiology spectrum}, volume = {10}, number = {2}, pages = {e0264621}, pmid = {35352958}, issn = {2165-0497}, mesh = {Animals ; *Community-Acquired Infections ; *Cross Infection ; Genetic Variation ; Humans ; *Klebsiella Infections/epidemiology/microbiology ; Klebsiella pneumoniae/genetics ; *Liver Abscess, Pyogenic/epidemiology/microbiology ; Mice ; *Pneumonia ; Virulence Factors/genetics ; }, abstract = {While Klebsiella pneumoniae is a common cause of nosocomial and community-acquired infections, including pneumonia and pyogenic liver abscess, little is known about the population structure of this bacterium. In this study, we investigated the prevalence and molecular characteristics of K. pneumoniae isolates from carriers, pyogenic liver abscess patients, and pneumonia patients, and genomic and phenotypic assays were used to determine the differences among the isolates. A total of 232 K. pneumoniae isolates were subtyped into 74 sequence types (STs). The isolates from different sources had their own STs, and the predominant subtypes in liver abscess and pneumonia patients were ST23 and ST11, respectively. Pangenome analysis also distinguished three phylogroups that were consistent with the isolate sources. The isolates collected from liver abscess patients carried significantly more virulence factors, and those from pneumonia patients harbored significantly more resistance genes and replicons. Almost all isolate STs (93/97 [95.88%]) from liver abscesses strongly correlated with the virulence factor salmochelin, while most pneumonia isolate STs (52/53 [98.11%]) from pneumonia did not correlate with salmochelin. The isolates collected from liver abscesses showed higher virulence in the cytotoxicity and mouse models. These data provide genomic support for the proposal that isolates collected from carriers, liver abscess patients, and pneumonia patients have distinct genomic features. Isolates from the different sources are largely nonoverlapping, suggesting that different patients may be infected via different sources. Further studies on the pathogenic mechanisms of salmochelin and other virulence factors will be required. IMPORTANCE While Klebsiella pneumoniae is a common cause of nosocomial and community-acquired infections, including pneumonia and pyogenic liver abscess, little is known about the population structure of this bacterium. We collected 232 isolates from carriers, pyogenic liver abscess patients, and pneumonia patients, and the isolates from different sources had their own sequence types. Pangenome analysis also distinguished three phylogroups that were consistent with the isolate sources. The isolates collected from liver abscess patients carried significantly more virulence factors, and those from pneumonia patients harbored significantly more resistance genes and replicons. Besides, there was a strong link between salmochelin and liver abscess. The isolates collected from liver abscesses also showed higher virulence in the cytotoxicity and mouse models. Isolates collected from different sources have distinct genomic features, suggesting that different patients may be infected via different sources.}, } @article {pmid35338232, year = {2022}, author = {Coll, F and Gouliouris, T and Bruchmann, S and Phelan, J and Raven, KE and Clark, TG and Parkhill, J and Peacock, SJ}, title = {PowerBacGWAS: a computational pipeline to perform power calculations for bacterial genome-wide association studies.}, journal = {Communications biology}, volume = {5}, number = {1}, pages = {266}, pmid = {35338232}, issn = {2399-3642}, support = {201344/Z/16/Z//Wellcome Trust (Wellcome)/ ; MR/M01360X/1/MRC_/Medical Research Council/United Kingdom ; WT098600/WT_/Wellcome Trust/United Kingdom ; BB/R013063/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; //Department of Health {UK)/ ; /WT_/Wellcome Trust/United Kingdom ; MR/N010469/1/MRC_/Medical Research Council/United Kingdom ; MR/R025576/1/MRC_/Medical Research Council/United Kingdom ; MR/V032836/1/MRC_/Medical Research Council/United Kingdom ; 201344/Z/16/Z/WT_/Wellcome Trust/United Kingdom ; MR/R020973/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Computer Simulation ; *Genome, Bacterial ; *Genome-Wide Association Study ; Phenotype ; Sample Size ; }, abstract = {Genome-wide association studies (GWAS) are increasingly being applied to investigate the genetic basis of bacterial traits. However, approaches to perform power calculations for bacterial GWAS are limited. Here we implemented two alternative approaches to conduct power calculations using existing collections of bacterial genomes. First, a sub-sampling approach was undertaken to reduce the allele frequency and effect size of a known and detectable genotype-phenotype relationship by modifying phenotype labels. Second, a phenotype-simulation approach was conducted to simulate phenotypes from existing genetic variants. We implemented both approaches into a computational pipeline (PowerBacGWAS) that supports power calculations for burden testing, pan-genome and variant GWAS; and applied it to collections of Enterococcus faecium, Klebsiella pneumoniae and Mycobacterium tuberculosis. We used this pipeline to determine sample sizes required to detect causal variants of different minor allele frequencies (MAF), effect sizes and phenotype heritability, and studied the effect of homoplasy and population diversity on the power to detect causal variants. Our pipeline and user documentation are made available and can be applied to other bacterial populations. PowerBacGWAS can be used to determine sample sizes required to find statistically significant associations, or the associations detectable with a given sample size. We recommend to perform power calculations using existing genomes of the bacterial species and population of study.}, } @article {pmid35337569, year = {2022}, author = {Kim, E and Kim, D and Yang, SM and Kim, HY}, title = {Validation of probiotic species or subspecies identity in commercial probiotic products using high-resolution PCR method based on large-scale genomic analysis.}, journal = {Food research international (Ottawa, Ont.)}, volume = {154}, number = {}, pages = {111011}, doi = {10.1016/j.foodres.2022.111011}, pmid = {35337569}, issn = {1873-7145}, mesh = {Bacteria ; Genomics ; *Probiotics/analysis ; Real-Time Polymerase Chain Reaction ; Species Specificity ; }, abstract = {The health-promoting effects of probiotics are species-specific, and hence it is important to declare the correct information in products. However, some studies have identified issues related to the accuracy of labeling commercial probiotic products. In this study, we developed a high-resolution real-time PCR method based on pangenome analysis for a more affordable, rapid, and accurate identification of commercial probiotic products than sequencing methods. We selected 25 species or subspecies primarily used for probiotic strains and are closely related to them as targets. To extract molecular markers, 354 whole-genome sequences present in the target genomes but not in the pangenome of other genomes were compared, which resulted in the identification of molecular marker genes. The marker genes exhibited 100% specificity for 100 strains as assessed by the real-time PCR method. Fifty probiotic and dairy products were investigated to verify the information claimed on the label. Real-time PCR results showed that most products reflected the bacterial species declared in the label claim, whereas 12 products showed the presence of undeclared species or missing species. Our method for accurately verifying the labeling of probiotic products would be useful for quality control and safety.}, } @article {pmid35336062, year = {2022}, author = {Rivera-Ramírez, A and Salgado-Morales, R and Jiménez-Pérez, A and Pérez-Martínez, R and García-Gómez, BI and Dantán-González, E}, title = {Comparative Genomics and Pathogenicity Analysis of Two Bacterial Symbionts of Entomopathogenic Nematodes: The Role of the GroEL Protein in Virulence.}, journal = {Microorganisms}, volume = {10}, number = {3}, pages = {}, pmid = {35336062}, issn = {2076-2607}, support = {A1-S-22034//Consejo Nacional de Ciencia y Tecnología/ ; }, abstract = {Bacteria of the genera Xenorhabdus and Photorhabdus are symbionts of entomopathogenic nematodes. Despite their close phylogenetic relationship, they show differences in their pathogenicity and virulence mechanisms in target insects. These differences were explored by the analysis of the pangenome, as it provides a framework for characterizing and defining the gene repertoire. We performed the first pangenome analysis of 91 strains of Xenorhabdus and Photorhabdus; the analysis showed that the Photorhabdus genus has a higher number of genes associated with pathogenicity. However, biological tests showed that whole cells of X. nematophila SC 0516 were more virulent than those of P. luminescens HIM3 when both were injected into G. mellonella larvae. In addition, we cloned and expressed the GroEL proteins of both bacteria, as this protein has been previously indicated to show insecticidal activity in the genus Xenorhabdus. Among these proteins, Cpn60-Xn was found to be the most toxic at all concentrations tested, with an LC50 value of 102.34 ng/larva. Sequence analysis suggested that the Cpn60-Xn toxin was homologous to Cpn60-Pl; however, Cpn60-Xn contained thirty-five differentially substituted amino acid residues that could be responsible for its insecticidal activity.}, } @article {pmid35335701, year = {2022}, author = {Moreno, E and Blasco, JM and Letesson, JJ and Gorvel, JP and Moriyón, I}, title = {Pathogenicity and Its Implications in Taxonomy: The Brucella and Ochrobactrum Case.}, journal = {Pathogens (Basel, Switzerland)}, volume = {11}, number = {3}, pages = {}, pmid = {35335701}, issn = {2076-0817}, abstract = {The intracellular pathogens of the genus Brucella are phylogenetically close to Ochrobactrum, a diverse group of free-living bacteria with a few species occasionally infecting medically compromised patients. A group of taxonomists recently included all Ochrobactrum organisms in the genus Brucella based on global genome analyses and alleged equivalences with genera such as Mycobacterium. Here, we demonstrate that such equivalencies are incorrect because they overlook the complexities of pathogenicity. By summarizing Brucella and Ochrobactrum divergences in lifestyle, structure, physiology, population, closed versus open pangenomes, genomic traits, and pathogenicity, we show that when they are adequately understood, they are highly relevant in taxonomy and not unidimensional quantitative characters. Thus, the Ochrobactrum and Brucella differences are not limited to their assignments to different "risk-groups", a biologically (and hence, taxonomically) oversimplified description that, moreover, does not support ignoring the nomen periculosum rule, as proposed. Since the epidemiology, prophylaxis, diagnosis, and treatment are thoroughly unrelated, merging free-living Ochrobactrum organisms with highly pathogenic Brucella organisms brings evident risks for veterinarians, medical doctors, and public health authorities who confront brucellosis, a significant zoonosis worldwide. Therefore, from taxonomical and practical standpoints, the Brucella and Ochrobactrum genera must be maintained apart. Consequently, we urge researchers, culture collections, and databases to keep their canonical nomenclature.}, } @article {pmid35333302, year = {2022}, author = {Qi, W and Lim, YW and Patrignani, A and Schläpfer, P and Bratus-Neuenschwander, A and Grüter, S and Chanez, C and Rodde, N and Prat, E and Vautrin, S and Fustier, MA and Pratas, D and Schlapbach, R and Gruissem, W}, title = {The haplotype-resolved chromosome pairs of a heterozygous diploid African cassava cultivar reveal novel pan-genome and allele-specific transcriptome features.}, journal = {GigaScience}, volume = {11}, number = {}, pages = {}, pmid = {35333302}, issn = {2047-217X}, mesh = {Alleles ; Chromosomes ; Diploidy ; Haplotypes ; *Manihot/genetics ; Plant Breeding ; Sequence Analysis, DNA ; Transcriptome ; }, abstract = {BACKGROUND: Cassava (Manihot esculenta) is an important clonally propagated food crop in tropical and subtropical regions worldwide. Genetic gain by molecular breeding has been limited, partially because cassava is a highly heterozygous crop with a repetitive and difficult-to-assemble genome.

FINDINGS: Here we demonstrate that Pacific Biosciences high-fidelity (HiFi) sequencing reads, in combination with the assembler hifiasm, produced genome assemblies at near complete haplotype resolution with higher continuity and accuracy compared to conventional long sequencing reads. We present 2 chromosome-scale haploid genomes phased with Hi-C technology for the diploid African cassava variety TME204. With consensus accuracy >QV46, contig N50 >18 Mb, BUSCO completeness of 99%, and 35k phased gene loci, it is the most accurate, continuous, complete, and haplotype-resolved cassava genome assembly so far. Ab initio gene prediction with RNA-seq data and Iso-Seq transcripts identified abundant novel gene loci, with enriched functionality related to chromatin organization, meristem development, and cell responses. During tissue development, differentially expressed transcripts of different haplotype origins were enriched for different functionality. In each tissue, 20-30% of transcripts showed allele-specific expression (ASE) differences. ASE bias was often tissue specific and inconsistent across different tissues. Direction-shifting was observed in <2% of the ASE transcripts. Despite high gene synteny, the HiFi genome assembly revealed extensive chromosome rearrangements and abundant intra-genomic and inter-genomic divergent sequences, with large structural variations mostly related to LTR retrotransposons. We use the reference-quality assemblies to build a cassava pan-genome and demonstrate its importance in representing the genetic diversity of cassava for downstream reference-guided omics analysis and breeding.

CONCLUSIONS: The phased and annotated chromosome pairs allow a systematic view of the heterozygous diploid genome organization in cassava with improved accuracy, completeness, and haplotype resolution. They will be a valuable resource for cassava breeding and research. Our study may also provide insights into developing cost-effective and efficient strategies for resolving complex genomes with high resolution, accuracy, and continuity.}, } @article {pmid35332834, year = {2022}, author = {Chen, Y and Ji, S and Sun, L and Wang, H and Zhu, F and Chen, M and Zhuang, H and Wang, Z and Jiang, S and Yu, Y and Chen, Y}, title = {The novel fosfomycin resistance gene fosY is present on a genomic island in CC1 methicillin-resistant Staphylococcus aureus.}, journal = {Emerging microbes & infections}, volume = {11}, number = {1}, pages = {1166-1173}, pmid = {35332834}, issn = {2222-1751}, mesh = {Anti-Bacterial Agents/pharmacology/therapeutic use ; *Fosfomycin/pharmacology ; Genomic Islands ; Humans ; *Methicillin-Resistant Staphylococcus aureus ; Microbial Sensitivity Tests ; Phylogeny ; *Staphylococcal Infections/epidemiology ; Staphylococcus aureus ; }, abstract = {Fosfomycin has gained attention as a combination therapy for methicillin-resistant Staphylococcus aureus infections. Hence, the detection of novel fosfomycin-resistance mechanisms in S. aureus is important. Here, the minimal inhibitory concentrations (MICs) of fosfomycin in CC1 methicillin-resistant S. aureus were determined. The pangenome analysis and comparative genomics were used to analyse CC1 MRSA. The gene function was confirmed by cloning the gene into pTXΔ. A phylogenetic tree was constructed to determine the clustering of the CC1 strains of S. aureus. We identified a novel gene, designated fosY, that confers fosfomycin resistance in S. aureus. The FosY protein is a putative bacillithiol transferase enzyme sharing 65.9-77.5% amino acid identity with FosB and FosD, respectively. The function of fosY in decreasing fosfomycin susceptibility was confirmed by cloning it into pTXΔ. The pTX-fosY transformant exhibited a 16-fold increase in fosfomycin MIC. The bioinformatic analysis showed that fosY is in a novel genomic island designated RIfosY (for "resistance island carrying fosY") that originated from other species. The global phylogenetic tree of ST1 MRSA displayed this fosY-positive ST1 clone, originating from different regions, in the same clade. The novel resistance gene in the fos family, fosY, and a genomic island, RIfosY, can promote cross-species gene transfer and confer resistance to CC1 MRSA causing the failure of clinical treatment. This emphasises the importance of genetic surveillance of resistance genes among MRSA isolates.}, } @article {pmid35332638, year = {2022}, author = {da Costa, AR and Chideroli, RT and Lanes, GC and Ferrari, NA and Chicoski, LM and Batista, CE and Pandolfi, VCF and Ware, C and Griffin, MJ and Dos Santos, AR and de Carvalho Azevedo, VA and da Costa, MM and de Pádua Pereira, U}, title = {Multiplex PCR assay for correct identification of the fish pathogenic species of Edwardsiella genus reveals the presence of E. anguillarum in South America in strains previously characterized as E. tarda.}, journal = {Journal of applied microbiology}, volume = {132}, number = {6}, pages = {4225-4235}, doi = {10.1111/jam.15538}, pmid = {35332638}, issn = {1365-2672}, support = {130477/2020-6//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; 306857/2021-9//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; //National Council for Scientific and Technological Development/ ; }, mesh = {Animals ; Brazil ; *Edwardsiella/genetics ; Edwardsiella tarda/genetics ; *Enterobacteriaceae Infections/microbiology/veterinary ; *Fish Diseases/diagnosis/microbiology ; Fishes/microbiology ; Multiplex Polymerase Chain Reaction/methods ; }, abstract = {AIMS: Develop a species-specific multiplex PCR to correctly identify Edwardsiella species in routine diagnostic for fish bacterial diseases.

METHODS AND RESULTS: The genomes of 62 Edwardsiella spp. isolates available from the National Center for Biotechnology Information (NCBI) database were subjected to taxonomic and pan-genomic analyses to identify unique regions that could be exploited by species-specific PCR. The designed primers were tested against isolated Edwardsiella spp. strains, revealing errors in commercial biochemical tests for bacterial classification regarding Edwardsiella species.

CONCLUSION: Some of the genomes of Edwardsiella spp. in the NCBI platform were incorrectly classified, which can lead to errors in some research. A functional mPCR was developed to differentiate between phenotypically and genetically ambiguous Edwardsiella, with which, we detected the presence of Edwardsiella anguillarum affecting fish in Brazil.

This study shows that the misclassification of Edwardsiella spp in Brazil concealed the presence of E. anguillarum in South America. Also, this review of the taxonomic classification of the Edwardsiella genus is a contribution to the field to help researchers with their sequencing and identification of genomes, showing some misclassifications in online databases that must be corrected, as well as developing an easy assay to characterize Edwardsiella species in an end-point mPCR.}, } @article {pmid35332354, year = {2022}, author = {Shang, Y and Ye, Q and Wu, Q and Xiang, X and Zha, F and Du, M and Zhang, J}, title = {Novel multiplex PCR assays for rapid identification of Salmonella serogroups B, C1, C2, D, E, S. enteritidis, and S. typhimurium.}, journal = {Analytical methods : advancing methods and applications}, volume = {14}, number = {14}, pages = {1445-1453}, doi = {10.1039/d1ay02163j}, pmid = {35332354}, issn = {1759-9679}, mesh = {*Multiplex Polymerase Chain Reaction ; *Salmonella enteritidis/genetics ; Serogroup ; }, abstract = {Foodborne illnesses caused by Salmonella represent a significant public health problem worldwide. The aim of this study was to establish multiplex PCR (mPCR) for the rapid identification of Salmonella serogroups B, C1, C2, D, and E as well as for the serovars enteritidis and typhimurium. Employing pan-genome analysis and PCR verification, B-rfbJ, C1-9679, C2-pimB, D-rfbJ, E-rfbC, and four genes (SE18636, SE16574, SE2599, and SE13329) were identified as specific target genes for Salmonella serogroups B, C1, C2, D, E, and S. enteritidis, respectively. Thereafter, three novel mPCR assays (one of 3-mPCR and two of 2-mPCR) were successfully developed to identify these bacteria based on the target genes and another S. typhimurium-specific STM4495 gene. The primers targeting C1-9679, C2-pimB, and E-rfbC genes specific to the serogroups C1, C2, and E, respectively, constituted a 3-mPCR, while the other two 2-mPCRs, respectively, consisting primers specific to serogroup D and S. enteritidis (D-rfbJ and SE16574), and serogroup B and S. typhimurium-specific primers (B-rfbJ and STM4495), were also designed. The specificity of each mPCR was further evaluated by using non-target strains. The detection limits of mPCRs were approximately 10[3]-10[4] CFU mL[-1] in pure culture and 10[4]-10[5] CFU g[-1] in spiked chicken meat. In addition, mPCR assays could correctly detect target Salmonella in food samples. These results suggest that specific targets could be mined efficiently through a pan-genome analysis tool, and the novel mPCR assays developed in this study offer a promising technique for rapid and accurate detection of five serogroups of Salmonella (B, C1, C2, D, and E) and two serovars (S. enteritidis and S. typhimurium).}, } @article {pmid35330288, year = {2022}, author = {Liu, W and Yu, SH and Zhang, HP and Fu, ZY and An, JQ and Zhang, JY and Yang, P}, title = {Two Cladosporium Fungi with Opposite Functions to the Chinese White Wax Scale Insect Have Different Genome Characters.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {3}, pages = {}, pmid = {35330288}, issn = {2309-608X}, abstract = {Insects encounter infection of microorganisms, and they also harbor endosymbiosis to participate in nutrition providing and act as a defender against pathogens. We previously found the Chinese white wax scale insect, Ericerus pela, was infected and killed by Cladosporium sp. (pathogen). We also found it harbored Cladosporium sp. (endogensis). In this study, we cultured these two Cladosporium fungi and sequenced their genome. The results showed Cladosporium sp. (endogensis) has a larger genome size and more genes than Cladosporium sp. (pathogen). Pan-genome analysis showed Cladosporium sp. (endogensis)-specific genes enriched in pathways related to nutrition production, such as amino acid metabolism, carbohydrate metabolism, and energy metabolism. These pathways were absent in that of Cladosporium sp. (pathogen). Gene Ontology analysis showed Cladosporium sp. (pathogen)-specific genes enriched in the biosynthesis of asperfuranone, emericellamide, and fumagillin. These terms were not found in that of Cladosporium sp. (endogensis). Pathogen Host Interactions analysis found Cladosporium sp. (endogensis) had more genes related to loss of pathogenicity and reduced virulence than Cladosporium sp. (pathogen). Cytotoxicity assay indicated Cladosporium sp. (pathogen) had cytotoxicity, while Cladosporium sp. (endogensis) had no cytotoxicity. These characters reflect the adaptation of endosymbiosis to host-restricted lifestyle and the invader of the entomopathogen to the host.}, } @article {pmid35327997, year = {2022}, author = {Tenea, GN}, title = {Decoding the Gene Variants of Two Native Probiotic Lactiplantibacillus plantarum Strains through Whole-Genome Resequencing: Insights into Bacterial Adaptability to Stressors and Antimicrobial Strength.}, journal = {Genes}, volume = {13}, number = {3}, pages = {}, pmid = {35327997}, issn = {2073-4425}, mesh = {Anti-Bacterial Agents/pharmacology ; *Anti-Infective Agents/metabolism ; Bacteria ; *Bacteriocins/genetics ; Lactobacillaceae ; *Lactobacillus plantarum/genetics/metabolism ; *Probiotics ; }, abstract = {In this study, whole-genome resequencing of two native probiotic Lactiplantibacillus plantarum strains-UTNGt21A and UTNGt2-was assessed in order to identify variants and perform annotation of genes involved in bacterial adaptability to different stressors, as well as their antimicrobial strength. A total of 21,906 single-nucleotide polymorphisms (SNPs) were detected in UTNGt21A, while 17,610 were disclosed in the UTNGt2 genome. The comparative genomic analysis revealed a greater number of deletions, transversions, and transitions within the UTNGt21A genome, while a small difference in the number of insertions was detected between the strains. A divergent number of types of variant annotations were detected in both strains, and categorized in terms of low, moderate, and high modifier impact on the protein effectiveness. Although both native strains shared common specific genes involved in the stress response to the gastrointestinal environment, which may qualify as a putative probiotic (bile salt, acid, temperature, osmotic stress), they were different in their antimicrobial gene cluster organization, with UTNGt21A displaying a complex bacteriocin gene arrangement and dissimilar gene variants that might alter their defense mechanisms and overall inhibitory capacity. The genome comparison revealed 34 and 9 genomic islands (GIs) in the UTNGt21A and UTNGt2 genomes, respectively, with the overrepresentation of genes involved in defense mechanisms and carbohydrate utilization. In addition, pan-genome analysis disclosed the presence of various strain-specific genes (shell genes), suggesting a high genome variation between strains. This genome analysis illustrates that the bacteriocin signature and gene variants reflect a niche-inherent pattern. These extensive genomic datasets will guide us to understand the potential benefits of the native strains and their utility in the food or pharmaceutical sectors.}, } @article {pmid35327964, year = {2022}, author = {Pudova, DS and Toymentseva, AA and Gogoleva, NE and Shagimardanova, EI and Mardanova, AM and Sharipova, MR}, title = {Comparative Genome Analysis of Two Bacillus pumilus Strains Producing High Level of Extracellular Hydrolases.}, journal = {Genes}, volume = {13}, number = {3}, pages = {}, pmid = {35327964}, issn = {2073-4425}, mesh = {*Bacillus pumilus/genetics ; Nucleotides ; Peptide Hydrolases ; Prophages/genetics ; Streptomycin ; }, abstract = {Whole-genome sequencing of a soil isolate Bacillus pumilus, strain 7P, and its streptomycin-resistant derivative, B. pumilus 3-19, showed genome sizes of 3,609,117 bp and 3,609,444 bp, respectively. Annotation of the genome showed 3794 CDS (3204 with predicted function) and 3746 CDS (3173 with predicted function) in the genome of strains 7P and 3-19, respectively. In the genomes of both strains, the prophage regions Bp1 and Bp2 were identified. These include 52 ORF of prophage proteins in the Bp1 region and 38 prophages ORF in the Bp2 region. Interestingly, more than 50% of Bp1 prophage proteins are similar to the proteins of the phi105 in B. subtilis. The DNA region of Bp2 has 15% similarity to the DNA of the Brevibacillus Jimmer phage. Degradome analysis of the genome of both strains revealed 148 proteases of various classes. These include 60 serine proteases, 48 metalloproteases, 26 cysteine proteases, 4 aspartate proteases, 2 asparagine proteases, 3 threonine proteases, and 2 unclassified proteases. Likewise, three inhibitors of proteolytic enzymes were found. Comparative analysis of variants in the genomes of strains 7P and 3-19 showed the presence of 81 nucleotide variants in the genome 3-19. Among them, the missense mutations in the rpsL, comA, spo0F genes and in the upstream region of the srlR gene were revealed. These nucleotide polymorphisms may have affected the streptomycin resistance and overproduction of extracellular hydrolases of the 3-19 strain. Finally, a plasmid DNA was found in strain 7P, which is lost in its derivative, strain 3-19. This plasmid contains five coding DNA sequencing (CDS), two regulatory proteins and three hypothetical proteins.}, } @article {pmid35325704, year = {2022}, author = {Kim, E and Kim, D and Yang, SM and Kim, HY}, title = {Multiplex SYBR Green real-time PCR for Lactobacillus acidophilus group species targeting biomarker genes revealed by a pangenome approach.}, journal = {Microbiological research}, volume = {259}, number = {}, pages = {127013}, doi = {10.1016/j.micres.2022.127013}, pmid = {35325704}, issn = {1618-0623}, mesh = {Benzothiazoles ; Biomarkers ; Diamines ; *Lactobacillus/genetics ; Lactobacillus acidophilus/genetics ; *Probiotics ; Quinolines ; RNA, Ribosomal, 16S/genetics ; Real-Time Polymerase Chain Reaction ; }, abstract = {The Lactobacillus acidophilus group consists of seven closely related species. Among these, Lb. acidophilus, Lb. gallinarum, and Lb. helveticus help maintain gut health and are used as a starter for fermented food. However, these species are difficult to differentiate using conventional methods due to the high similarity between the 16S rRNA and housekeeping genes. Thus, in this study, we selected biomarker genes to identify and discriminate the three species via pangenome analysis, and a multiplex SYBR Green real-time PCR that can be detected simultaneously in a single tube was developed. Pangenome analysis revealed three specific target genes: mucus-binding protein precursor to detect Lb. acidophilus, an amino acid ABC superfamily ATP binding cassette transporter carrier protein to detect Lb. gallinarum, and selenocysteine lyase to detect Lb. helveticus. The specificity was robustly verified using 26 Lb. acidophilus group strains and 62 other strains. The detection limits were 10[1] colony-forming units (CFU)/ml in pure culture. As per our findings, the developed method satisfactorily monitored Lb. acidophilus group species in probiotic and dairy products. This result suggests that real-time PCR based on specific targets provides a promising approach for the rapid, accurate, and sensitive identification of these three species.}, } @article {pmid35325213, year = {2022}, author = {Li, M and Sun, C and Xu, N and Bian, P and Tian, X and Wang, X and Wang, Y and Jia, X and Heller, R and Wang, M and Wang, F and Dai, X and Luo, R and Guo, Y and Wang, X and Yang, P and Hu, D and Liu, Z and Fu, W and Zhang, S and Li, X and Wen, C and Lan, F and Siddiki, AZ and Suwannapoom, C and Zhao, X and Nie, Q and Hu, X and Jiang, Y and Yang, N}, title = {De Novo Assembly of 20 Chicken Genomes Reveals the Undetectable Phenomenon for Thousands of Core Genes on Microchromosomes and Subtelomeric Regions.}, journal = {Molecular biology and evolution}, volume = {39}, number = {4}, pages = {}, pmid = {35325213}, issn = {1537-1719}, mesh = {Animals ; *Chickens/genetics ; *Genome ; Genomics ; Mammals/genetics ; Sequence Analysis, DNA ; }, abstract = {The gene numbers and evolutionary rates of birds were assumed to be much lower than those of mammals, which is in sharp contrast to the huge species number and morphological diversity of birds. It is, therefore, necessary to construct a complete avian genome and analyze its evolution. We constructed a chicken pan-genome from 20 de novo assembled genomes with high sequencing depth, and identified 1,335 protein-coding genes and 3,011 long noncoding RNAs not found in GRCg6a. The majority of these novel genes were detected across most individuals of the examined transcriptomes but were seldomly measured in each of the DNA sequencing data regardless of Illumina or PacBio technology. Furthermore, different from previous pan-genome models, most of these novel genes were overrepresented on chromosomal subtelomeric regions and microchromosomes, surrounded by extremely high proportions of tandem repeats, which strongly blocks DNA sequencing. These hidden genes were proved to be shared by all chicken genomes, included many housekeeping genes, and enriched in immune pathways. Comparative genomics revealed the novel genes had 3-fold elevated substitution rates than known ones, updating the knowledge about evolutionary rates in birds. Our study provides a framework for constructing a better chicken genome, which will contribute toward the understanding of avian evolution and the improvement of poultry breeding.}, } @article {pmid35323968, year = {2022}, author = {Khedkar, S and Smyshlyaev, G and Letunic, I and Maistrenko, OM and Coelho, LP and Orakov, A and Forslund, SK and Hildebrand, F and Luetge, M and Schmidt, TSB and Barabas, O and Bork, P}, title = {Landscape of mobile genetic elements and their antibiotic resistance cargo in prokaryotic genomes.}, journal = {Nucleic acids research}, volume = {50}, number = {6}, pages = {3155-3168}, pmid = {35323968}, issn = {1362-4962}, support = {BBS/E/F/000PR10353/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Bacteria/genetics ; *Bacteriophages/genetics ; DNA Transposable Elements/genetics ; Drug Resistance, Microbial/genetics ; Gene Transfer, Horizontal ; Phylogeny ; Recombinases/genetics ; }, abstract = {Prokaryotic Mobile Genetic Elements (MGEs) such as transposons, integrons, phages and plasmids, play important roles in prokaryotic evolution and in the dispersal of cargo functions like antibiotic resistance. However, each of these MGE types is usually annotated and analysed individually, hampering a global understanding of phylogenetic and environmental patterns of MGE dispersal. We thus developed a computational framework that captures diverse MGE types, their cargos and MGE-mediated horizontal transfer events, using recombinases as ubiquitous MGE marker genes and pangenome information for MGE boundary estimation. Applied to ∼84k genomes with habitat annotation, we mapped 2.8 million MGE-specific recombinases to six operational MGE types, which together contain on average 13% of all the genes in a genome. Transposable elements (TEs) dominated across all taxa (∼1.7 million occurrences), outnumbering phages and phage-like elements (<0.4 million). We recorded numerous MGE-mediated horizontal transfer events across diverse phyla and habitats involving all MGE types, disentangled and quantified the extent of hitchhiking of TEs (17%) and integrons (63%) with other MGE categories, and established TEs as dominant carriers of antibiotic resistance genes. We integrated all these findings into a resource (proMGE.embl.de), which should facilitate future studies on the large mobile part of genomes and its horizontal dispersal.}, } @article {pmid35319275, year = {2022}, author = {Liu, J and Xu, Z and Li, H and Chen, F and Han, K and Hu, X and Fang, Y and Chen, D}, title = {Metagenomic Approaches Reveal Strain Profiling and Genotyping of Klebsiella pneumoniae from Hospitalized Patients in China.}, journal = {Microbiology spectrum}, volume = {10}, number = {2}, pages = {e0219021}, pmid = {35319275}, issn = {2165-0497}, mesh = {Anti-Bacterial Agents/pharmacology/therapeutic use ; Bacterial Proteins/genetics ; Child ; Female ; Genotype ; Humans ; *Klebsiella Infections/drug therapy/epidemiology/genetics ; *Klebsiella pneumoniae/genetics ; Metagenome ; Metagenomics ; Microbial Sensitivity Tests ; Phylogeny ; beta-Lactamases/genetics ; }, abstract = {Klebsiella pneumoniae is a leading cause of highly drug-resistant infections in hospitals worldwide. Strain-level bacterial identification on the genetic determinants of multidrug resistance and high pathogenicity is critical for the surveillance and treatment of this clinically relevant pathogen. In this study, metagenomic next-generation sequencing was performed for specimens collected from August 2020 to May 2021 in Ruijin Hospital, Ningbo Women and Children's Hospital, and the Second Affiliated Hospital of Harbin Medical University. Genome biology of K. pneumoniae prevalent in China was characterized based on metagenomic data. Thirty K. pneumoniae strains derived from 14 sequence types were identified by multilocus sequence typing. The hypervirulent ST11 K. pneumoniae strains carrying the KL64 capsular locus were the most prevalent in the hospital population. The phylogenomic analyses revealed that the metagenome-reconstructed strains and public isolate genomes belonging to the same STs were closely related in the phylogenetic tree. Furthermore, the pangenome structure of the detected K. pneumoniae strains was analyzed, particularly focusing on the distribution of antimicrobial resistance genes and virulence genes across the strains. The genes encoding carbapenemases and extended-spectrum beta-lactamases were frequently detected in the strains of ST11 and ST15. The highest numbers of virulence genes were identified in the well-known hypervirulent strains affiliated to ST23 bearing the K1 capsule. In comparison to traditional cultivation and identification, strain-level metagenomics is advantageous to understand the mechanisms underlying resistance and virulence of K. pneumoniae directly from clinical specimens. Our findings should provide novel clues for future research into culture-independent metagenomic surveillance for bacterial pathogens. IMPORTANCE Routine culture and PCR-based molecular testing in the clinical microbiology laboratory are unable to recognize pathogens at the strain level and to detect strain-specific genetic determinants involved in virulence and resistance. To address this issue, we explored the strain-level profiling of K. pneumoniae prevalent in China based on metagenome-sequenced patient materials. Genome biology of the targeted bacterium can be well characterized through decoding sequence signatures and functional gene profiles at the single-strain resolution. The in-depth metagenomic analysis on strain profiling presented here shall provide a promising perspective for culture-free pathogen surveillance and molecular epidemiology of nosocomial infections.}, } @article {pmid35318683, year = {2022}, author = {Chapman, MA and He, Y and Zhou, M}, title = {Beyond a reference genome: pangenomes and population genomics of underutilized and orphan crops for future food and nutrition security.}, journal = {The New phytologist}, volume = {234}, number = {5}, pages = {1583-1597}, pmid = {35318683}, issn = {1469-8137}, support = {/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Climate Change ; Crops, Agricultural/genetics ; Genomics ; *Metagenomics ; *Plant Breeding ; }, abstract = {Underutilized crops are, by definition, under-researched compared to staple crops yet come with traits that may be especially important given climate change and the need to feed a globally increasing population. These crops are often stress-tolerant, and this combined with unique and beneficial nutritional profiles. Whilst progress is being made by generating reference genome sequences, in this Tansley Review, we show how this is only the very first step. We advocate that going 'beyond a reference genome' should be a priority, as it is only at this stage one can identify the specific genes and the adaptive alleles that underpin the valuable traits. We sum up how population genomic and pangenomic approaches have led to the identification of stress- and disease-tolerant alleles in staple crops and compare this to the small number of examples from underutilized crops. We also demonstrate how previously underutilized crops have benefitted from genomic advances and that many breeding targets in underutilized crops are often well studied in staple crops. This cross-crop population-level resequencing could lead to an understanding of the genetic basis of adaptive traits in underutilized crops. This level of investment may be crucial for fully understanding the value of these crops before they are lost.}, } @article {pmid35315699, year = {2022}, author = {Monir, MM and Hossain, T and Morita, M and Ohnishi, M and Johura, FT and Sultana, M and Monira, S and Ahmed, T and Thomson, N and Watanabe, H and Huq, A and Colwell, RR and Seed, K and Alam, M}, title = {Genomic Characteristics of Recently Recognized Vibrio cholerae El Tor Lineages Associated with Cholera in Bangladesh, 1991 to 2017.}, journal = {Microbiology spectrum}, volume = {10}, number = {2}, pages = {e0039122}, pmid = {35315699}, issn = {2165-0497}, mesh = {Bangladesh/epidemiology ; Bayes Theorem ; *Cholera/epidemiology/microbiology ; Cholera Toxin/genetics/metabolism ; Genome-Wide Association Study ; Genomics/methods ; Humans ; *Vibrio cholerae O1/genetics ; }, abstract = {Comparative genomic analysis of Vibrio cholerae El Tor associated with endemic cholera in Asia revealed two distinct lineages, one dominant in Bangladesh and the other in India. An in-depth whole-genome study of V. cholerae El Tor strains isolated during endemic cholera in Bangladesh (1991 to 2017) included reference genome sequence data obtained online. Core genome phylogeny established using single nucleotide polymorphisms (SNPs) showed V. cholerae El Tor strains comprised two lineages, BD-1 and BD-2, which, according to Bayesian phylodynamic analysis, originated from paraphyletic group BD-0 around 1981. BD-1 and BD-2 lineages overlapped temporally but were negatively associated as causative agents of cholera during 2004 to 2017. Genome-wide association study (GWAS) revealed 140 SNPs and 31 indels, resulting in gene alleles unique to BD-1 and BD-2. Regression analysis of root to tip distance and year of isolation indicated early BD-0 strains at the base, whereas BD-1 and BD-2 subsequently emerged and progressed by accumulating SNPs. Pangenome analysis provided evidence of gene acquisition by both BD-1 and BD-2, of which six crucial proteins of known function were predominant in BD-2. BD-1 and BD-2 diverged and have distinctively different genomic traits, namely, heterogeneity in VSP-2, VPI-1, mobile elements, toxin encoding elements, and total gene abundance. In addition, the observed phage-inducible chromosomal island-like element (PLE1), and SXT ICE elements (ICE[TET]) in BD-2 presumably provided a fitness advantage for the lineage to outcompete BD-1 as the etiological agent of endemic cholera in Bangladesh, with implications for global cholera epidemiology. IMPORTANCE Cholera is a global disease with specific reference to the Bay of Bengal Ganges Delta where Vibrio cholerae O1 El Tor, the causative agent of the disease showed two circulating lineages, one dominant in Bangladesh and the other in India. Results of an in-depth genomic study of V. cholerae associated with endemic cholera during the past 27 years (1991 to 2017) indicate emergence and succession of the two lineages, BD-1 and BD-2, arising from a common ancestral paraphyletic group, BD-0, comprising the early strains and short-term evolution of the bacterium in Bangladesh. Among the two V. cholerae lineages, BD-2 supersedes BD-1 and is predominant in the most recent endemic cholera in Bangladesh. The BD-2 lineage contained significantly more SNPs and indels, and showed richness in gene abundance, including antimicrobial resistance genes, gene cassettes, and PLE to fight against bacteriophage infection, acquired over time. These findings have important epidemic implications on a global scale.}, } @article {pmid35311514, year = {2022}, author = {Kahn, AK and Almeida, RPP}, title = {Phylogenetics of Historical Host Switches in a Bacterial Plant Pathogen.}, journal = {Applied and environmental microbiology}, volume = {88}, number = {7}, pages = {e0235621}, pmid = {35311514}, issn = {1098-5336}, mesh = {Humans ; Multilocus Sequence Typing ; Phylogeny ; *Plant Diseases/microbiology ; Plants/microbiology ; *Xylella/genetics ; }, abstract = {Xylella fastidiosa is an insect-transmitted bacterial plant pathogen found across the Americas and, more recently, worldwide. X. fastidiosa infects plants of at least 563 species belonging to 82 botanical families. While the species X. fastidiosa infects many plants, particular strains have increased plant specificity. Understanding the molecular underpinnings of plant host specificity in X. fastidiosa is vital for predicting host shifts and epidemics. While there may exist multiple genetic determinants of host range in X. fastidiosa, the drivers of the unique relationships between X. fastidiosa and its hosts should be elucidated. Our objective with this study was to predict the ancestral plant hosts of this pathogen using phylogenetic and genomic methods based on a large data set of pathogen whole-genome data from agricultural hosts. We used genomic data to construct maximum-likelihood (ML) phylogenetic trees of subsets of the core and pan-genomes. With those trees, we ran ML ancestral state reconstructions of plant host at two taxonomic scales (genus and multiorder clades). Both the core and pan-genomes were informative in terms of predicting ancestral host state, giving new insight into the history of the plant hosts of X. fastidiosa. Subsequently, gene gain and loss in the pan-genome were found to be significantly correlated with plant host through genes that had statistically significant associations with particular hosts. IMPORTANCE Xylella fastidiosa is a globally important bacterial plant pathogen with many hosts; however, the underpinnings of host specificity are not known. This paper contains important findings about the usage of phylogenetics to understand the history of host specificity in this bacterial species, as well as convergent evolution in the pan-genome. There are strong signals of historical host range that give us insights into the history of this pathogen and its various invasions. The data from this paper are relevant in making decisions for quarantine and eradication, as they show the historical trends of host switching, which can help us predict likely future host shifts. We also demonstrate that using multilocus sequence type (MLST) genes in this system, which is still a commonly used process for policymaking, does not reconstruct the same phylogenetic topology as whole-genome data.}, } @article {pmid35309181, year = {2022}, author = {Green, V and Roytman, M}, title = {Treatment-Resistant Hepatitis C Viral Infection: A Case Report and Literature Review.}, journal = {Case reports in hepatology}, volume = {2022}, number = {}, pages = {3556780}, pmid = {35309181}, issn = {2090-6587}, abstract = {Hepatitis C virus (HCV) is an ongoing global public health threat affecting millions worldwide. Increasing recognition of its impact and recent advances towards HCV prevention and cure have provided incentive for the World Health Organization to call for global elimination by 2030. The goal of therapy is to achieve a sustained virologic response (SVR-12), defined as undetectable HCV-RNA within 12 weeks after treatment completion. In 2011, approval was given for the first direct-acting antiviral agents (DAAs). More recently, in 2013, more effective DAAs, with pan-genomic properties, have been introduced, and these regimens boast increasing rates of SVR. The ultimate goal is that the history of HCV ends with the pan-genotypic efficacy of multiple, easy-to-use and tolerate, combination regimens. These regimens have already demonstrated the ability to cure previously challenging patient groups. However, limitations exist in the current portfolio of agents, with suboptimal outcomes for patients with HCV genotype 3. In addition to this, access to DAAs remains an obstacle for many patients. We present this case of a 61-year-old male with HCV genotype 3 who has had several treatment failures with standard HCV therapy who was eventually approved for compassionate use of a 16-week course of glecaprevir (GLE)/pibrentasvir (PIB), sofosbuvir (SOF), and ribavirin (RBV) which ultimately led to SVR-12.}, } @article {pmid35303917, year = {2022}, author = {Estrada, AA and Gottschalk, M and Gebhart, CJ and Marthaler, DG}, title = {Comparative analysis of Streptococcus suis genomes identifies novel candidate virulence-associated genes in North American isolates.}, journal = {Veterinary research}, volume = {53}, number = {1}, pages = {23}, pmid = {35303917}, issn = {1297-9716}, support = {project #00057268//Rapid Agricultural Response Fund, Minnesota Agricultural Experiment Station (US)/ ; }, mesh = {Animals ; Genome, Bacterial ; Genotype ; *Streptococcus suis/genetics ; Swine ; *Swine Diseases/microbiology ; Virulence/genetics ; }, abstract = {Streptococcus suis is a significant economic and welfare concern in the swine industry. Pan-genome analysis provides an in-silico approach for the discovery of genes involved in pathogenesis in bacterial pathogens. In this study, we performed pan-genome analysis of 208 S. suis isolates classified into the pathogenic, possibly opportunistic, and commensal pathotypes to identify novel candidate virulence-associated genes (VAGs) of S. suis. Using chi-square tests and LASSO regression models, three accessory pan-genes corresponding to S. suis strain P1/7 markers SSU_RS09525, SSU_RS09155, and SSU_RS03100 (>95% identity) were identified as having a significant association with the pathogenic pathotype. The proposed novel SSU_RS09525 + /SSU_RS09155 + /SSU_RS03100 + genotype identified 96% of the pathogenic pathotype strains, suggesting a novel genotyping scheme for predicting the pathogenicity of S. suis isolates in North America. In addition, mobile genetic elements carrying antimicrobial resistance genes (ARGs) and VAGs were identified but did not appear to play a major role in the spread of ARGs and VAGs.}, } @article {pmid35298044, year = {2023}, author = {Silva, M and Pontes, A and Franco-Duarte, R and Soares, P and Sampaio, JP and Sousa, MJ and Brito, PH}, title = {A glimpse at an early stage of microbe domestication revealed in the variable genome of Torulaspora delbrueckii, an emergent industrial yeast.}, journal = {Molecular ecology}, volume = {32}, number = {10}, pages = {2396-2412}, doi = {10.1111/mec.16428}, pmid = {35298044}, issn = {1365-294X}, mesh = {Saccharomyces cerevisiae/genetics ; *Torulaspora/genetics ; Domestication ; Fermentation ; *Wine/analysis ; }, abstract = {Microbe domestication has a major applied relevance but is still poorly understood from an evolutionary perspective. The yeast Torulaspora delbrueckii is gaining importance for biotechnology but little is known about its population structure, variation in gene content or possible domestication routes. Here, we show that T. delbrueckii is composed of five major clades. Among the three European clades, a lineage associated with the wild arboreal niche is sister to the two other lineages that are linked to anthropic environments, one to wine fermentations and the other to diverse sources including dairy products and bread dough (Mix-Anthropic clade). Using 64 genomes we assembled the pangenome and the variable genome of T. delbrueckii. A comparison with Saccharomyces cerevisiae indicated that the weight of the variable genome in the pangenome of T. delbrueckii is considerably smaller. An association of gene content and ecology supported the hypothesis that the Mix-Anthropic clade has the most specialized genome and indicated that some of the exclusive genes were implicated in galactose and maltose utilization. More detailed analyses traced the acquisition of a cluster of GAL genes in strains associated with dairy products and the expansion and functional diversification of MAL genes in strains isolated from bread dough. In contrast to S. cerevisiae, domestication in T. delbrueckii is not primarily driven by alcoholic fermentation but rather by adaptation to dairy and bread-production niches. This study expands our views on the processes of microbe domestication and on the trajectories leading to adaptation to anthropic niches.}, } @article {pmid35297758, year = {2022}, author = {Sharma, P and Johnson, MA and Mazloom, R and Allen, C and Heath, LS and Lowe-Power, TM and Vinatzer, BA}, title = {Meta-analysis of the Ralstonia solanacearum species complex (RSSC) based on comparative evolutionary genomics and reverse ecology.}, journal = {Microbial genomics}, volume = {8}, number = {3}, pages = {}, pmid = {35297758}, issn = {2057-5858}, mesh = {Biological Evolution ; Genome, Bacterial ; Genomics ; *Ralstonia solanacearum/genetics ; }, abstract = {Ralstonia solanacearum species complex (RSSC) strains are bacteria that colonize plant xylem tissue and cause vascular wilt diseases. However, individual strains vary in host range, optimal disease temperatures and physiological traits. To increase our understanding of the evolution, diversity and biology of the RSSC, we performed a meta-analysis of 100 representative RSSC genomes. These 100 RSSC genomes contain 4940 genes on average, and a pangenome analysis found that there are 3262 genes in the core genome (~60 % of the mean RSSC genome) with 13 128 genes in the extensive flexible genome. A core genome phylogenetic tree and a whole-genome similarity matrix aligned with the previously named species (R. solanacearum , R. pseudosolanacearum , R. syzygii) and phylotypes (I–IV). These analyses also highlighted a third unrecognized sub-clade of phylotype II. Additionally, we identified differences between phylotypes with respect to gene content and recombination rate, and we delineated population clusters based on the extent of horizontal gene transfer. Multiple analyses indicate that phylotype II is the most diverse phylotype, and it may thus represent the ancestral group of the RSSC. We also used our genome-based framework to test whether the RSSC sequence variant (sequevar) taxonomy is a robust method to define within-species relationships of strains. The sequevar taxonomy is based on alignments of a single conserved gene (egl). Although sequevars in phylotype II describe monophyletic groups, the sequevar system breaks down in the highly recombinogenic phylotype I, which highlights the need for an improved, cost-effective method for genotyping strains in phylotype I. Finally, we enabled quick and precise genome-based identification of newly sequenced RSSC strains by assigning Life Identification Numbers (LINs) to the 100 strains and by circumscribing the RSSC and its sub-groups in the LINbase Web service.}, } @article {pmid35291951, year = {2022}, author = {Roux, E and Nicolas, A and Valence, F and Siekaniec, G and Chuat, V and Nicolas, J and Le Loir, Y and Guédon, E}, title = {The genomic basis of the Streptococcus thermophilus health-promoting properties.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {210}, pmid = {35291951}, issn = {1471-2164}, mesh = {*Genomics ; Humans ; Lactose/metabolism ; Phenotype ; *Streptococcus thermophilus/genetics/metabolism ; }, abstract = {BACKGROUND: Streptococcus thermophilus is a Gram-positive bacterium widely used as starter in the dairy industry as well as in many traditional fermented products. In addition to its technological importance, it has also gained interest in recent years as beneficial bacterium due to human health-promoting functionalities. The objective of this study was to inventory the main health-promoting properties of S. thermophilus and to study their intra-species diversity at the genomic and genetic level within a collection of representative strains.

RESULTS: In this study various health-related functions were analyzed at the genome level from 79 genome sequences of strains isolated over a long time period from diverse products and different geographic locations. While some functions are widely conserved among isolates (e.g., degradation of lactose, folate production) suggesting their central physiological and ecological role for the species, others including the tagatose-6-phosphate pathway involved in the catabolism of galactose, and the production of bioactive peptides and gamma-aminobutyric acid are strain-specific. Most of these strain-specific health-promoting properties seems to have been acquired via horizontal gene transfer events. The genetic basis for the phenotypic diversity between strains for some health related traits have also been investigated. For instance, substitutions in the galK promoter region correlate with the ability of some strains to catabolize galactose via the Leloir pathway. Finally, the low occurrence in S. thermophilus genomes of genes coding for biogenic amine production and antibiotic resistance is also a contributing factor to its safety status.

CONCLUSIONS: The natural intra-species diversity of S. thermophilus, therefore, represents an interesting source for innovation in the field of fermented products enriched for healthy components that can be exploited to improve human health. A better knowledge of the health-promoting properties and their genomic and genetic diversity within the species may facilitate the selection and application of strains for specific biotechnological and human health-promoting purpose. Moreover, by pointing out that a substantial part of its functional potential still defies us, our work opens the way to uncover additional health-related functions through the intra-species diversity exploration of S. thermophilus by comparative genomics approaches.}, } @article {pmid35289871, year = {2022}, author = {Bonnici, V and Giugno, R}, title = {PANPROVA: pangenomic prokaryotic evolution of full assemblies.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {9}, pages = {2631-2632}, doi = {10.1093/bioinformatics/btac158}, pmid = {35289871}, issn = {1367-4811}, mesh = {*Software ; *Genome ; Sequence Analysis, DNA ; High-Throughput Nucleotide Sequencing ; Benchmarking ; }, abstract = {MOTIVATION: Computational tools for pangenomic analysis have gained increasing interest over the past two decades in various applications such as evolutionary studies and vaccine development. Synthetic benchmarks are essential for the systematic evaluation of their performance. Currently, benchmarking tools represent a genome as a set of genetic sequences and fail to simulate the complete information of the genomes, which is essential for evaluating pangenomic detection between fragmented genomes.

RESULTS: We present PANPROVA, a benchmark tool to simulate prokaryotic pangenomic evolution by evolving the complete genomic sequence of an ancestral isolate. In this way, the possibility of operating in the preassembly phase is enabled. Gene set variations, sequence variation and horizontal acquisition from a pool of external genomes are the evolutionary features of the tool.

PANPROVA is publicly available at https://github.com/InfOmics/PANPROVA. The manuscript explicitelly refers to the github repository.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid35288665, year = {2022}, author = {Zhuang, Y and Wang, X and Li, X and Hu, J and Fan, L and Landis, JB and Cannon, SB and Grimwood, J and Schmutz, J and Jackson, SA and Doyle, JJ and Zhang, XS and Zhang, D and Ma, J}, title = {Phylogenomics of the genus Glycine sheds light on polyploid evolution and life-strategy transition.}, journal = {Nature plants}, volume = {8}, number = {3}, pages = {233-244}, pmid = {35288665}, issn = {2055-0278}, mesh = {Diploidy ; *Glycine ; Phylogeny ; *Polyploidy ; Soybeans/genetics ; }, abstract = {Polyploidy and life-strategy transitions between annuality and perenniality often occur in flowering plants. However, the evolutionary propensities of polyploids and the genetic bases of such transitions remain elusive. We assembled chromosome-level genomes of representative perennial species across the genus Glycine including five diploids and a young allopolyploid, and constructed a Glycine super-pangenome framework by integrating 26 annual soybean genomes. These perennial diploids exhibit greater genome stability and possess fewer centromere repeats than the annuals. Biased subgenomic fractionation occurred in the allopolyploid, primarily by accumulation of small deletions in gene clusters through illegitimate recombination, which was associated with pre-existing local subgenomic differentiation. Two genes annotated to modulate vegetative-reproductive phase transition and lateral shoot outgrowth were postulated as candidates underlying the perenniality-annuality transition. Our study provides insights into polyploid genome evolution and lays a foundation for unleashing genetic potential from the perennial gene pool for soybean improvement.}, } @article {pmid35287927, year = {2022}, author = {Cao, H and Xu, D and Zhang, T and Ren, Q and Xiang, L and Ning, C and Zhang, Y and Gao, R}, title = {Comprehensive and functional analyses reveal the genomic diversity and potential toxicity of Microcystis.}, journal = {Harmful algae}, volume = {113}, number = {}, pages = {102186}, doi = {10.1016/j.hal.2022.102186}, pmid = {35287927}, issn = {1878-1470}, mesh = {Biological Evolution ; *Cyanobacteria ; Genomics ; *Microcystis ; Phylogeny ; }, abstract = {Microcystis is a cyanobacteria that is widely distributed across the world. It has attracted great attention because it produces the hepatotoxin microcystin (MC) that can inhibit eukaryotic protein phosphatases and pose a great risk to animal and human health. Due to the high diversity of morphospecies and genomes, it is still difficult to classify Microcystis species. In this study, we investigated the pangenome of 23 Microcystis strains to detect the genetic diversity and evolutionary dynamics. Microcystis revealed an open pangenome containing 22,009 gene families and exhibited different functional constraints. The core-genome phylogenetic analysis accurately differentiated the toxic and nontoxic strains and could be used as a taxonomic standard at the genetic level. We also investigated the functions of HGT events, of which were mostly conferred from cyanobacteria and closely related species. In order to detect the potential toxicity of Microcystis, we searched and characterized MC biosynthetic gene clusters and other secondary metabolite gene clusters. Our work provides insights into the genetic diversity, evolutionary dynamics, and potential toxicity of Microcystis, which could benefit the species classification and development of new methods for drinking water quality control and management of bloom formation in the future.}, } @article {pmid35285694, year = {2022}, author = {Yan, W and Feng, X and Lin, TH and Huang, X and Xie, L and Wei, S and Zhou, K and Chen, YL and Luo, W and Xu, W and Zhang, W and Nawaz, MZ and Luo, YW and Zeng, Q and Zhang, R and Jiao, N}, title = {Diverse Subclade Differentiation Attributed to the Ubiquity of Prochlorococcus High-Light-Adapted Clade II.}, journal = {mBio}, volume = {13}, number = {2}, pages = {e0302721}, pmid = {35285694}, issn = {2150-7511}, mesh = {Ecosystem ; Genome, Bacterial ; Oceans and Seas ; Phylogeny ; *Prochlorococcus/genetics/metabolism ; }, abstract = {Prochlorococcus is the key primary producer in marine ecosystems, and the high-light-adapted clade II (HLII) is the most abundant ecotype. However, the genomic and ecological basis of Prochlorococcus HLII in the marine environment has remained elusive. Here, we show that the ecologically coherent subclade differentiation of HLII corresponds to genomic and ecological characteristics on the basis of analyses of 31 different strains of HLII, including 12 novel isolates. Different subclades of HLII with different core and accessory genes were identified, and their distribution in the marine environment was explored using the TARA Oceans metagenome database. Three major subclade groups were identified, viz., the surface group (HLII-SG), the transition group (HLII-TG), and the deep group (HLII-DG). These subclade groups showed different temperature ranges and optima for distribution. In regression analyses, temperature and nutrient availability were identified as key factors affecting the distribution of HLII subclades. A 35% increase in the relative abundance of HLII-SG by the end of the 21st century was predicted under the Representative Concentration Pathway 8.5 scenario. Our results show that the ubiquity and distribution of Prochlorococcus HLII in the marine environment are associated with the differentiation of diverse subclades. These findings provide insights into the large-scale shifts in the Prochlorococcus community in response to future climate change. IMPORTANCEProchlorococcus is the most abundant oxygenic photosynthetic microorganism on Earth, and high-light-adapted clade II (HLII) is the dominant ecotype. However, the factors behind the dominance of HLII in the vast oligotrophic oceans are still unknown. Here, we identified three distinct groups of HLII subclades, viz., the surface group (HLII-SG), the transition group (HLII-TG), and the deep group (HLII-DG). We further demonstrated that the ecologically coherent subclade differentiation of HLII corresponds to genomic and ecological characteristics. Our study suggests that the differentiation of diverse subclades underlies the ubiquity and distribution of Prochlorococcus HLII in the marine environment and provides insights into the shifts in the Prochlorococcus community in response to future climate change.}, } @article {pmid35278263, year = {2022}, author = {Cai, X and Lin, R and Liang, J and King, GJ and Wu, J and Wang, X}, title = {Transposable element insertion: a hidden major source of domesticated phenotypic variation in Brassica rapa.}, journal = {Plant biotechnology journal}, volume = {20}, number = {7}, pages = {1298-1310}, pmid = {35278263}, issn = {1467-7652}, mesh = {Biological Variation, Population ; *Brassica rapa/genetics ; DNA Transposable Elements/genetics ; Genome, Plant/genetics ; Sequence Analysis, DNA ; }, abstract = {Transposable element (TE) is prevalent in plant genomes. However, studies on their impact on phenotypic evolution in crop plants are relatively rare, because systematically identifying TE insertions within a species has been a challenge. Here, we present a novel approach for uncovering TE insertion polymorphisms (TIPs) using pan-genome analysis combined with population-scale resequencing, and we adopt this pipeline to retrieve TIPs in a Brassica rapa germplasm collection. We found that 23% of genes within the reference Chiifu-401-42 genome harbored TIPs. TIPs tended to have large transcriptional effects, including modifying gene expression levels and altering gene structure by introducing new introns. Among 524 diverse accessions, TIPs broadly influenced genes related to traits and acted a crucial role in the domestication of B. rapa morphotypes. As examples, four specific TIP-containing genes were found to be candidates that potentially involved in various climatic conditions, promoting the formation of diverse vegetable crops in B. rapa. Our work reveals the hitherto hidden TIPs implicated in agronomic traits and highlights their widespread utility in studies of crop domestication.}, } @article {pmid37077982, year = {2022}, author = {Pucker, B and Irisarri, I and de Vries, J and Xu, B}, title = {Plant genome sequence assembly in the era of long reads: Progress, challenges and future directions.}, journal = {Quantitative plant biology}, volume = {3}, number = {}, pages = {e5}, pmid = {37077982}, issn = {2632-8828}, abstract = {Third-generation long-read sequencing is transforming plant genomics. Oxford Nanopore Technologies and Pacific Biosciences are offering competing long-read sequencing technologies and enable plant scientists to investigate even large and complex plant genomes. Sequencing projects can be conducted by single research groups and sequences of smaller plant genomes can be completed within days. This also resulted in an increased investigation of genomes from multiple species in large scale to address fundamental questions associated with the origin and evolution of land plants. Increased accessibility of sequencing devices and user-friendly software allows more researchers to get involved in genomics. Current challenges are accurately resolving diploid or polyploid genome sequences and better accounting for the intra-specific diversity by switching from the use of single reference genome sequences to a pangenome graph.}, } @article {pmid35271656, year = {2022}, author = {Dwiyanto, J and Hor, JW and Reidpath, D and Su, TT and Lee, SWH and Ayub, Q and Mustapha, FB and Lee, SM and Foo, SC and Chong, CW and Rahman, S}, title = {Pan-genome and resistome analysis of extended-spectrum ß-lactamase-producing Escherichia coli: A multi-setting epidemiological surveillance study from Malaysia.}, journal = {PloS one}, volume = {17}, number = {3}, pages = {e0265142}, pmid = {35271656}, issn = {1932-6203}, mesh = {Anti-Bacterial Agents/pharmacology/therapeutic use ; *Escherichia coli/genetics ; *Escherichia coli Infections/drug therapy/epidemiology ; Humans ; Malaysia/epidemiology ; Microbial Sensitivity Tests ; Plasmids/genetics ; Virulence Factors ; beta-Lactamases/genetics ; }, abstract = {OBJECTIVES: This study profiled the prevalence of extended-spectrum ß-lactamase-producing Escherichia coli (ESBL-EC) in the community and compared their resistome and genomic profiles with isolates from clinical patients through whole-genome sequencing.

METHODS: Fecal samples from 233 community dwellers from Segamat, a town in southern Malaysia, were obtained between May through August 2018. Putative ESBL strains were screened and tested using antibiotic susceptibility tests. Additionally, eight clinical ESBL-EC were obtained from a hospital in the same district between June through October 2020. Whole-genome sequencing was then conducted on selected ESBL-EC from both settings (n = 40) for pan-genome comparison, cluster analysis, and resistome profiling.

RESULTS: A mean ESBL-EC carriage rate of 17.82% (95% CI: 10.48%- 24.11%) was observed in the community and was consistent across demographic factors. Whole-genome sequences of the ESBL-EC (n = 40) enabled the detection of multiple plasmid replicon groups (n = 28), resistance genes (n = 34) and virulence factors (n = 335), with no significant difference in the number of genes carried between the community and clinical isolates (plasmid replicon groups, p = 0.13; resistance genes, p = 0.47; virulence factors, p = 0.94). Virulence gene marker analysis detected the presence of extraintestinal pathogenic E. coli (ExPEC), uropathogenic E. coli (UPEC), and enteroaggregative E. coli (EAEC) in both the community and clinical isolates. Multiple blaCTX-M variants were observed, dominated by blaCTX-M-27 (n = 12), blaCTX-M-65 (n = 10), and blaCTX-M-15 (n = 9). The clinical and community isolates did not cluster together based on the pan-genome comparison, suggesting isolates from the two settings were clonally unrelated. However, cluster analysis based on carried plasmids, resistance genes and phenotypic susceptibility profiles identified four distinct clusters, with similar patterns between the community and clinical isolates.

CONCLUSION: ESBL-EC from the clinical and community settings shared similar resistome profiles, suggesting the frequent exchange of genetic materials through horizontal gene transfer.}, } @article {pmid35269907, year = {2022}, author = {Mancebo, FJ and Parras-Moltó, M and García-Ríos, E and Pérez-Romero, P}, title = {Deciphering the Potential Coding of Human Cytomegalovirus: New Predicted Transmembrane Proteome.}, journal = {International journal of molecular sciences}, volume = {23}, number = {5}, pages = {}, pmid = {35269907}, issn = {1422-0067}, support = {MPY110/18//Instituto de Salud Carlos III/ ; MPY127/19//Instituto de Salud Carlos III/ ; MPY303/20//Instituto de Salud Carlos III/ ; CD18CIII/00007//Instituto de Salud Carlos III/ ; F18III/00013//Instituto de Salud Carlos III/ ; 2019-03482//Swedish Research Council/ ; }, mesh = {Antibodies, Viral ; Cytomegalovirus ; *Cytomegalovirus Infections ; *Cytomegalovirus Vaccines ; Humans ; Proteome/genetics ; Viral Envelope Proteins/genetics ; }, abstract = {CMV is a major cause of morbidity and mortality in immunocompromised individuals that will benefit from the availability of a vaccine. Despite the efforts made during the last decade, no CMV vaccine is available. An ideal CMV vaccine should elicit a broad immune response against multiple viral antigens including proteins involved in virus-cell interaction and entry. However, the therapeutic use of neutralizing antibodies targeting glycoproteins involved in viral entry achieved only partial protection against infection. In this scenario, a better understanding of the CMV proteome potentially involved in viral entry may provide novel candidates to include in new potential vaccine design. In this study, we aimed to explore the CMV genome to identify proteins with putative transmembrane domains to identify new potential viral envelope proteins. We have performed in silico analysis using the genome sequences of nine different CMV strains to predict the transmembrane domains of the encoded proteins. We have identified 77 proteins with transmembrane domains, 39 of which were present in all the strains and were highly conserved. Among the core proteins, 17 of them such as UL10, UL139 or US33A have no ascribed function and may be good candidates for further mechanistic studies.}, } @article {pmid35269811, year = {2022}, author = {Tay Fernandez, CG and Nestor, BJ and Danilevicz, MF and Gill, M and Petereit, J and Bayer, PE and Finnegan, PM and Batley, J and Edwards, D}, title = {Pangenomes as a Resource to Accelerate Breeding of Under-Utilised Crop Species.}, journal = {International journal of molecular sciences}, volume = {23}, number = {5}, pages = {}, pmid = {35269811}, issn = {1422-0067}, support = {DP210100296//Australian Research Council/ ; DP200100762//Australian Research Council/ ; DE210100398//Australian Research Council/ ; 9177539 and 9177591//Grains Research and Development Corporation/ ; }, mesh = {Chromosome Mapping ; Crops, Agricultural/genetics ; Genome, Plant ; *Genome-Wide Association Study ; *Oryza/genetics ; Plant Breeding ; Soybeans/genetics ; Zea mays/genetics ; }, abstract = {Pangenomes are a rich resource to examine the genomic variation observed within a species or genera, supporting population genetics studies, with applications for the improvement of crop traits. Major crop species such as maize (Zea mays), rice (Oryza sativa), Brassica (Brassica spp.), and soybean (Glycine max) have had pangenomes constructed and released, and this has led to the discovery of valuable genes associated with disease resistance and yield components. However, pangenome data are not available for many less prominent crop species that are currently under-utilised. Despite many under-utilised species being important food sources in regional populations, the scarcity of genomic data for these species hinders their improvement. Here, we assess several under-utilised crops and review the pangenome approaches that could be used to build resources for their improvement. Many of these under-utilised crops are cultivated in arid or semi-arid environments, suggesting that novel genes related to drought tolerance may be identified and used for introgression into related major crop species. In addition, we discuss how previously collected data could be used to enrich pangenome functional analysis in genome-wide association studies (GWAS) based on studies in major crops. Considering the technological advances in genome sequencing, pangenome references for under-utilised species are becoming more obtainable, offering the opportunity to identify novel genes related to agro-morphological traits in these species.}, } @article {pmid35269627, year = {2022}, author = {Syrokou, MK and Paramithiotis, S and Drosinos, EH and Bosnea, L and Mataragas, M}, title = {A Comparative Genomic and Safety Assessment of Six Lactiplantibacillus plantarum subsp. argentoratensis Strains Isolated from Spontaneously Fermented Greek Wheat Sourdoughs for Potential Biotechnological Application.}, journal = {International journal of molecular sciences}, volume = {23}, number = {5}, pages = {}, pmid = {35269627}, issn = {1422-0067}, support = {T1EDK-05339//European Union and Greek national funds/ ; }, mesh = {Fermentation ; Fructose ; *Genomics ; Greece ; Lactobacillus ; *Triticum/genetics ; }, abstract = {The comparative genome analysis of six Lactiplantibacillus plantarum subsp. argentoratensis strains previously isolated from spontaneously fermented Greek wheat sourdoughs is presented. Genomic attributes related to food safety have been studied according to the European Food Safety Authority (EFSA) suggestions for the use of lactic acid bacteria (LAB) in the production of foods. Bioinformatic analysis revealed a complete set of genes for maltose, sucrose, glucose, and fructose fermentation; conversion of fructose to mannitol; folate and riboflavin biosynthesis; acetoin production; conversion of citrate to oxaloacetate; and the ability to produce antimicrobial compounds (plantaricins). Pathogenic factors were absent but some antibiotic resistance genes were detected. CRISPR and cas genes were present as well as various mobile genetic elements (MGEs) such as plasmids, prophages, and insertion sequences. The production of biogenic amines by these strains was not possible due to the absence of key genes in their genome except lysine decarboxylase associated with cadaverine; however, potential degradation of these substances was identified due to the presence of a blue copper oxidase precursor and a multicopper oxidase protein family. Finally, comparative genomics and pan-genome analysis showed genetic differences between the strains (e.g., variable pln locus), and it facilitated the identification of various phenotypic and probiotic-related properties.}, } @article {pmid35264613, year = {2022}, author = {El Karkouri, K and Ghigo, E and Raoult, D and Fournier, PE}, title = {Genomic evolution and adaptation of arthropod-associated Rickettsia.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {3807}, pmid = {35264613}, issn = {2045-2322}, mesh = {Animals ; *Arthropods/genetics ; Evolution, Molecular ; *Gammaproteobacteria ; Genomics ; Phylogeny ; *Rickettsia/genetics ; *Spotted Fever Group Rickettsiosis ; }, abstract = {Rickettsia species are endosymbionts hosted by arthropods and are known to cause mild to fatal diseases in humans. Here, we analyse the evolution and diversity of 34 Rickettsia species using a pangenomic meta-analysis (80 genomes/41 plasmids). Phylogenomic trees showed that Rickettsia spp. diverged into two Spotted Fever groups, a Typhus group, a Canadensis group and a Bellii group, and may have inherited their plasmids from an ancestral plasmid that persisted in some strains or may have been lost by others. The results suggested that the ancestors of Rickettsia spp. might have infected Acari and/or Insecta and probably diverged by persisting inside and/or switching hosts. Pangenomic analysis revealed that the Rickettsia genus evolved through a strong interplay between genome degradation/reduction and/or expansion leading to possible distinct adaptive trajectories. The genus mainly shared evolutionary relationships with α-proteobacteria, and also with γ/β/δ-proteobacteria, cytophagia, actinobacteria, cyanobacteria, chlamydiia and viruses, suggesting lateral exchanges of several critical genes. These evolutionary processes have probably been orchestrated by an abundance of mobile genetic elements, especially in the Spotted Fever and Bellii groups. In this study, we provided a global evolutionary genomic view of the intracellular Rickettsia that may help our understanding of their diversity, adaptation and fitness.}, } @article {pmid35263215, year = {2022}, author = {Kim, JS and Kang, SW and Lee, JH and Park, SH and Lee, JS}, title = {The evolution and competitive strategies of Akkermansia muciniphila in gut.}, journal = {Gut microbes}, volume = {14}, number = {1}, pages = {2025017}, pmid = {35263215}, issn = {1949-0984}, mesh = {Akkermansia ; Animals ; *Gastrointestinal Microbiome/genetics ; Humans ; Mice ; Mucins/genetics/metabolism ; Phylogeny ; Sulfatases/genetics ; Verrucomicrobia/genetics/metabolism ; }, abstract = {Akkermansia muciniphila is a commensal bacterium using mucin as its sole carbon and nitrogen source. A. muciniphila is a promising candidate for next-generation probiotics to prevent inflammatory and metabolic disorders, including diabetes and obesity, and to increase the response to cancer immunotherapy. In this study, a comparative pan-genome analysis was conducted to investigate the genomic diversity and evolutionary relationships between complete genomes of 27 A. muciniphila strains, including KGMB strains isolated from healthy Koreans. The analysis showed that A. muciniphila strains formed two clades of group A and B in a phylogenetic tree constructed using 1,219 orthologous single-copy core genes. Interestingly, group A comprised of strains from human feces in Korea, whereas most of group B comprised strains from human feces in Europe and China, and from mouse feces. As group A and B branched, mucin hydrolysis played an important role in the stability of the core genome and drove evolution in the direction of defense against invading pathogens, survival in, and colonization in the mucus layer. In addition, WapA and anSME, which function in competition and post-translational modification of sulfatase, respectively, have been a particularly important selective pressure in the evolution of group A. KGMB strains in group A with anSME gene showed sulfatase activity, but KCTC 15667[T] in group B without anSME did not. Our findings revealed that KGMB strains evolved to gain an edge in the competition with other gut bacteria by increasing the utilization of sulfated mucin, which will allow it to become highly colonized in the gut environment.}, } @article {pmid35252029, year = {2022}, author = {Song, Y and Xu, X and Huang, Z and Xiao, Y and Yu, K and Jiang, M and Yin, S and Zheng, M and Meng, H and Han, Y and Wang, Y and Wang, D and Wei, Q}, title = {Genomic Characteristics Revealed Plasmid-Mediated Pathogenicity and Ubiquitous Rifamycin Resistance of Rhodococcus equi.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {807610}, pmid = {35252029}, issn = {2235-2988}, mesh = {Humans ; Phylogeny ; Plasmids/genetics ; *Rhodococcus equi/genetics ; *Rifamycins ; Virulence/genetics ; }, abstract = {Rhodococcus equi is a zoonotic pathogen that can cause fatal disease in patients who are immunocompromised. At present, the epidemiology and pathogenic mechanisms of R. equi infection are not clear. This study characterized the genomes of 53 R. equi strains from different sources. Pan-genome analysis showed that all R. equi strains contained 11481 pan genes, including 3690 core genes and 602 ~ 1079 accessory genes. Functional annotation of pan genome focused on the genes related to basic lifestyle, such as the storage and expression of metabolic and genetic information. Phylogenetic analysis based on pan-genome showed that the R. equi strains were clustered into six clades, which was not directly related to the isolation location and host source. Also, a total of 84 virulence genes were predicted in 53 R. equi strains. These virulence factors can be divided into 20 categories related to substance metabolism, secreted protein and immune escape. Meanwhile, six antibiotic resistance genes (RbpA, tetA (33), erm (46), sul1, qacEdelta 1 and aadA9) were detected, and all strains carried RbpA related to rifamycin resistance. In addition, 28 plasmids were found in the 53 R. equi strains, belonging to Type-A (n = 14), Type-B (n = 8) and Type-N (n = 6), respectively. The genetic structures of the same type of plasmid were highly similar. In conclusion, R. equi strains show different genomic characteristics, virulence-related genes, potential drug resistance and virulence plasmid structures, which may be conducive to the evolution of its pathogenesis.}, } @article {pmid35250944, year = {2022}, author = {Ma, L and Yang, W and Huang, S and Liu, R and Li, H and Huang, X and Xiong, J and Liu, X}, title = {Integrative Assessments on Molecular Taxonomy of Acidiferrobacter thiooxydans ZJ and Its Environmental Adaptation Based on Mobile Genetic Elements.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {826829}, pmid = {35250944}, issn = {1664-302X}, abstract = {Acidiferrobacter spp. are facultatively anaerobic acidophiles that belong to a distinctive Acidiferrobacteraceae family, which are similar to Ectothiorhodospiraceae phylogenetically, and are closely related to Acidithiobacillia class/subdivision physiologically. The limited genome information has kept them from being studied on molecular taxonomy and environmental adaptation in depth. Herein, Af. thiooxydans ZJ was isolated from acid mine drainage (AMD), and the complete genome sequence was reported to scan its genetic constitution for taxonomic and adaptative feature exploration. The genome has a single chromosome of 3,302,271 base pairs (bp), with a GC content of 63.61%. The phylogenetic tree based on OrthoANI highlighted the unique position of Af. thiooxydans ZJ, which harbored more unique genes among the strains from Ectothiorhodospiraceae and Acidithiobacillaceae by pan-genome analysis. The diverse mobile genetic elements (MGEs), such as insertion sequence (IS), clustered regularly interspaced short palindromic repeat (CRISPR), prophage, and genomic island (GI), have been identified and characterized in Af. thiooxydans ZJ. The results showed that Af. thiooxydans ZJ may effectively resist the infection of foreign viruses and gain functional gene fragments or clusters to shape its own genome advantageously. This study will offer more evidence of the genomic plasticity and improve our understanding of evolutionary adaptation mechanisms to extreme AMD environment, which could expand the potential utilization of Af. thiooxydans ZJ as an iron and sulfur oxidizer in industrial bioleaching.}, } @article {pmid35240255, year = {2022}, author = {Yang, R and Zhang, B and Xu, Y and Zhang, G and Liu, Y and Zhang, D and Zhang, W and Chen, T and Liu, G}, title = {Genomic insights revealed the environmental adaptability of Planococcus halotolerans Y50 isolated from petroleum-contaminated soil on the Qinghai-Tibet Plateau.}, journal = {Gene}, volume = {823}, number = {}, pages = {146368}, doi = {10.1016/j.gene.2022.146368}, pmid = {35240255}, issn = {1879-0038}, mesh = {Base Composition ; Biodegradation, Environmental ; Genome Size ; *Genome, Bacterial ; Petroleum/analysis/*microbiology ; Phylogeny ; Planococcaceae/classification/genetics/isolation & purification/*physiology ; Soil Microbiology ; Tibet ; Whole Genome Sequencing ; }, abstract = {The Tibetan Plateau niche provides unprecedented opportunities to find microbes that are functional and commercial significance. The present study investigated the physiological and genomic characteristics of Planococcus halotolerans Y50 that was isolated from a petroleum-contaminated soil sample from the Qinghai-Tibet Plateau, and it displayed psychrotolerant, antiradiation, and oil-degraded characteristics. Whole genome sequencing indicated that strain Y50 has a 3.52 Mb genome and 44.7% G + C content, and it possesses 3377 CDSs. The presence of a wide range of UV damage repair genes uvrX and uvsE, DNA repair genes radA and recN, superoxide dismutase, peroxiredoxin and dioxygenase genes provided the genomic basis for the adaptation of the plateau environment polluted by petroleum. Related experiments also verified that the Y50 strain could degrade n-alkanes from C11-C23, and approximately 30% of the total petroleum at 25 °C within 7 days. Meanwhile, strain Y50 could withstand 5 × 10[3] J/m[2] UVC and 10 KGy gamma ray radiation, and it had strong antioxidant and high radical scavengers for superoxide anion, hydroxyl radical and DPPH. In addition, pan-genome analysis and horizontal gene transfers revealed that strains with different niches have obtained various genes through horizontal gene transfer in the process of evolution, and the more similar their geographical locations, the more similar their members are genetically and ecologically. In conclusion, P. halotolerans Y50 possesses high potential of applications in the bioremediation of alpine hydrocarbons contaminated environment.}, } @article {pmid35236759, year = {2022}, author = {Ricci, ML and Fillo, S and Ciammaruconi, A and Lista, F and Ginevra, C and Jarraud, S and Girolamo, A and Barbanti, F and Rota, MC and Lindsay, D and Gorzynski, J and Uldum, SA and Baig, S and Foti, M and Petralito, G and Torri, S and Faccini, M and Bonini, M and Gentili, G and Senatore, S and Lamberti, A and Carrico, JA and Scaturro, M}, title = {Genome analysis of Legionella pneumophila ST23 from various countries reveals highly similar strains.}, journal = {Life science alliance}, volume = {5}, number = {6}, pages = {}, pmid = {35236759}, issn = {2575-1077}, mesh = {Disease Outbreaks ; Humans ; *Legionella pneumophila/genetics ; *Legionnaires' Disease/epidemiology ; Multilocus Sequence Typing ; Serogroup ; }, abstract = {Legionella pneumophila serogroup 1 (Lp1) sequence type (ST) 23 is one of the most commonly detected STs in Italy where it currently causes all investigated outbreaks. ST23 has caused both epidemic and sporadic cases between 1995 and 2018 and was analysed at genomic level and compared with ST23 isolated in other countries to determine possible similarities and differences. A core genome multi-locus sequence typing (cgMLST), based on a previously described set of 1,521 core genes, and single-nucleotide polymorphisms (SNPs) approaches were applied to an ST23 collection including genomes from Italy, France, Denmark and Scotland. DNAs were automatically extracted, libraries prepared using NextEra library kit and MiSeq sequencing performed. Overall, 63 among clinical and environmental Italian Lp1 isolates and a further seven and 11 ST23 from Denmark and Scotland, respectively, were sequenced, and pangenome analysed. Both cgMLST and SNPs analyses showed very few loci and SNP variations in ST23 genomes. All the ST23 causing outbreaks and sporadic cases in Italy and elsewhere, were phylogenetically related independent of year, town or country of isolation. Distances among the ST23s were further shortened when SNPs due to horizontal gene transfers were removed. The Lp1 ST23 isolated in Italy have kept their monophyletic origin, but they are phylogenetically close also to ST23 from other countries. The ST23 are quite widespread in Italy, and a thorough epidemiological investigation is compelled to determine sources of infection when this ST is identified in both LD sporadic cases and outbreaks.}, } @article {pmid35230132, year = {2022}, author = {Yin, Z and Liu, X and Qian, C and Sun, L and Pang, S and Liu, J and Li, W and Huang, W and Cui, S and Zhang, C and Song, W and Wang, D and Xie, Z}, title = {Pan-Genome Analysis of Delftia tsuruhatensis Reveals Important Traits Concerning the Genetic Diversity, Pathogenicity, and Biotechnological Properties of the Species.}, journal = {Microbiology spectrum}, volume = {10}, number = {2}, pages = {e0207221}, pmid = {35230132}, issn = {2165-0497}, mesh = {*Anti-Infective Agents ; Delftia ; Genetic Variation ; *Genome, Bacterial ; Humans ; Phosphate Transport Proteins/genetics ; Phylogeny ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Delftia tsuruhatensis strains have long been known to promote plant growth and biological control. Recently, it has become an emerging opportunistic pathogen in humans. However, the genomic characteristics of the genetic diversity, pathogenicity, and biotechnological properties have not yet been comprehensively investigated. Here, a comparative pan-genome analysis was constructed. The open pan-genome with a large and flexible gene repertoire exhibited a high degree of genetic diversity. The purifying selection was the main force to drive pan-genome evolution. Significant differences were observed in the evolutionary relationship, functional enrichment, and degree of selective pressure between the different components of the pan-genome. A high degree of genetic plasticity was characterized by the determinations of diverse mobile genetic elements (MGEs), massive genomic rearrangement, and horizontal genes. Horizontal gene transfer (HGT) plays an important role in the genetic diversity of this bacterium and the formation of genomic traits. Our results revealed the occurrence of diverse virulence-related elements associated with macromolecular secretion systems, virulence factors associated with multiple nosocomial infections, and antimicrobial resistance, indicating the pathogenic potential. Lateral flagellum, T1SS, T2SS, T6SS, Tad pilus, type IV pilus, and a part of virulence-related genes exhibited general properties, whereas polar flagellum, T4SS, a part of virulence-related genes, and resistance genes presented heterogeneous properties. The pan-genome also harbors abundant genetic traits related to secondary metabolism, carbohydrate active enzymes (CAZymes), and phosphate transporter, indicating rhizosphere adaptation, plant growth promotion, and great potential uses in agriculture and biological control. This study provides comprehensive insights into this uncommon species from the genomic perspective. IMPORTANCE D. tsuruhatensis is considered a plant growth-promoting rhizobacterium (PGPR), an organic pollutant degradation strain, and an emerging opportunistic pathogen to the human. However, the genetic diversity, the evolutionary dynamics, and the genetic basis of these remarkable traits are still little known. We constructed a pan-genome analysis for D. tsuruhatensis and revealed extensive genetic diversity and genetic plasticity exhibited by open pan-genome, diverse mobile genetic elements (MGEs), genomic rearrangement, and horizontal genes. Our results highlight that horizontal gene transfer (HGT) and purifying selection are important forces in D. tsuruhatensis genetic evolution. The abundant virulence-related elements associated with macromolecular secretion systems, virulence factors, and antimicrobial resistance could contribute to the pathogenicity of this bacterium. Therefore, clinical microbiologists need to be aware of D. tsuruhatensis as an opportunistic pathogen. The genetic profiles of secondary metabolism, carbohydrate active enzymes (CAZymes), and phosphate transporter could provide insight into the genetic armory of potential applications for agriculture and biological control of D. tsuruhatensis in general.}, } @article {pmid35220969, year = {2022}, author = {Marwaha, S and Knowles, JW and Ashley, EA}, title = {A guide for the diagnosis of rare and undiagnosed disease: beyond the exome.}, journal = {Genome medicine}, volume = {14}, number = {1}, pages = {23}, pmid = {35220969}, issn = {1756-994X}, support = {R01 DK120565/DK/NIDDK NIH HHS/United States ; U41 HG009649/HG/NHGRI NIH HHS/United States ; R01 DK116750/DK/NIDDK NIH HHS/United States ; P30 DK116074/DK/NIDDK NIH HHS/United States ; }, mesh = {*Exome ; Genomics ; Humans ; Rare Diseases/diagnosis/genetics ; *Undiagnosed Diseases ; Exome Sequencing ; }, abstract = {Rare diseases affect 30 million people in the USA and more than 300-400 million worldwide, often causing chronic illness, disability, and premature death. Traditional diagnostic techniques rely heavily on heuristic approaches, coupling clinical experience from prior rare disease presentations with the medical literature. A large number of rare disease patients remain undiagnosed for years and many even die without an accurate diagnosis. In recent years, gene panels, microarrays, and exome sequencing have helped to identify the molecular cause of such rare and undiagnosed diseases. These technologies have allowed diagnoses for a sizable proportion (25-35%) of undiagnosed patients, often with actionable findings. However, a large proportion of these patients remain undiagnosed. In this review, we focus on technologies that can be adopted if exome sequencing is unrevealing. We discuss the benefits of sequencing the whole genome and the additional benefit that may be offered by long-read technology, pan-genome reference, transcriptomics, metabolomics, proteomics, and methyl profiling. We highlight computational methods to help identify regionally distant patients with similar phenotypes or similar genetic mutations. Finally, we describe approaches to automate and accelerate genomic analysis. The strategies discussed here are intended to serve as a guide for clinicians and researchers in the next steps when encountering patients with non-diagnostic exomes.}, } @article {pmid35217472, year = {2022}, author = {Hübner, S}, title = {Are we there yet? Driving the road to evolutionary graph-pangenomics.}, journal = {Current opinion in plant biology}, volume = {66}, number = {}, pages = {102195}, doi = {10.1016/j.pbi.2022.102195}, pmid = {35217472}, issn = {1879-0356}, mesh = {*Biological Evolution ; *Genomics/methods ; }, abstract = {With increase in the number of sequenced genomes, it is now recognized that graph-based pangenomes can provide a comprehensive platform to study diversity in a population or species, from point mutations to large chromosomal rearrangements. By incorporating concepts from graph theory, a graph-pangenome can be studied directly to identify genomic regions and genes that underlie important evolutionary processes and traits. Here, I discuss how basic concepts in graph theory can be implemented to address questions in evolutionary genomics and guide future breeding efforts. Despite its compelling versatility, a graph-pangenome assembly is still challenging especially in species with large complex genomes. As technology is rapidly improving, the graph-pangenome is expected to become a central platform in genomics studies and applications. Thus, development of tools and methods that exploit the graph structure are urged to pave the route to evolutionary graph-pangenomics.}, } @article {pmid35216392, year = {2022}, author = {Tay Fernandez, CG and Nestor, BJ and Danilevicz, MF and Marsh, JI and Petereit, J and Bayer, PE and Batley, J and Edwards, D}, title = {Expanding Gene-Editing Potential in Crop Improvement with Pangenomes.}, journal = {International journal of molecular sciences}, volume = {23}, number = {4}, pages = {}, pmid = {35216392}, issn = {1422-0067}, support = {DP210100296//Australian Research Council/ ; DP200100762//Australian Research Council/ ; DE210100398//Australian Research Council/ ; 9177539//Grains Research and Development Corporation/ ; 9177591//Grains Research and Development Corporation/ ; }, mesh = {CRISPR-Cas Systems/*genetics ; Crops, Agricultural/*genetics ; Gene Editing/methods ; Genome, Plant/*genetics ; Phenotype ; Plant Breeding/methods ; Plants, Genetically Modified/genetics ; }, abstract = {Pangenomes aim to represent the complete repertoire of the genome diversity present within a species or cohort of species, capturing the genomic structural variance between individuals. This genomic information coupled with phenotypic data can be applied to identify genes and alleles involved with abiotic stress tolerance, disease resistance, and other desirable traits. The characterisation of novel structural variants from pangenomes can support genome editing approaches such as Clustered Regularly Interspaced Short Palindromic Repeats and CRISPR associated protein Cas (CRISPR-Cas), providing functional information on gene sequences and new target sites in variant-specific genes with increased efficiency. This review discusses the application of pangenomes in genome editing and crop improvement, focusing on the potential of pangenomes to accurately identify target genes for CRISPR-Cas editing of plant genomes while avoiding adverse off-target effects. We consider the limitations of applying CRISPR-Cas editing with pangenome references and potential solutions to overcome these limitations.}, } @article {pmid35214170, year = {2022}, author = {Mahmoudinoodezh, H and Telukutla, SR and Bhangu, SK and Bachari, A and Cavalieri, F and Mantri, N}, title = {The Transdermal Delivery of Therapeutic Cannabinoids.}, journal = {Pharmaceutics}, volume = {14}, number = {2}, pages = {}, pmid = {35214170}, issn = {1999-4923}, abstract = {Recently, several studies have indicated an increased interest in the scientific community regarding the application of Cannabis sativa plants, and their extracts, for medicinal purposes. This plant of enormous medicinal potential has been legalised in an increasing number of countries globally. Due to the recent changes in therapeutic and recreational legislation, cannabis and cannabinoids are now frequently permitted for use in clinical settings. However, with their highly lipophilic features and very low aqueous solubility, cannabinoids are prone to degradation, specifically in solution, as they are light-, temperature-, and auto-oxidation-sensitive. Thus, plant-derived cannabinoids have been developed for oral, nasal-inhalation, intranasal, mucosal (sublingual and buccal), transcutaneous (transdermal), local (topical), and parenteral deliveries. Among these administrations routes, topical and transdermal products usually have a higher bioavailability rate with a prolonged steady-state plasma concentration. Additionally, these administrations have the potential to eliminate the psychotropic impacts of the drug by its diffusion into a nonreactive, dead stratum corneum. This modality avoids oral administration and, thus, the first-pass metabolism, leading to constant cannabinoid plasma levels. This review article investigates the practicality of delivering therapeutic cannabinoids via skin in accordance with existing literature.}, } @article {pmid35210526, year = {2022}, author = {Gonzalez-Diaz, A and Carrera-Salinas, A and Pinto, M and Cubero, M and van der Ende, A and Langereis, JD and Domínguez, MÁ and Ardanuy, C and Bajanca-Lavado, P and Marti, S}, title = {Comparative pangenome analysis of capsulated Haemophilus influenzae serotype f highlights their high genomic stability.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {3189}, pmid = {35210526}, issn = {2045-2322}, mesh = {Bacterial Capsules/*genetics ; *Genome, Bacterial ; *Genomic Instability ; Genomics ; Haemophilus Infections/microbiology ; Haemophilus influenzae/classification/*genetics ; Humans ; Multilocus Sequence Typing ; Netherlands ; Phylogeny ; Polymorphism, Single Nucleotide ; Portugal ; Serogroup ; Serotyping/methods ; Spain ; }, abstract = {Haemophilus influenzae is an opportunistic pathogen adapted to the human respiratory tract. Non-typeable H. influenzae are highly heterogeneous, but few studies have analysed the genomic variability of capsulated strains. This study aims to examine the genetic diversity of 37 serotype f isolates from the Netherlands, Portugal, and Spain, and to compare all capsulated genomes available on public databases. Serotype f isolates belonged to CC124 and shared few single nucleotide polymorphisms (SNPs) (n = 10,999), but a high core genome (> 80%). Three main clades were identified by the presence of 75, 60 and 41 exclusive genes for each clade, respectively. Multi-locus sequence type analysis of all capsulated genomes revealed a reduced number of clonal complexes associated with each serotype. Pangenome analysis showed a large pool of genes (n = 6360), many of which were accessory genome (n = 5323). Phylogenetic analysis revealed that serotypes a, b, and f had greater diversity. The total number of SNPs in serotype f was significantly lower than in serotypes a, b, and e (p < 0.0001), indicating low variability within the serotype f clonal complexes. Capsulated H. influenzae are genetically homogeneous, with few lineages in each serotype. Serotype f has high genetic stability regardless of time and country of isolation.}, } @article {pmid35208688, year = {2022}, author = {Dai, D and Lu, H and Xing, P and Wu, Q}, title = {Comparative Genomic Analyses of the Genus Nesterenkonia Unravels the Genomic Adaptation to Polar Extreme Environments.}, journal = {Microorganisms}, volume = {10}, number = {2}, pages = {}, pmid = {35208688}, issn = {2076-2607}, support = {QYZDJ-SSW-DQC030//Key Research Program of Frontier Science, Chinese Academy of Sciences/ ; 31730013//National Natural Science Foundation of China/ ; }, abstract = {The members of the Nesterenkonia genus have been isolated from various habitats, like saline soil, salt lake, sponge-associated and the human gut, some of which are even located in polar areas. To identify their stress resistance mechanisms and draw a genomic profile across this genus, we isolated four Nesterenkonia strains from the lakes in the Tibetan Plateau, referred to as the third pole, and compared them with all other 30 high-quality Nesterenkonia genomes that are deposited in NCBI. The Heaps' law model estimated that the pan-genome of this genus is open and the number of core, shell, cloud, and singleton genes were 993 (6.61%), 2782 (18.52%), 4117 (27.40%), and 7132 (47.47%), respectively. Phylogenomic and ANI/AAI analysis indicated that all genomes can be divided into three main clades, named NES-1, NES-2, and NES-3. The strains isolated from lakes in the Tibetan Plateau were clustered with four strains from different sources in the Antarctic and formed a subclade within NES-2, described as NES-AT. Genome features of this subclade, including GC (guanine + cytosine) content, tRNA number, carbon/nitrogen atoms per residue side chain (C/N-ARSC), and amino acid composition, in NES-AT individuals were significantly different from other strains, indicating genomic adaptation to cold, nutrient-limited, osmotic, and ultraviolet conditions in polar areas. Functional analysis revealed the enrichment of specific genes involved in bacteriorhodopsin synthesis, biofilm formation, and more diverse nutrient substance metabolism genes in the NES-AT clade, suggesting potential adaptation strategies for energy metabolism in polar environments. This study provides a comprehensive profile of the genomic features of the Nesterenkonia genus and reveals the possible mechanism for the survival of Nesterenkonia isolates in polar areas.}, } @article {pmid35208668, year = {2022}, author = {Silva-Andrade, C and Martin, AJ and Garrido, D}, title = {Comparative Genomics of Clostridium baratii Reveals Strain-Level Diversity in Toxin Abundance.}, journal = {Microorganisms}, volume = {10}, number = {2}, pages = {}, pmid = {35208668}, issn = {2076-2607}, support = {21191605//Agencia Nacional de Investigación y Desarrollo/ ; EQM190070//Agencia Nacional de Investigación y Desarrollo/ ; 1190074//Agencia Nacional de Investigación y Desarrollo/ ; 1181089//Agencia Nacional de Investigación y Desarrollo/ ; }, abstract = {Clostridium baratii strains are rare opportunistic pathogens associated with botulism intoxication. They have been isolated from foods, soil and be carried asymptomatically or cause botulism outbreaks. Is not taxonomically related to Clostridium botulinum, but some strains are equipped with BoNT/F7 cluster. Despite their relationship with diseases, our knowledge regarding the genomic features and phylogenetic characteristics is limited. We analyzed the pangenome of C. baratii to understand the diversity and genomic features of this species. We compared existing genomes in public databases, metagenomes, and one newly sequenced strain isolated from an asymptomatic subject. The pangenome was open, indicating it comprises genetically diverse organisms. The core genome contained 28.49% of the total genes of the pangenome. Profiling virulence factors confirmed the presence of phospholipase C in some strains, a toxin capable of disrupting eukaryotic cell membranes. Furthermore, the genomic analysis indicated significant horizontal gene transfer (HGT) events as defined by the presence of prophage genomes. Seven strains were equipped with BoNT/F7 cluster. The active site was conserved in all strains, identifying a missing 7-aa region upstream of the active site in C. baratii genomes. This analysis could be important to advance our knowledge regarding opportunistic clostridia and better understand their contribution to disease.}, } @article {pmid35197996, year = {2022}, author = {Tiwari, JK and Buckseth, T and Zinta, R and Bhatia, N and Dalamu, D and Naik, S and Poonia, AK and Kardile, HB and Challam, C and Singh, RK and Luthra, SK and Kumar, V and Kumar, M}, title = {Germplasm, Breeding, and Genomics in Potato Improvement of Biotic and Abiotic Stresses Tolerance.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {805671}, pmid = {35197996}, issn = {1664-462X}, abstract = {Potato is one of the most important food crops in the world. Late blight, viruses, soil and tuber-borne diseases, insect-pests mainly aphids, whiteflies, and potato tuber moths are the major biotic stresses affecting potato production. Potato is an irrigated and highly fertilizer-responsive crop, and therefore, heat, drought, and nutrient stresses are the key abiotic stresses. The genus Solanum is a reservoir of genetic diversity, however, a little fraction of total diversity has been utilized in potato breeding. The conventional breeding has contributed significantly to the development of potato varieties. In recent years, a tremendous progress has been achieved in the sequencing technologies from short-reads to long-reads sequence data, genomes of Solanum species (i.e., pan-genomics), bioinformatics and multi-omics platforms such as genomics, transcriptomics, proteomics, metabolomics, ionomics, and phenomics. As such, genome editing has been extensively explored as a next-generation breeding tool. With the available high-throughput genotyping facilities and tetraploid allele calling softwares, genomic selection would be a reality in potato in the near future. This mini-review covers an update on germplasm, breeding, and genomics in potato improvement for biotic and abiotic stress tolerance.}, } @article {pmid35196218, year = {2022}, author = {Francés-Cuesta, C and Ansari, I and Fernández-Garayzábal, JF and Gibello, A and González-Candelas, F}, title = {Comparative genomics and evolutionary analysis of Lactococcus garvieae isolated from human endocarditis.}, journal = {Microbial genomics}, volume = {8}, number = {2}, pages = {}, pmid = {35196218}, issn = {2057-5858}, mesh = {Animals ; Biological Evolution ; *Endocarditis ; Genomics/methods ; Humans ; *Lactococcus/genetics ; Mammals ; }, abstract = {Lactococcus garvieae is a well-known pathogen of fish, but is rarely involved in infections in humans and other mammals. In humans, the main clinical manifestation of L. garvieae infections is endocarditis usually related to the ingestion of contaminated food, such as undercooked fish and shellfish. This study presents the first complete genomic sequence of a clinical L. garvieae strain isolated from a patient with endocarditis and its comparative analysis with other genomes. This human isolate contains a circular chromosome of 2 099 060 bp and one plasmid of 50 557 bp. In comparison with other fully sequenced L. garvieae strains, the chromosomal DNA of L. garvieae Lg-Granada carries a low proportion of insertion sequence elements and a higher number of putative prophages. Our results show that, in general, L. garvieae is a highly recombinogenic species with an open pangenome in which almost 30 % of its genome has undergone horizontal transfers. Within the genus Lactococcus, L. lactis is the main donor of genetic components to L. garvieae but, taking Lg-Granada as a representative, this bacterium tends to import more genes from Bacilli taxa than from other Lactococcus species.}, } @article {pmid35196120, year = {2022}, author = {Yang, Y and Schubert, T and Lv, Y and Li, X and Yan, J}, title = {Comparative Genomic Analysis Reveals Preserved Features in Organohalide-Respiring Sulfurospirillum Strains.}, journal = {mSphere}, volume = {7}, number = {1}, pages = {e0093121}, pmid = {35196120}, issn = {2379-5042}, mesh = {Bacteria/genetics ; *Environmental Pollutants/metabolism ; Genomics ; Humans ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Tetrachloroethylene/metabolism ; }, abstract = {Sulfurospirillum species strains are frequently detected in various pristine and contaminated environments and participate in carbon, sulfur, nitrogen, and halogen elements cycling. Recently we obtained the complete genome sequences of two newly isolated Sulfurospirillum strains, ACSDCE and ACSTCE, capable of dechlorinating tetrachloroethene to cis-1,2-dichloroethene and trichloroethene under low-pH conditions, but a detailed analysis of these two genomes in reference to other Sulfurospirillum genomes for an improved understanding of Sulfurospirillum evolution and ecophysiology has not been accomplished. Here, we performed phylogenetic and pangenome analyses with 12 completed Sulfurospirillum genomes, including those of strain ACSTCE and strain ACSDCE, to unravel the evolutionary and metabolic potentials in the genus Sulfurospirillum. Based on 16S rRNA gene and whole-genome phylogenies, strains ACSTCE, ACSDCE, and JPD-1 could be clustered into a single species, proposed as "Candidatus Sulfurospirillum acididehalogenans." TimeTree analysis suggested that the organohalide-respiring (OHR) Sulfurospirillum might acquire the ability to use chlorinated electron acceptors later than other energy conservation processes. Nevertheless, the ambiguity of the phylogenetic relations among Sulfurospirillum strains complicated the interpretation of acquisition and loss of metabolic traits. Interestingly, all OHR Sulfurospirillum genomes except the ones of Sulfurospirillum multivorans strains harbor a well-aligned and conserved region comprising the genetic components required for the organohalide respiration chain. Pangenome results further revealed that a total of 34,620 gene products, annotated from the 12 Sulfurospirillum genomes, can be classified into 4,118 homolog families and 2,075 singleton families. Various Sulfurospirillum species strains have conserved metabolisms as well as individual enzymes and biosynthesis capabilities. For instance, only the OHR Sulfurospirillum species strains possess the quinone-dependent pyruvate dehydrogenase (PoxB) gene, and only "Ca. Sulfurospirillum acididehalogenans" strains harbor urea transporter and urease genes. The plasmids found in strain ACSTCE and strain ACSDCE feature genes coding for type II toxin-antitoxin systems and transposases and are promising tools for the development of robust gene editing tools for Sulfurospirillum. IMPORTANCE Organohalide-respiring bacteria (OHRB) play critical roles in the detoxification of chlorinated pollutants and bioremediation of subsurface environments (e.g., groundwater and sediment) impacted by anthropogenic chlorinated solvents. The majority of known OHRB cannot perform reductive dechlorination below neutral pH, hampering the applications of OHRB for remediating acidified groundwater due to fermentation and reductive dechlorination. Previously we isolated two Sulfurospirillum strains, ACSTCE and ACSDCE, capable of dechlorinating tetrachloroethene under acidic conditions (e.g., pH 5.5), and obtained the complete genomes of both strains. Notably, two plasmid sequences were identified in the genomes of strain ACSTCE and strain ACSDCE that may be conducive to unraveling the genetic modification mechanisms in the genus Sulfurospirillum. Our findings improve the current understanding of Sulfurospirillum species strains regarding their biogeographic evolution, genome dynamics, and functional diversity. This study has applied values for the bioremediation of toxic and persistent organohalide pollutants in low-pH environments.}, } @article {pmid35195510, year = {2022}, author = {Saati-Santamaría, Z and Selem-Mojica, N and Peral-Aranega, E and Rivas, R and García-Fraile, P}, title = {Unveiling the genomic potential of Pseudomonas type strains for discovering new natural products.}, journal = {Microbial genomics}, volume = {8}, number = {2}, pages = {}, pmid = {35195510}, issn = {2057-5858}, mesh = {*Biological Products/metabolism ; Genomics ; Multigene Family ; Phylogeny ; *Pseudomonas/genetics ; }, abstract = {Microbes host a huge variety of biosynthetic gene clusters that produce an immeasurable array of secondary metabolites with many different biological activities such as antimicrobial, anticarcinogenic and antiviral. Despite the complex task of isolating and characterizing novel natural products, microbial genomic strategies can be useful for carrying out these types of studies. However, although genomic-based research on secondary metabolism is on the increase, there is still a lack of reports focusing specifically on the genus Pseudomonas. In this work, we aimed (i) to unveil the main biosynthetic systems related to secondary metabolism in Pseudomonas type strains, (ii) to study the evolutionary processes that drive the diversification of their coding regions and (iii) to select Pseudomonas strains showing promising results in the search for useful natural products. We performed a comparative genomic study on 194 Pseudomonas species, paying special attention to the evolution and distribution of different classes of biosynthetic gene clusters and the coding features of antimicrobial peptides. Using EvoMining, a bioinformatic approach for studying evolutionary processes related to secondary metabolism, we sought to decipher the protein expansion of enzymes related to the lipid metabolism, which may have evolved toward the biosynthesis of novel secondary metabolites in Pseudomonas. The types of metabolites encoded in Pseudomonas type strains were predominantly non-ribosomal peptide synthetases, bacteriocins, N-acetylglutaminylglutamine amides and ß-lactones. Also, the evolution of genes related to secondary metabolites was found to coincide with Pseudomonas species diversification. Interestingly, only a few Pseudomonas species encode polyketide synthases, which are related to the lipid metabolism broadly distributed among bacteria. Thus, our EvoMining-based search may help to discover new types of secondary metabolite gene clusters in which lipid-related enzymes are involved. This work provides information about uncharacterized metabolites produced by Pseudomonas type strains, whose gene clusters have evolved in a species-specific way. Our results provide novel insight into the secondary metabolism of Pseudomonas and will serve as a basis for the prioritization of the isolated strains. This article contains data hosted by Microreact.}, } @article {pmid35192531, year = {2022}, author = {Elbir, H and Almathen, F and Almuhasen, FM}, title = {Genomic differences among strains of Corynebacterium cystitidis isolated from uterus of camels.}, journal = {Journal of infection in developing countries}, volume = {16}, number = {1}, pages = {134-146}, doi = {10.3855/jidc.15023}, pmid = {35192531}, issn = {1972-2680}, mesh = {Animals ; *Camelus ; Cattle ; *Corynebacterium/genetics ; Female ; Genomics ; Phylogeny ; Uterus ; }, abstract = {INTRODUCTION: Members of the Corynebacterium cystitidis species are usually isolated from kidney and urine of cow having pyelonephritis. Nevertheless, we have isolated Corynebacterium cystitidis for the first time from uterus of camels, extending the type of mammalian host for this species. Furthermore, it remains unknown whether there are significant genetic variations between strains isolated from different host species and anatomic sites. In this perspective, we investigated the genomic diversity of Corynebacterium cystitidis species, whose pan genome remain unexplored to date.

METHODOLOGY: Thus, we sequenced and compared the genomes of five Corynebacterium cystitidis of camel origin and a public genome of cow associated Corynebacterium cystitidis.

RESULTS: Results revealed open pan genome of 4,038 gene clusters and horizontal gene transfer played a role in the extensive genetic diversity. Further, we found an obvious distinction between cow and camel associated C. cystitidis via phylogenomic analysis and by average nucleotide identity value of 95% between the two distant lineages and > 99% within camel associated C. cystitidis strains. Moreover, our data supports the hypothesis that the gene repertoire of cow associated Corynebacterium cystitidis developed so as to become more adaptable to the urine milieu. These genetic potentials are specifically evident for genes required for benzoate breakdown, iron transport, citrate and alanine utilization.

CONCLUSIONS: Our findings confirm the differentiation of strains into camel lineage and cow lineage. These different niches, comprising the uterus of camel and urinary tract of cow probably played a role in shaping the gene repertoire of strains.}, } @article {pmid35189951, year = {2022}, author = {Leger, A and Brettell, I and Monahan, J and Barton, C and Wolf, N and Kusminski, N and Herder, C and Aadepu, N and Becker, C and Gierten, J and Hammouda, OT and Hasel, E and Lischik, C and Lust, K and Sokolova, N and Suzuki, R and Tavhelidse, T and Thumberger, T and Tsingos, E and Watson, P and Welz, B and Naruse, K and Loosli, F and Wittbrodt, J and Birney, E and Fitzgerald, T}, title = {Genomic variations and epigenomic landscape of the Medaka Inbred Kiyosu-Karlsruhe (MIKK) panel.}, journal = {Genome biology}, volume = {23}, number = {1}, pages = {58}, pmid = {35189951}, issn = {1474-760X}, support = {R01 ES029917/ES/NIEHS NIH HHS/United States ; }, mesh = {Animals ; Epigenomics ; Genome ; Genomics/methods ; Humans ; *Oryzias/genetics ; }, abstract = {BACKGROUND: The teleost medaka (Oryzias latipes) is a well-established vertebrate model system, with a long history of genetic research, and multiple high-quality reference genomes available for several inbred strains. Medaka has a high tolerance to inbreeding from the wild, thus allowing one to establish inbred lines from wild founder individuals.

RESULTS: We exploit this feature to create an inbred panel resource: the Medaka Inbred Kiyosu-Karlsruhe (MIKK) panel. This panel of 80 near-isogenic inbred lines contains a large amount of genetic variation inherited from the original wild population. We use Oxford Nanopore Technologies (ONT) long read data to further investigate the genomic and epigenomic landscapes of a subset of the MIKK panel. Nanopore sequencing allows us to identify a large variety of high-quality structural variants, and we present results and methods using a pan-genome graph representation of 12 individual medaka lines. This graph-based reference MIKK panel genome reveals novel differences between the MIKK panel lines and standard linear reference genomes. We find additional MIKK panel-specific genomic content that would be missing from linear reference alignment approaches. We are also able to identify and quantify the presence of repeat elements in each of the lines. Finally, we investigate line-specific CpG methylation and performed differential DNA methylation analysis across these 12 lines.

CONCLUSIONS: We present a detailed analysis of the MIKK panel genomes using long and short read sequence technologies, creating a MIKK panel-specific pan genome reference dataset allowing for investigation of novel variation types that would be elusive using standard approaches.}, } @article {pmid35189248, year = {2022}, author = {Bolourchi, N and Noori Goodarzi, N and Giske, CG and Nematzadeh, S and Haririzadeh Jouriani, F and Solgi, H and Badmasti, F}, title = {Comprehensive pan-genomic, resistome and virulome analysis of clinical OXA-48 producing carbapenem-resistant Serratia marcescens strains.}, journal = {Gene}, volume = {822}, number = {}, pages = {146355}, doi = {10.1016/j.gene.2022.146355}, pmid = {35189248}, issn = {1879-0038}, mesh = {Adult ; Base Composition ; Blood/microbiology ; Bronchoalveolar Lavage Fluid/microbiology ; Carbapenems/*pharmacology ; *Drug Resistance, Multiple, Bacterial ; Genome Size ; Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Hospitalization ; Humans ; Male ; Phylogeny ; Plasmids/genetics ; Prophages/*genetics ; Serratia marcescens/*classification/genetics/isolation & purification/virology ; Virulence Factors/genetics ; Whole Genome Sequencing/*methods ; Young Adult ; beta-Lactamases/genetics ; }, abstract = {BACKGROUND: Carbapenem-resistant Enterobacteriaceae (CRE) have been thoroughly studied as the pathogens associated with hospital acquired infections. However, data on Serratia marcescens are not enough. S. marcescens is now becoming a propensity for its highly antimicrobial-resistant clinical infections.

METHODS: Four carbapenem-resistant S. marcescens (CR-SM) isolates were obtained from hospitalized patients through routine microbiological experiments. We assembled the isolates genomes using whole genome sequencing (WGS) and compared their resistome and virulome patterns.

RESULTS: The average length and CG content of chromosomes was 5.33 Mbp and 59.8%, respectively. The number of coding sequences (CDSs) ranged from 4,959 to 4,989. All strains had one single putative conjugative plasmid with IncL incompatibility (Inc) group. The strains harbored blaCTX-M-15, blaTEM-1 and blaSHV-134. All plamsids were positive for blaOXA-48. No blaNDM-1, blaKPC, blaVIM and blaIMP were identified. The blaSRT-2 and aac(6')-Ic genes were chromosomally-encoded. Class 1 integron was detected in strains P8, P11 and P14. The Escher_RCS47 and Salmon_SJ46 prophages played major role in plasmid-mediated carraige of extended spectrum β-lactamases (ESBLs). The CR-SM strains were equipt with typical virulence factors of oppotunistic pathogens including biofilm formation, adhesins, secretory systems and siderophores. The strains did not have ability to produce prodigiosin but were positive for chitinase and EstA.

CONCLUSION: The presence of conjugative plasmids harboring major β-lactamases within prophage and class 1 integron structures highlights the role of different mobile genetic elements (MGEs) in distribution of AMR factors and more specifically carbapenemases. More molecular studies are required to determine the status of carbapenem resistance in clinical starins. However, appropriate strategies to control the global dissemination of CR-SM are urgent.}, } @article {pmid35184193, year = {2022}, author = {Yu, T and Bai, Y and Liu, Z and Wang, Z and Yang, Q and Wu, T and Feng, S and Zhang, Y and Shen, S and Li, Q and Gu, L and Song, X}, title = {Large-scale analyses of heat shock transcription factors and database construction based on whole-genome genes in horticultural and representative plants.}, journal = {Horticulture research}, volume = {9}, number = {}, pages = {}, pmid = {35184193}, issn = {2662-6810}, abstract = {Heat shock transcription factor (Hsf) plays a critical role in regulating heat resistance. Here, 2950 Hsf family genes were identified from 111 horticultural and representative plants. More Hsf genes were detected in higher plants than lower plants. Based on all Hsf genes, we constructed a phylogenetic tree, which indicated that Hsf genes of each branch evolved independently after species differentiation. Furthermore, we uncovered the evolutionary trajectories of Hsf genes by motif analysis. There were only 6 motifs (M1 to M6) in lower plants, and then 4 novel motifs (M7-M10) appeared in higher plants. However, the motifs of some Hsf genes were lost in higher plant, indicating that Hsf genes have undergone sequence variation during the evolution. The number of Hsf gene loss was more than duplication after whole-genome duplication in higher plants. The heat response network was constructed using 24 Hsf genes, 2421 downstream, and 222 upstream genes of Arabidopsis. Further enrichment analysis revealed that Hsf genes and other transcription factors interacted with each other to response heat resistance. The global expression maps were illustrated for Hsf genes under various abiotic, biotic stresses, and several developmental stages in Arabidopsis. The syntenic and phylogenetic analyses were conducted using Hsf genes of Arabidopsis and Pan-genome of 18 Brassica rapa accessions. We also performed the expression pattern analysis of Hsf and six Hsp family genes using expression values from different tissues and heat treatments in B. rapa. The interaction network between Hsf and Hsp gene families was constructed in B. rapa, and several core genes were detected in the network. Finally, we constructed a Hsf database (http://hsfdb.bio2db.com) for researchers to retrieve Hsf gene family information. Therefore, our study will provide rich resources for the evolution and functional study of Hsf genes.}, } @article {pmid35182233, year = {2022}, author = {Bu, F and Zhong, M and Chen, Q and Wang, Y and Zhao, X and Zhang, Q and Li, X and Booth, KT and Azaiez, H and Lu, Y and Cheng, J and Smith, RJH and Yuan, H}, title = {DVPred: a disease-specific prediction tool for variant pathogenicity classification for hearing loss.}, journal = {Human genetics}, volume = {141}, number = {3-4}, pages = {401-411}, pmid = {35182233}, issn = {1432-1203}, support = {2017YFC0907503//the national key research and development program of china/ ; ZYJC20002//1 3 5 project for disciplines of excellence west china hospital, sichuan university/ ; }, mesh = {*Deafness ; Genomics ; *Hearing Loss/genetics ; High-Throughput Nucleotide Sequencing ; Humans ; Virulence ; }, abstract = {Numerous computational prediction tools have been introduced to estimate the functional impact of variants in the human genome based on evolutionary constraints and biochemical metrics. However, their implementation in diagnostic settings to classify variants faced challenges with accuracy and validity. Most existing tools are pan-genome and pan-diseases, which neglected gene- and disease-specific properties and limited the accessibility of curated data. As a proof-of-concept, we developed a disease-specific prediction tool named Deafness Variant deleteriousness Prediction tool (DVPred) that focused on the 157 genes reportedly causing genetic hearing loss (HL). DVPred applied the gradient boosting decision tree (GBDT) algorithm to the dataset consisting of expert-curated pathogenic and benign variants from a large in-house HL patient cohort and public databases. With the incorporation of variant-level and gene-level features, DVPred outperformed the existing universal tools. It boasts an area under the curve (AUC) of 0.98, and showed consistent performance (AUC = 0.985) in an independent assessment dataset. We further demonstrated that multiple gene-level metrics, including low complexity genomic regions and substitution intolerance scores, were the top features of the model. A comprehensive analysis of missense variants showed a gene-specific ratio of predicted deleterious and neutral variants, implying varied tolerance or intolerance to variation in different genes. DVPred explored the utility of disease-specific strategy in improving the deafness variant prediction tool. It can improve the prioritization of pathogenic variants among massive variants identified by high-throughput sequencing on HL genes. It also shed light on the development of variant prediction tools for other genetic disorders.}, } @article {pmid35179459, year = {2022}, author = {Park, S and Jung, D and O'Brien, B and Ruffini, J and Dussault, F and Dube-Duquette, A and Demontier, É and Lucier, JF and Malouin, F and Dufour, S and Ronholm, J}, title = {Comparative genomic analysis of Staphylococcus aureus isolates associated with either bovine intramammary infections or human infections demonstrates the importance of restriction-modification systems in host adaptation.}, journal = {Microbial genomics}, volume = {8}, number = {2}, pages = {}, pmid = {35179459}, issn = {2057-5858}, mesh = {Animals ; Cattle ; DNA Restriction-Modification Enzymes ; Female ; Genomics ; Host Adaptation ; Humans ; *Mastitis, Bovine/microbiology ; *Staphylococcal Infections/microbiology ; Staphylococcus aureus ; }, abstract = {Staphylococcus aureus is a major etiological agent of clinical and subclinical bovine mastitis. The versatile and adaptative evolutionary strategies of this bacterium have challenged mastitis control and prevention globally, and the high incidence of S. aureus mastitis increases concerns about antimicrobial resistance (AMR) and zoonosis. This study aims to describe the evolutionary relationship between bovine intramammary infection (IMI)-associated S. aureus and human pathogenic S. aureus and further elucidate the specific genetic composition that leads to the emergence of successful bovine IMI-associated S. aureus lineages. We performed a phylogenomic analysis of 187 S. aureus isolates that originated from either dairy cattle or humans. Our results revealed that bovine IMI-associated S. aureus isolates showed distinct clades compared to human-originated S. aureus isolates. From a pan-genome analysis, 2070 core genes were identified. Host-specific genes and clonal complex (CC)-specific genes were also identified in bovine S. aureus isolates, mostly located in mobile genetic elements (MGEs). Additionally, the genome sequences of three apparent human-adapted isolates (two from CC97 and one from CC8), isolated from bovine mastitis samples, may provide an snapshot of the genomic characteristics in early host spillover events. Virulence and AMR genes were not conserved among bovine IMI-associated S. aureus isolates. Restriction-modification (R-M) genes in bovine IMI-associated S. aureus demonstrated that the Type I R-M system was lineage-specific and Type II R-M system was sequence type (ST)-specific. The distribution of exclusive, virulence, and AMR genes were closely correlated with the presence of R-M systems in S. aureus, suggesting that R-M systems may contribute to shaping clonal diversification by providing a genetic barrier to the horizontal gene transfer (HGT). Our findings indicate that the CC or ST lineage-specific R-M systems may limit genetic exchange between bovine-adapted S. aureus isolates from different lineages.}, } @article {pmid35175339, year = {2022}, author = {Pretorius, IS}, title = {Visualizing the next frontiers in wine yeast research.}, journal = {FEMS yeast research}, volume = {22}, number = {1}, pages = {}, pmid = {35175339}, issn = {1567-1364}, mesh = {Artificial Intelligence ; Computing Methodologies ; Fermentation ; Quantum Theory ; Saccharomyces cerevisiae/genetics ; *Wine/analysis ; }, abstract = {A range of game-changing biodigital and biodesign technologies are coming of age all around us, transforming our world in complex ways that are hard to predict. Not a day goes by without news of how data-centric engineering, algorithm-driven modelling, and biocyber technologies-including the convergence of artificial intelligence, machine learning, automated robotics, quantum computing, and genome editing-will change our world. If we are to be better at expecting the unexpected in the world of wine, we need to gain deeper insights into the potential and limitations of these technological developments and advances along with their promise and perils. This article anticipates how these fast-expanding bioinformational and biodesign toolkits might lead to the creation of synthetic organisms and model systems, and ultimately new understandings of biological complexities could be achieved. A total of four future frontiers in wine yeast research are discussed in this article: the construction of fully synthetic yeast genomes, including minimal genomes; supernumerary pan-genome neochromosomes; synthetic metagenomes; and synthetic yeast communities. These four concepts are at varying stages of development with plenty of technological pitfalls to overcome before such model chromosomes, genomes, strains, and yeast communities could illuminate some of the ill-understood aspects of yeast resilience, fermentation performance, flavour biosynthesis, and ecological interactions in vineyard and winery settings. From a winemaker's perspective, some of these ideas might be considered as far-fetched and, as such, tempting to ignore. However, synthetic biologists know that by exploring these futuristic concepts in the laboratory could well forge new research frontiers to deepen our understanding of the complexities of consistently producing fine wines with different fermentation processes from distinctive viticultural terroirs. As the saying goes in the disruptive technology industry, it take years to create an overnight success. The purpose of this article is neither to glorify any of these concepts as a panacea to all ills nor to crucify them as a danger to winemaking traditions. Rather, this article suggests that these proposed research endeavours deserve due consideration because they are likely to cast new light on the genetic blind spots of wine yeasts, and how they interact as communities in vineyards and wineries. Future-focussed research is, of course, designed to be subject to revision as new data and technologies become available. Successful dislodging of old paradigms with transformative innovations will require open-mindedness and pragmatism, not dogmatism-and this can make for a catch-22 situation in an archetypal traditional industry, such as the wine industry, with its rich territorial and socio-cultural connotations.}, } @article {pmid35171009, year = {2022}, author = {Dong, B and Lin, X and Jing, X and Hu, T and Zhou, J and Chen, J and Xiao, L and Wang, B and Chen, Z and Liu, J and Hu, Y and Liu, G and Liu, S and Liu, J and Wei, W and Zou, Y}, title = {A Bacterial Genome and Culture Collection of Gut Microbial in Weanling Piglet.}, journal = {Microbiology spectrum}, volume = {10}, number = {1}, pages = {e0241721}, pmid = {35171009}, issn = {2165-0497}, support = {201918/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Animals ; Bacteria/classification/genetics/*isolation & purification ; Feces/microbiology ; Female ; *Gastrointestinal Microbiome ; *Genome, Bacterial ; Male ; Metagenomics ; Phylogeny ; Swine/growth & development/*microbiology ; }, abstract = {The microbiota hosted in the pig gastrointestinal tract are important to health of this biomedical model. However, the individual species and functional repertoires that make up the pig gut microbiome remain largely undefined. Here we comprehensively investigated the genomes and functions of the piglet gut microbiome using culture-based and metagenomics approaches. A collection included 266 cultured genomes and 482 metagenome-assembled genomes (MAGs) that were clustered to 428 species across 10 phyla was established. Among these clustered species, 333 genomes represent potential new species. Less matches between cultured genomes and MAGs revealed a substantial bias for the acquisition of reference genomes by the two strategies. Glycoside hydrolases was the dominant category of carbohydrate-active enzymes. Four-hundred forty-five secondary metabolite biosynthetic genes were predicted from 292 genomes with bacteriocin being the most. Pan genome analysis of Limosilactobacillus reuteri uncover the biosynthesis of reuterin was strain-specific and the production was experimentally determined. This study provides a comprehensive view of the microbiome composition and the function landscape of the gut of weanling piglets and a valuable bacterial resource for further experimentations. IMPORTANCE The microorganism communities resided in mammalian gastrointestinal tract impacted the health and disease of the host. Our study complements metagenomic analysis with culture-based approach to establish a bacteria and genome collection and comprehensively investigate the microbiome composition and function of the gut of weanling piglets. We provide a valuable resource for further study of gut microbiota of weanling piglet and development of probiotics for prevention of disease.}, } @article {pmid35168548, year = {2022}, author = {García-Martín, AB and Roder, T and Schmitt, S and Zeeh, F and Bruggmann, R and Perreten, V}, title = {Whole-genome analyses reveal a novel prophage and cgSNPs-derived sublineages of Brachyspira hyodysenteriae ST196.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {131}, pmid = {35168548}, issn = {1471-2164}, support = {1.16.04//Swiss Federal Food Safety and Veterinary Office (SFVO)/ ; 1.19.05//Swiss Federal Food Safety and Veterinary Office (SFVO)/ ; 1.16.04//Swiss Federal Food Safety and Veterinary Office (SFVO)/ ; 1.16.04//Swiss Federal Food Safety and Veterinary Office (SFVO)/ ; }, mesh = {Animals ; Anti-Bacterial Agents ; *Brachyspira ; *Brachyspira hyodysenteriae/genetics ; *Gram-Negative Bacterial Infections ; Macrolides ; Prophages/genetics ; Swine ; *Swine Diseases ; }, abstract = {BACKGROUND: Brachyspira (B.) hyodysenteriae is a fastidious anaerobe spirochete that can cause swine dysentery, a severe mucohaemorragic colitis that affects pig production and animal welfare worldwide. In Switzerland, the population of B. hyodysenteriae is characterized by the predominance of macrolide-lincosamide-resistant B. hyodysenteriae isolates of sequence type (ST) ST196, prompting us to obtain deeper insights into the genomic structure and variability of ST196 using pangenome and whole genome variant analyses.

RESULTS: The draft genome of 14 B. hyodysenteriae isolates of ST196, sampled during a 7-year period from geographically distant pig herds, was obtained by whole-genome sequencing (WGS) and compared to the complete genome of the B. hyodysenteriae isolate Bh743-7 of ST196 used as reference. Variability results revealed the existence of 30 to 52 single nucleotide polymorphisms (SNPs), resulting in eight sublineages of ST196. The pangenome analysis led to the identification of a novel prophage, pphBhCH20, of the Siphoviridae family in a single isolate of ST196, which suggests that horizontal gene transfer events may drive changes in genomic structure.

CONCLUSIONS: This study contributes to the catalogue of publicly available genomes and provides relevant bioinformatic tools and information for further comparative genomic analyses for B. hyodysenteriae. It reveals that Swiss B. hyodysenteriae isolates of the same ST may have evolved independently over time by point mutations and acquisition of larger genetic elements. In line with this, the third type of mobile genetic element described so far in B. hyodysenteriae, the novel prophage pphBhCH20, has been identified in a single isolate of B. hyodysenteriae of ST196.}, } @article {pmid35166928, year = {2022}, author = {Wang, H and Shen, Y and Li, P and Xiao, Y and Li, Y and Hu, X and Wang, Z and Cheng, Z and Wang, Z and Liu, Q and Qin, S and Huo, X and Ma, K and Zhang, W and Zhang, H and Wang, L}, title = {Characterization and genomic analysis of a Demerecviridae phage SP76 with lytic multiple-serotypes of Salmonella.}, journal = {Archives of microbiology}, volume = {204}, number = {3}, pages = {175}, pmid = {35166928}, issn = {1432-072X}, support = {U1803109//Innovative Research Group Project of the National Natural Science Foundation of China/ ; BE2019304//Jiangsu Provincial Key Research and Development Program/ ; wzykjtd202002//University-level science and technology team of Wuhu institute of technology/ ; 2018YFC1602500//National key research and development program of China/ ; }, mesh = {*Bacteriophages/genetics ; Genome, Viral ; Genomics ; Salmonella enteritidis ; Serogroup ; }, abstract = {With the increase in antimicrobial resistance of Salmonella, phages have been paid more attention to as an alternative to antibiotics. In this study, a phage designated as SP76 was isolated from sewage. It can lyse several serotypes of Salmonella, including S. typhimurium (21/33), S. enteritidis (7/7), S. dublin (4/4), S. pullorum (2/2) and S. choleraesuis (1/2). SP76 showed a latent time of about 10 min, and maintained good lytic activity at a pH range of 3-10 and temperatures between 4 and 37 °C. Moreover, its optimal multiplicity of infection (MOI) was 0.0001. Based on the results of genomic sequence and analysis, SP76 was found to have a genome of 111,639 bp that encoded 166 predicted ORFs and belong to the Demerecviridae family, order Caudovirales. No virulence or lysogen formation gene clusters were identified in the SP76 genome. A pan-genome analysis based on 100 phages within the subfamily Markadamsvirinae indicated that SP76 had 23 core genes and 1199 accessory genes. We grouped the subfamily Markadamsvirinae and found that the main difference was in group III. In vitro bacteriostasis, experiments showed that the phage SP76 reduced planktonic bacteria by 1.52 log10 CFU/mL, and biofilms (24 h old) by 0.372 log10 CFU/mL, respectively. Thus, we isolated a safe and efficient phage that might be a good antibacterial agent.}, } @article {pmid35166563, year = {2022}, author = {Pudlo, NA and Urs, K and Crawford, R and Pirani, A and Atherly, T and Jimenez, R and Terrapon, N and Henrissat, B and Peterson, D and Ziemer, C and Snitkin, E and Martens, EC}, title = {Phenotypic and Genomic Diversification in Complex Carbohydrate-Degrading Human Gut Bacteria.}, journal = {mSystems}, volume = {7}, number = {1}, pages = {e0094721}, pmid = {35166563}, issn = {2379-5077}, support = {K01 DK084214/DK/NIDDK NIH HHS/United States ; P30 DK034933/DK/NIDDK NIH HHS/United States ; R01 DK118024/DK/NIDDK NIH HHS/United States ; R01 DK125445/DK/NIDDK NIH HHS/United States ; }, mesh = {Humans ; *Polysaccharides/chemistry ; Bacteria/metabolism ; Dietary Carbohydrates/metabolism ; *Microbiota ; Dietary Fiber/metabolism ; Genomics ; Mucins/metabolism ; }, abstract = {Symbiotic bacteria are responsible for the majority of complex carbohydrate digestion in the human colon. Since the identities and amounts of dietary polysaccharides directly impact the gut microbiota, determining which microorganisms consume specific nutrients is central for defining the relationship between diet and gut microbial ecology. Using a custom phenotyping array, we determined carbohydrate utilization profiles for 354 members of the Bacteroidetes, a dominant saccharolytic phylum. There was wide variation in the numbers and types of substrates degraded by individual bacteria, but phenotype-based clustering grouped members of the same species indicating that each species performs characteristic roles. The ability to utilize dietary polysaccharides and endogenous mucin glycans was negatively correlated, suggesting exclusion between these niches. By analyzing related Bacteroides ovatus/Bacteroides xylanisolvens strains that vary in their ability to utilize mucin glycans, we addressed whether gene clusters that confer this complex, multilocus trait are being gained or lost in individual strains. Pangenome reconstruction of these strains revealed a remarkably mosaic architecture in which genes involved in polysaccharide metabolism are highly variable and bioinformatics data provide evidence of interspecies gene transfer that might explain this genomic heterogeneity. Global transcriptomic analyses suggest that the ability to utilize mucin has been lost in some lineages of B. ovatus and B. xylanisolvens, which harbor residual gene clusters that are involved in mucin utilization by strains that still actively express this phenotype. Our data provide insight into the breadth and complexity of carbohydrate metabolism in the microbiome and the underlying genomic events that shape these behaviors. IMPORTANCE Nonharmful bacteria are the primary microbial symbionts that inhabit the human gastrointestinal tract. These bacteria play many beneficial roles and in some cases can modify disease states, making it important to understand which nutrients sustain specific lineages. This knowledge will in turn lead to strategies to intentionally manipulate the gut microbial ecosystem. We designed a scalable, high-throughput platform for measuring the ability of gut bacteria to utilize polysaccharides, of which many are derived from dietary fiber sources that can be manipulated easily. Our results provide paths to expand phenotypic surveys of more diverse gut bacteria to understand their functions and also to leverage dietary fibers to alter the physiology of the gut microbial community.}, } @article {pmid35165358, year = {2022}, author = {Kuang, X and Wang, F and Hernandez, KM and Zhang, Z and Grossman, RL}, title = {Accurate and rapid prediction of tuberculosis drug resistance from genome sequence data using traditional machine learning algorithms and CNN.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {2427}, pmid = {35165358}, issn = {2045-2322}, mesh = {Antitubercular Agents/*pharmacology/*therapeutic use ; Cohort Studies ; *Data Accuracy ; *Deep Learning ; Drug Resistance, Multiple, Bacterial/*genetics ; Genome, Bacterial/*drug effects ; Humans ; Microbial Sensitivity Tests ; Mutation ; Mycobacterium tuberculosis/*genetics/isolation & purification ; Phenotype ; Phylogeny ; Prognosis ; Tuberculosis, Multidrug-Resistant/*drug therapy/microbiology ; Whole Genome Sequencing/*methods ; }, abstract = {Effective and timely antibiotic treatment depends on accurate and rapid in silico antimicrobial-resistant (AMR) predictions. Existing statistical rule-based Mycobacterium tuberculosis (MTB) drug resistance prediction methods using bacterial genomic sequencing data often achieve varying results: high accuracy on some antibiotics but relatively low accuracy on others. Traditional machine learning (ML) approaches have been applied to classify drug resistance for MTB and have shown more stable performance. However, there is no study that uses deep learning architecture like Convolutional Neural Network (CNN) on a large and diverse cohort of MTB samples for AMR prediction. We developed 24 binary classifiers of MTB drug resistance status across eight anti-MTB drugs and three different ML algorithms: logistic regression, random forest and 1D CNN using a training dataset of 10,575 MTB isolates collected from 16 countries across six continents, where an extended pan-genome reference was used for detecting genetic features. Our 1D CNN architecture was designed to integrate both sequential and non-sequential features. In terms of F1-scores, 1D CNN models are our best classifiers that are also more accurate and stable than the state-of-the-art rule-based tool Mykrobe predictor (81.1 to 93.8%, 93.7 to 96.2%, 93.1 to 94.8%, 95.9 to 97.2% and 97.1 to 98.2% for ethambutol, rifampicin, pyrazinamide, isoniazid and ofloxacin respectively). We applied filter-based feature selection to find AMR relevant features. All selected variant features are AMR-related ones in CARD database. 78.8% of them are also in the catalogue of MTB mutations that were recently identified as drug resistance-associated ones by WHO. To facilitate ML model development for AMR prediction, we packaged every step into an automated pipeline and shared the source code at https://github.com/KuangXY3/MTB-AMR-classification-CNN .}, } @article {pmid35161414, year = {2022}, author = {Kaashyap, M and Ford, R and Mann, A and Varshney, RK and Siddique, KHM and Mantri, N}, title = {Comparative Flower Transcriptome Network Analysis Reveals DEGs Involved in Chickpea Reproductive Success during Salinity.}, journal = {Plants (Basel, Switzerland)}, volume = {11}, number = {3}, pages = {}, pmid = {35161414}, issn = {2223-7747}, abstract = {Salinity is increasingly becoming a significant problem for the most important yet intrinsically salt-sensitive grain legume chickpea. Chickpea is extremely sensitive to salinity during the reproductive phase. Therefore, it is essential to understand the molecular mechanisms by comparing the transcriptomic dynamics between the two contrasting genotypes in response to salt stress. Chickpea exhibits considerable genetic variation amongst improved cultivars, which show better yields in saline conditions but still need to be enhanced for sustainable crop production. Based on previous extensive multi-location physiological screening, two identified genotypes, JG11 (salt-tolerant) and ICCV2 (salt-sensitive), were subjected to salt stress to evaluate their phenological and transcriptional responses. RNA-Sequencing is a revolutionary tool that allows for comprehensive transcriptome profiling to identify genes and alleles associated with stress tolerance and sensitivity. After the first flowering, the whole flower from stress-tolerant and sensitive genotypes was collected. A total of ~300 million RNA-Seq reads were sequenced, resulting in 2022 differentially expressed genes (DEGs) in response to salt stress. Genes involved in flowering time such as FLOWERING LOCUS T (FT) and pollen development such as ABORTED MICROSPORES (AMS), rho-GTPase, and pollen-receptor kinase were significantly differentially regulated, suggesting their role in salt tolerance. In addition to this, we identify a suite of essential genes such as MYB proteins, MADS-box, and chloride ion channel genes, which are crucial regulators of transcriptional responses to salinity tolerance. The gene set enrichment analysis and functional annotation of these genes in flower development suggest that they can be potential candidates for chickpea crop improvement for salt tolerance.}, } @article {pmid35151053, year = {2022}, author = {Sun, Q and Cheng, J and Lin, R and Li, J and Zhang, Y and Liang, X and Su, Y and Pang, R and Xue, L and Zeng, H and Gu, Q and Ding, Y and Wu, Q and Chen, M and Zhang, J}, title = {A novel multiplex PCR method for simultaneous identification of hypervirulent Listeria monocytogenes clonal complex 87 and CC88 strains in China.}, journal = {International journal of food microbiology}, volume = {366}, number = {}, pages = {109558}, doi = {10.1016/j.ijfoodmicro.2022.109558}, pmid = {35151053}, issn = {1879-3460}, mesh = {Animals ; Food Microbiology ; *Listeria monocytogenes ; Milk ; Multilocus Sequence Typing ; Multiplex Polymerase Chain Reaction ; }, abstract = {Listeria monocytogenes is an important foodborne pathogen worldwide, with 20-30% fatality rate in vulnerable persons. The hypervirulent L. monocytogenes clonal complex (CC) 87 strains have emerging both in food production environments and clinic cases. The objective of this study was to develop a multiplex PCR to simultaneously detect L. monocytogenes CC87 and CC88 strains based on pan-genome analysis. A novel multiplex PCR comprised of genes A6K41_13255 (specific for CC87 and 88), BCW_4260_01987 group_8135 (specific for CC88) and 02-1103_01073 group_5869 (specific for L. monocytogenes) were designed. The specificity of this multiplex PCR was robust verified with other CCs of L. monocytogenes and other species strains. The detection limit of this multiplex PCR for CC87 and CC88 were 1.7 × 10[4] cfu/mL and 2.1 × 10[4] cfu/mL, respectively. This multiplex PCR could accurately detect CC87 and CC88 strains with the interference of different ratios of L. monocytogenes CC8, CC9, CC121, CC155, and L. innocua strains. Furthermore, this multiplex PCR method could successfully detect 1.9 × 10[4] cfu/mL of L. monocytogenes CC87 and 1.7 × 10[4] cfu/mL CC88 strains in artificially contaminated milk after 9 h enrichment, respectively. In addition, this multiplex PCR could accurately detect CC87 isolates in food samples within 48 h, which was faster than the routine MLST analysis. In conclusion, this novel multiplex PCR offers a promising approach for accurate, inexpensive, and rapid detection of L. monocytogenes CC87 and CC88 strains simultaneously, which could apply to surveillance the prevalence of CC87 and CC88 strains in both food and food production environments and to evaluate the effect of disinfection measures for controlling the persistent L. monocytogenes contamination.}, } @article {pmid35146465, year = {2022}, author = {van Hal, SJ and Willems, RJL and Gouliouris, T and Ballard, SA and Coque, TM and Hammerum, AM and Hegstad, K and Pinholt, M and Howden, BP and Malhotra-Kumar, S and Werner, G and Yanagihara, K and Earl, AM and Raven, KE and Corander, J and Bowden, R and , }, title = {The interplay between community and hospital Enterococcus faecium clones within health-care settings: a genomic analysis.}, journal = {The Lancet. Microbe}, volume = {3}, number = {2}, pages = {e133-e141}, pmid = {35146465}, issn = {2666-5247}, support = {U19 AI110818/AI/NIAID NIH HHS/United States ; HHSN272200900018C/AI/NIAID NIH HHS/United States ; 203141/Z/16/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Clone Cells ; *Enterococcus faecium/genetics ; Genome, Bacterial/genetics ; Genomics ; Hospitals ; Humans ; Phylogeny ; }, abstract = {BACKGROUND: The genomic relationships among Enterococcus faecium isolates are the subject of ongoing research that seeks to clarify the origins of observed lineages and the extent of horizontal gene transfer between them, and to robustly identify links between genotypes and phenotypes. E faecium is considered to form distinct groups-A and B-corresponding to isolates derived from patients who were hospitalised (A) and isolates from humans in the community (B). The additional separation of A into the so-called clades A1 and A2 remains an area of uncertainty. We aimed to investigate the relationships between A1 and non-A1 groups and explore the potential role of non-A1 isolates in shaping the population structure of hospital E faecium.

METHODS: We collected short-read sequence data from invited groups that had previously published E faecium genome data. This hospital-based isolate collection could be separated into three groups (or clades, A1, A2, and B) by augmenting the study genomes with published sequences derived from human samples representing the previously defined genomic clusters. We performed phylogenetic analyses, by constructing maximum-likelihood phylogenetic trees, and identified historical recombination events. We assessed the pan-genome, did resistome analysis, and examined the genomic data to identify mobile genetic elements. Each genome underwent chromosome painting by use of ChromoPainter within FineSTRUCTURE software to assess ancestry and identify hybrid groups. We further assessed highly admixed regions to infer recombination directionality.

FINDINGS: We assembled a collection of 1095 hospital E faecium sequences from 34 countries, further augmented by 33 published sequences. 997 (88%) of 1128 genomes clustered as A1, 92 (8%) as A2, and 39 (4%) as B. We showed that A1 probably emerged as a clone from within A2 and that, because of ongoing gene flow, hospital isolates currently identified as A2 represent a genetic continuum between A1 and community E faecium. This interchange of genetic material between isolates from different groups results in the emergence of hybrid genomes between clusters. Of the 1128 genomes, 49 (4%) hybrid genomes were identified: 33 previously labelled as A2 and 16 previously labelled as A1. These interactions were fuelled by a directional pattern of recombination mediated by mobile genetic elements. By contrast, the contribution of B group genetic material to A1 was limited to a few small regions of the genome and appeared to be driven by genomic sweep events.

INTERPRETATION: A2 and B isolates coming into the hospital form an important reservoir for ongoing A1 adaptation, suggesting that effective long-term control of the effect of E faecium could benefit from strategies to reduce these genomic interactions, such as a focus on reducing the acquisition of hospital A1 strains by patients entering the hospital.

FUNDING: Wellcome Trust.}, } @article {pmid35143550, year = {2022}, author = {Wyka, S and Mondo, S and Liu, M and Nalam, V and Broders, K}, title = {A large accessory genome and high recombination rates may influence global distribution and broad host range of the fungal plant pathogen Claviceps purpurea.}, journal = {PloS one}, volume = {17}, number = {2}, pages = {e0263496}, pmid = {35143550}, issn = {1932-6203}, mesh = {Claviceps/*genetics ; DNA Transposable Elements ; *Genome, Fungal ; Host Specificity ; Polymorphism, Single Nucleotide ; *Recombination, Genetic ; Selection, Genetic ; }, abstract = {Pangenome analyses are increasingly being utilized to study the evolution of eukaryotic organisms. While pangenomes can provide insight into polymorphic gene content, inferences about the ecological and adaptive potential of such organisms also need to be accompanied by additional supportive genomic analyses. In this study we constructed a pangenome of Claviceps purpurea from 24 genomes and examined the positive selection and recombination landscape of an economically important fungal organism for pharmacology and agricultural research. Together, these analyses revealed that C. purpurea has a relatively large accessory genome (~ 38%), high recombination rates (ρ = 0.044), and transposon mediated gene duplication. However, due to observations of relatively low transposable element (TE) content (8.8%) and a lack of variability in genome sizes, prolific TE expansion may be controlled by frequent recombination. We additionally identified that within the ergoline biosynthetic cluster the lpsA1 and lpsA2 were the result of a recombination event. However, the high recombination rates observed in C. purpurea may be influencing an overall trend of purifying selection across the genome. These results showcase the use of selection and recombination landscapes to identify mechanisms contributing to pangenome structure and primary factors influencing the evolution of an organism.}, } @article {pmid35143500, year = {2022}, author = {Roe, C and Vazquez, AJ and Phillips, PD and Allender, CJ and Bowen, RA and Nottingham, RD and Doyle, A and Wongsuwan, G and Wuthiekanun, V and Limmathurotsakul, D and Peacock, S and Keim, P and Tuanyok, A and Wagner, DM and Sahl, JW}, title = {Multiple phylogenetically-diverse, differentially-virulent Burkholderia pseudomallei isolated from a single soil sample collected in Thailand.}, journal = {PLoS neglected tropical diseases}, volume = {16}, number = {2}, pages = {e0010172}, pmid = {35143500}, issn = {1935-2735}, mesh = {Animals ; Burkholderia pseudomallei/classification/genetics/*isolation & purification/*pathogenicity ; Female ; Genome, Bacterial ; Genomics ; Humans ; Melioidosis/*microbiology ; Mice, Inbred BALB C ; Multilocus Sequence Typing ; *Phylogeny ; *Soil Microbiology ; Thailand ; Virulence ; }, abstract = {Burkholderia pseudomallei is a soil-dwelling bacterium endemic to Southeast Asia and northern Australia that causes the disease, melioidosis. Although the global genomic diversity of clinical B. pseudomallei isolates has been investigated, there is limited understanding of its genomic diversity across small geographic scales, especially in soil. In this study, we obtained 288 B. pseudomallei isolates from a single soil sample (~100g; intensive site 2, INT2) collected at a depth of 30cm from a site in Ubon Ratchathani Province, Thailand. We sequenced the genomes of 169 of these isolates that represent 7 distinct sequence types (STs), including a new ST (ST1820), based on multi-locus sequence typing (MLST) analysis. A core genome SNP phylogeny demonstrated that all identified STs share a recent common ancestor that diverged an estimated 796-1260 years ago. A pan-genomics analysis demonstrated recombination between clades and intra-MLST phylogenetic and gene differences. To identify potential differential virulence between STs, groups of BALB/c mice (5 mice/isolate) were challenged via subcutaneous injection (500 CFUs) with 30 INT2 isolates representing 5 different STs; over the 21-day experiment, eight isolates killed all mice, 2 isolates killed an intermediate number of mice (1-2), and 20 isolates killed no mice. Although the virulence results were largely stratified by ST, one virulent isolate and six attenuated isolates were from the same ST (ST1005), suggesting that variably conserved genomic regions may contribute to virulence. Genomes from the animal-challenged isolates were subjected to a bacterial genome-wide association study to identify genomic regions associated with differential virulence. One associated region is a unique variant of Hcp1, a component of the type VI secretion system, which may result in attenuation. The results of this study have implications for comprehensive sampling strategies, environmental exposure risk assessment, and understanding recombination and differential virulence in B. pseudomallei.}, } @article {pmid35143386, year = {2022}, author = {Huang, Z and Yu, K and Fu, S and Xiao, Y and Wei, Q and Wang, D}, title = {Genomic analysis reveals high intra-species diversity of Shewanella algae.}, journal = {Microbial genomics}, volume = {8}, number = {2}, pages = {}, pmid = {35143386}, issn = {2057-5858}, mesh = {Adaptation, Biological ; Anti-Bacterial Agents/pharmacology ; CRISPR-Cas Systems ; China ; Drug Resistance, Bacterial/genetics ; *Genetic Variation ; Genome, Bacterial ; Genomic Islands ; *Genomics ; Humans ; Microbial Sensitivity Tests ; Phylogeny ; Prophages/genetics ; Shewanella/classification/drug effects/*genetics/isolation & purification ; Species Specificity ; Virulence/genetics ; }, abstract = {Shewanella algae is widely distributed in marine and freshwater habitats, and has been proved to be an emerging marine zoonotic and human pathogen. However, the genomic characteristics and pathogenicity of Shewanella algae are unclear. Here, the whole-genome features of 55 S. algae strains isolated from different sources were described. Pan-genome analysis yielded 2863 (19.4 %) genes shared among all strains. Functional annotation of the core genome showed that the main functions are focused on basic lifestyle such as metabolism and energy production. Meanwhile, the phylogenetic tree of the single nucleotide polymorphisms (SNPs) of core genome divided the 55 strains into three clades, with the majority of strains from China falling into the first two clades. As for the accessory genome, 167 genomic islands (GIs) and 65 phage-related elements were detected. The CRISPR-Cas system with a high degree of confidence was predicted in 23 strains. The GIs carried a suite of virulence genes and mobile genetic elements, while prophages contained several transposases and integrases. Horizontal genes transfer based on homology analysis indicated that these GIs and prophages were parts of major drivers for the evolution and the environmental adaptation of S. algae. In addition, a rich putative virulence-associated gene pool was found. Eight classes of antibiotic-associated resistance genes were detected, and the carriage rate of β-lactam resistance genes was 100 %. In conclusion, S. algae exhibits a high intra-species diversity in the aspects of population structure, virulence-associated genes and potential drug resistance, which is helpful for its evolution in pathogenesis and environmental adaptability.}, } @article {pmid35139905, year = {2022}, author = {Diricks, M and Kohl, TA and Käding, N and Leshchinskiy, V and Hauswaldt, S and Jiménez Vázquez, O and Utpatel, C and Niemann, S and Rupp, J and Merker, M}, title = {Whole genome sequencing-based classification of human-related Haemophilus species and detection of antimicrobial resistance genes.}, journal = {Genome medicine}, volume = {14}, number = {1}, pages = {13}, pmid = {35139905}, issn = {1756-994X}, mesh = {*Anti-Bacterial Agents/pharmacology ; Drug Resistance, Bacterial/genetics ; Haemophilus/genetics ; *Haemophilus Infections/epidemiology/microbiology ; Humans ; Whole Genome Sequencing ; }, abstract = {BACKGROUND: Bacteria belonging to the genus Haemophilus cause a wide range of diseases in humans. Recently, H. influenzae was classified by the WHO as priority pathogen due to the wide spread of ampicillin resistant strains. However, other Haemophilus spp. are often misclassified as H. influenzae. Therefore, we established an accurate and rapid whole genome sequencing (WGS) based classification and serotyping algorithm and combined it with the detection of resistance genes.

METHODS: A gene presence/absence-based classification algorithm was developed, which employs the open-source gene-detection tool SRST2 and a new classification database comprising 36 genes, including capsule loci for serotyping. These genes were identified using a comparative genome analysis of 215 strains belonging to ten human-related Haemophilus (sub)species (training dataset). The algorithm was evaluated on 1329 public short read datasets (evaluation dataset) and used to reclassify 262 clinical Haemophilus spp. isolates from 250 patients (German cohort). In addition, the presence of antibiotic resistance genes within the German dataset was evaluated with SRST2 and correlated with results of traditional phenotyping assays.

RESULTS: The newly developed algorithm can differentiate between clinically relevant Haemophilus species including, but not limited to, H. influenzae, H. haemolyticus, and H. parainfluenzae. It can also identify putative haemin-independent H. haemolyticus strains and determine the serotype of typeable Haemophilus strains. The algorithm performed excellently in the evaluation dataset (99.6% concordance with reported species classification and 99.5% with reported serotype) and revealed several misclassifications. Additionally, 83 out of 262 (31.7%) suspected H. influenzae strains from the German cohort were in fact H. haemolyticus strains, some of which associated with mouth abscesses and lower respiratory tract infections. Resistance genes were detected in 16 out of 262 datasets from the German cohort. Prediction of ampicillin resistance, associated with blaTEM-1D, and tetracycline resistance, associated with tetB, correlated well with available phenotypic data.

CONCLUSIONS: Our new classification database and algorithm have the potential to improve diagnosis and surveillance of Haemophilus spp. and can easily be coupled with other public genotyping and antimicrobial resistance databases. Our data also point towards a possible pathogenic role of H. haemolyticus strains, which needs to be further investigated.}, } @article {pmid35134833, year = {2022}, author = {Cunial, F and Denas, O and Belazzougui, D}, title = {Fast and compact matching statistics analytics.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {7}, pages = {1838-1845}, pmid = {35134833}, issn = {1367-4811}, mesh = {*Software ; Sequence Analysis, DNA/methods ; *Algorithms ; Genomics/methods ; Genome ; }, abstract = {MOTIVATION: Fast, lightweight methods for comparing the sequence of ever larger assembled genomes from ever growing databases are increasingly needed in the era of accurate long reads and pan-genome initiatives. Matching statistics is a popular method for computing whole-genome phylogenies and for detecting structural rearrangements between two genomes, since it is amenable to fast implementations that require a minimal setup of data structures. However, current implementations use a single core, take too much memory to represent the result, and do not provide efficient ways to analyze the output in order to explore local similarities between the sequences.

RESULTS: We develop practical tools for computing matching statistics between large-scale strings, and for analyzing its values, faster and using less memory than the state-of-the-art. Specifically, we design a parallel algorithm for shared-memory machines that computes matching statistics 30 times faster with 48 cores in the cases that are most difficult to parallelize. We design a lossy compression scheme that shrinks the matching statistics array to a bitvector that takes from 0.8 to 0.2 bits per character, depending on the dataset and on the value of a threshold, and that achieves 0.04 bits per character in some variants. And we provide efficient implementations of range-maximum and range-sum queries that take a few tens of milliseconds while operating on our compact representations, and that allow computing key local statistics about the similarity between two strings. Our toolkit makes construction, storage and analysis of matching statistics arrays practical for multiple pairs of the largest genomes available today, possibly enabling new applications in comparative genomics.

Our C/C++ code is available at https://github.com/odenas/indexed_ms under GPL-3.0. The data underlying this article are available in NCBI Genome at https://www.ncbi.nlm.nih.gov/genome and in the International Genome Sample Resource (IGSR) at https://www.internationalgenome.org.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid35123187, year = {2022}, author = {Wang, Z and Zheng, X and Guo, G and Hu, Z and Miao, J and Dong, Y and Xu, Z and Zhou, Q and Wei, X and Han, X and Liu, Y and Zhang, W}, title = {O145 may be emerging as a predominant serogroup of Avian pathogenic Escherichia coli (APEC) in China.}, journal = {Veterinary microbiology}, volume = {266}, number = {}, pages = {109358}, doi = {10.1016/j.vetmic.2022.109358}, pmid = {35123187}, issn = {1873-2542}, mesh = {Animals ; China/epidemiology ; Escherichia coli/genetics ; *Escherichia coli Infections/epidemiology/veterinary ; *Escherichia coli Proteins/genetics ; Serogroup ; *Shiga-Toxigenic Escherichia coli/genetics ; }, abstract = {Among the numerous serotypes of Avian pathogenic Escherichia coli (APEC), O1, O2 and O78 have long been considered the predominant serogroups. O145, a pivotal serogroup in non-O157 Shiga toxin-producing Escherichia coli, has never been considered an important serogroup among APEC. The prevalence of APEC O145 was determined from the results of molecular serogrouping based on 42 sequenced isolates from Jiangsu and Guangxi Provinces in China. After realizing the potential importance of O145, 224 APEC isolates isolated from Jiangsu, Guangxi, Anhui, Shandong, Henan, Yunnan and Fujian provinces were screened using PCR amplification. The results showed that the proportion of O145 detected was 37.9 % (85/224), which was higher than those of the three traditional APEC serogroups. The virulence evaluation experiment showed that this serogroup may have stronger pathogenicity. Here, we report for the first time that O145 may be emerging as a predominant serogroup of APEC in China. The possible reasons for its prevalence and oversight were analyzed through genomic analysis. Furthermore, pangenome analysis with STEC O145 was performed to assess the potential threat to humans. The discovery of the ubiquity of O145 may not be coincidental, which may also account for the failure of vaccines that target the three major serogroups. Therefore, this newly predominant serogroup should be paid more attention and the focus should not be limited to the so-called three major APEC serogroups.}, } @article {pmid35115531, year = {2022}, author = {Reid, CJ and Cummins, ML and Börjesson, S and Brouwer, MSM and Hasman, H and Hammerum, AM and Roer, L and Hess, S and Berendonk, T and Nešporová, K and Haenni, M and Madec, JY and Bethe, A and Michael, GB and Schink, AK and Schwarz, S and Dolejska, M and Djordjevic, SP}, title = {A role for ColV plasmids in the evolution of pathogenic Escherichia coli ST58.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {683}, pmid = {35115531}, issn = {2041-1723}, mesh = {Animals ; Cattle ; Drug Resistance, Microbial/genetics ; Escherichia coli/classification/*genetics/pathogenicity ; Escherichia coli Infections/diagnosis/microbiology/*veterinary ; *Evolution, Molecular ; Genome, Bacterial/genetics ; Genomic Islands/*genetics ; Genomics/methods ; Host Specificity ; Humans ; Phylogeny ; Plasmids/*genetics ; Poultry ; Species Specificity ; Swine ; Virulence/genetics ; Virulence Factors/*genetics ; }, abstract = {Escherichia coli ST58 has recently emerged as a globally disseminated uropathogen that often progresses to sepsis. Unlike most pandemic extra-intestinal pathogenic E. coli (ExPEC), which belong to pathogenic phylogroup B2, ST58 belongs to the environmental/commensal phylogroup B1. Here, we present a pan-genomic analysis of a global collection of 752 ST58 isolates from diverse sources. We identify a large ST58 sub-lineage characterized by near ubiquitous carriage of ColV plasmids, which carry genes encoding virulence factors, and by a distinct accessory genome including genes typical of the Yersiniabactin High Pathogenicity Island. This sub-lineage includes three-quarters of all ExPEC sequences in our study and has a broad host range, although poultry and porcine sources predominate. By contrast, strains isolated from cattle often lack ColV plasmids. Our data indicate that ColV plasmid acquisition contributed to the divergence of the major ST58 sub-lineage, and different sub-lineages inhabit poultry, swine and cattle.}, } @article {pmid35115520, year = {2022}, author = {Li, H and Wang, S and Chai, S and Yang, Z and Zhang, Q and Xin, H and Xu, Y and Lin, S and Chen, X and Yao, Z and Yang, Q and Fei, Z and Huang, S and Zhang, Z}, title = {Graph-based pan-genome reveals structural and sequence variations related to agronomic traits and domestication in cucumber.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {682}, pmid = {35115520}, issn = {2041-1723}, mesh = {Chromosomes, Plant/genetics ; Cucumis sativus/classification/*genetics/growth & development ; DNA, Plant/chemistry/genetics ; *Domestication ; Gene Expression Regulation, Plant ; *Genetic Variation ; Genome, Plant/*genetics ; Genome-Wide Association Study/methods ; Genomics/*methods ; Genotype ; INDEL Mutation ; Phylogeny ; Polymorphism, Single Nucleotide ; Quantitative Trait Loci/*genetics ; Reverse Transcriptase Polymerase Chain Reaction ; Sequence Analysis, DNA/methods ; Species Specificity ; Synteny ; }, abstract = {Structural variants (SVs) represent a major source of genetic diversity and are related to numerous agronomic traits and evolutionary events; however, their comprehensive identification and characterization in cucumber (Cucumis sativus L.) have been hindered by the lack of a high-quality pan-genome. Here, we report a graph-based cucumber pan-genome by analyzing twelve chromosome-scale genome assemblies. Genotyping of seven large chromosomal rearrangements based on the pan-genome provides useful information for use of wild accessions in breeding and genetic studies. A total of ~4.3 million genetic variants including 56,214 SVs are identified leveraging the chromosome-level assemblies. The pan-genome graph integrating both variant information and reference genome sequences aids the identification of SVs associated with agronomic traits, including warty fruits, flowering times and root growth, and enhances the understanding of cucumber trait evolution. The graph-based cucumber pan-genome and the identified genetic variants provide rich resources for future biological research and genomics-assisted breeding.}, } @article {pmid35114403, year = {2022}, author = {Gong, M and Yang, P and Fang, W and Li, R and Jiang, Y}, title = {Building a cattle pan-genome using more de novo assemblies.}, journal = {Journal of genetics and genomics = Yi chuan xue bao}, volume = {49}, number = {9}, pages = {906-908}, doi = {10.1016/j.jgg.2022.01.003}, pmid = {35114403}, issn = {1673-8527}, mesh = {Animals ; Cattle/genetics ; *Genome/genetics ; *High-Throughput Nucleotide Sequencing ; Sequence Analysis, DNA ; }, } @article {pmid35111182, year = {2021}, author = {Navarro-Payá, D and Santiago, A and Orduña, L and Zhang, C and Amato, A and D'Inca, E and Fattorini, C and Pezzotti, M and Tornielli, GB and Zenoni, S and Rustenholz, C and Matus, JT}, title = {The Grape Gene Reference Catalogue as a Standard Resource for Gene Selection and Genetic Improvement.}, journal = {Frontiers in plant science}, volume = {12}, number = {}, pages = {803977}, pmid = {35111182}, issn = {1664-462X}, abstract = {Effective crop improvement, whether through selective breeding or biotech strategies, is largely dependent on the cumulative knowledge of a species' pangenome and its containing genes. Acquiring this knowledge is specially challenging in grapevine, one of the oldest fruit crops grown worldwide, which is known to have more than 30,000 genes. Well-established research communities studying model organisms have created and maintained, through public and private funds, a diverse range of online tools and databases serving as repositories of genomes and gene function data. The lack of such resources for the non-model, but economically important, Vitis vinifera species has driven the need for a standardised collection of genes within the grapevine community. In an effort led by the Integrape COST Action CA17111, we have recently developed the first grape gene reference catalogue, where genes are ascribed to functional data, including their accession identifiers from different genome-annotation versions (https://integrape.eu/resources/genes-genomes/). We present and discuss this gene repository together with a validation-level scheme based on varied supporting evidence found in current literature. The catalogue structure and online submission form provided permits community curation. Finally, we present the Gene Cards tool, developed within the Vitis Visualization (VitViz) platform, to visualize the data collected in the catalogue and link gene function with tissue-specific expression derived from public transcriptomic data. This perspective article aims to present these resources to the community as well as highlight their potential use, in particular for plant-breeding applications.}, } @article {pmid35108613, year = {2022}, author = {Svahn, AJ and Chang, SL and Rockett, RJ and Cliff, OM and Wang, Q and Arnott, A and Ramsperger, M and Sorrell, TC and Sintchenko, V and Prokopenko, M}, title = {Genome-wide networks reveal emergence of epidemic strains of Salmonella Enteritidis.}, journal = {International journal of infectious diseases : IJID : official publication of the International Society for Infectious Diseases}, volume = {117}, number = {}, pages = {65-73}, doi = {10.1016/j.ijid.2022.01.056}, pmid = {35108613}, issn = {1878-3511}, mesh = {Disease Outbreaks ; Humans ; Minisatellite Repeats ; Phylogeny ; *Salmonella Infections/epidemiology/microbiology ; *Salmonella enteritidis/genetics ; Whole Genome Sequencing ; }, abstract = {OBJECTIVES: To enhance monitoring of high-burden foodborne pathogens, there is opportunity to combine pangenome data with network analysis.

METHODS: Salmonella enterica subspecies Enterica serovar Enteritidis isolates were referred to the New South Wales (NSW) Enteric Reference Laboratory between August 2015 and December 2019 (1033 isolates in total), inclusive of a confirmed outbreak. All isolates underwent whole genome sequencing. Distances between genomes were quantified by in silico multiple-locus variable-number tandem repeat analysis (MLVA) as well as core single nucleotide polymorphisms (SNPs), which informed the construction of undirected networks. Centrality-prevalence spaces were generated from the undirected networks. Components on the undirected SNP network were considered alongside a phylogenetic tree representation.

RESULTS: Outbreak isolates were identified as distinct components on the MLVA and SNP networks. The MLVA network-based centrality-prevalence space did not delineate the outbreak, whereas the outbreak was delineated in the SNP network-based centrality-prevalence space. Components on the undirected SNP network showed a high concordance to the SNP clusters based on phylogenetic analysis.

CONCLUSIONS: Bacterial whole-genome data in network-based analysis can improve the resolution of population analysis. High concordance of network components and SNP clusters is promising for rapid population analyses of foodborne Salmonella spp. owing to the low overhead of network analysis.}, } @article {pmid35108356, year = {2022}, author = {Rubio, A and Jimenez, J and Pérez-Pulido, AJ}, title = {Assessment of selection pressure exerted on genes from complete pangenomes helps to improve the accuracy in the prediction of new genes.}, journal = {Briefings in bioinformatics}, volume = {23}, number = {2}, pages = {}, doi = {10.1093/bib/bbac010}, pmid = {35108356}, issn = {1477-4054}, mesh = {*Acinetobacter baumannii/genetics/metabolism ; Bacteria/genetics ; Base Sequence ; *Genome, Bacterial ; Humans ; Phylogeny ; Virulence/genetics ; }, abstract = {Bacterial genomes are massively sequenced, and they provide valuable data to better know the complete set of genes of a species. The analysis of thousands of bacterial strains can identify both shared genes and those appearing only in the pathogenic ones. Current computational gene finders facilitate this task but often miss some existing genes. However, the present availability of different genomes from the same species is useful to estimate the selective pressure applied on genes of complete pangenomes. It may assist in evaluating gene predictions either by checking the certainty of a new gene or annotating it as a gene under positive selection. Here, we estimated the selective pressure of 19 271 genes that are part of the pangenome of the human opportunistic pathogen Acinetobacter baumannii and found that most genes in this bacterium are subject to negative selection. However, 23% of them showed values compatible with positive selection. These latter were mainly uncharacterized proteins or genes required to evade the host defence system including genes related to resistance and virulence whose changes may be favoured to acquire new functions. Finally, we evaluated the utility of measuring selection pressure in the detection of sequencing errors and the validation of gene prediction.}, } @article {pmid35107701, year = {2022}, author = {Vandamme, P and Peeters, C and Seth-Smith, HMB and Schmid, H and Cnockaert, M and Egli, A and Goldenberger, D}, title = {Description of Pseudoclavibacter triregionum sp. nov. from human blood and Pseudoclavibacter albus comb. nov., and revised classification of the genus Pseudoclavibacter: proposal of Caespitibacter gen. nov., with Caespitibacter soli comb. nov. and Caespitibacter caeni comb. nov.}, journal = {Antonie van Leeuwenhoek}, volume = {115}, number = {4}, pages = {461-472}, pmid = {35107701}, issn = {1572-9699}, mesh = {Bacterial Typing Techniques ; DNA, Bacterial/chemistry/genetics ; *Fatty Acids/analysis ; Humans ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {We present polyphasic taxonomic data to demonstrate that strain 125703-2019[T], a human blood isolate, represents a novel species within the genus Pseudoclavibacter, and to reclassify the illegitimate Zimmermannella alba Lin et al., 2004 as Pseudoclavibacter albus comb. nov. Upon primary isolation, strain 125703-2019[T] could not be identified reliably using MALDI-TOF mass spectrometry during routine diagnostic work, but partial 16S rRNA gene sequence analysis revealed that it belonged to the genus Pseudoclavibacter. Average nucleotide identity and digital DNA-DNA hybridisation analyses confirmed that it represented a novel species within this genus. A detailed physiological characterisation yielded differential tests between the novel species and its nearest neighbor taxa, which could also be differentiated using MALDI-TOF mass spectrometry. We propose to formally classify this strain into the novel species Pseudoclavibacter triregionum sp. nov., with strain 125703-2019[T] (= R-76471[T], LMG 31777[T], CCUG 74796[T]) as the type strain. The whole-genome assembly of strain 125703-2019[T] has a size of 2.4 Mb and a G + C content of 72.74%. A Pseudoclavibacter pangenome analysis revealed that 667 gene clusters were exclusively present in strain 125703-2019[T]. While these gene clusters were enriched in several COG functional categories, this analysis did not reveal functions that explained the occurrence of this species in human infection. Finally, several phylogenetic and phylogenomic analyses demonstrated that the genus Pseudoclavibacter is polyphyletic with Pseudoclavibacter soli and Pseudoclavibacter caeni representing a unique and deeply branching line of descent within the family Microbacteriaceae. We therefore also propose to reclassify both species into the novel genus Caespitibacter gen. nov. as Caespitibacter soli comb. nov. and Caespitibacter caeni comb. nov., respectively, and with C. soli comb. nov. as the type species.}, } @article {pmid35107332, year = {2022}, author = {Peng, M and Wang, D and Lui, LM and Nielsen, T and Tian, R and Kempher, ML and Tao, X and Pan, C and Chakraborty, R and Deutschbauer, AM and Thorgersen, MP and Adams, MWW and Fields, MW and Hazen, TC and Arkin, AP and Zhou, A and Zhou, J}, title = {Genomic Features and Pervasive Negative Selection in Rhodanobacter Strains Isolated from Nitrate and Heavy Metal Contaminated Aquifer.}, journal = {Microbiology spectrum}, volume = {10}, number = {1}, pages = {e0259121}, pmid = {35107332}, issn = {2165-0497}, mesh = {Base Composition ; Gammaproteobacteria/classification/*genetics/*isolation & purification/metabolism ; Gene Transfer, Horizontal ; Genome Size ; Genome, Bacterial ; Genomic Islands ; Genomics ; Groundwater/microbiology ; Metals, Heavy/analysis/metabolism ; Nitrates/*analysis/metabolism ; Phylogeny ; Water Pollutants, Chemical/*analysis/metabolism ; }, abstract = {Rhodanobacter species dominate in the Oak Ridge Reservation (ORR) subsurface environments contaminated with acids, nitrate, metal radionuclides, and other heavy metals. To uncover the genomic features underlying adaptations to these mixed-waste environments and to guide genetic tool development, we sequenced the whole genomes of eight Rhodanobacter strains isolated from the ORR site. The genome sizes ranged from 3.9 to 4.2 Mb harboring 3,695 to 4,035 protein-coding genes and GC contents approximately 67%. Seven strains were classified as R. denitrificans and one strain, FW510-R12, as R. thiooxydans based on full length 16S rRNA sequences. According to gene annotation, the top two Cluster of Orthologous Groups (COGs) with high pan-genome expansion rates (Pan/Core gene ratio) were "replication, recombination and repair" and "defense mechanisms." The denitrifying genes had high DNA homologies except the predicted protein structure variances in NosZ. In contrast, heavy metal resistance genes were diverse with between 7 to 34% of them were located in genomic islands, and these results suggested origins from horizontal gene transfer. Analysis of the methylation patterns in four strains revealed the unique 5mC methylation motifs. Most orthologs (78%) had ratios of nonsynonymous to synonymous substitutions (dN/dS) less than one when compared to the type strain 2APBS1, suggesting the prevalence of negative selection. Overall, the results provide evidence for the important roles of horizontal gene transfer and negative selection in genomic adaptation at the contaminated field site. The complex restriction-modification system genes and the unique methylation motifs in Rhodanobacter strains suggest the potential recalcitrance to genetic manipulation. IMPORTANCE Despite the dominance of Rhodanobacter species in the subsurface of the contaminated Oak Ridge Reservation (ORR) site, very little is known about the mechanisms underlying their adaptions to the various stressors present at ORR. Recently, multiple Rhodanobacter strains have been isolated from the ORR groundwater samples from several wells with varying geochemical properties. Using Illumina, PacBio, and Oxford Nanopore sequencing platforms, we obtained the whole genome sequences of eight Rhodanobacter strains. Comparison of the whole genomes demonstrated the genetic diversity, and analysis of the long nanopore reads revealed the heterogeneity of methylation patterns in strains isolated from the same well. Although all strains contained a complete set of denitrifying genes, the predicted tertiary structures of NosZ differed. The sequence comparison results demonstrate the important roles of horizontal gene transfer and negative selection in adaptation. In addition, these strains may be recalcitrant to genetic manipulation due to the complex restriction-modification systems and methylations.}, } @article {pmid35104691, year = {2022}, author = {Cummins, EA and Hall, RJ and McInerney, JO and McNally, A}, title = {Prokaryote pangenomes are dynamic entities.}, journal = {Current opinion in microbiology}, volume = {66}, number = {}, pages = {73-78}, doi = {10.1016/j.mib.2022.01.005}, pmid = {35104691}, issn = {1879-0364}, mesh = {*Prokaryotic Cells ; }, abstract = {Prokaryote pangenomes are influenced heavily by environmental factors and the opportunity for gene gain and loss events. As the field of pangenome analysis has expanded, so has the need to fully understand the complexity of how eco-evolutionary dynamics shape pangenomes. Here, we describe current models of pangenome evolution and discuss their suitability and accuracy. We suggest that pangenomes are dynamic entities under constant flux, highlighting the influence of two-way interactions between pangenome and environment. New classifications of core and accessory genes are also considered, underscoring the need for continuous evaluation of nomenclature in a fast-moving field. We conclude that future models of pangenome evolution should incorporate eco-evolutionary dynamics to fully encompass their dynamic, changeable nature.}, } @article {pmid35104682, year = {2022}, author = {Basharat, Z and Khan, K and Jalal, K and Ahmad, D and Hayat, A and Alotaibi, G and Al Mouslem, A and Aba Alkhayl, FF and Almatroudi, A}, title = {An in silico hierarchal approach for drug candidate mining and validation of natural product inhibitors against pyrimidine biosynthesis enzyme in the antibiotic-resistant Shigella flexneri.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {98}, number = {}, pages = {105233}, doi = {10.1016/j.meegid.2022.105233}, pmid = {35104682}, issn = {1567-7257}, mesh = {Anti-Bacterial Agents/*pharmacology ; Biological Products/*antagonists & inhibitors ; Computer Simulation ; Drug Discovery/*methods ; Drug Resistance, Bacterial ; Pyrimidines/biosynthesis/*pharmacology ; Shigella flexneri/drug effects/*enzymology ; }, abstract = {Shigella flexneri is the main causative agent of the communicable diarrheal disease, shigellosis. It is estimated that about 80-165 million cases and > 1 million deaths occur every year due to this disease. S. flexneri causes dysentery mostly in young children, elderly and immunocompromised patients, all over the globe. Recently, due to the emergence of S. flexneri antibiotic resistance strains, it is a dire need to predict novel therapeutic drug targets in the bacterium and screen natural products against it, which could eliminate the curse of antibiotic resistance. Therefore, in current study, available antibiotic-resistant genomes (n = 179) of S. flexneri were downloaded from PATRIC database and a pan-genome and resistome analysis was conducted. Around 5059 genes made up the accessory, 2469 genes made up the core, and 1558 genes made up the unique genome fraction, with 44, 34, and 13 antibiotic-resistant genes in each fraction, respectively. Core genome fraction (27% of the pan-genome), which was common to all strains, was used for subtractive genomics and resulted in 384 non-homologous, and 85 druggable targets. Dihydroorotase was chosen for further analysis and docked with natural product libraries (Ayurvedic and Streptomycin compounds), while the control was orotic acid or vitamin B13 (which is a natural binder of this protein). Dynamics simulation of 50 ns was carried out to validate findings for top-scored inhibitors. The current study proposed dihydroorotase as a significant drug target in S. flexneri and 4-tritriacontanone & patupilone compounds as potent drugs against shigellosis. Further experiments are required to ascertain validity of our findings.}, } @article {pmid35103490, year = {2022}, author = {Delgado-Blas, JF and Valenzuela Agüi, C and Marin Rodriguez, E and Serna, C and Montero, N and Saba, CKS and Gonzalez-Zorn, B}, title = {Dissemination Routes of Carbapenem and Pan-Aminoglycoside Resistance Mechanisms in Hospital and Urban Wastewater Canalizations of Ghana.}, journal = {mSystems}, volume = {7}, number = {1}, pages = {e0101921}, pmid = {35103490}, issn = {2379-5077}, support = {//Wellcome Trust/United Kingdom ; }, mesh = {*Carbapenems ; *Aminoglycosides ; Wastewater ; Ghana ; Anti-Bacterial Agents ; Bacteria ; Hospitals ; }, abstract = {Wastewater has a major role in antimicrobial resistance (AMR) dynamics and public health. The impact on AMR of wastewater flux at the community-hospital interface in low- and middle-income countries (LMICs) is poorly understood. Therefore, the present study analyzed the epidemiological scenario of resistance genes, mobile genetic elements (MGEs), and bacterial populations in wastewater around the Tamale metropolitan area (Ghana). Wastewater samples were collected from the drainage and canalizations before and after three hospitals and one urban waste treatment plant (UWTP). From all carbapenem/pan-aminoglycoside-resistant bacteria, 36 isolates were selected to determine bacterial species and phenotypical resistance profiles. Nanopore sequencing was used to screen resistance genes and plasmids, whereas, sequence types, resistome and plasmidome contents, pan-genome structures, and resistance gene variants were analyzed with Illumina sequencing. The combination of these sequencing data allowed for the resolution of the resistance gene-carrying platforms. Hospitals and the UWTP collected genetic and bacterial elements from community wastewater and amplified successful resistance gene-bacterium associations, which reached the community canalizations. Uncommon carbapenemase/β-lactamase gene variants, like blaDIM-1, and novel variants, including blaVIM-71, blaCARB-53, and blaCMY-172, were identified and seem to spread via clonal expansion of environmental Pseudomonas spp. However, blaNDM-1, blaCTX-M-15, and armA genes, among others, were associated with MGEs that allowed for their dissemination between environmental and clinical bacterial hosts. In conclusion, untreated hospital wastewater in Ghana is a hot spot for the emergence and spread of genes and gene-plasmid-bacterium associations that accelerate AMR, including to last-resort antibiotics. Urgent actions must be taken in wastewater management in LMICs in order to delay AMR expansion. IMPORTANCE Antimicrobial resistance (AMR) is one the major threats to public health today, especially resistance to last-resort compounds for the treatment of critical infections, such as carbapenems and aminoglycosides. Innumerable works have focused on the clinical ambit of AMR, but studies addressing the impact of wastewater cycles on the emergence and dissemination of resistant bacteria are still limited. The lack of knowledge is even greater when referring to low- and middle-income countries, where there is an absence of accurate sanitary systems. Furthermore, the combination of short- and long-read sequencing has surpassed former technical limitations, allowing the complete characterization of resistance genes, mobile genetic platforms, plasmids, and bacteria. The present study deciphered the multiple elements and routes involved in AMR dynamics in wastewater canalizations and, therefore, in the local population of Tamale, providing the basis to adopt accurate control measures to preserve and promote public health.}, } @article {pmid35100028, year = {2022}, author = {Tambong, JT and Xu, R and Cuppels, D and Chapados, J and Gerdis, S and Eyres, J and Koziol, A and Dettman, J}, title = {Whole-Genome Resources and Species-Level Taxonomic Validation of 89 Plant-Pathogenic Xanthomonas Strains Isolated from Various Host Plants.}, journal = {Plant disease}, volume = {106}, number = {6}, pages = {1558-1565}, doi = {10.1094/PDIS-11-21-2498-SC}, pmid = {35100028}, issn = {0191-2917}, mesh = {Genome, Bacterial/genetics ; *Solanum lycopersicum/microbiology ; Phylogeny ; Plant Diseases/microbiology ; United States ; *Xanthomonas ; }, abstract = {Bacterial spot disease caused by Xanthomonas spp. is a global threat to tomato and pepper plants. A recent classification of these pathogens indicated the need for a diverse dataset of whole-genome resources. We report whole-genome resources of 89 Xanthomonas strains isolated from Canada (n = 44), the United States (n = 29), Argentina (n = 4), Brazil (n = 3), Costa Rica (n = 3), New Zealand (n = 1), Australia (n = 1), Mexico (n = 1), Taiwan (n = 1), Thailand (n = 1), and unknown (n = 1). Of these strains, 48 were previously identified to species-level based on nongenome-based approaches while 41 strains were classified only at the genus level. The average coverage of the sequencing reads was 103×. The draft genome sizes ranged from 4.53 to 5.46 Mbp with a G + C content of 63.53 to 67.78% and comprised 4,233-5,178 protein-coding sequences. Using average nucleotide identity (ANI) and genome-based DNA-DNA hybridization (gDDH) values, the taxonomic classifications were validated for 38 of the 48 strains previously assigned to species level using other methods. Ten strains previously identified as Xanthomonas campestris, X. axonopodis, X. vasicola, and X. arboricola were incorrectly assigned, and new species-level delineations are proposed. Data from ANI, gDDH, and pangenome phylogeny of shared protein families were used to assign the 41 strains, previously identified only to genus level, into five distinct species: X. euvesicatoria (pv. euvesicatoria or pv. perforans), X. hortorum pv. gardneri, X. vesicatoria, X. campestris, and X. arboricola. These 89 whole-genome sequences of Xanthomonas strains, the majority (49.4%) of which are from Canada, could be useful resources in our understanding of the global population structure and evolution of these pathogens.}, } @article {pmid35095786, year = {2021}, author = {Li, X and Yang, Z and Wang, Z and Li, W and Zhang, G and Yan, H}, title = {Comparative Genomics of Pseudomonas stutzeri Complex: Taxonomic Assignments and Genetic Diversity.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {755874}, pmid = {35095786}, issn = {1664-302X}, abstract = {Pseudomonas stutzeri is a species complex with extremely broad phenotypic and genotypic diversity. However, very little is known about its diversity, taxonomy and phylogeny at the genomic scale. To address these issues, we systematically and comprehensively defined the taxonomy and nomenclature for this species complex and explored its genetic diversity using hundreds of sequenced genomes. By combining average nucleotide identity (ANI) evaluation and phylogenetic inference approaches, we identified 123 P. stutzeri complex genomes covering at least six well-defined species among all sequenced Pseudomonas genomes; of these, 25 genomes represented novel members of this species complex. ANI values of ≥∼95% and digital DNA-DNA hybridization (dDDH) values of ≥∼60% in combination with phylogenomic analysis consistently and robustly supported the division of these strains into 27 genomovars (most likely species to some extent), comprising 16 known and 11 unknown genomovars. We revealed that 12 strains had mistaken taxonomic assignments, while 16 strains without species names can be assigned to the species level within the species complex. We observed an open pan-genome of the P. stutzeri complex comprising 13,261 gene families, among which approximately 45% gene families do not match any sequence present in the COG database, and a large proportion of accessory genes. The genome contents experienced extensive genetic gain and loss events, which may be one of the major mechanisms driving diversification within this species complex. Surprisingly, we found that the ectoine biosynthesis gene cluster (ect) was present in all genomes of P. stutzeri species complex strains but distributed at very low frequency (43 out of 9548) in other Pseudomonas genomes, suggesting a possible origin of the ancestors of P. stutzeri species complex in high-osmolarity environments. Collectively, our study highlights the potential of using whole-genome sequences to re-evaluate the current definition of the P. stutzeri complex, shedding new light on its genomic diversity and evolutionary history.}, } @article {pmid35088248, year = {2022}, author = {Md Abdullah-Al-Mamun, and Hossain, MS and Debnath, GC and Sultana, S and Rahman, A and Hasan, Z and Das, SR and Ashik, MA and Prodhan, MY and Aktar, S and Cho, KM and Haque, MA}, title = {Unveiling lignocellulolytic trait of a goat omasum inhabitant Klebsiella variicola strain HSTU-AAM51 in light of biochemical and genome analyses.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {53}, number = {1}, pages = {99-130}, pmid = {35088248}, issn = {1678-4405}, support = {17-475 RG/BIO/AS_I//The World Academy of Sciences (IT)/ ; January//The World Academy of Sciences (IT)/ ; 2018 -June//The World Academy of Sciences (IT)/ ; 2020//The World Academy of Sciences (IT)/ ; }, mesh = {Animals ; *Goats ; Klebsiella ; *Omasum ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Klebsiella variicola is generally known as endophyte as well as lignocellulose-degrading strain. However, their roles in goat omasum along with lignocellulolytic genetic repertoire are not yet explored. In this study, five different pectin-degrading bacteria were isolated from a healthy goat omasum. Among them, a new Klebsiella variicola strain HSTU-AAM51 was identified to degrade lignocellulose. The genome of the HSTU-AAM51 strain comprised 5,564,045 bp with a GC content of 57.2% and 5312 coding sequences. The comparison of housekeeping genes (16S rRNA, TonB, gyrase B, RecA) and whole-genome sequence (ANI, pangenome, synteny, DNA-DNA hybridization) revealed that the strain HSTU-AAM51 was clustered with Klebsiella variicola strains, but the HSTU-AAM51 strain was markedly deviated. It consisted of seventeen cellulases (GH1, GH3, GH4, GH5, GH13), fourteen beta-glucosidase (2GH3, 7GH4, 4GH1), two glucosidase, and one pullulanase genes. The strain secreted cellulase, pectinase, and xylanase, lignin peroxidase approximately 76-78 U/mL and 57-60 U/mL, respectively, when it was cultured on banana pseudostem for 96 h. The catalytically important residues of extracellular cellulase, xylanase, mannanase, pectinase, chitinase, and tannase proteins (validated 3D model) were bound to their specific ligands. Besides, genes involved in the benzoate and phenylacetate catabolic pathways as well as laccase and DiP-type peroxidase were annotated, which indicated the strain lignin-degrading potentiality. This study revealed a new K. variicola bacterium from goat omasum which harbored lignin and cellulolytic enzymes that could be utilized for the production of bioethanol from lignocelluloses.}, } @article {pmid35087075, year = {2022}, author = {Lee, IPA and Andam, CP}, title = {Frequencies and characteristics of genome-wide recombination in Streptococcus agalactiae, Streptococcus pyogenes, and Streptococcus suis.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {1515}, pmid = {35087075}, issn = {2045-2322}, support = {R35 GM142924/GM/NIGMS NIH HHS/United States ; 1R35GM142924/NH/NIH HHS/United States ; }, mesh = {*Streptococcus suis ; }, abstract = {Streptococcus consists of ecologically diverse species, some of which are important pathogens of humans and animals. We sought to quantify and compare the frequencies and characteristics of within-species recombination in the pan-genomes of Streptococcus agalactiae, Streptococcus pyogenes and Streptococcus suis. We used 1081, 1813 and 1204 publicly available genome sequences of each species, respectively. Based on their core genomes, S. agalactiae had the highest relative rate of recombination to mutation (11.5743) compared to S. pyogenes (1.03) and S. suis (0.57). The proportion of the species pan-genome that have had a history of recombination was 12.85%, 24.18% and 20.50% of the pan-genomes of each species, respectively. The composition of recombining genes varied among the three species, and some of the most frequently recombining genes are implicated in adhesion, colonization, oxidative stress response and biofilm formation. For each species, a total of 22.75%, 29.28% and 18.75% of the recombining genes were associated with prophages. The cargo genes of integrative conjugative elements and integrative and mobilizable elements contained genes associated with antimicrobial resistance and virulence. Homologous recombination and mobilizable pan-genomes enable the creation of novel combinations of genes and sequence variants, and the potential for high-risk clones to emerge.}, } @article {pmid35074363, year = {2022}, author = {Fu, S and Wang, Q and Wang, R and Zhang, Y and Lan, R and He, F and Yang, Q}, title = {Horizontal transfer of antibiotic resistance genes within the bacterial communities in aquacultural environment.}, journal = {The Science of the total environment}, volume = {820}, number = {}, pages = {153286}, doi = {10.1016/j.scitotenv.2022.153286}, pmid = {35074363}, issn = {1879-1026}, mesh = {*Anti-Bacterial Agents/pharmacology ; Aquaculture ; *Drug Resistance, Bacterial/genetics ; Enterobacter/genetics ; *Gene Transfer, Horizontal ; Genes, Bacterial ; Genome, Bacterial ; Providencia/genetics ; RNA, Ribosomal, 16S ; Shewanella/genetics ; *Vibrio parahaemolyticus/genetics ; }, abstract = {Very little is known about how microbiome interactions shape the horizontal transfer of antibiotic resistance genes in aquacultural environment. To this end, we first conducted 16S rRNA gene amplicon sequencing to monitor the dynamics of bacterial community compositions in one shrimp farm from 2019 to 2020. Next, co-occurrence analysis was then conducted to reveal the interactions network between Vibrio spp. and other species. Subsequently, 21 V. parahaemolyticus isolates and 15 related bacterial species were selected for whole-genome sequencing (WGS). The 16S rDNA amplicon sequencing results identified a remarkable increase of Vibrio and Providencia in September-2019 and a significant rise of Enterobacter and Shewanella in Septtember-2020. Co-occurrence analysis revealed that Vibrio spp. positively interacted with the above species, leading to the sequencing of their isolates to further understand the sharing of the resistant genomic islands (GIs). Subsequent pan-genomic analysis of V. parahaemolyticus genomes identified 278 horizontally transferred genes in 10 GIs, most of which were associated with antibiotic resistance, virulence, and fitness of metabolism. Most of the GIs have also been identified in Providencia, and Enterobacter, suggesting that exchange of genetic traits might occur in V. parahaemolyticus and other cooperative species in a specific niche. No genetic exchange was found between the species with negative relationships. The knowledge generated from this study would greatly improve our capacity to predict and mitigate the emergence of new resistant population and provide practical guidance on the microbial management during the aquacultural activities.}, } @article {pmid35072136, year = {2021}, author = {Rehm, HL and Page, AJH and Smith, L and Adams, JB and Alterovitz, G and Babb, LJ and Barkley, MP and Baudis, M and Beauvais, MJS and Beck, T and Beckmann, JS and Beltran, S and Bernick, D and Bernier, A and Bonfield, JK and Boughtwood, TF and Bourque, G and Bowers, SR and Brookes, AJ and Brudno, M and Brush, MH and Bujold, D and Burdett, T and Buske, OJ and Cabili, MN and Cameron, DL and Carroll, RJ and Casas-Silva, E and Chakravarty, D and Chaudhari, BP and Chen, SH and Cherry, JM and Chung, J and Cline, M and Clissold, HL and Cook-Deegan, RM and Courtot, M and Cunningham, F and Cupak, M and Davies, RM and Denisko, D and Doerr, MJ and Dolman, LI and Dove, ES and Dursi, LJ and Dyke, SOM and Eddy, JA and Eilbeck, K and Ellrott, KP and Fairley, S and Fakhro, KA and Firth, HV and Fitzsimons, MS and Fiume, M and Flicek, P and Fore, IM and Freeberg, MA and Freimuth, RR and Fromont, LA and Fuerth, J and Gaff, CL and Gan, W and Ghanaim, EM and Glazer, D and Green, RC and Griffith, M and Griffith, OL and Grossman, RL and Groza, T and Auvil, JMG and Guigó, R and Gupta, D and Haendel, MA and Hamosh, A and Hansen, DP and Hart, RK and Hartley, DM and Haussler, D and Hendricks-Sturrup, RM and Ho, CWL and Hobb, AE and Hoffman, MM and Hofmann, OM and Holub, P and Hsu, JS and Hubaux, JP and Hunt, SE and Husami, A and Jacobsen, JO and Jamuar, SS and Janes, EL and Jeanson, F and Jené, A and Johns, AL and Joly, Y and Jones, SJM and Kanitz, A and Kato, K and Keane, TM and Kekesi-Lafrance, K and Kelleher, J and Kerry, G and Khor, SS and Knoppers, BM and Konopko, MA and Kosaki, K and Kuba, M and Lawson, J and Leinonen, R and Li, S and Lin, MF and Linden, M and Liu, X and Udara Liyanage, I and Lopez, J and Lucassen, AM and Lukowski, M and Mann, AL and Marshall, J and Mattioni, M and Metke-Jimenez, A and Middleton, A and Milne, RJ and Molnár-Gábor, F and Mulder, N and Munoz-Torres, MC and Nag, R and Nakagawa, H and Nasir, J and Navarro, A and Nelson, TH and Niewielska, A and Nisselle, A and Niu, J and Nyrönen, TH and O'Connor, BD and Oesterle, S and Ogishima, S and Wang, VO and Paglione, LAD and Palumbo, E and Parkinson, HE and Philippakis, AA and Pizarro, AD and Prlic, A and Rambla, J and Rendon, A and Rider, RA and Robinson, PN and Rodarmer, KW and Rodriguez, LL and Rubin, AF and Rueda, M and Rushton, GA and Ryan, RS and Saunders, GI and Schuilenburg, H and Schwede, T and Scollen, S and Senf, A and Sheffield, NC and Skantharajah, N and Smith, AV and Sofia, HJ and Spalding, D and Spurdle, AB and Stark, Z and Stein, LD and Suematsu, M and Tan, P and Tedds, JA and Thomson, AA and Thorogood, A and Tickle, TL and Tokunaga, K and Törnroos, J and Torrents, D and Upchurch, S and Valencia, A and Guimera, RV and Vamathevan, J and Varma, S and Vears, DF and Viner, C and Voisin, C and Wagner, AH and Wallace, SE and Walsh, BP and Williams, MS and Winkler, EC and Wold, BJ and Wood, GM and Woolley, JP and Yamasaki, C and Yates, AD and Yung, CK and Zass, LJ and Zaytseva, K and Zhang, J and Goodhand, P and North, K and Birney, E}, title = {GA4GH: International policies and standards for data sharing across genomic research and healthcare.}, journal = {Cell genomics}, volume = {1}, number = {2}, pages = {}, pmid = {35072136}, issn = {2666-979X}, support = {OT3 HL142478/HL/NHLBI NIH HHS/United States ; U13 CA221044/CA/NCI NIH HHS/United States ; 75N91019D00024/CA/NCI NIH HHS/United States ; U54 HG006542/HG/NHGRI NIH HHS/United States ; U41 HG006834/HG/NHGRI NIH HHS/United States ; R00 HG010157/HG/NHGRI NIH HHS/United States ; U24 HG011025/HG/NHGRI NIH HHS/United States ; R24 OD011883/OD/NIH HHS/United States ; U01 CA242954/CA/NCI NIH HHS/United States ; HHSN261201500001G/CA/NCI NIH HHS/United States ; 220544//Wellcome Trust/United Kingdom ; HHSN261200800001E/CA/NCI NIH HHS/United States ; UM1 HG009443/HG/NHGRI NIH HHS/United States ; U24 HG006941/HG/NHGRI NIH HHS/United States ; R35 HG011949/HG/NHGRI NIH HHS/United States ; RM1 HG010461/HG/NHGRI NIH HHS/United States ; U2C OD023196/OD/NIH HHS/United States ; K99 HG010157/HG/NHGRI NIH HHS/United States ; MC_PC_19024/MRC_/Medical Research Council/United Kingdom ; U24 CA231877/CA/NCI NIH HHS/United States ; R35 GM128636/GM/NIGMS NIH HHS/United States ; 206194//Wellcome Trust/United Kingdom ; U24 HG010262/HG/NHGRI NIH HHS/United States ; R00 HG007940/HG/NHGRI NIH HHS/United States ; HHSN261201400008C/CA/NCI NIH HHS/United States ; U24 TR002306/TR/NCATS NIH HHS/United States ; 108749//Wellcome Trust/United Kingdom ; HHSN261201500003I/CA/NCI NIH HHS/United States ; U24 CA237719/CA/NCI NIH HHS/United States ; U54 HG007990/HG/NHGRI NIH HHS/United States ; //Wellcome Trust/United Kingdom ; R35 HG011899/HG/NHGRI NIH HHS/United States ; HHSN261200800001C/RC/CCR NIH HHS/United States ; MR/S003703/1/MRC_/Medical Research Council/United Kingdom ; R01 CA237118/CA/NCI NIH HHS/United States ; RM1 HG010860/HG/NHGRI NIH HHS/United States ; HHSN261201500001W/CA/NCI NIH HHS/United States ; 201535//Wellcome Trust/United Kingdom ; U41 HG006627/HG/NHGRI NIH HHS/United States ; }, abstract = {The Global Alliance for Genomics and Health (GA4GH) aims to accelerate biomedical advances by enabling the responsible sharing of clinical and genomic data through both harmonized data aggregation and federated approaches. The decreasing cost of genomic sequencing (along with other genome-wide molecular assays) and increasing evidence of its clinical utility will soon drive the generation of sequence data from tens of millions of humans, with increasing levels of diversity. In this perspective, we present the GA4GH strategies for addressing the major challenges of this data revolution. We describe the GA4GH organization, which is fueled by the development efforts of eight Work Streams and informed by the needs of 24 Driver Projects and other key stakeholders. We present the GA4GH suite of secure, interoperable technical standards and policy frameworks and review the current status of standards, their relevance to key domains of research and clinical care, and future plans of GA4GH. Broad international participation in building, adopting, and deploying GA4GH standards and frameworks will catalyze an unprecedented effort in data sharing that will be critical to advancing genomic medicine and ensuring that all populations can access its benefits.}, } @article {pmid35062865, year = {2022}, author = {Nouioui, I and Ha, SM and Baek, I and Chun, J and Goodfellow, M}, title = {Genome insights into the pharmaceutical and plant growth promoting features of the novel species Nocardia alni sp. nov.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {70}, pmid = {35062865}, issn = {1471-2164}, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial ; Fatty Acids/analysis ; Frankia ; Nucleic Acid Hybridization ; *Pharmaceutical Preparations ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; *Soil Microbiology ; }, abstract = {BACKGROUND: Recent studies highlighted the biosynthetic potential of nocardiae to produce diverse novel natural products comparable to that of Streptomyces, thereby making them an attractive source of new drug leads. Many of the 119 Nocardia validly named species were isolated from natural habitats but little is known about the diversity and the potential of the endophytic nocardiae of root nodule of actinorhizal plants.

RESULTS: The taxonomic status of an actinobacterium strain, designated ncl2[T], was established in a genome-based polyphasic study. The strain was Gram-stain-positive, produced substrate and aerial hyphae that fragmented into coccoid and rod-like elements and showed chemotaxonomic properties that were also typical of the genus Nocardia. It formed a distinct branch in the Nocardia 16S rRNA gene tree and was most closely related to the type strains of Nocardia nova (98.6%), Nocardia jiangxiensis (98.4%), Nocardia miyuensis (97.8%) and Nocardia vaccinii (97.7%). A comparison of the draft genome sequence generated for the isolate with the whole genome sequences of its closest phylogenetic neighbours showed that it was most closely related to the N. jiangxiensis, N. miyuensis and N. vaccinii strains, a result underpinned by average nucleotide identity and digital DNA-DNA hybridization data. Corresponding taxogenomic data, including those from a pan-genome sequence analysis showed that strain ncl2[T] was most closely related to N. vaccinii DSM 43285[T]. A combination of genomic, genotypic and phenotypic data distinguished these strains from one another. Consequently, it is proposed that strain ncl2[T] (= DSM 110931[T] = CECT 30122[T]) represents a new species within the genus Nocardia, namely Nocardia alni sp. nov. The genomes of the N. alni and N. vaccinii strains contained 36 and 29 natural product-biosynthetic gene clusters, respectively, many of which were predicted to encode for a broad range of novel specialised products, notably antibiotics. Genome mining of the N. alni strain and the type strains of its closest phylogenetic neighbours revealed the presence of genes associated with direct and indirect mechanisms that promote plant growth. The core genomes of these strains mainly consisted of genes involved in amino acid transport and metabolism, energy production and conversion and transcription.

CONCLUSIONS: Our genome-based taxonomic study showed that isolate ncl2[T] formed a new centre of evolutionary variation within the genus Nocardia. This novel endophytic strain contained natural product biosynthetic gene clusters predicted to synthesize novel specialised products, notably antibiotics and genes associated with the expression of plant growth promoting compounds.}, } @article {pmid35061539, year = {2022}, author = {Casey, JR and Boiteau, RM and Engqvist, MKM and Finkel, ZV and Li, G and Liefer, J and Müller, CL and Muñoz, N and Follows, MJ}, title = {Basin-scale biogeography of marine phytoplankton reflects cellular-scale optimization of metabolism and physiology.}, journal = {Science advances}, volume = {8}, number = {3}, pages = {eabl4930}, pmid = {35061539}, issn = {2375-2548}, abstract = {Extensive microdiversity within Prochlorococcus, the most abundant marine cyanobacterium, occurs at scales from a single droplet of seawater to ocean basins. To interpret the structuring role of variations in genetic potential, as well as metabolic and physiological acclimation, we developed a mechanistic constraint-based modeling framework that incorporates the full suite of genes, proteins, metabolic reactions, pigments, and biochemical compositions of 69 sequenced isolates spanning the Prochlorococcus pangenome. Optimizing each strain to the local, observed physical and chemical environment along an Atlantic Ocean transect, we predicted variations in strain-specific patterns of growth rate, metabolic configuration, and physiological state, defining subtle niche subspaces directly attributable to differences in their encoded metabolic potential. Predicted growth rates covaried with observed ecotype abundances, affirming their significance as a measure of fitness and inferring a nonlinear density dependence of mortality. Our study demonstrates the potential to interpret global-scale ecosystem organization in terms of cellular-scale processes.}, } @article {pmid35056644, year = {2022}, author = {Buttimer, C and Bottacini, F and Shkoporov, AN and Draper, LA and Ross, P and Hill, C}, title = {Selective Isolation of Eggerthella lenta from Human Faeces and Characterisation of the Species Prophage Diversity.}, journal = {Microorganisms}, volume = {10}, number = {1}, pages = {}, pmid = {35056644}, issn = {2076-2607}, support = {GOIPD/2019/1097//Irish Research Council/ ; }, abstract = {Eggerthella lenta is an anaerobic, high GC, Gram-positive bacillus commonly found in the human digestive tract that belongs to the class Coriobacteriia of the phylum Actinobacteria. This species has been of increasing interest as an important player in the metabolism of xenobiotics and dietary compounds. However, little is known regarding its susceptibility to bacteriophage predation and how this may influence its fitness. Here, we report the isolation of seven novel E. lenta strains using cefotaxime and ceftriaxone as selective agents. We conducted comparative and pangenome analyses of these strains and those publicly available to investigate the diversity of prophages associated with this species. Prophage gene products represent a minimum of 5.8% of the E. lenta pangenome, comprising at least ten distantly related prophage clades that display limited homology to currently known bacteriophages. All clades possess genes implicated in virion structure, lysis, lysogeny and, to a limited extent, DNA replication. Some prophages utilise tyrosine recombinases and diversity generating retroelements to generate phase variation among targeted genes. The prophages have differing levels of sensitivity to the CRISPR/cas systems of their hosts, with spacers from 44 E. lenta isolates found to target only five out of the ten identified prophage clades. Furthermore, using a PCR-based approach targeting the prophage attP site, we were able to determine that several of these elements can excise from the host chromosome, thus supporting the notion that these are active prophages. The findings of this study provide further insights into the diversity of prophages infecting species of the phylum Actinobacteria.}, } @article {pmid35041495, year = {2022}, author = {Rossi, M and Oliva, M and Langmead, B and Gagie, T and Boucher, C}, title = {MONI: A Pangenomic Index for Finding Maximal Exact Matches.}, journal = {Journal of computational biology : a journal of computational molecular cell biology}, volume = {29}, number = {2}, pages = {169-187}, pmid = {35041495}, issn = {1557-8666}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; R01 AI141810/AI/NIAID NIH HHS/United States ; }, mesh = {*Algorithms ; Computational Biology ; Databases, Genetic/statistics & numerical data ; Genome, Bacterial ; Genome, Human ; Genomics/*statistics & numerical data ; High-Throughput Nucleotide Sequencing/statistics & numerical data ; Humans ; Salmonella/genetics ; Sequence Alignment/*statistics & numerical data ; Sequence Analysis, DNA/statistics & numerical data ; *Software ; Wavelet Analysis ; }, abstract = {Recently, Gagie et al. proposed a version of the FM-index, called the r-index, that can store thousands of human genomes on a commodity computer. Then Kuhnle et al. showed how to build the r-index efficiently via a technique called prefix-free parsing (PFP) and demonstrated its effectiveness for exact pattern matching. Exact pattern matching can be leveraged to support approximate pattern matching, but the r-index itself cannot support efficiently popular and important queries such as finding maximal exact matches (MEMs). To address this shortcoming, Bannai et al. introduced the concept of thresholds, and showed that storing them together with the r-index enables efficient MEM finding-but they did not say how to find those thresholds. We present a novel algorithm that applies PFP to build the r-index and find the thresholds simultaneously and in linear time and space with respect to the size of the prefix-free parse. Our implementation called MONI can rapidly find MEMs between reads and large-sequence collections of highly repetitive sequences. Compared with other read aligners-PuffAligner, Bowtie2, BWA-MEM, and CHIC- MONI used 2-11 times less memory and was 2-32 times faster for index construction. Moreover, MONI was less than one thousandth the size of competing indexes for large collections of human chromosomes. Thus, MONI represents a major advance in our ability to perform MEM finding against very large collections of related references.}, } @article {pmid35040700, year = {2022}, author = {Moller, AG and Petit, RA and Read, TD}, title = {Species-Scale Genomic Analysis of Staphylococcus aureus Genes Influencing Phage Host Range and Their Relationships to Virulence and Antibiotic Resistance Genes.}, journal = {mSystems}, volume = {7}, number = {1}, pages = {e0108321}, pmid = {35040700}, issn = {2379-5077}, support = {R21 AI138079/AI/NIAID NIH HHS/United States ; }, mesh = {Humans ; Staphylococcus aureus/genetics ; *Bacteriophages ; Virulence ; Host Specificity ; Anti-Bacterial Agents ; *Superinfection ; Genomics ; *Staphylococcal Infections/microbiology ; Drug Resistance, Microbial ; }, abstract = {Phage therapy has been proposed as a possible alternative treatment for infections caused by the ubiquitous bacterial pathogen Staphylococcus aureus. However, successful therapy requires understanding the genetic basis of host range-the subset of strains in a species that could be killed by a particular phage. We searched diverse sets of S. aureus public genome sequences against a database of genes suggested from prior studies to influence host range to look for patterns of variation across the species. We found that genes encoding biosynthesis of molecules that were targets of S. aureus phage adsorption to the outer surface of the cell were the most conserved in the pangenome. Putative phage resistance genes that were core components of the pangenome genes had similar nucleotide diversity, ratio of nonsynonymous to synonymous substitutions, and functionality (measured by delta-bitscore) to other core genes. However, phage resistance genes that were not part of the core genome were significantly less consistent with the core genome phylogeny than all noncore genes in this set, suggesting more frequent movement between strains by horizontal gene transfer. Only superinfection immunity genes encoded by temperate phages inserted in the genome correlated with experimentally determined temperate phage resistance. Taken together, these results suggested that, while phage adsorption genes are heavily conserved in the S. aureus species, HGT may play a significant role in strain-specific evolution of host range patterns. IMPORTANCE Staphylococcus aureus is a widespread, hospital- and community-acquired pathogen that is commonly antibiotic resistant. It causes diverse diseases affecting both the skin and internal organs. Its ubiquity, antibiotic resistance, and disease burden make new therapies urgent, such as phage therapy, in which viruses specific to infecting bacteria clear infection. S. aureus phage host range not only determines whether phage therapy will be successful by killing bacteria but also horizontal gene transfer through transduction of host genetic material by phages. In this work, we comprehensively reviewed existing literature to build a list of S. aureus phage resistance genes and searched our database of almost 43,000 S. aureus genomes for these genes to understand their patterns of evolution, finding that prophages' superinfection immunity correlates best with phage resistance and HGT. These findings improved our understanding of the relationship between known phage resistance genes and phage host range in the species.}, } @article {pmid35038925, year = {2022}, author = {Alvarez-Fraga, L and Phan, MD and Goh, KGK and Nhu, NTK and Hancock, SJ and Allsopp, LP and Peters, KM and Forde, BM and Roberts, LW and Sullivan, MJ and Totsika, M and Beatson, SA and Ulett, GC and Schembri, MA}, title = {Differential Afa/Dr Fimbriae Expression in the Multidrug-Resistant Escherichia coli ST131 Clone.}, journal = {mBio}, volume = {13}, number = {1}, pages = {e0351921}, pmid = {35038925}, issn = {2150-7511}, mesh = {Humans ; Adhesins, Bacterial/metabolism ; Anti-Bacterial Agents/metabolism ; Clone Cells ; DNA Transposable Elements ; *Drug Resistance, Multiple, Bacterial/genetics ; *Escherichia coli Infections/genetics ; *Urinary Tract Infections/genetics ; *Uropathogenic Escherichia coli/genetics/pathogenicity ; Virulence/genetics ; }, abstract = {Many antibiotic resistant uropathogenic Escherichia coli (UPEC) strains belong to clones defined by their multilocus sequence type (ST), with ST131 being the most dominant. Although we have a good understanding of resistance development to fluoroquinolones and third-generation cephalosporins by ST131, our understanding of the virulence repertoire that has contributed to its global dissemination is limited. Here we show that the genes encoding Afa/Dr fimbriae, a group of adhesins strongly associated with UPEC that cause gestational pyelonephritis and recurrent cystitis, are found in approximately one third of all ST131 strains. Sequence comparison of the AfaE adhesin protein revealed a unique allelic variant carried by 82.9% of afa-positive ST131 strains. We identify the afa regulatory region as a hotspot for the integration of insertion sequence (IS) elements, all but one of which alter afa transcription. Close investigation demonstrated that the integration of an IS1 element in the afa regulatory region leads to increased expression of Afa/Dr fimbriae, promoting enhanced adhesion to kidney epithelial cells and suggesting a mechanism for altered virulence. Finally, we provide evidence for a more widespread impact of IS1 on ST131 genome evolution, suggesting that IS dynamics contribute to strain level microevolution that impacts ST131 fitness. IMPORTANCE E. coli ST131 is the most common antibiotic resistant UPEC clone associated with human urinary tract and bloodstream infections. Understanding the features of ST131 that have driven its global dissemination remains a critical priority if we are to counter its increasing antibiotic resistance. Here, we utilized a large collection of ST131 isolates to investigate the prevalence, regulation, and function of Afa/Dr fimbriae, a well-characterized UPEC colonization and virulence factor. We show that the afa genes are found frequently in ST131 and demonstrate how the integration of IS elements in the afa regulatory region modulates Afa expression, presenting an example of altered virulence capacity. We also exploit a curated set of ST131 genomes to map the integration of the antibiotic resistance-associated IS1 element in the ST131 pangenome, providing evidence for its widespread impact on ST131 genome evolution.}, } @article {pmid35037212, year = {2022}, author = {Ruperao, P and Gandham, P and Rathore, A}, title = {Construction of Practical Haplotype Graph (PHG) with the Whole-Genome Sequence Data.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2443}, number = {}, pages = {273-284}, pmid = {35037212}, issn = {1940-6029}, mesh = {*Genome ; *Genomics ; Haplotypes/genetics ; Sequence Analysis, DNA ; }, abstract = {With the emerging sequencing technologies and cost reduction, the sequence data generation has accelerated from a single individual to multiple (thousands of) individuals of a species. The terabytes of sequence data generated from thousands of individuals include the majority of the redundant sequence which depends on the level of sequence similarity within the population of individuals. Managing large datasets and creating the unique catalogue sequence from such a large population is challenging to analyze, store, and retrieve the information. In this chapter, we discuss the practical haplotype graph (PHG) which addresses the above said challenges and also able to retrieve required information such as variants and sequences more efficiently, which enable researchers to manage and assess large genomic data.}, } @article {pmid35037211, year = {2022}, author = {Tay Fernandez, C}, title = {Making a Pangenome Using the Iterative Mapping Approach.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2443}, number = {}, pages = {259-271}, pmid = {35037211}, issn = {1940-6029}, abstract = {Pangenomes have replaced single reference genomes as genetic references, as they contain a better scope of the diversity found in a single species. This protocol outlines the iterative mapping approach in constructing a pangenome, including how to check the raw data, align the data to a reference, how to assemble the data, and how to remove potential contaminants from the final assembly.}, } @article {pmid35037204, year = {2022}, author = {Kamal, N and Lux, T and Jayakodi, M and Haberer, G and Gundlach, H and Mayer, KFX and Mascher, M and Spannagl, M}, title = {The Barley and Wheat Pan-Genomes.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2443}, number = {}, pages = {147-159}, pmid = {35037204}, issn = {1940-6029}, mesh = {Crops, Agricultural/genetics ; Genome, Plant ; Genomics ; *Hordeum/genetics ; Triticum/genetics ; }, abstract = {To unlock the genetic potential in crops, multi-genome comparisons are an essential tool. Decreasing costs and improved sequencing technologies have democratized plant genome sequencing and led to a vast increase in the amount of available reference sequences on the one hand and enabled the assembly of even the largest and most complex and repetitive crops genomes such as wheat and barley. These developments have led to the era of pan-genomics in recent years. Pan-genome projects enable the definition of the core and dispensable genome for various crop species as well as the analysis of structural and functional variation and hence offer unprecedented opportunities for exploring and utilizing the genetic basis of natural variation in crops. Comparing, analyzing, and visualizing these multiple reference genomes and their diversity requires powerful and specialized computational strategies and tools.}, } @article {pmid35037202, year = {2022}, author = {Tello-Ruiz, MK and Jaiswal, P and Ware, D}, title = {Gramene: A Resource for Comparative Analysis of Plants Genomes and Pathways.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2443}, number = {}, pages = {101-131}, pmid = {35037202}, issn = {1940-6029}, mesh = {Crops, Agricultural/genetics ; *Databases, Genetic ; *Genome, Plant ; Genomics/methods ; Phylogeny ; }, abstract = {Gramene is an integrated bioinformatics resource for accessing, visualizing, and comparing plant genomes and biological pathways. Originally targeting grasses, Gramene has grown to host annotations for over 90 plant genomes including agronomically important cereals (e.g., maize, sorghum, wheat, teff), fruits and vegetables (e.g., apple, watermelon, clementine, tomato, cassava), specialty crops (e.g., coffee, olive tree, pistachio, almond), and plants of special or emerging interest (e.g., cotton, tobacco, cannabis, or hemp). For some species, the resource includes multiple varieties of the same species, which has paved the road for the creation of species-specific pan-genome browsers. The resource also features plant research models, including Arabidopsis and C4 warm-season grasses and brassicas, as well as other species that fill phylogenetic gaps for plant evolution studies. Its strength derives from the application of a phylogenetic framework for genome comparison and the use of ontologies to integrate structural and functional annotation data. This chapter outlines system requirements for end-users and database hosting, data types and basic navigation within Gramene, and provides examples of how to (1) explore Gramene's search results, (2) explore gene-centric comparative genomics data visualizations in Gramene, and (3) explore genetic variation associated with a gene locus. This is the first publication describing in detail Gramene's integrated search interface-intended to provide a simplified entry portal for the resource's main data categories (genomic location, phylogeny, gene expression, pathways, and external references) to the most complete and up-to-date set of plant genome and pathway annotations.}, } @article {pmid35037201, year = {2022}, author = {Redsun, S and Hokin, S and Cameron, CT and Cleary, AM and Berendzen, J and Dash, S and Brown, AV and Wilkey, A and Campbell, JD and Huang, W and Kalberer, SR and Weeks, NT and Cannon, SB and Farmer, AD}, title = {Doing Genetic and Genomic Biology Using the Legume Information System and Associated Resources.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2443}, number = {}, pages = {81-100}, pmid = {35037201}, issn = {1940-6029}, mesh = {Databases, Genetic ; *Fabaceae/genetics ; Genome, Plant ; Genomics ; Plant Breeding ; }, abstract = {In this chapter, we introduce the main components of the Legume Information System (https://legumeinfo.org) and several associated resources. Additionally, we provide an example of their use by exploring a biological question: is there a common molecular basis, across legume species, that underlies the photoperiod-mediated transition from vegetative to reproductive development, that is, days to flowering? The Legume Information System (LIS) holds genetic and genomic data for a large number of crop and model legumes and provides a set of online bioinformatic tools designed to help biologists address questions and tasks related to legume biology. Such tasks include identifying the molecular basis of agronomic traits; identifying orthologs/syntelogs for known genes; determining gene expression patterns; accessing genomic datasets; identifying markers for breeding work; and identifying genetic similarities and differences among selected accessions. LIS integrates with other legume-focused informatics resources such as SoyBase (https://soybase.org), PeanutBase (https://peanutbase.org), and projects of the Legume Federation (https://legumefederation.org).}, } @article {pmid35035886, year = {2021}, author = {Sutton, G and Fogel, GB and Abramson, B and Brinkac, L and Michael, T and Liu, ES and Thomas, S}, title = {Horizontal transfer and evolution of wall teichoic acid gene cassettes in Bacillus subtilis.}, journal = {F1000Research}, volume = {10}, number = {}, pages = {354}, pmid = {35035886}, issn = {2046-1402}, mesh = {*Bacillus subtilis/genetics ; *Bacterial Proteins ; Cell Wall/genetics ; Teichoic Acids ; }, abstract = {Background: Wall teichoic acid (WTA) genes are essential for production of cell walls in gram-positive bacteria and necessary for survival and variability in the cassette has led to recent antibiotic resistance acquisition in pathogenic bacteria. Methods: Using a pan-genome approach, we examined the evolutionary history of WTA genes in Bacillus subtilis ssp. subtilis. Results: Our analysis reveals an interesting pattern of evolution from the type-strain WTA gene cassette possibly resulting from horizontal acquisition from organisms with similar gene sequences. The WTA cassettes have a high level of variation which may be due to one or more independent horizontal transfer events during the evolution of Bacillus subtilis ssp. subtilis. This swapping of entire WTA cassettes and smaller regions within the WTA cassettes is an unusual feature in the evolution of the Bacillus subtilis genome and highlights the importance of horizontal transfer of gene cassettes through homologous recombination within B. subtilis or other bacterial species. Conclusions: Reduced sequence conservation of these WTA cassettes may indicate a modified function like the previously documented WTA ribitol/glycerol variation. An improved understanding of high-frequency recombination of gene cassettes has ramifications for synthetic biology and the use of B. subtilis in industry.}, } @article {pmid35026436, year = {2022}, author = {Hoopes, G and Meng, X and Hamilton, JP and Achakkagari, SR and de Alves Freitas Guesdes, F and Bolger, ME and Coombs, JJ and Esselink, D and Kaiser, NR and Kodde, L and Kyriakidou, M and Lavrijssen, B and van Lieshout, N and Shereda, R and Tuttle, HK and Vaillancourt, B and Wood, JC and de Boer, JM and Bornowski, N and Bourke, P and Douches, D and van Eck, HJ and Ellis, D and Feldman, MJ and Gardner, KM and Hopman, JCP and Jiang, J and De Jong, WS and Kuhl, JC and Novy, RG and Oome, S and Sathuvalli, V and Tan, EH and Ursum, RA and Vales, MI and Vining, K and Visser, RGF and Vossen, J and Yencho, GC and Anglin, NL and Bachem, CWB and Endelman, JB and Shannon, LM and Strömvik, MV and Tai, HH and Usadel, B and Buell, CR and Finkers, R}, title = {Phased, chromosome-scale genome assemblies of tetraploid potato reveal a complex genome, transcriptome, and predicted proteome landscape underpinning genetic diversity.}, journal = {Molecular plant}, volume = {15}, number = {3}, pages = {520-536}, doi = {10.1016/j.molp.2022.01.003}, pmid = {35026436}, issn = {1752-9867}, mesh = {Alleles ; Chromosomes ; Plant Breeding ; Proteome/genetics ; *Solanum tuberosum/genetics ; *Tetraploidy ; Transcriptome/genetics ; }, abstract = {Cultivated potato is a clonally propagated autotetraploid species with a highly heterogeneous genome. Phased assemblies of six cultivars including two chromosome-scale phased genome assemblies revealed extensive allelic diversity, including altered coding and transcript sequences, preferential allele expression, and structural variation that collectively result in a highly complex transcriptome and predicted proteome, which are distributed across the homologous chromosomes. Wild species contribute to the extensive allelic diversity in tetraploid cultivars, demonstrating ancestral introgressions predating modern breeding efforts. As a clonally propagated autotetraploid that undergoes limited meiosis, dysfunctional and deleterious alleles are not purged in tetraploid potato. Nearly a quarter of the loci bore mutations are predicted to have a high negative impact on protein function, complicating breeder's efforts to reduce genetic load. The StCDF1 locus controls maturity, and analysis of six tetraploid genomes revealed that 12 allelic variants of StCDF1 are correlated with maturity in a dosage-dependent manner. Knowledge of the complexity of the tetraploid potato genome with its rampant structural variation and embedded deleterious and dysfunctional alleles will be key not only to implementing precision breeding of tetraploid cultivars but also to the construction of homozygous, diploid potato germplasm containing favorable alleles to capitalize on heterosis in F1 hybrids.}, } @article {pmid35022669, year = {2022}, author = {Sakkour, A and Mascher, M and Himmelbach, A and Haberer, G and Lux, T and Spannagl, M and Stein, N and Kawamoto, S and Sato, K}, title = {Chromosome-scale assembly of barley cv. 'Haruna Nijo' as a resource for barley genetics.}, journal = {DNA research : an international journal for rapid publication of reports on genes and genomes}, volume = {29}, number = {1}, pages = {}, pmid = {35022669}, issn = {1756-1663}, support = {18076896//JST Mirai Program/ ; 031A536B//German Ministry of Education and Research project de. NBI/ ; 031B0190A//SHAPE I/ ; }, mesh = {Chromosomes ; Genome ; Genotype ; *Hordeum/genetics ; Molecular Sequence Annotation ; }, abstract = {Cultivated barley (Hordeum vulgare ssp. vulgare) is used for food, animal feed, and alcoholic beverages and is widely grown in temperate regions. Both barley and its wild progenitor (H. vulgare ssp. spontaneum) have large 5.1-Gb genomes. High-quality chromosome-scale assemblies for several representative barley genotypes, both wild and domesticated, have been constructed recently to populate the nascent barley pan-genome infrastructure. Here, we release a chromosome-scale assembly of the Japanese elite malting barley cultivar 'Haruna Nijo' using a similar methodology as in the barley pan-genome project. The 4.28-Gb assembly had a scaffold N50 size of 18.9 Mb. The assembly showed high collinearity with the barley reference genome 'Morex' cultivar, with some inversions. The pseudomolecule assembly was characterized using transcript evidence of gene projection derived from the reference genome and de novo gene annotation achieved using published full-length cDNA sequences and RNA-Seq data for 'Haruna Nijo'. We found good concordance between our whole-genome assembly and the publicly available BAC clone sequence of 'Haruna Nijo'. Interesting phenotypes have since been identified in Haruna Nijo; its genome sequence assembly will facilitate the identification of the underlying genes.}, } @article {pmid35020968, year = {2022}, author = {Maqbool, S and Hassan, MA and Xia, X and York, LM and Rasheed, A and He, Z}, title = {Root system architecture in cereals: progress, challenges and perspective.}, journal = {The Plant journal : for cell and molecular biology}, volume = {110}, number = {1}, pages = {23-42}, doi = {10.1111/tpj.15669}, pmid = {35020968}, issn = {1365-313X}, mesh = {Crops, Agricultural/genetics ; *Edible Grain/genetics ; Phenotype ; Plant Breeding ; *Plant Roots/genetics ; }, abstract = {Roots are essential multifunctional plant organs involved in water and nutrient uptake, metabolite storage, anchorage, mechanical support, and interaction with the soil environment. Understanding of this 'hidden half' provides potential for manipulation of root system architecture (RSA) traits to optimize resource use efficiency and grain yield in cereal crops. Unfortunately, root traits are highly neglected in breeding due to the challenges of phenotyping, but could have large rewards if the variability in RSA traits can be fully exploited. Until now, a plethora of genes have been characterized in detail for their potential role in improving RSA. The use of forward genetics approaches to find sequence variations in genes underpinning desirable RSA would be highly beneficial. Advances in computer vision applications have allowed image-based approaches for high-throughput phenotyping of RSA traits that can be used by any laboratory worldwide to make progress in understanding root function and dissection of the genetics. At the same time, the frontiers of root measurement include non-invasive methods like X-ray computer tomography and magnetic resonance imaging that facilitate new types of temporal studies. Root physiology and ecology are further supported by spatiotemporal root simulation modeling. The discovery of component traits providing improved resilience and yield advantage in target environments is a key necessity for mainstreaming root-based cereal breeding. The integrated use of pan-genome resources, now available in most cereals, coupled with new in-field phenotyping platforms has the potential for precise selection of superior genotypes with improved RSA.}, } @article {pmid35017390, year = {2022}, author = {Kherraf, ZE and Cazin, C and Lestrade, F and Muronova, J and Coutton, C and Arnoult, C and Thierry-Mieg, N and Ray, PF}, title = {From azoospermia to macrozoospermia, a phenotypic continuum due to mutations in the ZMYND15 gene.}, journal = {Asian journal of andrology}, volume = {24}, number = {3}, pages = {243-247}, pmid = {35017390}, issn = {1745-7262}, mesh = {Animals ; *Azoospermia/genetics ; Humans ; *Infertility, Male/genetics ; Male ; Membrane Proteins/genetics ; Mice ; Mutation ; *Oligospermia/genetics ; Repressor Proteins/*metabolism ; *Teratozoospermia/genetics ; }, abstract = {Thanks to tremendous advances in sequencing technologies and in particular to whole exome sequencing (WES), many genes have now been linked to severe sperm defects. A precise genetic diagnosis is obtained for a minority of patients and only for the most severe defects like azoospermia or macrozoospermia which is very often due to defects in the aurora kinase C (AURKC gene. Here, we studied a subject with a severe oligozoospermia and a phenotypic diagnosis of macrozoospermia. AURKC analysis did not reveal any deleterious variant. WES was then initiated which permitted to identify a homozygous loss of function variant in the zinc finger MYND-type containing 15 (ZMYND15 gene. ZMYND15 has been described to serve as a switch for haploid gene expression, and mice devoid of ZMYND15 were shown to be sterile due to nonobstructive azoospermia (NOA). In man, ZMYND15 has been associated with NOA and severe oligozoospermia. We confirm here that the presence of a bi-allelic ZMYND15 variant induces a severe oligozoospermia. In addition, we show that severe oligozoospermia can be associated macrozoospermia, and that a phenotypic misdiagnosis is possible, potentially delaying the genetic diagnosis. In conclusion, genetic defects in ZMYND15 can induce complete NOA or severe oligozoospermia associated with a very severe teratozoospermia. In our experience, severe oligozoospermia is often associated with severe teratozoospermia and can sometimes be misinterpreted as macrozoospermia or globozoospermia. In these instances, specific AURKC or dpy-19 like 2 (DPY19L2) diagnosis is usually negative and we recommend the direct use of a pan-genomic techniques such as WES.}, } @article {pmid35015132, year = {2022}, author = {Gladman, N and Olson, A and Wei, S and Chougule, K and Lu, Z and Tello-Ruiz, M and Meijs, I and Van Buren, P and Jiao, Y and Wang, B and Kumar, V and Kumari, S and Zhang, L and Burke, J and Chen, J and Burow, G and Hayes, C and Emendack, Y and Xin, Z and Ware, D}, title = {SorghumBase: a web-based portal for sorghum genetic information and community advancement.}, journal = {Planta}, volume = {255}, number = {2}, pages = {35}, pmid = {35015132}, issn = {1432-2048}, support = {S10 OD028632/OD/NIH HHS/United States ; 8062-21000-041-00D//Agricultural Research Service/ ; S10OD028632-01/NH/NIH HHS/United States ; }, mesh = {Databases, Genetic ; Edible Grain ; Genome, Plant/genetics ; Genomics ; Internet ; Plant Breeding ; *Sorghum/genetics ; }, abstract = {SorghumBase provides a community portal that integrates genetic, genomic, and breeding resources for sorghum germplasm improvement. Public research and development in agriculture rely on proper data and resource sharing within stakeholder communities. For plant breeders, agronomists, molecular biologists, geneticists, and bioinformaticians, centralizing desirable data into a user-friendly hub for crop systems is essential for successful collaborations and breakthroughs in germplasm development. Here, we present the SorghumBase web portal (https://www.sorghumbase.org), a resource for the sorghum research community. SorghumBase hosts a wide range of sorghum genomic information in a modular framework, built with open-source software, to provide a sustainable platform. This initial release of SorghumBase includes: (1) five sorghum reference genome assemblies in a pan-genome browser; (2) genetic variant information for natural diversity panels and ethyl methanesulfonate (EMS)-induced mutant populations; (3) search interface and integrated views of various data types; (4) links supporting interconnectivity with other repositories including genebank, QTL, and gene expression databases; and (5) a content management system to support access to community news and training materials. SorghumBase offers sorghum investigators improved data collation and access that will facilitate the growth of a robust research community to support genomics-assisted breeding.}, } @article {pmid35005658, year = {2022}, author = {MacKenzie, K and Marshall, J and Wright, F and Gunn, G and Holden, N}, title = {Phylogeny and potential virulence of cryptic clade Escherichia coli species complex isolates derived from an arable field trial.}, journal = {Current research in microbial sciences}, volume = {3}, number = {}, pages = {100093}, pmid = {35005658}, issn = {2666-5174}, abstract = {Analysis of Escherichia coli taxonomy has expanded into a species-complex with the identification of divergent cryptic clades. A key question is the evolutionary trajectory of these clades and their relationship to isolates of clinical or veterinary importance. Since they have some environmental association, we screened a collection of E. coli isolated from a long-term spring barley field trial for their presence. While most isolates clustered into the enteric-clade, four of them clustered into Clade-V, and one in Clade-IV. The Clade -V isolates shared >96% intra-clade average nucleotide sequence identity but <91% with other clades. Although pan-genomics analysis confirmed their taxonomy as Clade -V (E. marmotae), retrospective phylogroup PCR did not discriminate them correctly. Differences in metabolic and adherence gene alleles occurred in the Clade -V isolates compared to E. coli sensu scricto. They also encoded the bacteriophage phage-associated cyto-lethal distending toxin (CDT) and antimicrobial resistance (AMR) genes, including an ESBL, blaOXA-453. Thus, the isolate collection encompassed a genetic diversity, and included cryptic clade isolates that encode potential virulence factors. The analysis has determined the phylogenetic relationship of cryptic clade isolates with E. coli sensu scricto and indicates a potential for horizontal transfer of virulence factors.}, } @article {pmid34996906, year = {2022}, author = {Neupane, S and Bonilla, SI and Manalo, AM and Pelz-Stelinski, KS}, title = {Complete de novo assembly of Wolbachia endosymbiont of Diaphorina citri Kuwayama (Hemiptera: Liviidae) using long-read genome sequencing.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {125}, pmid = {34996906}, issn = {2045-2322}, support = {D19AP00013//Defense Advanced Research Projects Agency/ ; D19AP00013//Defense Advanced Research Projects Agency/ ; D19AP00013//Defense Advanced Research Projects Agency/ ; D19AP00013//Defense Advanced Research Projects Agency/ ; }, mesh = {Animals ; Cell Line ; *Chromosomes, Bacterial ; DNA, Bacterial/*genetics ; DNA, Circular/*genetics ; *Genes, Bacterial ; *Genome, Bacterial ; Hemiptera/*microbiology ; Phylogeny ; Sequence Analysis, DNA ; Symbiosis ; *Whole Genome Sequencing ; Wolbachia/*genetics ; }, abstract = {Wolbachia, a gram-negative [Formula: see text]-proteobacterium, is an endosymbiont found in some arthropods and nematodes. Diaphorina citri Kuwayama, the vector of 'Candidatus Liberibacter asiaticus' (CLas), are naturally infected with a strain of Wolbachia (wDi), which has been shown to colocalize with the bacteria pathogens CLas, the pathogen associated with huanglongbing (HLB) disease of citrus. The relationship between wDi and CLas is poorly understood in part because the complete genome of wDi has not been available. Using high-quality long-read PacBio circular consensus sequences, we present the largest complete circular wDi genome among supergroup-B members. The assembled circular chromosome is 1.52 megabases with 95.7% genome completeness with contamination of 1.45%, as assessed by checkM. We identified Insertion Sequences (ISs) and prophage genes scattered throughout the genomes. The proteins were annotated using Pfam, eggNOG, and COG that assigned unique domains and functions. The wDi genome was compared with previously sequenced Wolbachia genomes using pangenome and phylogenetic analyses. The availability of a complete circular chromosome of wDi will facilitate understanding of its role within the insect vector, which may assist in developing tools for disease management. This information also provides a baseline for understanding phylogenetic relationships among Wolbachia of other insect vectors.}, } @article {pmid34996379, year = {2022}, author = {Liang, Y and Huang, Y and Chen, K and Kong, X and Li, M}, title = {Characterization of non-specific lipid transfer protein (nsLtp) gene families in the Brassica napus pangenome reveals abundance variation.}, journal = {BMC plant biology}, volume = {22}, number = {1}, pages = {21}, pmid = {34996379}, issn = {1471-2229}, mesh = {Ascomycota/*pathogenicity ; Brassica napus/*genetics/*immunology/*microbiology ; Carrier Proteins/*genetics ; Crops, Agricultural/genetics/immunology/microbiology ; Disease Resistance/*genetics ; Gene Expression Regulation, Plant ; Genes, Plant ; Genetic Variation ; Genome, Plant ; Plant Diseases/*genetics ; }, abstract = {BACKGROUND: Brassica napus is an important agricultural species, improving stress resistance was one of the main breeding goals at present. Non-specific lipid transfer proteins (nsLTPs) are small, basic proteins which are involved in some biotic or abiotic stress responses. B. napus is susceptible to a variety of fungal diseases, so identify the BnLTPs and their expression in disease responses is very important. The common reference genome of B. napus does not contain all B. napus genes because of gene presence/absence variations between individuals. Therefore, it was necessary to search for candidate BnLTP genes in the B. napus pangenome.

RESULTS: In the present study, the BnLTP genes were identified throughout the pangenome, and different BnLTP genes were presented among varieties. Totally, 246 BnLTP genes were identified and could be divided into five types (1, 2, C, D, and G). The classification, phylogenetic reconstruction, chromosome distribution, functional annotation, and gene expression were analyzed. We also identified potential cis-elements that respond to biotic and abiotic stresses in the 2 kb upstream regions of all BnLTP genes. RNA sequencing analysis showed that the BnLTP genes were involved in the response to Sclerotinia sclerotiorum infection. We identified 32 BnLTPs linked to blackleg resistance quantitative trait locus (QTL).

CONCLUSION: The identification and analysis of LTP genes in the B. napus pangenome could help to elucidate the function of BnLTP family members and provide new information for future molecular breeding in B. napus.}, } @article {pmid34983642, year = {2022}, author = {Yang, SM and Kim, E and Lee, W and Kim, HY}, title = {Genomic characteristics and comparative genomics of Salmonella enterica subsp. enterica serovar Schwarzengrund strain S16 isolated from chicken feces.}, journal = {Gut pathogens}, volume = {14}, number = {1}, pages = {1}, pmid = {34983642}, issn = {1757-4749}, support = {19162MFDS042//Ministry of Food and Drug Safety/ ; }, abstract = {BACKGROUND: Salmonella enterica subsp. enterica serovar Schwarzengrund (S. Schwarzengrund) is most frequently isolated from commensals humans or poultry. Here we report S. Schwarzengrund strain S16, the first sequenced genome in the Republic of Korea. Additionally, genome sequencing for strain S16 was performed and compared with other S. Schwarzengrund genomes obtained from public database.

RESULTS: Strain S16 was isolated from chicken feces. The complete genome consists of one chromosome and one plasmid. The genome size is 4,822,755 bp with 4852 coding sequences. Strain S16 was determined as serovar Schwarzengrund by in silico serotyping and typed as sequence type (ST) 96. Forty-six S. Schwarzengrund genomes yielded a pangenome of 7112 genes, core-genome of 3374 genes, accessory-genome of 2906 genes, and unique-genome of 835 genes. Eighty-one genes were unique to strain S16, including hypothetical proteins and transcriptional regulators. Genotypic analysis of antibiotic resistance of strain S16 confirmed resistance to amikacin, ciprofloxacin, sulfamethoxazole, streptomycin, and tetracycline. Unlike other S. Schwarzengrund genomes, strain S16 had a mutation of gyrB. Moreover, similar to other S. Schwarzengrund genomes reported in other countries, strain S16 was harbored for 153 virulence genes including Saf operon and cdtB gene. All the antibiotic resistance genes and virulence genes were present in the core- or accessory-genomes.

CONCLUSIONS: Complete genome of strain S16 was sequenced. Comparative genomic analysis revealed several genes responsible for antibiotic resistance and specific genomic features of strain S16 and identified virulence factors that might contribute to the human and animal pathogenicity of other S. Schwarzengrund genomes.}, } @article {pmid34983386, year = {2022}, author = {Hyun, JC and Monk, JM and Palsson, BO}, title = {Comparative pangenomics: analysis of 12 microbial pathogen pangenomes reveals conserved global structures of genetic and functional diversity.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {7}, pmid = {34983386}, issn = {1471-2164}, support = {U01 AI124316/AI/NIAID NIH HHS/United States ; }, mesh = {*Phylogeny ; }, abstract = {BACKGROUND: With the exponential growth of publicly available genome sequences, pangenome analyses have provided increasingly complete pictures of genetic diversity for many microbial species. However, relatively few studies have scaled beyond single pangenomes to compare global genetic diversity both within and across different species. We present here several methods for "comparative pangenomics" that can be used to contextualize multi-pangenome scale genetic diversity with gene function for multiple species at multiple resolutions: pangenome shape, genes, sequence variants, and positions within variants.

RESULTS: Applied to 12,676 genomes across 12 microbial pathogenic species, we observed several shared resolution-specific patterns of genetic diversity: First, pangenome openness is associated with species' phylogenetic placement. Second, relationships between gene function and frequency are conserved across species, with core genomes enriched for metabolic and ribosomal genes and accessory genomes for trafficking, secretion, and defense-associated genes. Third, genes in core genomes with the highest sequence diversity are functionally diverse. Finally, certain protein domains are consistently mutation enriched across multiple species, especially among aminoacyl-tRNA synthetases where the extent of a domain's mutation enrichment is strongly function-dependent.

CONCLUSIONS: These results illustrate the value of each resolution at uncovering distinct aspects in the relationship between genetic and functional diversity across multiple species. With the continued growth of the number of sequenced genomes, these methods will reveal additional universal patterns of genetic diversity at the pangenome scale.}, } @article {pmid34982234, year = {2022}, author = {Li, L and Zhou, J and Li, M and Yu, Z and Gao, K and Yang, J and Cheng, P and Yang, J and Zhang, W and Yu, Z and Sun, H}, title = {Comparative Genomic Analysis of Streptococcus pneumoniae Strains: Penicillin Non-susceptible Multi-drug-Resistant Serotype 19A Isolates.}, journal = {Current microbiology}, volume = {79}, number = {2}, pages = {49}, pmid = {34982234}, issn = {1432-0991}, support = {202102310395//Scientific and Technological Projects of Henan Province/ ; 31900116//National Natural Science Foundation of China/ ; LHGJ20190955//Medical Science and Technology Projects of Henan Province/ ; }, mesh = {Anti-Bacterial Agents/pharmacology ; Genomics ; Humans ; Microbial Sensitivity Tests ; Penicillins ; *Pharmaceutical Preparations ; *Pneumococcal Infections ; Serogroup ; Serotyping ; Streptococcus pneumoniae/genetics ; }, abstract = {Streptococcus pneumoniae can cause several diseases including otitis media, sinusitis, pneumonia, sepsis and meningitis. The introduction of pneumococcal vaccines has changed the molecular epidemiological and antibiotic resistance profiles of related diseases. Analysis of molecular patterns and genome sequences of clinical strains may facilitate the identification of novel drug resistance mechanism. Three multidrug resistance 19A isolates were verified, serotyped and the complete genomes were sequenced combining the Pacific Biosciences and the Illumina Miseq platform. Genomic annotation revealed that similar central networks were found in the clinical isolates, and Mauve alignments indicated high similarity between different strains. The pan-genome analysis showed the shared and unique cluster in the strains. Mobile elements were predicted in the isolates including prophages and CRISPER systems, which may participate in the virulence and antibiotic resistance of the strains. The presence of 31 virulence factor genes was predicted from other pathogens for PRSP 19339 and 19343, while 30 for PRSP 19087. Meanwhile, 33 genes antibiotic resistance genes were predicted including antibiotic resistance genes, antibiotic-target genes and antibiotic biosynthesis genes. Further analysis of the antibiotic resistance genes revealed new mutations in the isolates. By comparative genomic analysis, we contributed to the understanding of resistance mechanism of the clinical isolates with other serotype strains, which could facilitate the concrete drug resistance mechanism study.}, } @article {pmid34980383, year = {2022}, author = {Zhou, H and Zhang, J and Shao, Y and Wang, J and Xu, W and Liu, Y and Yu, S and Ye, Q and Pang, R and Wu, S and Gu, Q and Xue, L and Zhang, J and Li, H and Wu, Q and Ding, Y}, title = {Development of a high resolution melting method based on a novel molecular target for discrimination between Bacillus cereus and Bacillus thuringiensis.}, journal = {Food research international (Ottawa, Ont.)}, volume = {151}, number = {}, pages = {110845}, doi = {10.1016/j.foodres.2021.110845}, pmid = {34980383}, issn = {1873-7145}, mesh = {*Bacillus cereus/genetics ; *Bacillus thuringiensis/genetics ; Genomics ; Multilocus Sequence Typing ; }, abstract = {Delimitation within the Bacillus cereus group is confusing due to the highly similar genetic background of its constituent bacteria. This study aimed to develop a rapid and efficient method for the identification of Bacillus cereus and Bacillus thuringiensis, two closely related species within the B. cereus group. Using average nucleotide identity analysis (ANI) and ribosomal multilocus sequence typing (rMLST), the authenticity of the genomes of B. cereus and B. thuringiensis was determined. Emetic B. cereus and Bacillus bombysepticus were also included to provide novel genomic insights into the boundaries within the B. cereus group. Using pan-genome analysis, ispD, a novel core and single-copy molecular target, was identified for the differentiation between B. cereus and B. thuringiensis. Based on the single nucleotide polymorphism within ispD, a high resolution melting (HRM) method for the determination of B. cereus and B. thuringiensis was developed. This method can not only distinguish B. cereus and B. thuringiensis, but can also separate B. cereus from other foodborne pathogenic bacteria. The detection limit of this method could reach 1 pg of pure genomic DNA and 3.7 × 10[2] cfu/mL of pure culture. Moreover, this new method could effectively differentiate B. cereus and B. thuringiensis in spiked, mixed, and real food samples. Collectively, the established HRM method can provide a new reference paradigm for the sensitive and specific nucleic acid detection of pathogens with identical genomes.}, } @article {pmid34977588, year = {2021}, author = {Khazaei, H and O'Sullivan, DM and Stoddard, FL and Adhikari, KN and Paull, JG and Schulman, AH and Andersen, SU and Vandenberg, A}, title = {Recent advances in faba bean genetic and genomic tools for crop improvement.}, journal = {Legume science}, volume = {3}, number = {3}, pages = {e75}, pmid = {34977588}, issn = {2639-6181}, support = {BB/P023509/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/J/000CA392/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {Faba bean (Vicia faba L.), a member of the Fabaceae family, is one of the important food legumes cultivated in cool temperate regions. It holds great importance for human consumption and livestock feed because of its high protein content, dietary fibre, and nutritional value. Major faba bean breeding challenges include its mixed breeding system, unknown wild progenitor, and genome size of ~13 Gb, which is the largest among diploid field crops. The key breeding objectives in faba bean include improved resistance to biotic and abiotic stress and enhanced seed quality traits. Regarding quality traits, major progress on reduction of vicine-convicine and seed coat tannins, the main anti-nutritional factors limiting faba bean seed usage, have been recently achieved through gene discovery. Genomic resources are relatively less advanced compared with other grain legume species, but significant improvements are underway due to a recent increase in research activities. A number of bi-parental populations have been constructed and mapped for targeted traits in the last decade. Faba bean now benefits from saturated synteny-based genetic maps, along with next-generation sequencing and high-throughput genotyping technologies that are paving the way for marker-assisted selection. Developing a reference genome, and ultimately a pan-genome, will provide a foundational resource for molecular breeding. In this review, we cover the recent development and deployment of genomic tools for faba bean breeding.}, } @article {pmid34973586, year = {2022}, author = {Basharat, Z and Akhtar, U and Khan, K and Alotaibi, G and Jalal, K and Abbas, MN and Hayat, A and Ahmad, D and Hassan, SS}, title = {Differential analysis of Orientia tsutsugamushi genomes for therapeutic target identification and possible intervention through natural product inhibitor screening.}, journal = {Computers in biology and medicine}, volume = {141}, number = {}, pages = {105165}, doi = {10.1016/j.compbiomed.2021.105165}, pmid = {34973586}, issn = {1879-0534}, mesh = {*Biological Products/pharmacology/therapeutic use ; Genomics ; Humans ; *Orientia tsutsugamushi/genetics ; *Scrub Typhus/drug therapy/epidemiology ; }, abstract = {Orientia tsutsugamushi (Ott) is a causative agent of scrub typhus, and one of the emerging pathogens that could affect a large human population. It is one of the misdiagnosed and under-reported, febrile illnesses that infects various body organs (skin, heart, lung, kidney, and brain). The control of this infection is hampered due to the lack of drugs or vaccine against it. This study was undertaken to identify potential drug targets from the core genome of Ott and investigate novel natural product inhibitors against them. Hence, the available genomes for 22 strains of Ott were downloaded from the PATRIC database, and pan-genomic analysis was performed. Only 202 genes were present in the core region. Among these, 94 were identified as essential, 32 non-homologous to humans, nine non-homologous to useful gut flora and a single gene dapD as a drug target. Product of this gene (2,3,4,5-tetrahydropyridine-2-carboxylate N-succinyltransferase) was modeled and docked against traditional Indian (Ayurvedic) and Chinese phytochemical libraries, with best hits selected for docking, based on multiple target-drug/s interactions and minimum energy scores. ADMET profiling and molecular dynamics simulation was performed for top three compounds from each library to assess the toxicity and stability, respectively. We presume that these compounds (ZINC8214635, ZINC32793028, ZINC08101133, ZINC85625167, ZINC06018678, and ZINC13377938) could be successful inhibitors of Ott. However, in-depth experimental and clinical research is needed for further validation.}, } @article {pmid34961889, year = {2021}, author = {Xu, L and Ying, JJ and Fang, YC and Zhang, R and Hua, J and Wu, M and Han, BN and Sun, C}, title = {Halomonas populi sp. nov. isolated from Populus euphratica.}, journal = {Archives of microbiology}, volume = {204}, number = {1}, pages = {86}, pmid = {34961889}, issn = {1432-072X}, support = {LQ19C010006//natural science foundation of zhejiang province/ ; LQ19C010005//natural science foundation of zhejiang province/ ; JB1805//scientific research fund of the second institute of oceanography, mnr/ ; JB2003//scientific research fund of the second institute of oceanography, mnr/ ; 32000001//national natural science foundation of china/ ; 31900003//national natural science foundation of china/ ; 2019M652042//china postdoctoral science foundation/ ; 2018M642382//china postdoctoral science foundation/ ; 2019FY100700//national science and technology fundamental resources investigation program of china/ ; 2021FY100908//national science and technology fundamental resources investigation program of china/ ; }, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids/analysis ; *Halomonas/genetics ; Phospholipids/analysis ; Phylogeny ; *Populus ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {Three yellow-pigmented, Gram-stain-negative, aerobic, motile by flagella and rod-shaped strains, designated as MC[T], PC and RC, were isolated from stems of Populus euphratica. Growth of those three strains occurs at 4-40 °C, pH 6.0-10.0 and with 0.5-18.0% (w/v) NaCl. Respiratory quinones contained ubiquinone-9 and ubiquione-8 as major and minor components, respectively. Major fatty acids (> 10%) were summed feature 8 (C18:1ω6c and/or C18:1ω7c), summed feature 3 (C16:1ω6c and/or C16:1ω7c) and C16:0. Polar lipids included diphosphatidylglycerol, phosphatidylethanolamine, phosphatidylglycerol, two unidentified phospholipids, one unidentified aminolipid, one unidentified glycolipid and four unidentified lipids. Strains MC[T], PC and RC shared pairwise 16S rRNA gene sequence similarities of 99.9-100.0%, and showed higher similarities of 98.4-98.5% with Halomonas songnenensis NEAU-ST10-39[T] and 98.3-98.4% with Halomonas nanhaiensis YIM M 13059[T] than to other Halomonas type strains. Genomic comparisons revealed that those three strains had the pan-genome consisting of 4446 orthologous clusters, among which 676 orthologous clusters were absent in other Halomonas type strains. Phylogenomic tree indicated that strains MC[T], PC and RC formed an independently stable clade with Halomonas nanhaiensis YIM M 13059[T] and Halomonas songnenensis NEAU-ST10-39[T]. The average nucleotide identity and digital DNA-DNA hybridization values between those three strains and other Halomonas type strains were < 89.9% and < 39.3%, respectively. Based upon phenotypic, chemotaxonomic, phylogenetic and genomic results, strains MC[T], PC and RC represent a novel species in the genus Halomonas, for which the name Halomonas populi sp. nov. is proposed. The type strain is MC[T] (= JCM 33545[T] = MCCC 1K03942[T]).}, } @article {pmid34961779, year = {2021}, author = {Almeida-Silva, F and Venancio, TM}, title = {Integration of genome-wide association studies and gene coexpression networks unveils promising soybean resistance genes against five common fungal pathogens.}, journal = {Scientific reports}, volume = {11}, number = {1}, pages = {24453}, pmid = {34961779}, issn = {2045-2322}, mesh = {Fusarium/physiology ; *Gene Expression Regulation, Plant ; Gene Regulatory Networks ; Genome-Wide Association Study ; Host-Pathogen Interactions ; Phakopsora pachyrhizi/physiology ; Plant Diseases/*genetics/*microbiology ; Plant Proteins/genetics ; Soybeans/*genetics/microbiology ; }, abstract = {Soybean is one of the most important legume crops worldwide. However, soybean yield is dramatically affected by fungal diseases, leading to economic losses of billions of dollars yearly. Here, we integrated publicly available genome-wide association studies and transcriptomic data to prioritize candidate genes associated with resistance to Cadophora gregata, Fusarium graminearum, Fusarium virguliforme, Macrophomina phaseolina, and Phakopsora pachyrhizi. We identified 188, 56, 11, 8, and 3 high-confidence candidates for resistance to F. virguliforme, F. graminearum, C. gregata, M. phaseolina and P. pachyrhizi, respectively. The prioritized candidate genes are highly conserved in the pangenome of cultivated soybeans and are heavily biased towards fungal species-specific defense responses. The vast majority of the prioritized candidate resistance genes are related to plant immunity processes, such as recognition, signaling, oxidative stress, systemic acquired resistance, and physical defense. Based on the number of resistance alleles, we selected the five most resistant accessions against each fungal species in the soybean USDA germplasm. Interestingly, the most resistant accessions do not reach the maximum theoretical resistance potential. Hence, they can be further improved to increase resistance in breeding programs or through genetic engineering. Finally, the coexpression network generated here is available in a user-friendly web application (https://soyfungigcn.venanciogroup.uenf.br/) and an R/Shiny package (https://github.com/almeidasilvaf/SoyFungiGCN) that serve as a public resource to explore soybean-pathogenic fungi interactions at the transcriptional level.}, } @article {pmid34960001, year = {2021}, author = {Kaashyap, M and Cohen, M and Mantri, N}, title = {Microbial Diversity and Characteristics of Kombucha as Revealed by Metagenomic and Physicochemical Analysis.}, journal = {Nutrients}, volume = {13}, number = {12}, pages = {}, pmid = {34960001}, issn = {2072-6643}, mesh = {Acetobacter/isolation & purification ; Bacteria/classification ; Chemical Phenomena ; Fermentation ; Humans ; Kombucha Tea/*analysis/*microbiology ; Metagenomics/*methods ; *Microbiota ; Phenols/analysis ; Probiotics/analysis ; Proteins/analysis ; RNA, Ribosomal, 16S/genetics ; Tea/chemistry ; Yeasts/classification ; }, abstract = {Kombucha is a fermented tea made from a Symbiotic Culture of Bacteria and Yeast (SCOBY) with a long history of use as a health tonic. It is likely that most health benefits come from the tea and fermentation metabolites from specific microbial communities. Despite its growing importance as a functional health drink, the microbial ecosystem present in kombucha has not been fully documented. To characterize the microbial composition and biochemical properties of 'The Good Brew' original base kombucha, we used metagenomics amplicon (16S rRNA and ITS) sequencing to identify the microbial communities at the taxonomic level. We identified 34 genera with 200 microbial species yet described in kombucha. The dominance of organic acid producing microorganisms Acetobacter, Komagataeibacter and Starmerella are healthy for the human gut and their glucose metabolising activities have a putative role in preventing conditions such as diabetes and obesity. Kombucha contains high protein (3.31 µg/mL), high phenolic content (290.4 mg/100 mL) and low sugars (glucose: 1.87 g/L; sucrose 1.11 g/L; fructose: 0.05 g/L) as compared to green tea. The broad microbial diversity with proven health benefits for the human gut suggests kombucha is a powerful probiotic. These findings are important to improve the commercial value of kombucha and uncover the immense prospects for health benefits.}, } @article {pmid34959560, year = {2021}, author = {Pérez-Duque, A and Gonzalez-Muñoz, A and Arboleda-Valencia, J and Vivas-Aguas, LJ and Córdoba-Meza, T and Rodriguez-Rey, GT and Díaz-Guevara, P and Martinez-Urtaza, J and Wiesner-Reyes, M}, title = {Comparative Genomics of Clinical and Environmental Isolates of Vibrio spp. of Colombia: Implications of Traits Associated with Virulence and Resistance.}, journal = {Pathogens (Basel, Switzerland)}, volume = {10}, number = {12}, pages = {}, pmid = {34959560}, issn = {2076-0817}, support = {777 of 2017//Ministry of Science, Technology, and Innovation of Colombia/ ; }, abstract = {There is widespread concern about the increase in cases of human and animal infections caused by pathogenic Vibrio species due to the emergence of epidemic lineages. In Colombia, active surveillance by the National Institute of Health (INS) has confirmed the presence of Vibrio; however, in routine surveillance, these isolates are not genomically characterized. This study focused on the pangenome analysis of six Vibrio species: V. parahaemolyticus, V. vulnificus, V. alginolyticus, V. fluvialis, V. diabolicus and V. furnissii to determine the genetic architectures of potentially virulent and antimicrobial resistance traits. Isolates from environmental and clinical samples were genome sequenced, assembled and annotated. The most important species in public health were further characterized by multilocus sequence typing and phylogenomics. For V. parahaemolyticus, we found the virulent ST3 and ST120 genotypes. For V. vulnificus, we identified isolates belonging to lineages 1 and 2. Virulence gene homologues between species were found even in non-pathogenic species such as V. diabolicus. Annotations related to the mobilome, integrative mobile and conjugative elements and resistance genes were obtained from environmental and clinical isolates. This study contributes genomic information to the intensified surveillance program implemented by the INS to establish potential sources of vibriosis in Colombia.}, } @article {pmid34958071, year = {2021}, author = {Pavlovikj, N and Gomes-Neto, JC and Benson, AK}, title = {Heuristic Mining of Hierarchical Genotypes and Accessory Genome Loci in Bacterial Populations.}, journal = {Journal of visualized experiments : JoVE}, volume = {}, number = {178}, pages = {}, doi = {10.3791/63115}, pmid = {34958071}, issn = {1940-087X}, mesh = {*Genome, Bacterial ; Genotype ; *Heuristics ; Phylogeny ; Reproducibility of Results ; Whole Genome Sequencing ; }, abstract = {Routine and systematic use of bacterial whole-genome sequencing (WGS) is enhancing the accuracy and resolution of epidemiological investigations carried out by Public Health laboratories and regulatory agencies. Large volumes of publicly available WGS data can be used to study pathogenic populations at a large scale. Recently, a freely available computational platform called ProkEvo was published to enable reproducible, automated, and scalable hierarchical-based population genomic analyses using bacterial WGS data. This implementation of ProkEvo demonstrated the importance of combining standard genotypic mapping of populations with mining of accessory genomic content for ecological inference. In particular, the work highlighted here used ProkEvo-derived outputs for population-scaled hierarchical analyses using the R programming language. The main objective was to provide a practical guide for microbiologists, ecologists, and epidemiologists by showing how to: i) use a phylogeny-guided mapping of hierarchical genotypes; ii) assess frequency distributions of genotypes as a proxy for ecological fitness; iii) determine kinship relationships and genetic diversity using specific genotypic classifications; and iv) map lineage differentiating accessory loci. To enhance reproducibility and portability, R markdown files were used to demonstrate the entire analytical approach. The example dataset contained genomic data from 2,365 isolates of the zoonotic foodborne pathogen Salmonella Newport. Phylogeny-anchored mapping of hierarchical genotypes (Serovar -> BAPS1 -> ST -> cgMLST) revealed the population genetic structure, highlighting sequence types (STs) as the keystone differentiating genotype. Across the three most dominant lineages, ST5 and ST118 shared a common ancestor more recently than with the highly clonal ST45 phylotype. ST-based differences were further highlighted by the distribution of accessory antimicrobial resistance (AMR) loci. Lastly, a phylogeny-anchored visualization was used to combine hierarchical genotypes and AMR content to reveal the kinship structure and lineage-specific genomic signatures. Combined, this analytical approach provides some guidelines for conducting heuristic bacterial population genomic analyses using pan-genomic information.}, } @article {pmid34957454, year = {2021}, author = {Son, S and Lee, R and Park, SM and Lee, SH and Lee, HK and Kim, Y and Shin, D}, title = {Complete genome sequencing and comparative genomic analysis of Lactobacillus acidophilus C5 as a potential canine probiotics.}, journal = {Journal of animal science and technology}, volume = {63}, number = {6}, pages = {1411-1422}, pmid = {34957454}, issn = {2055-0391}, abstract = {Lactobacillus acidophilus is a gram-positive, microaerophilic, and acidophilic bacterial species. L. acidophilus strains in the gastrointestinal tracts of humans and other animals have been profiled, but strains found in the canine gut have not been studied yet. Our study helps in understanding the genetic features of the L. acidophilus C5 strain found in the canine gut, determining its adaptive features evolved to survive in the canine gut environment, and in elucidating its probiotic functions. To examine the canine L. acidophilus C5 genome, we isolated the C5 strain from a Korean dog and sequenced it using PacBio SMRT sequencing technology. A comparative genomic approach was used to assess genetic relationships between C5 and six other strains and study the distinguishing features related to different hosts. We found that most genes in the C5 strain were related to carbohydrate transport and metabolism. The pan-genome of seven L. acidophilus strains contained 2,254 gene families, and the core genome contained 1,726 gene families. The phylogenetic tree of the core genes in the canine L. acidophilus C5 strain was very close to that of two strains (DSM20079 and NCFM) from humans. We identified 30 evolutionarily accelerated genes in the L. acidophilus C5 strain in the ratio of non-synonymous to synonymous substitutions (dN/dS) analysis. Five of these thirty genes were associated with carbohydrate transport and metabolism. This study provides insights into genetic features and adaptations of the L. acidophilus C5 strain to survive the canine intestinal environment. It also suggests that the evolution of the L. acidophilus genome is closely related to the host's evolutionary adaptation process.}, } @article {pmid34956133, year = {2021}, author = {Thomas, P and Abdel-Glil, MY and Subbaiyan, A and Busch, A and Eichhorn, I and Wieler, LH and Neubauer, H and Pletz, M and Seyboldt, C}, title = {First Comparative Analysis of Clostridium septicum Genomes Provides Insights Into the Taxonomy, Species Genetic Diversity, and Virulence Related to Gas Gangrene.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {771945}, pmid = {34956133}, issn = {1664-302X}, abstract = {Clostridium septicum is a Gram-positive, toxin-producing, and spore-forming bacterium that is recognized, together with C. perfringens, as the most important etiologic agent of progressive gas gangrene. Clostridium septicum infections are almost always fatal in humans and animals. Despite its clinical and agricultural relevance, there is currently limited knowledge of the diversity and genome structure of C. septicum. This study presents the complete genome sequence of C. septicum DSM 7534[T] type strain as well as the first comparative analysis of five C. septicum genomes. The taxonomy of C. septicum, as revealed by 16S rRNA analysis as well as by genomic wide indices such as protein-based phylogeny, average nucleotide identity, and digital DNA-DNA hybridization indicates a stable clade. The composition and presence of prophages, CRISPR elements and accessory genetic material was variable in the investigated genomes. This is in contrast to the limited genetic variability described for the phylogenetically and phenotypically related species Clostridium chauvoei. The restriction-modification (RM) systems between two C. septicum genomes were heterogeneous for the RM types they encoded. C. septicum has an open pangenome with 2,311 genes representing the core genes and 1,429 accessory genes. The core genome SNP divergence between genome pairs varied up to 4,886 pairwise SNPs. A vast arsenal of potential virulence genes was detected in the genomes studied. Sequence analysis of these genes revealed that sialidase, hemolysin, and collagenase genes are conserved compared to the α-toxin and hyaluronidase genes. In addition, a conserved gene found in all C. septicum genomes was predicted to encode a leucocidin homolog (beta-channel forming cytolysin) similar (71.10% protein identity) to Clostridium chauvoei toxin A (CctA), which is a potent toxin. In conclusion, our results provide first, valuable insights into strain relatedness and genomic plasticity of C. septicum and contribute to our understanding of the virulence mechanisms of this important human and animal pathogen.}, } @article {pmid34954348, year = {2022}, author = {Hussain, JN and Cohen, MM and Mantri, N and O'Malley, CJ and Greaves, RF}, title = {Infrared sauna as exercise-mimetic? Physiological responses to infrared sauna vs exercise in healthy women: A randomized controlled crossover trial.}, journal = {Complementary therapies in medicine}, volume = {64}, number = {}, pages = {102798}, doi = {10.1016/j.ctim.2021.102798}, pmid = {34954348}, issn = {1873-6963}, mesh = {Blood Pressure ; Cross-Over Studies ; Exercise ; Female ; Heart Rate ; Humans ; Male ; *Steam Bath ; }, abstract = {BACKGROUND: Passive heat therapies have been reported to have similar effects on the cardiovascular system as exercise. Studies supporting these findings in healthy populations have predominantly been done with men using warm water immersions or traditional saunas, rather than newer infrared-based saunas.

OBJECTIVE: To explore short-term thermal and cardiovascular responses in women using an infrared sauna as compared to moderate-intensity exercise.

STUDY DESIGN: Randomized controlled crossover trial with balanced allocations.

SETTING: Brisbane, Australia (August 2019 - March 2020) PARTICIPANTS: Ten healthy women (36 ± 9 years) INTERVENTIONS: 45 min of resting, infrared sauna or indoor bicycling PRIMARY OUTCOME MEASURES: tympanic/skin temperatures; respiratory rate; blood pressure; arterial stiffness; heart rate variability RESULTS: Tympanic temperatures were elevated during infrared sauna as compared to both control (mean diff = +1.05 [o]C ± SEM 0.12 [o]C, 95% C.I.: 0.73 - 1.36, p < 0.0005) and exercise (mean diff = +0.79 [o]C ± SEM 0.12 [o]C, 95% C.I.: 0.49 - 1.08, p < 0.0005). Respiratory rates were higher during exercise as compared to both control (mean diff = +7.66 ± SEM 1.37, 95% C.I.: 4.09 - 11.23, p < 0.0005) and infrared sauna (mean diff = +6.66 ± SEM 1.33, 95% C.I.: 3.20 - 10.11, p < 0.0005). No significant differences in non-invasive measures of blood pressure, arterial stiffness or heart rate variability were detected between any of the interventions.

CONCLUSIONS: These findings suggest the physiological effects of infrared sauna bathing are underpinned by thermoregulatory-induced responses, more so than exercise-mimetic cardiorespiratory or cardiovascular activations.}, } @article {pmid34944959, year = {2021}, author = {Stenman, A and Yang, M and Paulsson, JO and Zedenius, J and Paulsson, K and Juhlin, CC}, title = {Pan-Genomic Sequencing Reveals Actionable CDKN2A/2B Deletions and Kataegis in Anaplastic Thyroid Carcinoma.}, journal = {Cancers}, volume = {13}, number = {24}, pages = {}, pmid = {34944959}, issn = {2072-6694}, support = {Junior Clinical Investigator Award//Swedish Cancer Society/ ; Stora Etableringsanslaget//Swedish Society for Medical Research/ ; Project funding//Karolinska Institute/ ; Project funding//Stockholm County Council/ ; }, abstract = {Anaplastic thyroid carcinoma (ATC) is a lethal malignancy characterized by poor response to conventional therapies. Whole-genome sequencing (WGS) analyses of this tumor type are limited, and we therefore interrogated eight ATCs using WGS and RNA sequencing. Five out of eight cases (63%) displayed cyclin-dependent kinase inhibitor 2A (CDKN2A) abnormalities, either copy number loss (n = 4) or truncating mutations (n = 1). All four cases with loss of the CDKN2A locus (encoding p16 and p14arf) also exhibited loss of the neighboring CDKN2B gene (encoding p15ink4b), and displayed reduced CDKN2A/2B mRNA levels. Mutations in established ATC-related genes were observed, including TP53, BRAF, ARID1A, and RB1, and overrepresentation of mutations were also noted in 13 additional cancer genes. One of the more predominant mutational signatures was intimately coupled to the activity of Apolipoprotein B mRNA-editing enzyme, the catalytic polypeptide-like (APOBEC) family of cytidine deaminases implied in kataegis, a focal hypermutation phenotype, which was observed in 4/8 (50%) cases. We corroborate the roles of CDKN2A/2B in ATC development and identify kataegis as a recurrent phenomenon. Our findings pinpoint clinically relevant alterations, which may indicate response to CDK inhibitors, and focal hypermutational phenotypes that may be coupled to improved responses using immune checkpoint inhibitors.}, } @article {pmid34943759, year = {2021}, author = {Decano, AG and Pettigrew, K and Sabiiti, W and Sloan, DJ and Neema, S and Bazira, J and Kiiru, J and Onyango, H and Asiimwe, B and Holden, MTG}, title = {Pan-Resistome Characterization of Uropathogenic Escherichia coli and Klebsiella pneumoniae Strains Circulating in Uganda and Kenya, Isolated from 2017-2018.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {10}, number = {12}, pages = {}, pmid = {34943759}, issn = {2079-6382}, support = {MR/S004785/1/MRC_/Medical Research Council/United Kingdom ; }, abstract = {Urinary tract infection (UTI) develops after a pathogen adheres to the inner lining of the urinary tract. Cases of UTIs are predominantly caused by several Gram-negative bacteria and account for high morbidity in the clinical and community settings. Of greater concern are the strains carrying antimicrobial resistance (AMR)-conferring genes. The gravity of a UTI is also determined by a spectrum of other virulence factors. This study represents a pilot project to investigate the burden of AMR among uropathogens in East Africa. We examined bacterial samples isolated in 2017-2018 from in- and out-patients in Kenya (KY) and Uganda (UG) that presented with clinical symptoms of UTI. We reconstructed the evolutionary history of the strains, investigated their population structure, and performed comparative analysis their pangenome contents. We found 55 Escherichia coli and 19 Klebsiella pneumoniae strains confirmed uropathogenic following screening for the prevalence of UTI virulence genes including fimH, iutA, feoA/B/C, mrkD, and foc. We identified 18 different sequence types in E. coli population while all K. pneumoniae strains belong to ST11. The most prevalent E. coli sequence types were ST131 (26%), ST335/1193 (10%), and ST10 (6%). Diverse plasmid types were observed in both collections such as Incompatibility (IncF/IncH/IncQ1/IncX4) and Col groups. Pangenome analysis of each set revealed a total of 2862 and 3464 genes comprised the core genome of E. coli and K. pneumoniae population, respectively. Among these are acquired AMR determinants including fluoroquinolone resistance-conferring genes aac(3)-Ib-cr and other significant genes: aad, tet, sul1, sul2, and cat, which are associated with aminoglycoside, tetracycline, sulfonamide, and chloramphenicol resistance, respectively. Accessory genomes of both species collections were detected several β-lactamase genes, blaCTX-M, blaTEM and blaOXA, or blaNDM. Overall, 93% are multi-drug resistant in the E. coli collection while 100% of the K. pneumoniae strains contained genes that are associated with resistance to three or more antibiotic classes. Our findings illustrate the abundant acquired resistome and virulome repertoire in uropathogenic E. coli and K. pneumoniae, which are mainly disseminated via clonal and horizontal transfer, circulating in the East African region. We further demonstrate here that routine genomic surveillance is necessary for high-resolution bacterial epidemiology of these important AMR pathogens.}, } @article {pmid34925418, year = {2021}, author = {Zenda, T and Liu, S and Dong, A and Li, J and Wang, Y and Liu, X and Wang, N and Duan, H}, title = {Omics-Facilitated Crop Improvement for Climate Resilience and Superior Nutritive Value.}, journal = {Frontiers in plant science}, volume = {12}, number = {}, pages = {774994}, pmid = {34925418}, issn = {1664-462X}, abstract = {Novel crop improvement approaches, including those that facilitate for the exploitation of crop wild relatives and underutilized species harboring the much-needed natural allelic variation are indispensable if we are to develop climate-smart crops with enhanced abiotic and biotic stress tolerance, higher nutritive value, and superior traits of agronomic importance. Top among these approaches are the "omics" technologies, including genomics, transcriptomics, proteomics, metabolomics, phenomics, and their integration, whose deployment has been vital in revealing several key genes, proteins and metabolic pathways underlying numerous traits of agronomic importance, and aiding marker-assisted breeding in major crop species. Here, citing several relevant examples, we appraise our understanding on the recent developments in omics technologies and how they are driving our quest to breed climate resilient crops. Large-scale genome resequencing, pan-genomes and genome-wide association studies are aiding the identification and analysis of species-level genome variations, whilst RNA-sequencing driven transcriptomics has provided unprecedented opportunities for conducting crop abiotic and biotic stress response studies. Meanwhile, single cell transcriptomics is slowly becoming an indispensable tool for decoding cell-specific stress responses, although several technical and experimental design challenges still need to be resolved. Additionally, the refinement of the conventional techniques and advent of modern, high-resolution proteomics technologies necessitated a gradual shift from the general descriptive studies of plant protein abundances to large scale analysis of protein-metabolite interactions. Especially, metabolomics is currently receiving special attention, owing to the role metabolites play as metabolic intermediates and close links to the phenotypic expression. Further, high throughput phenomics applications are driving the targeting of new research domains such as root system architecture analysis, and exploration of plant root-associated microbes for improved crop health and climate resilience. Overall, coupling these multi-omics technologies to modern plant breeding and genetic engineering methods ensures an all-encompassing approach to developing nutritionally-rich and climate-smart crops whose productivity can sustainably and sufficiently meet the current and future food, nutrition and energy demands.}, } @article {pmid34921716, year = {2022}, author = {Jin, X and Yang, Y and Cao, H and Gao, B and Zhao, Z}, title = {Eco-phylogenetic analyses reveal divergent evolution of vitamin B12 metabolism in the marine bacterial family 'Psychromonadaceae'.}, journal = {Environmental microbiology reports}, volume = {14}, number = {1}, pages = {147-163}, doi = {10.1111/1758-2229.13036}, pmid = {34921716}, issn = {1758-2229}, mesh = {*Cyanobacteria/metabolism ; *Gammaproteobacteria/metabolism ; Phylogeny ; Vitamin B 12/metabolism ; Vitamins ; }, abstract = {Cobalamin (vitamin B12) is an essential micronutrient required by both prokaryotes and eukaryotes. Nevertheless, with high genetic and metabolic cost, de novo cobalamin biosynthesis is exclusive to a subset of prokaryotic taxa. Many Cyanobacterial and Archaeal taxa have been implicated in de novo cobalamin biosynthesis in epi- and mesopelagic ocean respectively. However, the contributions of Gammaproteobacteria particularly the family 'Psychromonadaceae' is largely unknown. Through phylo-pangenomic analyses using concatenated single-copy proteins and homologous gene clusters respectively, the phylogenies within 'Psychromonadaceae' recapitulate both their taxonomic delineations and environmental distributions. Moreover, uneven distribution of cobalamin de novo biosynthetic operon and cobalamin-dependent light-responsive regulon were observed, and of which the linkages to the environmental conditions where cobalamin availability and light regime can be varied respectively were discussed, suggesting the impacts of ecological divergence in shaping their disparate cobalamin-related metabolisms. Functional analysis demonstrated a varying degree of cobalamin dependency for both central metabolic processes and cobalamin-mediated light-responsive regulation, and underlying sequence characteristics of cis- and trans-regulatory elements were revealed. Our findings emphasized the potential roles of cobalamin in shaping the ecological distributions and driving the metabolic evolution in the marine bacterial family 'Psychromonadaceae', and have further implications for an improved understanding of nutritional interdependencies and community metabolism modulated by cobalamin.}, } @article {pmid34920102, year = {2022}, author = {Yi, B and Dalpke, AH}, title = {Revisiting the intrageneric structure of the genus Pseudomonas with complete whole genome sequence information: Insights into diversity and pathogen-related genetic determinants.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {97}, number = {}, pages = {105183}, doi = {10.1016/j.meegid.2021.105183}, pmid = {34920102}, issn = {1567-7257}, mesh = {Genetic Variation ; *Genome, Bacterial ; Host Specificity ; Phylogeny ; Plant Diseases/microbiology ; Plants ; Pseudomonas/classification/*genetics ; Species Specificity ; Virulence ; *Whole Genome Sequencing ; }, abstract = {Pseudomonas spp. exhibit considerable differences in host specificity and virulence. Most Pseudomonas species were isolated exclusively from environmental sources, ranging from soil to plants, but some Pseudomonas species have been detected from versatile sources, including both human host and environmental sources. Understanding genome variations that generate the tremendous diversity in Pseudomonas biology is important in controlling the incidence of infections. With a data set of 704 Pseudomonas complete whole genome sequences representing 186 species, Pseudomonas intrageneric structure was investigated by hierarchical clustering based on average nucleotide identity, and by phylogeny analysis based on concatenated core-gene alignment. Further comparative functional analyses indicated that Pseudomonas species only living in natural habitats lack multiple functions that are important in the regulation of bacterial pathogenesis, indicating the possession of these functions might be characteristic of Pseudomonas human pathogens. Moreover, we have performed pan-genome based homogeneity analyses, and detected genes with conserved structures but diversified functions across the Pseudomonas genomes, suggesting these genes play a role in driving diversity. In summary, this study provided insights into the dynamics of genome diversity and pathogen-related genetic determinants in Pseudomonas, which might help the development of more targeted antibiotics for the treatment of Pseudomonas infections.}, } @article {pmid34919780, year = {2022}, author = {Lian, X and Zhang, H and Jiang, C and Gao, F and Yan, L and Zheng, X and Cheng, J and Wang, W and Wang, X and Ye, X and Li, J and Zhang, L and Li, Z and Tan, B and Feng, J}, title = {De novo chromosome-level genome of a semi-dwarf cultivar of Prunus persica identifies the aquaporin PpTIP2 as responsible for temperature-sensitive semi-dwarf trait and PpB3-1 for flower type and size.}, journal = {Plant biotechnology journal}, volume = {20}, number = {5}, pages = {886-902}, pmid = {34919780}, issn = {1467-7652}, mesh = {*Aquaporins/genetics ; Chromosomes ; Flowers/genetics ; Fruit/genetics ; Genome-Wide Association Study ; Plant Breeding ; *Prunus persica/genetics ; Temperature ; }, abstract = {Peach (Prunus persica) is one of the most important fruit crops globally, but its cultivation can be hindered by large tree size. 'Zhongyoutao 14' (CN14) is a temperature-sensitive semi-dwarf (TSSD) cultivar which might be useful as breeding stock. The genome of CN14 was sequenced and assembled de novo using single-molecule real-time sequencing and chromosome conformation capture assembly. A high-quality genome was assembled and annotated, with 228.82 Mb mapped to eight chromosomes. Eighty-six re-sequenced F1 individuals and 334 previously re-sequenced accessions were used to identify candidate genes controlling TSSD and flower type and size. An aquaporin tonoplast intrinsic protein (PpTIP2) was a strong candidate gene for control of TSSD. Sequence variations in the upstream regulatory region of PpTIP2 correlated with different transcriptional activity at different temperatures. PpB3-1, a candidate gene for flower type (SH) and flower size, contributed to petal development and promoted petal enlargement. The locus of another 12 agronomic traits was identified through genome-wide association study. Most of these loci exhibited consistent and precise association signals, except for flesh texture and flesh adhesion. A 6015-bp insertion in exon 3 and a 26-bp insertion upstream of PpMYB25 were associated with fruit hairless. Along with a 70.5-Kb gap at the F-M locus in CN14, another two new alleles were identified in peach accessions. Our findings will not only promote genomic research and agronomic breeding in peach but also provide a foundation for the peach pan-genome.}, } @article {pmid34916613, year = {2022}, author = {Yuan, Z and Wu, Q and Xu, L and Druzhinina, IS and Stukenbrock, EH and Nieuwenhuis, BPS and Zhong, Z and Liu, ZJ and Wang, X and Cai, F and Kubicek, CP and Shan, X and Wang, J and Shi, G and Peng, L and Martin, FM}, title = {Genomic landscape of a relict fir-associated fungus reveals rapid convergent adaptation towards endophytism.}, journal = {The ISME journal}, volume = {16}, number = {5}, pages = {1294-1305}, pmid = {34916613}, issn = {1751-7370}, mesh = {Acclimatization ; Endophytes ; *Genes, Mating Type, Fungal ; *Genomics ; Reproduction ; }, abstract = {Comparative and pan-genomic analyses of the endophytic fungus Pezicula neosporulosa (Helotiales, Ascomycota) from needles of the relict fir, Abies beshanzuensis, showed expansions of carbohydrate metabolism and secondary metabolite biosynthetic genes characteristic for unrelated plant-beneficial helotialean, such as dark septate endophytes and ericoid mycorrhizal fungi. The current species within the relatively young Pliocene genus Pezicula are predominantly saprotrophic, while P. neosporulosa lacks such features. To understand the genomic background of this putatively convergent evolution, we performed population analyses of 77 P. neosporulosa isolates. This revealed a mosaic structure of a dozen non-recombining and highly genetically polymorphic subpopulations with a unique mating system structure. We found that one idiomorph of a probably duplicated mat1-2 gene was found in putatively heterothallic isolates, while the other co-occurred with mat1-1 locus suggesting homothallic reproduction for these strains. Moreover, 24 and 81 genes implicated in plant cell-wall degradation and secondary metabolite biosynthesis, respectively, showed signatures of the balancing selection. These findings highlight the evolutionary pattern of the two gene families for allowing the fungus a rapid adaptation towards endophytism and facilitating diverse symbiotic interactions.}, } @article {pmid34915867, year = {2021}, author = {Anjos, WF and Lanes, GC and Azevedo, VA and Santos, AR}, title = {GENPPI: standalone software for creating protein interaction networks from genomes.}, journal = {BMC bioinformatics}, volume = {22}, number = {1}, pages = {596}, pmid = {34915867}, issn = {1471-2105}, mesh = {Animals ; Phylogeny ; *Protein Interaction Maps/genetics ; Sheep ; *Software ; }, abstract = {BACKGROUND: Bacterial genomes are being deposited into online databases at an increasing rate. Genome annotation represents one of the first efforts to understand organisms and their diseases. Some evolutionary relationships capable of being annotated only from genomes are conserved gene neighbourhoods (CNs), phylogenetic profiles (PPs), and gene fusions. At present, there is no standalone software that enables networks of interactions among proteins to be created using these three evolutionary characteristics with efficient and effective results.

RESULTS: We developed GENPPI software for the ab initio prediction of interaction networks using predicted proteins from a genome. In our case study, we employed 50 genomes of the genus Corynebacterium. Based on the PP relationship, GENPPI differentiated genomes between the ovis and equi biovars of the species Corynebacterium pseudotuberculosis and created groups among the other species analysed. If we inspected only the CN relationship, we could not entirely separate biovars, only species. Our software GENPPI was determined to be efficient because, for example, it creates interaction networks from the central genomes of 50 species/lineages with an average size of 2200 genes in less than 40 min on a conventional computer. Moreover, the interaction networks that our software creates reflect correct evolutionary relationships between species, which we confirmed with average nucleotide identity analyses. Additionally, this software enables the user to define how he or she intends to explore the PP and CN characteristics through various parameters, enabling the creation of customized interaction networks. For instance, users can set parameters regarding the genus, metagenome, or pangenome. In addition to the parameterization of GENPPI, it is also the user's choice regarding which set of genomes they are going to study.

CONCLUSIONS: GENPPI can help fill the gap concerning the considerable number of novel genomes assembled monthly and our ability to process interaction networks considering the noncore genes for all completed genome versions. With GENPPI, a user dictates how many and how evolutionarily correlated the genomes answer a scientific query.}, } @article {pmid34914532, year = {2021}, author = {Sirén, J and Monlong, J and Chang, X and Novak, AM and Eizenga, JM and Markello, C and Sibbesen, JA and Hickey, G and Chang, PC and Carroll, A and Gupta, N and Gabriel, S and Blackwell, TW and Ratan, A and Taylor, KD and Rich, SS and Rotter, JI and Haussler, D and Garrison, E and Paten, B}, title = {Pangenomics enables genotyping of known structural variants in 5202 diverse genomes.}, journal = {Science (New York, N.Y.)}, volume = {374}, number = {6574}, pages = {abg8871}, pmid = {34914532}, issn = {1095-9203}, support = {U01 HG010961/HG/NHGRI NIH HHS/United States ; U01 HL137183/HL/NHLBI NIH HHS/United States ; U41 HG010972/HG/NHGRI NIH HHS/United States ; U41 HG007234/HG/NHGRI NIH HHS/United States ; N02 HL64278/HL/NHLBI NIH HHS/United States ; R01 HG010485/HG/NHGRI NIH HHS/United States ; OT2 OD026682/OD/NIH HHS/United States ; OT3 HL142481/HL/NHLBI NIH HHS/United States ; }, mesh = {Algorithms ; Alleles ; Computational Biology ; *Genetic Variation ; Genome, Fungal ; *Genome, Human ; Genomics/*methods ; Genotype ; *Genotyping Techniques ; Haplotypes ; High-Throughput Nucleotide Sequencing ; Humans ; Polymorphism, Single Nucleotide ; Quantitative Trait Loci ; Saccharomyces/genetics ; Saccharomyces cerevisiae/genetics ; Sequence Analysis, DNA ; }, abstract = {We introduce Giraffe, a pangenome short-read mapper that can efficiently map to a collection of haplotypes threaded through a sequence graph. Giraffe maps sequencing reads to thousands of human genomes at a speed comparable to that of standard methods mapping to a single reference genome. The increased mapping accuracy enables downstream improvements in genome-wide genotyping pipelines for both small variants and larger structural variants. We used Giraffe to genotype 167,000 structural variants, discovered in long-read studies, in 5202 diverse human genomes that were sequenced using short reads. We conclude that pangenomics facilitates a more comprehensive characterization of variation and, as a result, has the potential to improve many genomic analyses.}, } @article {pmid34910572, year = {2022}, author = {Zhao, S and Zhang, C and Rogers, MJ and Zhao, X and He, J}, title = {Differentiating Closely Affiliated Dehalococcoides Lineages by a Novel Genetic Marker Identified via Computational Pangenome Analysis.}, journal = {Applied and environmental microbiology}, volume = {88}, number = {4}, pages = {e0218121}, pmid = {34910572}, issn = {1098-5336}, mesh = {Biodegradation, Environmental ; *Chloroflexi/metabolism ; Dehalococcoides ; Genetic Markers ; Phylogeny ; }, abstract = {As a group, the genus Dehalococcoides dehalogenates a wide range of organohalide pollutants, but the range of organohalide compounds that can be utilized for reductive dehalogenation differs among Dehalococcoides strains. Dehalococcoides lineages cannot be reliably disambiguated in mixed communities using typical phylogenetic markers, which often confounds bioremediation efforts. Here, we describe a computational approach to identify Dehalococcoides genetic markers with improved discriminatory resolution. Screening core genes from the Dehalococcoides pangenome for degree of similarity and frequency of 100% identity found a candidate genetic marker encoding a bacterial neuraminidase repeat (BNR)-containing protein of unknown function. This gene exhibits the fewest completely identical amino acid sequences and has among the lowest average amino acid sequence identity in the core pangenome. Primers targeting BNR could effectively discriminate between 40 available BNR sequences (in silico) and 10 different Dehalococcoides isolates (in vitro). Amplicon sequencing of BNR fragments generated from 22 subsurface soil samples revealed a total of 109 amplicon sequence variants, suggesting a high diversity of Dehalococcoides distributed in the environment. Therefore, the BNR gene can serve as an alternative genetic marker to differentiate strains of Dehalococcoides in complicated microbial communities. IMPORTANCE The challenge of discriminating between phylogenetically similar but functionally distinct bacterial lineages is particularly relevant to the development of technologies seeking to exploit the metabolic or physiological characteristics of specific members of bacterial genera. A computational approach was developed to expedite screening of potential genetic markers among phylogenetically affiliated bacteria. Using this approach, a gene encoding a bacterial neuraminidase repeat (BNR)-containing protein of unknown function was selected and evaluated as a genetic marker to differentiate strains of Dehalococcoides, an environmentally relevant genus of bacteria whose members can transform and detoxify a range of halogenated organic solvents and persistent organic pollutants, in complex microbial communities to demonstrate the validity of the approach. Moreover, many apparently phylogenetically distinct, currently uncharacterized Dehalococcoides were detected in environmental samples derived from contaminated sites.}, } @article {pmid34904403, year = {2022}, author = {Zanini, SF and Bayer, PE and Wells, R and Snowdon, RJ and Batley, J and Varshney, RK and Nguyen, HT and Edwards, D and Golicz, AA}, title = {Pangenomics in crop improvement-from coding structural variations to finding regulatory variants with pangenome graphs.}, journal = {The plant genome}, volume = {15}, number = {1}, pages = {e20177}, doi = {10.1002/tpg2.20177}, pmid = {34904403}, issn = {1940-3372}, support = {BBS/E/J/000PR9788/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Genome, Plant ; Genomics ; *Hordeum/genetics ; *Oryza/genetics ; Sequence Analysis, DNA ; Soybeans/genetics ; Triticum/genetics ; }, abstract = {Since the first reported crop pangenome in 2014, advances in high-throughput and cost-effective DNA sequencing technologies facilitated multiple such studies including the pangenomes of oilseed rape (Brassica napus L.), soybean [Glycine max (L.) Merr.], rice (Oryza sativa L.), wheat (Triticum aestivum L.), and barley (Hordeum vulgare L.). Compared with single-reference genomes, pangenomes provide a more accurate representation of the genetic variation present in a species. By combining the genomic data of multiple accessions, pangenomes allow for the detection and annotation of complex DNA polymorphisms such as structural variations (SVs), one of the major determinants of genetic diversity within a species. In this review we summarize the current literature on crop pangenomics, focusing on their application to find candidate SVs involved in traits of agronomic interest. We then highlight the potential of pangenomes in the discovery and functional characterization of noncoding regulatory sequences and their variations. We conclude with a summary and outlook on innovative data structures representing the complete content of plant pangenomes including annotations of coding and noncoding elements and outcomes of transcriptomic and epigenomic experiments.}, } @article {pmid34901703, year = {2021}, author = {Pronozin, AY and Bragina, MK and Salina, EA}, title = {Crop pangenomes.}, journal = {Vavilovskii zhurnal genetiki i selektsii}, volume = {25}, number = {1}, pages = {57-63}, doi = {10.18699/VJ21.007}, pmid = {34901703}, issn = {2500-0462}, abstract = {Progress in genome sequencing, assembly and analysis allows for a deeper study of agricultural plants' chromosome structures, gene identification and annotation. The published genomes of agricultural plants proved to be a valuable tool for studing gene functions and for marker-assisted and genomic selection. However, large structural genome changes, including gene copy number variations (CNVs) and gene presence/absence variations (PAVs), prevail in crops. These genomic variations play an important role in the functional set of genes and the gene composition in individuals of the same species and provide the genetic determination of the agronomically important crops properties. A high degree of genomic variation observed indicates that single reference genomes do not represent the diversity within a species, leading to the pangenome concept. The pangenome represents information about all genes in a taxon: those that are common to all taxon members and those that are variable and are partially or completely specific for particular individuals. Pangenome sequencing and analysis technologies provide a large-scale study of genomic variation and resources for an evolutionary research, functional genomics and crop breeding. This review provides an analysis of agricultural plants' pangenome studies. Pangenome structural features, methods and programs for bioinformatic analysis of pangenomic data are described.}, } @article {pmid34897437, year = {2022}, author = {Zimin, AV and Shumate, A and Shinder, I and Heinz, J and Puiu, D and Pertea, M and Salzberg, SL}, title = {A reference-quality, fully annotated genome from a Puerto Rican individual.}, journal = {Genetics}, volume = {220}, number = {2}, pages = {}, pmid = {34897437}, issn = {1943-2631}, support = {R01 HG006677/HG/NHGRI NIH HHS/United States ; R01 MH123567/MH/NIMH NIH HHS/United States ; R35 GM130151/GM/NIGMS NIH HHS/United States ; }, mesh = {*Black People ; *Genome, Human ; Hispanic or Latino/genetics ; Humans ; Molecular Sequence Annotation ; }, abstract = {Until 2019, the human genome was available in only one fully annotated version, GRCh38, which was the result of 18 years of continuous improvement and revision. Despite dramatic improvements in sequencing technology, no other genome was available as an annotated reference until 2019, when the genome of an Ashkenazi individual, Ash1, was released. In this study, we describe the assembly and annotation of a second individual genome, from a Puerto Rican individual whose DNA was collected as part of the Human Pangenome project. The new genome, called PR1, is the first true reference genome created from an individual of African descent. Due to recent improvements in both sequencing and assembly technology, and particularly to the use of the recently completed CHM13 human genome as a guide to assembly, PR1 is more complete and more contiguous than either GRCh38 or Ash1. Annotation revealed 37,755 genes (of which 19,999 are protein coding), including 12 additional gene copies that are present in PR1 and missing from CHM13. Fifty-seven genes have fewer copies in PR1 than in CHM13, 9 map only partially, and 3 genes (all noncoding) from CHM13 are entirely missing from PR1.}, } @article {pmid34890406, year = {2021}, author = {Gautreau, G and Bazin, A and Gachet, M and Planel, R and Burlot, L and Dubois, M and Perrin, A and Médigue, C and Calteau, A and Cruveiller, S and Matias, C and Ambroise, C and Rocha, EPC and Vallenet, D}, title = {Correction: PPanGGOLiN: Depicting microbial diversity via a partitioned pangenome graph.}, journal = {PLoS computational biology}, volume = {17}, number = {12}, pages = {e1009687}, pmid = {34890406}, issn = {1553-7358}, abstract = {[This corrects the article DOI: 10.1371/journal.pcbi.1007732.].}, } @article {pmid34889729, year = {2021}, author = {Siani, R and Stabl, G and Gutjahr, C and Schloter, M and Radl, V}, title = {Acidovorax pan-genome reveals specific functional traits for plant beneficial and pathogenic plant-associations.}, journal = {Microbial genomics}, volume = {7}, number = {12}, pages = {}, pmid = {34889729}, issn = {2057-5858}, mesh = {Bacterial Proteins/*genetics ; Comamonadaceae/*classification/genetics/isolation & purification ; Genome, Bacterial ; Genomics ; High-Throughput Nucleotide Sequencing ; Lotus/*growth & development/microbiology ; Phylogeny ; Plant Diseases/microbiology ; Sequence Analysis, DNA/*methods ; Symbiosis ; }, abstract = {Beta-proteobacteria belonging to the genus Acidovorax have been described from various environments. Many strains can interact with a range of hosts, including humans and plants, forming neutral, beneficial or detrimental associations. In the frame of this study, we investigated the genomic properties of 52 bacterial strains of the genus Acidovorax, isolated from healthy roots of Lotus japonicus, with the intent of identifying traits important for effective plant-growth promotion. Based on single-strain inoculation bioassays with L. japonicus, performed in a gnotobiotic system, we distinguished seven robust plant-growth promoting strains from strains with no significant effects on plant-growth. We showed that the genomes of the two groups differed prominently in protein families linked to sensing and transport of organic acids, production of phytohormones, as well as resistance and production of compounds with antimicrobial properties. In a second step, we compared the genomes of the tested isolates with those of plant pathogens and free-living strains of the genus Acidovorax sourced from public repositories. Our pan-genomics comparison revealed features correlated with commensal and pathogenic lifestyle. We showed that commensals and pathogens differ mostly in their ability to use plant-derived lipids and in the type of secretion-systems being present. Most free-living Acidovorax strains did not harbour any secretion-systems. Overall, our data indicate that Acidovorax strains undergo extensive adaptations to their particular lifestyle by horizontal uptake of novel genetic information and loss of unnecessary genes.}, } @article {pmid34887548, year = {2022}, author = {Conrad, RE and Viver, T and Gago, JF and Hatt, JK and Venter, SN and Rossello-Mora, R and Konstantinidis, KT}, title = {Toward quantifying the adaptive role of bacterial pangenomes during environmental perturbations.}, journal = {The ISME journal}, volume = {16}, number = {5}, pages = {1222-1234}, pmid = {34887548}, issn = {1751-7370}, support = {1831582//National Science Foundation (NSF)/ ; 1759831//National Science Foundation (NSF)/ ; }, mesh = {Bacteria/genetics ; *Genome, Bacterial ; Metagenome ; Metagenomics ; *Microbiota ; }, abstract = {Metagenomic surveys have revealed that natural microbial communities are predominantly composed of sequence-discrete, species-like populations but the genetic and/or ecological processes that maintain such populations remain speculative, limiting our understanding of population speciation and adaptation to perturbations. To address this knowledge gap, we sequenced 112 Salinibacter ruber isolates and 12 companion metagenomes from four adjacent saltern ponds in Mallorca, Spain that were experimentally manipulated to dramatically alter salinity and light intensity, the two major drivers of this ecosystem. Our analyses showed that the pangenome of the local Sal. ruber population is open and similar in size (~15,000 genes) to that of randomly sampled Escherichia coli genomes. While most of the accessory (noncore) genes were isolate-specific and showed low in situ abundances based on the metagenomes compared to the core genes, indicating that they were functionally unimportant and/or transient, 3.5% of them became abundant when salinity (but not light) conditions changed and encoded for functions related to osmoregulation. Nonetheless, the ecological advantage of these genes, while significant, was apparently not strong enough to purge diversity within the population. Collectively, our results provide an explanation for how this immense intrapopulation gene diversity is maintained, which has implications for the prokaryotic species concept.}, } @article {pmid34884620, year = {2021}, author = {Jalal, K and Khan, K and Ahmad, D and Hayat, A and Basharat, Z and Abbas, MN and Alghamdi, S and Almehmadi, M and Sahibzada, MUK}, title = {Pan-Genome Reverse Vaccinology Approach for the Design of Multi-Epitope Vaccine Construct against Escherichia albertii.}, journal = {International journal of molecular sciences}, volume = {22}, number = {23}, pages = {}, pmid = {34884620}, issn = {1422-0067}, mesh = {Bacterial Vaccines/*immunology ; Computational Biology ; Epitopes, B-Lymphocyte/*immunology ; Epitopes, T-Lymphocyte/*immunology ; Escherichia/genetics/*immunology ; *Genome, Bacterial ; Humans ; Molecular Docking Simulation ; Molecular Dynamics Simulation ; Vaccines, Subunit/*immunology ; Vaccinology ; }, abstract = {Escherichia albertii is characterized as an emerging pathogen, causing enteric infections. It is responsible for high mortality rate, especially in children, elderly, and immunocompromised people. To the best of our knowledge, no vaccine exists to curb this pathogen. Therefore, in current study, we aimed to identify potential vaccine candidates and design chimeric vaccine models against Escherichia albertii from the analysis of publicly available data of 95 strains, using a reverse vaccinology approach. Outer-membrane proteins (n = 4) were identified from core genome as vaccine candidates. Eventually, outer membrane Fimbrial usher (FimD) protein was selected as a promiscuous vaccine candidate and utilized to construct a potential vaccine model. It resulted in three epitopes, leading to the design of twelve vaccine constructs. Amongst these, V6 construct was found to be highly immunogenic, non-toxic, non-allergenic, antigenic, and most stable. This was utilized for molecular docking and simulation studies against six HLA and two TLR complexes. This construct can therefore be used for pan-therapy against different strains of E. albertii and needs to be tested in vitro and in vivo.}, } @article {pmid34874249, year = {2021}, author = {Bohr, LL and Youngblom, MA and Eldholm, V and Pepperell, CS}, title = {Genome reorganization during emergence of host-associated Mycobacterium abscessus.}, journal = {Microbial genomics}, volume = {7}, number = {12}, pages = {}, pmid = {34874249}, issn = {2057-5858}, support = {R01 AI113287/AI/NIAID NIH HHS/United States ; T32 AI055396/AI/NIAID NIH HHS/United States ; }, mesh = {Adaptation, Physiological ; Australia ; Chromosomes, Bacterial/*genetics ; Databases, Genetic ; Denmark ; Gene Transfer, Horizontal ; High-Throughput Nucleotide Sequencing ; Humans ; Mycobacterium Infections, Nontuberculous/*microbiology ; Mycobacterium abscessus/*classification/genetics/isolation & purification ; Netherlands ; Norway ; Phylogeny ; Phylogeography ; Plasmids/*genetics ; Prophages/*genetics ; Recombination, Genetic ; United Kingdom ; Whole Genome Sequencing/*methods ; }, abstract = {Mycobacterium abscessus is a rapid growing, free-living species of bacterium that also causes lung infections in humans. Human infections are usually acquired from the environment; however, dominant circulating clones (DCCs) have emerged recently in both M. abscessus subsp. massiliense and subsp. abscessus that appear to be transmitted among humans and are now globally distributed. These recently emerged clones are potentially informative about the ecological and evolutionary mechanisms of pathogen emergence and host adaptation. The geographical distribution of DCCs has been reported, but the genomic processes underlying their transition from environmental bacterium to human pathogen are not well characterized. To address this knowledge gap, we delineated the structure of M. abscessus subspecies abscessus and massiliense using genomic data from 200 clinical isolates of M. abscessus from seven geographical regions. We identified differences in overall patterns of lateral gene transfer (LGT) and barriers to LGT between subspecies and between environmental and host-adapted bacteria. We further characterized genome reorganization that accompanied bacterial host adaptation, inferring selection pressures acting at both genic and intergenic loci. We found that both subspecies encode an expansive pangenome with many genes at rare frequencies. Recombination appears more frequent in M. abscessus subsp. massiliense than in subsp. abscessus, consistent with prior reports. We found evidence suggesting that phage are exchanged between subspecies, despite genetic barriers evident elsewhere throughout the genome. Patterns of LGT differed according to niche, with less LGT observed among host-adapted DCCs versus environmental bacteria. We also found evidence suggesting that DCCs are under distinct selection pressures at both genic and intergenic sites. Our results indicate that host adaptation of M. abscessus was accompanied by major changes in genome evolution, including shifts in the apparent frequency of LGT and impacts of selection. Differences were evident among the DCCs as well, which varied in the degree of gene content remodelling, suggesting they were placed differently along the evolutionary trajectory toward host adaptation. These results provide insight into the evolutionary forces that reshape bacterial genomes as they emerge into the pathogenic niche.}, } @article {pmid34871726, year = {2022}, author = {Lata, KS and Kumar, S and Vindal, V and Patel, S and Das, J}, title = {A core and pan gene map of Leptospira genus and its interactions with human host.}, journal = {Microbial pathogenesis}, volume = {162}, number = {}, pages = {105347}, doi = {10.1016/j.micpath.2021.105347}, pmid = {34871726}, issn = {1096-1208}, mesh = {Animals ; Genome, Bacterial ; Genomics ; Humans ; *Leptospira/genetics ; *Leptospirosis/genetics ; Zoonoses ; }, abstract = {Leptospira species are the etiological agent of an emerging zoonotic disease known as "Leptospirosis" that substantially affects both human health and economy across the globe. Despite the global importance of the disease, pathogenetic features, host-adaptation and proper diagnosis of this bacteria remains lacking. To accomplish these gaps, pan-genome of Leptospira genus was explored in the present study. The pan-genome of Leptospira genus was comprised of core (692) and accessory parts (softcore:1804, shell:6432, cloud:16,600). The functional analysis revealed the abundancy of "Translation, ribosomal structure and biogenesis" COG class in core-genes; whereas in accessory parts, genes involved in signal transduction was the most abundant. Furthermore, pathogen-host interaction (PHI) analysis of core and accessory proteins with human proteins showed the presence of a total of 599 and 510 interactions, respectively. There were eight hubs in core PHI network and five hubs in PHI network of accessory proteins. The human's proteins involved in these interactions were found functionally enriched in metabolic processes, responses to stimulus and immune system processes. Further, pan-genome based phylogeny separated the Leptospira genus in three major clades (belonging to P1, P2 and S) which relates with their pathogenicity level. Additionally, pathogenic and saprophytic clade specific genes of Leptospira have also been identified and functionally annotated for COG, KEGG and virulence factors. The results revealed the presence of 102 pathogenic and 215 saprophytic group specific gene clusters. The COG functional annotation of pathogen specific genes showed that defence mechanism followed by signal transduction mechanisms category were most significantly enriched COG categories; whereas in saprophytic group, signal transduction mechanisms was the most abundant COG, suggesting their role in adaptation and hence important for microbe's evolution and survival. In conclusion, this study provides a new insight of genomic features of Leptospira genus which may further be implemented for development of better control actions of the disease.}, } @article {pmid34867904, year = {2021}, author = {Lin, S and Sun, B and Shi, X and Xu, Y and Gu, Y and Gu, X and Ma, X and Wan, T and Xu, J and Su, J and Lou, Y and Zheng, M}, title = {Comparative Genomic and Pan-Genomic Characterization of Staphylococcus epidermidis From Different Sources Unveils the Molecular Basis and Potential Biomarkers of Pathogenic Strains.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {770191}, pmid = {34867904}, issn = {1664-302X}, abstract = {Coagulase-negative Staphylococcus (CoNS) is the most common pathogen causing traumatic endophthalmitis. Among which, Staphylococcus epidermidis is the most common species that colonizes human skin, eye surfaces, and nasal cavity. It is also the main cause of nosocomial infection, specially foreign body-related bloodstream infections (FBR-BSIs). Although some studies have reported the genome characteristics of S. epidermidis, the genome of ocular trauma-sourced S. epidermidis strain and a comprehensive understanding of its pathogenicity are still lacking. Our study sequenced, analyzed, and reported the whole genomes of 11 ocular trauma-sourced samples of S. epidermidis that caused traumatic endophthalmitis. By integrating publicly available genomes, we obtained a total of 187 S. epidermidis samples from healthy and diseased eyes, skin, respiratory tract, and blood. Combined with pan-genome, phylogenetic, and comparative genomic analyses, our study showed that S. epidermidis, regardless of niche source, exhibits two founder lineages with different pathogenicity. Moreover, we identified several potential biomarkers associated with the virulence of S. epidermidis, including essD, uhpt, sdrF, sdrG, fbe, and icaABCDR. EssD and uhpt have high homology with esaD and hpt in Staphylococcus aureus, showing that the genomes of S. epidermidis and S. aureus may have communicated during evolution. SdrF, sdrG, fbe, and icaABCDR are related to biofilm formation. Compared to S. epidermidis from blood sources, ocular-sourced strains causing intraocular infection had no direct relationship with biofilm formation. In conclusion, this study provided additional data resources for studies on S. epidermidis and improved our understanding of the evolution and pathogenicity among strains of different sources.}, } @article {pmid34867903, year = {2021}, author = {Wu, S and Pang, R and Huang, J and Zhang, F and Cai, Z and Zhang, J and Chen, M and Xue, L and Gu, Q and Wang, J and Ding, Y and Wan, Q and Wu, Q}, title = {Evolutionary Divergence of the Novel Staphylococcal Species Staphylococcus argenteus.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {769642}, pmid = {34867903}, issn = {1664-302X}, abstract = {Currently, invasive infections caused by Staphylococcus argenteus, which is a recently named staphylococcal species, are increasingly reported worldwide. However, only a few genomic studies of S. argenteus have offered comprehensive information regarding its genetic diversity, epidemiological characteristics, antimicrobial resistance genes (ARGs), virulence genes and other profiles. Here, we describe a comparative genomic analysis by population structure, pangenome, panmobilome, region-specific accessory genes confer an adaptive advantage in 153 S. argenteus strains which comprised 24 strains sequenced in this study and 129 strains whose genome sequences were available from GenBank. As a result, the population of S. argenteus comprised seven genetically distinct clades, including two major clades (C1 and C2), with distinct isolation source patterns. Pangenome analysis revealed that S. argenteus has an open pangenome composed of 7,319 genes and a core genome composed of 1,508 genes. We further determined the distributions of 75 virulence factors (VFs) and 30 known ARGs and identified at least four types of plasmids and 93 complete or partial putative prophages. It indicate that S. argenteus may show a similar level of pathogenicity to that of S. aureus. This study also provides insights into the evolutionary divergence of this pathogen, indicating that the geographical distribution was a potential driving force behind the evolutionary divergence of S. argenteus. The preferential horizontal acquisition of particular elements, such as staphylococcal cassette chromosome mec elements and plasmids, was observed in specific regions, revealing potential gene exchange between S. argenteus strains and local S. aureus strains. Moreover, multiple specific genes related to environmental adaptation were identified in strains isolated from East Asia. However, these findings may help promote our understanding of the evolutionary divergence of this bacterium at a high genetic resolution by providing insights into the epidemiology of S. argenteus and may help combat its spread.}, } @article {pmid34865982, year = {2022}, author = {Bohra, A and Bansal, KC and Graner, A}, title = {The 3366 chickpea genomes for research and breeding.}, journal = {Trends in plant science}, volume = {27}, number = {3}, pages = {217-219}, doi = {10.1016/j.tplants.2021.11.017}, pmid = {34865982}, issn = {1878-4372}, mesh = {*Cicer/genetics ; Crops, Agricultural/genetics ; Genome, Plant/genetics ; Genomics ; Plant Breeding ; }, abstract = {Genome sequences provide an unprecedented resource to rapidly develop modern crops. A recent paper by Varshney et al. provides genome variation maps of 3366 chickpea accessions. Here, we highlight how this breakthrough research can fundamentally change breeding practices of chickpea and potentially other crops.}, } @article {pmid34850891, year = {2021}, author = {Saw, JH and Cardona, T and Montejano, G}, title = {Complete Genome Sequencing of a Novel Gloeobacter Species from a Waterfall Cave in Mexico.}, journal = {Genome biology and evolution}, volume = {13}, number = {12}, pages = {}, pmid = {34850891}, issn = {1759-6653}, support = {MR/T017546/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {*Cyanobacteria/genetics ; Mexico ; Photosystem II Protein Complex/genetics ; Sequence Analysis, DNA ; Whole Genome Sequencing ; }, abstract = {Only two complete genomes of the cyanobacterial genus Gloeobacter from two very different regions of the world currently exist. Here, we present the complete genome sequence of a third member of the genus isolated from a waterfall cave in Mexico. Analysis of the average nucleotide identities (ANIs) between published Gloeobacter genomes revealed that the complete genome of this new member is only 92.7% similar to Gloeobacter violaceus and therefore we determined it to be a new species. We propose to name this new species Gloeobacter morelensis after the location in Mexico where it was isolated. The complete genome consists of one circular chromosome (4,921,229 bp), one linear plasmid (172,328 bp), and one circular plasmid (8,839 bp). Its genome is the largest of all completely sequenced genomes of Gloeobacter species. Pangenomic comparisons revealed that G. morelensis encodes 759 genes not shared with other Gloeobacter species. Despite being more closely related to G. violaceus, it features an extremely divergent psbA gene encoding an atypical D1 core subunit of Photosystem II previously only found within the genome of Gloeobacter kilaueensis. In addition, we detected evidence of concerted evolution of psbA genes encoding identical D1 in all three Gloeobacter genomes, a characteristic that seems widespread in cyanobacteria and may therefore be traced back to their last common ancestor.}, } @article {pmid34849810, year = {2021}, author = {Noshay, JM and Liang, Z and Zhou, P and Crisp, PA and Marand, AP and Hirsch, CN and Schmitz, RJ and Springer, NM}, title = {Stability of DNA methylation and chromatin accessibility in structurally diverse maize genomes.}, journal = {G3 (Bethesda, Md.)}, volume = {11}, number = {8}, pages = {}, pmid = {34849810}, issn = {2160-1836}, mesh = {Chromatin/genetics ; *DNA Methylation ; Gene Expression Regulation, Plant ; Genome, Plant ; Humans ; *Zea mays/genetics ; }, abstract = {Accessible chromatin and unmethylated DNA are associated with many genes and cis-regulatory elements. Attempts to understand natural variation for accessible chromatin regions (ACRs) and unmethylated regions (UMRs) often rely upon alignments to a single reference genome. This limits the ability to assess regions that are absent in the reference genome assembly and monitor how nearby structural variants influence variation in chromatin state. In this study, de novo genome assemblies for four maize inbreds (B73, Mo17, Oh43, and W22) are utilized to assess chromatin accessibility and DNA methylation patterns in a pan-genome context. A more complete set of UMRs and ACRs can be identified when chromatin data are aligned to the matched genome rather than a single reference genome. While there are UMRs and ACRs present within genomic regions that are not shared between genotypes, these features are 6- to 12-fold enriched within regions between genomes. Characterization of UMRs present within shared genomic regions reveals that most UMRs maintain the unmethylated state in other genotypes with only ∼5% being polymorphic between genotypes. However, the majority (71%) of UMRs that are shared between genotypes only exhibit partial overlaps suggesting that the boundaries between methylated and unmethylated DNA are dynamic. This instability is not solely due to sequence variation as these partially overlapping UMRs are frequently found within genomic regions that lack sequence variation. The ability to compare chromatin properties among individuals with structural variation enables pan-epigenome analyses to study the sources of variation for accessible chromatin and unmethylated DNA.}, } @article {pmid34848115, year = {2022}, author = {Brockhurst, MA and Harrison, E}, title = {Ecological and evolutionary solutions to the plasmid paradox.}, journal = {Trends in microbiology}, volume = {30}, number = {6}, pages = {534-543}, doi = {10.1016/j.tim.2021.11.001}, pmid = {34848115}, issn = {1878-4380}, support = {BB/R006253/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R014884/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Bacteria/genetics ; *Biological Evolution ; *Gene Transfer, Horizontal ; Genome, Bacterial ; Plasmids/genetics ; }, abstract = {The 'plasmid paradox' arises because, although plasmids are common features of bacterial genomes, theoretically they should not exist: rates of conjugation were believed insufficient to allow plasmids to persist by infectious transmission, whereas the costs of plasmid maintenance meant that plasmids should be purged by negative selection regardless of whether they encoded beneficial accessory traits because these traits should eventually be captured by the chromosome, enabling the loss of the redundant plasmid. In the decade since the plasmid paradox was described, new data and theory show that a range of ecological and evolutionary mechanisms operate in bacterial populations and communities to explain the widespread distribution and stable maintenance of plasmids. We conclude, therefore, that multiple solutions to the plasmid paradox are now well understood. The current challenge for the field, however, is to better understand how these solutions operate in natural bacterial communities to explain and predict the distribution of plasmids and the dynamics of the horizontal gene transfer that they mediate in bacterial (pan)genomes.}, } @article {pmid34845943, year = {2022}, author = {Wang, Z and Zheng, X and Guo, G and Dong, Y and Xu, Z and Wei, X and Han, X and Liu, Y and Zhang, W}, title = {Combining pangenome analysis to identify potential cross-protective antigens against avian pathogenic Escherichia coli.}, journal = {Avian pathology : journal of the W.V.P.A}, volume = {51}, number = {1}, pages = {66-75}, doi = {10.1080/03079457.2021.2005240}, pmid = {34845943}, issn = {1465-3338}, mesh = {Animals ; Chickens ; Escherichia coli/genetics ; *Escherichia coli Infections/prevention & control/veterinary ; *Escherichia coli Vaccines ; *Poultry Diseases/prevention & control ; }, abstract = {RESEARCH HIGHLIGHTSPan-RV analysis was used for the first time in the discovery of APEC-protective proteins.A total of 53 potential protective proteins were screened out.Four proteins were verified as potential vaccine candidates using western blotting.}, } @article {pmid34840474, year = {2021}, author = {Saha, O and Islam, MR and Rahman, MS and Hoque, MN and Hossain, MA and Sultana, M}, title = {First report from Bangladesh on genetic diversity of multidrug-resistant Pasteurella multocida type B:2 in fowl cholera.}, journal = {Veterinary world}, volume = {14}, number = {9}, pages = {2527-2542}, pmid = {34840474}, issn = {0972-8988}, abstract = {BACKGROUND AND AIM: Fowl cholera (FC) caused by Pasteurella multocida is a highly contagious bacterial disease of global importance for poultry production. The severity and incidence of FC caused by P. multocida may vary considerably depending on several factors associated with the host (including species and age of infected birds), the environment, and the bacterial strain. This study aimed to investigate the genetic diversity of multidrug-resistant P. multocida strains isolated from FC outbreaks in laying hens from commercial farms of Bangladesh.

MATERIALS AND METHODS: We collected 57 samples of suspected FC, including 36 live and 21 dead laying hens. P. multocida isolates were characterized by biochemical and molecular-biological methods.

RESULTS: Twenty-two strains of P. multocida were isolated from these samples through phenotypic and genotypic characterization. The strains were grouped into two distinct random amplification of polymorphic DNA (RAPD) biotypes harboring a range of pathogenic genes; exbB, ompH, ptfA, nanB, sodC, and hgbA. In this study, 90.90% and 81.82% P. multocida strains were multidrug-resistant and biofilm formers, respectively. Whole-genome sequencing of the two representative RAPD phylotypes confirmed as P. multocida type B: L2:ST122, harboring a number of virulence factors-associated genes (VFGs), and antimicrobial resistance (AMR) genes (ARGs). In addition, pan-genome analysis revealed 90 unique genes in the genomes of P. multocida predicted to be associated with versatile metabolic functions, pathogenicity, virulence, and AMR.

CONCLUSION: This is first-ever report on the association of P. multocida genotype B: L2:ST122 and related VFGs and ARGs in the pathogenesis of FC in laying hens. This study also provides a genetic context for future researches on the evolutionary diversity of P. multocida strains and their host adaptation.}, } @article {pmid34839707, year = {2022}, author = {Hall, JPJ and Botelho, J and Cazares, A and Baltrus, DA}, title = {What makes a megaplasmid?.}, journal = {Philosophical transactions of the Royal Society of London. Series B, Biological sciences}, volume = {377}, number = {1842}, pages = {20200472}, pmid = {34839707}, issn = {1471-2970}, support = {/WT_/Wellcome Trust/United Kingdom ; }, mesh = {*Plasmids/genetics ; }, abstract = {Naturally occurring plasmids come in different sizes. The smallest are less than a kilobase of DNA, while the largest can be over three orders of magnitude larger. Historically, research has tended to focus on smaller plasmids that are usually easier to isolate, manipulate and sequence, but with improved genome assemblies made possible by long-read sequencing, there is increased appreciation that very large plasmids-known as megaplasmids-are widespread, diverse, complex, and often encode key traits in the biology of their host microorganisms. Why are megaplasmids so big? What other features come with large plasmid size that could affect bacterial ecology and evolution? Are megaplasmids 'just' big plasmids, or do they have distinct characteristics? In this perspective, we reflect on the distribution, diversity, biology, and gene content of megaplasmids, providing an overview to these large, yet often overlooked, mobile genetic elements. This article is part of the theme issue 'The secret lives of microbial mobile genetic elements'.}, } @article {pmid34836555, year = {2021}, author = {Hwang, Y and Schulze-Makuch, D and Arens, FL and Saenz, JS and Adam, PS and Sager, C and Bornemann, TLV and Zhao, W and Zhang, Y and Airo, A and Schloter, M and Probst, AJ}, title = {Leave no stone unturned: individually adapted xerotolerant Thaumarchaeota sheltered below the boulders of the Atacama Desert hyperarid core.}, journal = {Microbiome}, volume = {9}, number = {1}, pages = {234}, pmid = {34836555}, issn = {2049-2618}, mesh = {Archaea/genetics ; Bacteria/genetics ; *Desert Climate ; *Microbiota ; Soil Microbiology ; }, abstract = {BACKGROUND: The hyperarid core of the Atacama Desert is an extremely harsh environment thought to be colonized by only a few heterotrophic bacterial species. Current concepts for understanding this extreme ecosystem are mainly based on the diversity of these few species, yet a substantial area of the Atacama Desert hyperarid topsoil is covered by expansive boulder accumulations, whose underlying microbiomes have not been investigated so far. With the hypothesis that these sheltered soils harbor uniquely adapted microbiomes, we compared metagenomes and geochemistry between soils below and beside boulders across three distantly located boulder accumulations in the Atacama Desert hyperarid core.

RESULTS: Genome-resolved metagenomics of eleven samples revealed substantially different microbial communities in soils below and beside boulders, despite the presence of shared species. Archaea were found in significantly higher relative abundance below the boulders across all samples within distances of up to 205 km. These key taxa belong to a novel genus of ammonia-oxidizing Thaumarchaeota, Candidatus Nitrosodeserticola. We resolved eight mid-to-high quality genomes of this genus and used comparative genomics to analyze its pangenome and site-specific adaptations. Ca. Nitrosodeserticola genomes contain genes for ammonia oxidation, the 3-hydroxypropionate/4-hydroxybutyrate carbon fixation pathway, and acetate utilization indicating a chemolithoautotrophic and mixotrophic lifestyle. They also possess the capacity for tolerating extreme environmental conditions as highlighted by the presence of genes against oxidative stress and DNA damage. Site-specific adaptations of the genomes included the presence of additional genes for heavy metal transporters, multiple types of ATP synthases, and divergent genes for aquaporins.

CONCLUSION: We provide the first genomic characterization of hyperarid soil microbiomes below the boulders in the Atacama Desert, and report abundant and highly adapted Thaumarchaeaota with ammonia oxidation and carbon fixation potential. Ca. Nitrosodeserticola genomes provide the first metabolic and physiological insight into a thaumarchaeal lineage found in globally distributed terrestrial habitats characterized by various environmental stresses. We consequently expand not only the known genetic repertoire of Thaumarchaeota but also the diversity and microbiome functioning in hyperarid ecosystems. Video Abstract.}, } @article {pmid34835497, year = {2021}, author = {Pardo-Esté, C and Lorca, D and Castro-Severyn, J and Krüger, G and Alvarez-Thon, L and Zepeda, P and Sulbaran-Bracho, Y and Hidalgo, A and Tello, M and Molina, F and Molina, L and Remonsellez, F and Castro-Nallar, E and Saavedra, C}, title = {Genetic Characterization of Salmonella Infantis with Multiple Drug Resistance Profiles Isolated from a Poultry-Farm in Chile.}, journal = {Microorganisms}, volume = {9}, number = {11}, pages = {}, pmid = {34835497}, issn = {2076-2607}, support = {ANID-FONDECYT Regular 1210633//ANID (Agencia Nacional de Investigación y Desarrollo de Chile)/ ; ECOS-ANID 170023//ANID/ ; ANID 2021 Post-Doctoral FONDECYT 3210156//ANID/ ; FONDECYT N° 1191019//ANID/ ; UNAB Regular Grants DI-15-19/RG//UNAB/ ; }, abstract = {Salmonella comprises over 2500 serotypes and foodborne contamination associated with this pathogen remains an important health concern worldwide. During the last decade, a shift in serotype prevalence has occurred as traditionally less prevalent serotypes are increasing in frequency of infections, especially those related to poultry meat contamination. S. Infantis is one of the major emerging serotypes, and these strains commonly display antimicrobial resistance and can persist despite cleaning protocols. Thus, this work aimed to isolate S. Infantis strains from a poultry meat farm in Santiago, Chile and to characterize genetic variations present in them. We determined their genomic and phenotypic profiles at different points along the production line. The results indicate that the strains encompass 853 polymorphic sites (core-SNPs) with isolates differing from one another by 0-347 core SNPs, suggesting variation among them; however, we found discrete correlations with the source of the sample in the production line. Furthermore, the pan-genome was composed of 4854 total gene clusters of which 2618 (53.9%) corresponds to the core-genome and only 181 (3.7%) are unique genes (those present in one particular strain). This preliminary analysis will enrich the surveillance of Salmonella, yet further studies are required to assess their evolution and phylogeny.}, } @article {pmid34829120, year = {2021}, author = {Kim, E and Yang, SM and Won, JE and Kim, DY and Kim, DS and Kim, HY}, title = {Real-Time PCR Method for the Rapid Detection and Quantification of Pathogenic Staphylococcus Species Based on Novel Molecular Target Genes.}, journal = {Foods (Basel, Switzerland)}, volume = {10}, number = {11}, pages = {}, pmid = {34829120}, issn = {2304-8158}, support = {2020R1A6A3A01100168//Ministry of Education/ ; }, abstract = {Coagulase-positive Staphylococcus aureus is a foodborne pathogen considered one of the causes of food-related disease outbreaks. Like S. aureus, Staphylococcus capitis, Staphylococcus caprae, and S. epidermidis are opportunistic pathogens causing clinical infections and food contamination. The objective of our study was to develop a rapid, accurate, and monitoring technique to detect four Staphylococcus species in food. Four novel molecular targets (GntR family transcriptional regulator for S. aureus, phosphomannomutase for S. epidermidis, FAD-dependent urate hydroxylase for S. capitis, and Gram-positive signal peptide protein for S. caprae) were mined based on pan-genome analysis. Primers targeting molecular target genes showed 100% specificity for 100 non-target reference strains. The detection limit in pure cultures and artificially contaminated food samples was 10[2] colony-forming unit/mL for S. aureus, S. capitis, S. caprae, and S. epidermidis. Moreover, real-time polymerase chain reaction successfully detected strains isolated from various food matrices. Thus, our method allows an accurate and rapid monitoring of Staphylococcus species and may help control staphylococcal contamination of food.}, } @article {pmid34828313, year = {2021}, author = {Peng, S and Petersen, JL and Bellone, RR and Kalbfleisch, T and Kingsley, NB and Barber, AM and Cappelletti, E and Giulotto, E and Finno, CJ}, title = {Decoding the Equine Genome: Lessons from ENCODE.}, journal = {Genes}, volume = {12}, number = {11}, pages = {}, pmid = {34828313}, issn = {2073-4425}, mesh = {Animals ; Gene Expression Profiling/*veterinary ; Genome ; Genomics/*methods ; Horses/*genetics ; Molecular Sequence Annotation ; }, abstract = {The horse reference genome assemblies, EquCab2.0 and EquCab3.0, have enabled great advancements in the equine genomics field, from tools to novel discoveries. However, significant gaps of knowledge regarding genome function remain, hindering the study of complex traits in horses. In an effort to address these gaps and with inspiration from the Encyclopedia of DNA Elements (ENCODE) project, the equine Functional Annotation of Animal Genome (FAANG) initiative was proposed to bridge the gap between genome and gene expression, providing further insights into functional regulation within the horse genome. Three years after launching the initiative, the equine FAANG group has generated data from more than 400 experiments using over 50 tissues, targeting a variety of regulatory features of the equine genome. In this review, we examine how valuable lessons learned from the ENCODE project informed our decisions in the equine FAANG project. We report the current state of the equine FAANG project and discuss how FAANG can serve as a template for future expansion of functional annotation in the equine genome and be used as a reference for studies of complex traits in horse. A well-annotated reference functional atlas will also help advance equine genetics in the pan-genome and precision medicine era.}, } @article {pmid34819642, year = {2021}, author = {Barber, AE and Sae-Ong, T and Kang, K and Seelbinder, B and Li, J and Walther, G and Panagiotou, G and Kurzai, O}, title = {Aspergillus fumigatus pan-genome analysis identifies genetic variants associated with human infection.}, journal = {Nature microbiology}, volume = {6}, number = {12}, pages = {1526-1536}, pmid = {34819642}, issn = {2058-5276}, mesh = {Aspergillosis/*microbiology ; Aspergillus fumigatus/*genetics/isolation & purification/physiology ; Fungal Proteins/genetics ; Genetic Variation ; *Genome, Fungal ; Genome-Wide Association Study ; Humans ; Virulence Factors/genetics ; }, abstract = {Aspergillus fumigatus is an environmental saprobe and opportunistic human fungal pathogen. Despite an estimated annual occurrence of more than 300,000 cases of invasive disease worldwide, a comprehensive survey of the genomic diversity present in A. fumigatus-including the relationship between clinical and environmental isolates and how this genetic diversity contributes to virulence and antifungal drug resistance-has been lacking. In this study we define the pan-genome of A. fumigatus using a collection of 300 globally sampled genomes (83 clinical and 217 environmental isolates). We found that 7,563 of the 10,907 unique orthogroups (69%) are core and present in all isolates and the remaining 3,344 show presence/absence of variation, representing 16-22% of the genome of each isolate. Using this large genomic dataset of environmental and clinical samples, we found an enrichment for clinical isolates in a genetic cluster whose genomes also contain more accessory genes, including genes coding for transmembrane transporters and proteins with iron-binding activity, and genes involved in both carbohydrate and amino-acid metabolism. Finally, we leverage the power of genome-wide association studies to identify genomic variation associated with clinical isolates and triazole resistance as well as characterize genetic variation in known virulence factors. This characterization of the genomic diversity of A. fumigatus allows us to move away from a single reference genome that does not necessarily represent the species as a whole and better understand its pathogenic versatility, ultimately leading to better management of these infections.}, } @article {pmid34817761, year = {2022}, author = {Almeida, OGG and Gimenez, MP and De Martinis, ECP}, title = {Comparative pangenomic analyses and biotechnological potential of cocoa-related Acetobacter senegalensis strains.}, journal = {Antonie van Leeuwenhoek}, volume = {115}, number = {1}, pages = {111-123}, pmid = {34817761}, issn = {1572-9699}, support = {2018/13564-3//Fundação de Amparo à Pesquisa do Estado de São Paulo/ ; 18/13564-3//Fundação de Amparo à Pesquisa do Estado de São Paulo/ ; 001//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; 306330/2019-9//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; }, mesh = {Acetic Acid ; *Acetobacter/genetics ; Biotechnology ; *Cacao ; }, abstract = {Acetobacter senegalensis belongs to the group of acetic acid bacteria (AAB) that present potential biotechnological applications, for production of D-gluconate, cellulose and acetic acid. AAB can overcome heat and acid stresses by using strategies involving the overexpression of heat-shock proteins and enzymes from the complex pyrroquinoline-ADH, besides alcohol dehydrogenases (ADH). Nonetheless, the isolation of A. senegalensis and other AAB from food may be challenging due to presence of viable but non-culturable (VBNC) cells and due to uncertainties about nutritional requirements. To contribute for a better understanding of the ecology of AAB, this paper reports on the pangenome analysis of five strains of A. senegalensis recently isolated from a Brazilian spontaneous cocoa fermentation. The results showed biosynthetic clusters exclusively found in some cocoa-related AAB, such as those related to terpene pathways, which are important for flavour development. Genes related to oxidative stress were conserved in all the genomes, with multiple clusters. Moreover, there were genes coding for ADH and putative ABC transporters distributed in core, shell and cloud genomes, while chaperonin-encoding genes were present only in the core and soft-core genomes. Regarding quorum sensing, a response regulator gene was in the shell genome, and the gene encoding for acyl-homoserine lactone efflux protein was in the soft-core genome. There were quorum quenching-related genes, mainly encoding for lactonases, but also for acylases. Moreover, A. senegalensis did not have determinants of virulence or antibiotic resistance, which are good traits for strains intended to be applied in food fermentation.}, } @article {pmid34817285, year = {2021}, author = {Xiao, Y and Jiang, R and Wu, X and Zhong, Q and Li, Y and Wang, H}, title = {Comparative Genomic Analysis of Stenotrophomonas maltophilia Strain W18 Reveals Its Adaptative Genomic Features for Degrading Polycyclic Aromatic Hydrocarbons.}, journal = {Microbiology spectrum}, volume = {9}, number = {3}, pages = {e0142021}, pmid = {34817285}, issn = {2165-0497}, mesh = {Adaptation, Physiological/genetics ; Alcohol Dehydrogenase/genetics ; *Biodegradation, Environmental ; Environmental Pollutants/metabolism ; Genome, Bacterial/*genetics ; Genomics ; Phylogeny ; Polycyclic Aromatic Hydrocarbons/*metabolism ; Stenotrophomonas maltophilia/*genetics/*metabolism ; }, abstract = {Polycyclic aromatic hydrocarbons (PAHs) are hazardous pollutants that are ubiquitous in the environment. Numerous bacteria have evolved to have degrading genes or pathways to degrade PAHs. Stenotrophomonas maltophilia strain W18 was found to be able to degrade PAHs. Including 43 other complete genome sequences of S. maltophilia strains, we performed a comparative genomic analysis of 44 S. maltophilia strains by running OrthoFinder. A KEGG pathway enrichment analysis of environmental and clinical isolates of S. maltophilia revealed that environmental isolates tended to enhance gene functions such as "energy metabolism," "amino acid metabolism," "xenobiotic biodegradation and metabolism," and "folding, sorting, and degradation." The pangenome of the 44 S. maltophilia strains was open, while the core genome was estimated to reach a steady plateau. Based on gene annotations, we inferred that most of the degradation potential came from the core genome of S. maltophilia, while character genes and accessory genes also contributed to the degradation ability of S. maltophilia W18. The genes expression level of core genes, character genes and accessory genes were proved by RT-qPCR experiment, and accessory genes encoding alcohol dehydrogenase were upregulated most compared with genes with similar functions. We performed a credible comparative genomic analysis of S. maltophilia strains. S. maltophilia W18 was set as a model PAH-degrading bacterium of this species in this study, which would provide guidance for understanding and predicting the degradation mechanisms of other PAH-degrading S. maltophilia strains lacking complete genome data or waiting to be determined. IMPORTANCE This study provided the latest comparative genomic analysis on Stenotrophomonas maltophilia strains and focused on analyzing their genomic features that allow them to adapt to natural environments. In this study, we set S. maltophilia W18 as a typical PAH-degrading strain of this species. By discussing the genomic adaptative features of degrading PAH, we can predict genomic adaptative features of other S. maltophilia PAH-degrading strains since the core function of this species is stable. The gene functions of how S. maltophilia environmental isolates are enhanced for adaptation to various natural environments compared with clinical isolates have been revealed. Combined with a pangenome analysis and RT-qPCR results, we have proved that core genes, character genes, and accessory genes are all involved in PAH degradation. Accessory genes encoding alcohol dehydrogenase were upregulated most compared with core and character genes with similar functions, which suggests that PAH metabolization potential might be enhanced by horizontal gene transfer.}, } @article {pmid34815206, year = {2021}, author = {Bian, PP and Zhang, Y and Jiang, Y}, title = {Pan-genome: setting a new standard for high-quality reference genomes.}, journal = {Yi chuan = Hereditas}, volume = {43}, number = {11}, pages = {1023-1037}, doi = {10.16288/j.yczz.21-214}, pmid = {34815206}, issn = {0253-9772}, mesh = {*Genome/genetics ; *Genomics ; }, abstract = {With the release of high-quality reference genomes assembled by long reads from the third-generation sequencing technology, as well as extensive re-sequencing and population genetic analysis, researchers found that a single reference genome does not represent the diversity within a species. The missing sequences on the reference genome result in an incomplete population genetic polymorphism map. The emergence of pan-genome can well repair the deficiency of single reference genome, which include core genome (responsible for basic biological functions and the main phenotypic characteristics within a species) and the variable genome (related to the genetic diversity or biological characteristics). According to the core and variable genome proportion, the types of pan-genomes can be either open or closed. Here, we review the current exploring of pan-genome for a range of species, to discuss the characteristics of pan-genome in various biological groups. The pan-genome of mammals are more likely closed, while the pan-genomes of microbes, angiosperms, and some invertebrates are likely non-closed. It is possible to complete the reference genome and obtain complete variation information through the pan-genomic study, which will contribute to the study of molecular mechanism for genetic diversity and phenotypic evolution.}, } @article {pmid34803983, year = {2021}, author = {Cobian, N and Garlet, A and Hidalgo-Cantabrana, C and Barrangou, R}, title = {Comparative Genomic Analyses and CRISPR-Cas Characterization of Cutibacterium acnes Provide Insights Into Genetic Diversity and Typing Applications.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {758749}, pmid = {34803983}, issn = {1664-302X}, abstract = {Cutibacterium acnes is an important member of the human skin microbiome and plays a critical role in skin health and disease. C. acnes encompasses different phylotypes that have been found to be associated with different skin phenotypes, suggesting a genetic basis for their impact on skin health. Here, we present a comprehensive comparative analysis of 255 C. acnes genomes to provide insights into the species genetic diversity and identify unique features that define various phylotypes. Results revealed a relatively small and open pan genome (6,240 genes) with a large core genome (1,194 genes), and three distinct phylogenetic clades, with multiple robust sub-clades. Furthermore, we identified several unique gene families driving differences between distinct C. acnes clades. Carbohydrate transporters, stress response mechanisms and potential virulence factors, potentially involved in competitive growth and host colonization, were detected in type I strains, which are presumably responsible for acne. Diverse type I-E CRISPR-Cas systems and prophage sequences were detected in select clades, providing insights into strain divergence and adaptive differentiation. Collectively, these results enable to elucidate the fundamental differences among C. acnes phylotypes, characterize genetic elements that potentially contribute to type I-associated dominance and disease, and other key factors that drive the differentiation among clades and sub-clades. These results enable the use of comparative genomics analyses as a robust method to differentiate among the C. acnes genotypes present in the skin microbiome, opening new avenues for the development of biotherapeutics to manipulate the skin microbiota.}, } @article {pmid34803963, year = {2021}, author = {Jiang, ZM and Zhang, BH and Sun, HM and Zhang, T and Yu, LY and Zhang, YQ}, title = {Properties of Modestobacter deserti sp. nov., a Kind of Novel Phosphate-Solubilizing Actinobacteria Inhabited in the Desert Biological Soil Crusts.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {742798}, pmid = {34803963}, issn = {1664-302X}, abstract = {Three Gram-stain-positive, aerobic, motile actinobacterial strains designated as CPCC 205119[T], CPCC 205215, and CPCC 205251 were isolated from different biological soil crust samples collected from Tengger Desert, China. The 16S rRNA gene sequence comparison of these three strains showed they had almost identical 16S rRNA genes, which were closely related to members of the family Geodermatophilaceae, with the highest similarities of 96.3-97.3% to the species of Modestobacter. In the phylogenetic tree based on 16S rRNA gene sequences, these isolates clustered into a subclade next to the branch containing the species of Modestobacter lapidis and Modestobacter multiseptatus, within the lineage of the genus Modestobacter. The comparative genomic characteristics (values of ANI, dDDH, AAI, and POCP) and the phenotypic properties (morphological, physiological, and chemotaxonomic characteristics) of these isolates readily supported to affiliate them to the genus Modestobacter as a single separate species. For which, we proposed that the isolates CPCC 205119[T], CPCC 205215, and CPCC 205251 represent a novel species of the genus Modestobacter as Modestobacter deserti sp. nov. CPCC 205119[T] (=I12A-02624=NBRC 113528[T]=KCTC 49201[T]) is the type strain. The genome of strain CPCC 205119[T] consisted of one chromosome (4,843,235bp) containing 4,424 coding genes, 48 tRNA genes, five rRNA genes, three other ncRNA genes, and 101 pseudogenes, with G+C content of 74.7%. The whole-genome sequences analysis indicated that this species contained alkaline phosphatase genes (phoA/phoD), phosphate transport-related genes (phoU, phnC, phnD, phnE, phoB, phoH, phoP, phoR, pitH, ppk, pstA, pstB, pstC, and pstS), trehalose-phosphate synthase gene (otsA), trehalose 6-phosphate phosphatase gene (otsB) and other encoding genes for the properties that help the microorganisms to adapt to harsh environmental conditions prevalent in deserts. Strains of this species could solubilize tricalcium phosphate [Ca3(PO4)2] and phytin, assimilate pyrophosphate, thiophosphate, dithiophosphate, phosphoenol pyruvate, 2-deoxy-d-glucose-6-phosphate, and cysteamine-S-phosphate.}, } @article {pmid34801677, year = {2022}, author = {Pan, Y and Zeng, Z and Niu, H and Huang, L and Hu, J and Li, G and Li, Y}, title = {Whole-genome epidemiology and characterisation of mcr-1-encoding Escherichia coli in aquatic bird farms from the Pearl River Delta, China, 2019-2020.}, journal = {International journal of antimicrobial agents}, volume = {59}, number = {1}, pages = {106478}, doi = {10.1016/j.ijantimicag.2021.106478}, pmid = {34801677}, issn = {1872-7913}, mesh = {Animals ; Aquatic Organisms/microbiology ; Birds/*microbiology ; China/epidemiology ; Colistin/*analysis ; Drug Resistance, Multiple, Bacterial/*genetics ; Escherichia coli/*genetics/*isolation & purification ; Escherichia coli Proteins/*genetics ; *Farms ; Feces/microbiology ; Genetic Variation ; Genome-Wide Association Study ; Genotype ; }, abstract = {Due to their unique breeding pattern, aquatic bird farms are increasingly considered as hotspots in the development and spread of antimicrobial resistance. However, comprehensive studies addressing the whole-genomic features of colistin-resistant bacteria in aquatic bird farms are scarce. Over a 2-year period, we conducted surveillance to determine the whole-genome epidemiology and characterisation of mcr-1-positive Escherichia coli in aquatic bird farms in southeastern coastal China. A total of 100 mcr-1-producing isolates among 654 E. coli strains were recovered from 781 samples collected in 11 aquatic bird farms and 1 veterinary clinic in the Pearl River Delta area. Higher resistance phenotypes to 17 antibiotics were found in mcr-1-positive isolates compared with other isolates. Subsequently, 20 mcr-1-carrying isolates were sequenced to analyse the whole-genomic features. Molecular typing as well as antimicrobial resistance gene and virulence factor profiles of the isolates showed considerable diversity. Three types of genetic backbones of mcr-1 in the isolates were assembled and were identified in diverse broad-host-range plasmids and bacterial species. Pangenome analyses revealed a large genetic pool composed of the isolates. Furthermore, phylogenetic trees both of the isolates in this study and a global data set were built, indicating the spread of the three mcr-1 backbones and the mcr-1-positive isolates among different habitats, farms and even countries. This study highlights that aquatic bird farms may act as an important reservoir for mcr-1-producing E. coli, from which colistin resistance may be spread to diverse habitats, different geographical locations and even across bacterial species.}, } @article {pmid34796561, year = {2021}, author = {Ferrer, L and García-Fonticoba, R and Pérez, D and Viñes, J and Fàbregas, N and Madroñero, S and Meroni, G and Martino, PA and Martínez, S and Maté, ML and Sánchez-Bruni, S and Cuscó, A and Migura-García, L and Francino, O}, title = {Whole genome sequencing and de novo assembly of Staphylococcus pseudintermedius: a pangenome approach to unravelling pathogenesis of canine pyoderma.}, journal = {Veterinary dermatology}, volume = {32}, number = {6}, pages = {654-663}, doi = {10.1111/vde.13040}, pmid = {34796561}, issn = {1365-3164}, support = {RTI2018-101991-B-100//Ministerio de Ciencia e Innovación, Spain/ ; 2017DI037//Agència de Gestió d'Ajuts Universitaris i de Recerca/ ; Torres Quevedo Project PTQ2018-009961//Ministerio de Ciencia e Innovación, Spain/ ; }, mesh = {Animals ; *Dog Diseases ; Dogs ; *Pyoderma/veterinary ; Staphylococcus/genetics ; Whole Genome Sequencing/veterinary ; }, abstract = {BACKGROUND: Staphylococcus pseudintermedius is the main aetiological agent of canine pyoderma. Whole genome sequencing is the most comprehensive way of obtaining relevant genomic information about micro-organisms.

HYPOTHESIS/OBJECTIVES: Oxford Nanopore technology enables quality sequencing and de novo assembly of the whole genome of S. pseudintermedius. Whole genome analysis of S. pseudintermedius may help to better understand the pathogenesis of canine pyodermas.

METHODS AND MATERIALS: Twenty-two strains of S. pseudintermedius isolated from the skin of five healthy dogs and 33 strains isolated from skin of 33 dogs with pyoderma were analysed. DNA was extracted and sequenced using Oxford Nanopore MinION, a new technology that delivers longer reads in a hand-held device. The pangenome was analysed and visualised with Anvi'o 6.1.

RESULTS: Nanopore technology allowed the sequencing and de novo assembly of the genomes of 55 S. pseudintermedius strains isolated from healthy dogs and from dogs with pyoderma. The average genome size of S. pseudintermedius was 2.62 Mbp, with 48% being core genome. Pyoderma isolates contained a higher number of antimicrobial resistance genes, yet the total number of virulence factors genes did not change between isolates from healthy dogs and from dogs with pyoderma. Genomes of meticillin-resistant S. pseudintermedius (MRSP) strains were larger than those of meticillin-susceptible (MSSP) strains (2.80 Mbp versus 2.59 Mbp), as a consequence of a greater presence of antimicrobial resistance genes, phages and prophages.

This technique allows much more precise and easier characterisation of canine S. pseudintermedius populations and may lead to a better understanding of the pathogenesis of canine pyodermas.}, } @article {pmid34792602, year = {2022}, author = {Watson, AK and Lopez, P and Bapteste, E}, title = {Hundreds of Out-of-Frame Remodeled Gene Families in the Escherichia coli Pangenome.}, journal = {Molecular biology and evolution}, volume = {39}, number = {1}, pages = {}, pmid = {34792602}, issn = {1537-1719}, mesh = {*Escherichia coli/genetics ; *Evolution, Molecular ; Open Reading Frames ; Phylogeny ; Reading Frames ; }, abstract = {All genomes include gene families with very limited taxonomic distributions that potentially represent new genes and innovations in protein-coding sequence, raising questions on the origins of such genes. Some of these genes are hypothesized to have formed de novo, from noncoding sequences, and recent work has begun to elucidate the processes by which de novo gene formation can occur. A special case of de novo gene formation, overprinting, describes the origin of new genes from noncoding alternative reading frames of existing open reading frames (ORFs). We argue that additionally, out-of-frame gene fission/fusion events of alternative reading frames of ORFs and out-of-frame lateral gene transfers could contribute to the origin of new gene families. To demonstrate this, we developed an original pattern-search in sequence similarity networks, enhancing the use of these graphs, commonly used to detect in-frame remodeled genes. We applied this approach to gene families in 524 complete genomes of Escherichia coli. We identified 767 gene families whose evolutionary history likely included at least one out-of-frame remodeling event. These genes with out-of-frame components represent ∼2.5% of all genes in the E. coli pangenome, suggesting that alternative reading frames of existing ORFs can contribute to a significant proportion of de novo genes in bacteria.}, } @article {pmid34792466, year = {2021}, author = {Pacheco-Moreno, A and Stefanato, FL and Ford, JJ and Trippel, C and Uszkoreit, S and Ferrafiat, L and Grenga, L and Dickens, R and Kelly, N and Kingdon, AD and Ambrosetti, L and Nepogodiev, SA and Findlay, KC and Cheema, J and Trick, M and Chandra, G and Tomalin, G and Malone, JG and Truman, AW}, title = {Pan-genome analysis identifies intersecting roles for Pseudomonas specialized metabolites in potato pathogen inhibition.}, journal = {eLife}, volume = {10}, number = {}, pages = {}, pmid = {34792466}, issn = {2050-084X}, support = {BB/J004553/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/J/000PR9790/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/J004596/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/J/000PR9797/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/M011216/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Hydrogen Cyanide/metabolism ; Lipopeptides/metabolism ; Peptides, Cyclic/metabolism ; Phytophthora infestans/*physiology ; Plant Diseases/*microbiology ; Pseudomonas fluorescens/*genetics/metabolism ; Solanum tuberosum/*microbiology ; Streptomyces/*physiology ; }, abstract = {Agricultural soil harbors a diverse microbiome that can form beneficial relationships with plants, including the inhibition of plant pathogens. Pseudomonas spp. are one of the most abundant bacterial genera in the soil and rhizosphere and play important roles in promoting plant health. However, the genetic determinants of this beneficial activity are only partially understood. Here, we genetically and phenotypically characterize the Pseudomonas fluorescens population in a commercial potato field, where we identify strong correlations between specialized metabolite biosynthesis and antagonism of the potato pathogens Streptomyces scabies and Phytophthora infestans. Genetic and chemical analyses identified hydrogen cyanide and cyclic lipopeptides as key specialized metabolites associated with S. scabies inhibition, which was supported by in planta biocontrol experiments. We show that a single potato field contains a hugely diverse and dynamic population of Pseudomonas bacteria, whose capacity to produce specialized metabolites is shaped both by plant colonization and defined environmental inputs.}, } @article {pmid34791415, year = {2022}, author = {Yates, AD and Allen, J and Amode, RM and Azov, AG and Barba, M and Becerra, A and Bhai, J and Campbell, LI and Carbajo Martinez, M and Chakiachvili, M and Chougule, K and Christensen, M and Contreras-Moreira, B and Cuzick, A and Da Rin Fioretto, L and Davis, P and De Silva, NH and Diamantakis, S and Dyer, S and Elser, J and Filippi, CV and Gall, A and Grigoriadis, D and Guijarro-Clarke, C and Gupta, P and Hammond-Kosack, KE and Howe, KL and Jaiswal, P and Kaikala, V and Kumar, V and Kumari, S and Langridge, N and Le, T and Luypaert, M and Maslen, GL and Maurel, T and Moore, B and Muffato, M and Mushtaq, A and Naamati, G and Naithani, S and Olson, A and Parker, A and Paulini, M and Pedro, H and Perry, E and Preece, J and Quinton-Tulloch, M and Rodgers, F and Rosello, M and Ruffier, M and Seager, J and Sitnik, V and Szpak, M and Tate, J and Tello-Ruiz, MK and Trevanion, SJ and Urban, M and Ware, D and Wei, S and Williams, G and Winterbottom, A and Zarowiecki, M and Finn, RD and Flicek, P}, title = {Ensembl Genomes 2022: an expanding genome resource for non-vertebrates.}, journal = {Nucleic acids research}, volume = {50}, number = {D1}, pages = {D996-D1003}, pmid = {34791415}, issn = {1362-4962}, support = {75N93019C00077/AI/NIAID NIH HHS/United States ; /WT_/Wellcome Trust/United Kingdom ; U24 HG002223/HG/NHGRI NIH HHS/United States ; 108749/Z/15/Z/WT_/Wellcome Trust/United Kingdom ; MR/S000453/1/MRC_/Medical Research Council/United Kingdom ; 201535/Z/16/Z/WT_/Wellcome Trust/United Kingdom ; BB/P024602/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/P016855/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/S02011X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/M028372/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/P027849/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/S020020/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/T015691/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; 222155/Z/20/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Animals ; Computational Biology ; *Databases, Genetic ; Genome, Bacterial/genetics ; Genome, Fungal/genetics ; Genome, Plant/genetics ; *Genomics ; *Internet ; Plants/classification/genetics ; *Software ; Vertebrates/classification/genetics ; }, abstract = {Ensembl Genomes (https://www.ensemblgenomes.org) provides access to non-vertebrate genomes and analysis complementing vertebrate resources developed by the Ensembl project (https://www.ensembl.org). The two resources collectively present genome annotation through a consistent set of interfaces spanning the tree of life presenting genome sequence, annotation, variation, transcriptomic data and comparative analysis. Here, we present our largest increase in plant, metazoan and fungal genomes since the project's inception creating one of the world's most comprehensive genomic resources and describe our efforts to reduce genome redundancy in our Bacteria portal. We detail our new efforts in gene annotation, our emerging support for pangenome analysis, our efforts to accelerate data dissemination through the Ensembl Rapid Release resource and our new AlphaFold visualization. Finally, we present details of our future plans including updates on our integration with Ensembl, and how we plan to improve our support for the microbial research community. Software and data are made available without restriction via our website, online tools platform and programmatic interfaces (available under an Apache 2.0 license). Data updates are synchronised with Ensembl's release cycle.}, } @article {pmid34783230, year = {2021}, author = {Monsu, M and Comin, M}, title = {Fast alignment of reads to a variation graph with application to SNP detection.}, journal = {Journal of integrative bioinformatics}, volume = {18}, number = {4}, pages = {}, pmid = {34783230}, issn = {1613-4516}, mesh = {*Genome, Human ; High-Throughput Nucleotide Sequencing ; Humans ; *Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; Software ; }, abstract = {Sequencing technologies has provided the basis of most modern genome sequencing studies due to its high base-level accuracy and relatively low cost. One of the most demanding step is mapping reads to the human reference genome. The reliance on a single reference human genome could introduce substantial biases in downstream analyses. Pangenomic graph reference representations offer an attractive approach for storing genetic variations. Moreover, it is possible to include known variants in the reference in order to make read mapping, variant calling, and genotyping variant-aware. Only recently a framework for variation graphs, vg [Garrison E, Adam MN, Siren J, et al. Variation graph toolkit improves read mapping by representing genetic variation in the reference. Nat Biotechnol 2018;36:875-9], have improved variation-aware alignment and variant calling in general. The major bottleneck of vg is its high cost of reads mapping to a variation graph. In this paper we study the problem of SNP calling on a variation graph and we present a fast reads alignment tool, named VG SNP-Aware. VG SNP-Aware is able align reads exactly to a variation graph and detect SNPs based on these aligned reads. The results show that VG SNP-Aware can efficiently map reads to a variation graph with a speedup of 40× with respect to vg and similar accuracy on SNPs detection.}, } @article {pmid34782773, year = {2021}, author = {Lu, Y and Wang, J and Chen, B and Mo, S and Lian, L and Luo, Y and Ding, D and Ding, Y and Cao, Q and Li, Y and Li, Y and Liu, G and Hou, Q and Cheng, T and Wei, J and Zhang, Y and Chen, G and Song, C and Hu, Q and Sun, S and Fan, G and Wang, Y and Liu, Z and Song, B and Zhu, JK and Li, H and Jiang, L}, title = {A donor-DNA-free CRISPR/Cas-based approach to gene knock-up in rice.}, journal = {Nature plants}, volume = {7}, number = {11}, pages = {1445-1452}, pmid = {34782773}, issn = {2055-0278}, mesh = {*CRISPR-Cas Systems ; DNA ; *Gene Editing ; Genes, Plant ; *Oryza/genetics ; Plant Breeding ; Promoter Regions, Genetic ; Ubiquitin/genetics ; }, abstract = {Structural variations (SVs), such as inversion and duplication, contribute to important agronomic traits in crops[1]. Pan-genome studies revealed that SVs were a crucial and ubiquitous force driving genetic diversification[2-4]. Although genome editing can effectively create SVs in plants and animals[5-8], the potential of designed SVs in breeding has been overlooked. Here, we show that new genes and traits can be created in rice by designed large-scale genomic inversion or duplication using CRISPR/Cas9. A 911 kb inversion on chromosome 1 resulted in a designed promoter swap between CP12 and PPO1, and a 338 kb duplication between HPPD and Ubiquitin2 on chromosome 2 created a novel gene cassette at the joint, promoterUbiquitin2::HPPD. Since the original CP12 and Ubiquitin2 genes were highly expressed in leaves, the expression of PPO1 and HPPD in edited plants with homozygous SV alleles was increased by tens of folds and conferred sufficient herbicide resistance in field trials without adverse effects on other important agronomic traits. CRISPR/Cas-based genome editing for gene knock-ups has been generally considered very difficult without inserting donor DNA as regulatory elements. Our study challenges this notion by providing a donor-DNA-free strategy, thus greatly expanding the utility of CRISPR/Cas in plant and animal improvements.}, } @article {pmid34782248, year = {2022}, author = {Sun, Y and Shang, L and Zhu, QH and Fan, L and Guo, L}, title = {Twenty years of plant genome sequencing: achievements and challenges.}, journal = {Trends in plant science}, volume = {27}, number = {4}, pages = {391-401}, doi = {10.1016/j.tplants.2021.10.006}, pmid = {34782248}, issn = {1878-4372}, mesh = {Crops, Agricultural/genetics ; *Genome, Plant/genetics ; High-Throughput Nucleotide Sequencing ; *Plant Breeding ; Sequence Analysis, DNA ; }, abstract = {Publication of the complete genome sequence of Arabidopsis thaliana, the first plant reference genome, in December 2000 heralded the beginning of the plant genome era. Over the past 20 years reference genomes have been generated for hundreds of plant species, spanning non-vascular to flowering plants. Releasing these plant genomes has dramatically advanced studies in all disciplines of plant biology. Importantly, multiple reference-level genomes have been generated for the major crops and their progenitors, enabling the creation of pan-genomes and exploration of domestication history and natural variations that can be adopted by modern crop breeding. We summarize the progress of plant genome sequencing and the challenges of sequencing more complex plant genomes and generating pan-genomes.}, } @article {pmid34780935, year = {2022}, author = {Bach, E and Sant'Anna, FH and Seger, GDDS and Passaglia, LMP}, title = {Pangenome inventory of Burkholderia sensu lato, Burkholderia sensu stricto, and the Burkholderia cepacia complex reveals the uniqueness of Burkholderia catarinensis.}, journal = {Genomics}, volume = {114}, number = {1}, pages = {398-408}, doi = {10.1016/j.ygeno.2021.11.011}, pmid = {34780935}, issn = {1089-8646}, mesh = {*Burkholderia/genetics ; *Burkholderia cepacia complex/genetics/metabolism ; }, abstract = {Here the pangenome analysis of Burkholderia sensu lato (s.l.) was performed for the first time, together with an updated analysis of the pangenome of Burkholderia sensu stricto, and Burkholderia cepacia complex (Bcc) focusing on the Bcc B. catarinensis specific features of its re-sequenced genome. The pangenome of Burkholderia s.l., Burkholderia s.s., and of the Bcc was open, composed of more than 96% of accessory genes, and more than 62% of unknown genes. Functional annotations showed that secondary metabolism genes belonged to the variable portion of genomes, which might explain their production of several compounds with varied bioactivities. Taken together, this work showed the great variability and uniqueness of these genomes and revealed an underexplored unknown potential in poorly characterized genes. Regarding B. catarinensis 89[T], its genome harbors genes related to hydrolases production and plant growth promotion. This draft genome will be valuable for further investigation of its biotechnological potentials.}, } @article {pmid34777930, year = {2021}, author = {Jha, V and Purohit, H and Dafale, NA}, title = {Revealing the potential of Klebsiella pneumoniae PVN-1 for plant beneficial attributes by genome sequencing and analysis.}, journal = {3 Biotech}, volume = {11}, number = {11}, pages = {473}, pmid = {34777930}, issn = {2190-572X}, abstract = {UNLABELLED: Genome sequencing of Klebsiella pneumoniae PVN-1, isolated from effluent treatment plant (ETP), generates a 5.064 Mb draft genome with 57.6% GC content. The draft genome assembled into 19 contigs comprises 4783 proteins, 3 rRNA, 44 tRNA, 8 other RNA, 4911 genes, and 73 pseudogenes. Genome information revealed the presence of phosphate metabolism/solubilizing, potassium solubilizing, auxin production, and other plant benefiting attributes like enterobactin and pyrroloquinoline quinone biosynthesis genes. Presence of gcd and pqq genes in K. pneumoniae PVN-1 genome validates the inorganic phosphate solubilizing potential (528.5 mg/L). Pangenome analysis identified a unique 5'-Nucleotidase that further assists in enhanced phosphate acquisition. Additionally, the genetic potential for complete benzoate, catechol, and phenylacetate degradation with stress response and heavy metal (Cu, Zn, Ni, Co) resistance was identified in K. pneumoniae PVN-1. Functioning of annotated plant benefiting genes validates by the metabolic activity of auxin production (7.40 µg/mL), nitrogen fixation, catalase activity, potassium solubilization (solubilization index-3.47), and protease activity (proteolytic index-2.27). In conclusion, the K. pneumoniae PVN-1 genome has numerous beneficial qualities that can be employed to enhance plant growth as well as for phytoremediation.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s13205-021-03020-2.}, } @article {pmid34768579, year = {2021}, author = {Lintas, C and Sacco, R and Azzarà, A and Cassano, I and Gurrieri, F}, title = {Genotype-Phenotype Correlations in Relation to Newly Emerging Monogenic Forms of Autism Spectrum Disorder and Associated Neurodevelopmental Disorders: The Importance of Phenotype Reevaluation after Pangenomic Results.}, journal = {Journal of clinical medicine}, volume = {10}, number = {21}, pages = {}, pmid = {34768579}, issn = {2077-0383}, abstract = {ASD genetic diagnosis has dramatically improved due to NGS technologies, and many new causative genes have been discovered. Consequently, new ASD phenotypes have emerged. An extensive exome sequencing study carried out by the Autism Sequencing Consortium (ASC) was published in February 2020. The study identified 102 genes which are de novo mutated in subjects affected by autism spectrum disorder (ASD) or similar neurodevelopmental disorders (NDDs). The majority of these genes was already known to be implicated in ASD or NDDs, whereas approximately 30 genes were considered "novel" as either they were not previously associated with ASD/NDDs or very little information about them was present in the literature. The aim of this work is to review the current literature since the publication of the ASC paper to see if new data mainly concerning genotype-phenotype correlations of the novel genes have been added to the existing one. We found new important clinical and molecular data for 6 of the 30 novel genes. Though the broad and overlapping neurodevelopmental phenotypes observed in most monogenic forms of NDDs make it difficult for the clinical geneticist to address gene-specific tests, knowledge of these new data can at least help to prioritize and interpret results of pangenomic tests to some extent. Indeed, for some of the new emerging genes analyzed in the present work, specific clinical features emerged that may help the clinical geneticist to make the final diagnosis by associating the genetic test results with the phenotype. The importance of this relatively new approach known as "reverse phenotyping" will be discussed.}, } @article {pmid34765088, year = {2021}, author = {Cheng, X and Yang, B and Zheng, J and Wei, H and Feng, X and Yin, Y}, title = {Cadmium stress triggers significant metabolic reprogramming in Enterococcus faecium CX 2-6.}, journal = {Computational and structural biotechnology journal}, volume = {19}, number = {}, pages = {5678-5687}, pmid = {34765088}, issn = {2001-0370}, support = {R01 GM140370/GM/NIGMS NIH HHS/United States ; }, abstract = {Heavy metal pollutions in the soils are increasingly threatening the global crop and food production. Using plant associated bacteria to remediate heavy metal contamination is a promising approach. We have isolated a cadmium (Cd) resistant Enterococcus faecium strain CX 2-6 from a heavy metal contaminated farmland. We have shown that: (i) CX 2-6 can tolerate cadmium (Cd) with a slower growth rate; (ii) The CX 2-6 complete genome is fully assembled using PacBio long reads; (iii) Differential expression analysis found 47% of CX 2-6 genes are significantly affected by Cd treatment and form three gene groups with distinct expression profiles; (iv) Differentially expressed genes (DEGs) form physically linked gene clusters in the CX 2-6 genome, and one of the gene clusters corresponds to a prophage that is unique to CX 2-6 and is strongly activated when Cd concentration is higher; (v) A majority of DEGs responding to Cd treatment are present in the core genome; and (vi) 55 noncoding RNA genes are identified and 49 of them are DEGs responding to cadmium stress. Our pan-genome analysis and comparative RNA-seq data analysis has significantly improved our understanding of the metabolic reprogramming of E. faecium CX 2-6 under Cd stress.}, } @article {pmid34761737, year = {2021}, author = {Urhan, A and Abeel, T}, title = {A comparative study of pan-genome methods for microbial organisms: Acinetobacter baumannii pan-genome reveals structural variation in antimicrobial resistance-carrying plasmids.}, journal = {Microbial genomics}, volume = {7}, number = {11}, pages = {}, pmid = {34761737}, issn = {2057-5858}, mesh = {*Acinetobacter baumannii ; Anti-Bacterial Agents/pharmacology ; Drug Resistance, Bacterial/genetics ; Plasmids/genetics ; beta-Lactam Resistance ; }, abstract = {Microbial organisms have diverse populations, where using a single linear reference sequence in comparative studies introduces reference-bias in downstream analyses, and leads to a failure to account for variability in the population. Recently, pan-genome graphs have emerged as an alternative to the traditional linear reference with many successful applications and a rapid increase in the number of methods available in the literature. Despite this enthusiasm, there has been no attempt at exploring these graph construction methods in depth, demonstrating their practical use. In this study, we aim to develop a general guide to help researchers who may want to incorporate pan-genomes in their analyses of microbial organisms. We evaluated the state-of-the art pan-genome construction tools to model a collection of 70 Acinetobacter baumannii strains. Our results suggest that all tools produced pan-genome graphs conforming to our expectations based on previous literature, and that their approach to homologue detection is likely to be the most influential in determining the final size and complexity of the pan-genome. The graphs overlapped most in the core pan-genome content while the cloud genes varied significantly among tools. We propose an alternative approach for pan-genome construction by combining two of the tools, Panaroo and Ptolemy, to further exploit them in downstream analyses, and demonstrate the effectiveness of our pipeline for structural variant calling in beta-lactam resistance genes in the same set of A. baumannii isolates, identifying various transposon structures for carbapenem resistance in chromosome, as well as plasmids. We identify a novel plasmid structure in two multidrug-resistant clinical isolates that had previously been studied, and which could be important for their resistance phenotypes.}, } @article {pmid34759320, year = {2021}, author = {Varshney, RK and Roorkiwal, M and Sun, S and Bajaj, P and Chitikineni, A and Thudi, M and Singh, NP and Du, X and Upadhyaya, HD and Khan, AW and Wang, Y and Garg, V and Fan, G and Cowling, WA and Crossa, J and Gentzbittel, L and Voss-Fels, KP and Valluri, VK and Sinha, P and Singh, VK and Ben, C and Rathore, A and Punna, R and Singh, MK and Tar'an, B and Bharadwaj, C and Yasin, M and Pithia, MS and Singh, S and Soren, KR and Kudapa, H and Jarquín, D and Cubry, P and Hickey, LT and Dixit, GP and Thuillet, AC and Hamwieh, A and Kumar, S and Deokar, AA and Chaturvedi, SK and Francis, A and Howard, R and Chattopadhyay, D and Edwards, D and Lyons, E and Vigouroux, Y and Hayes, BJ and von Wettberg, E and Datta, SK and Yang, H and Nguyen, HT and Wang, J and Siddique, KHM and Mohapatra, T and Bennetzen, JL and Xu, X and Liu, X}, title = {A chickpea genetic variation map based on the sequencing of 3,366 genomes.}, journal = {Nature}, volume = {599}, number = {7886}, pages = {622-627}, pmid = {34759320}, issn = {1476-4687}, mesh = {Cicer/*genetics ; Crops, Agricultural/genetics ; *Genetic Variation ; Genome, Plant/*genetics ; Haplotypes/genetics ; Plant Breeding ; Polymorphism, Single Nucleotide/genetics ; *Sequence Analysis, DNA ; }, abstract = {Zero hunger and good health could be realized by 2030 through effective conservation, characterization and utilization of germplasm resources[1]. So far, few chickpea (Cicer arietinum) germplasm accessions have been characterized at the genome sequence level[2]. Here we present a detailed map of variation in 3,171 cultivated and 195 wild accessions to provide publicly available resources for chickpea genomics research and breeding. We constructed a chickpea pan-genome to describe genomic diversity across cultivated chickpea and its wild progenitor accessions. A divergence tree using genes present in around 80% of individuals in one species allowed us to estimate the divergence of Cicer over the last 21 million years. Our analysis found chromosomal segments and genes that show signatures of selection during domestication, migration and improvement. The chromosomal locations of deleterious mutations responsible for limited genetic diversity and decreased fitness were identified in elite germplasm. We identified superior haplotypes for improvement-related traits in landraces that can be introgressed into elite breeding lines through haplotype-based breeding, and found targets for purging deleterious alleles through genomics-assisted breeding and/or gene editing. Finally, we propose three crop breeding strategies based on genomic prediction to enhance crop productivity for 16 traits while avoiding the erosion of genetic diversity through optimal contribution selection (OCS)-based pre-breeding. The predicted performance for 100-seed weight, an important yield-related trait, increased by up to 23% and 12% with OCS- and haplotype-based genomic approaches, respectively.}, } @article {pmid34758735, year = {2021}, author = {Her, HL and Lin, PT and Wu, YW}, title = {PangenomeNet: a pan-genome-based network reveals functional modules on antimicrobial resistome for Escherichia coli strains.}, journal = {BMC bioinformatics}, volume = {22}, number = {1}, pages = {548}, pmid = {34758735}, issn = {1471-2105}, support = {TMU-NTUST-107-10//TMU-NTUST Joint Research Program/ ; TMU-NTUST-107-10//TMU-NTUST Joint Research Program/ ; MOST108-2628-E-038-002-MY3//Ministry of Science and Technology, Taiwan/ ; }, mesh = {Anti-Bacterial Agents/pharmacology ; *Escherichia coli/genetics ; *Escherichia coli Infections/drug therapy ; Humans ; beta-Lactamases/genetics ; }, abstract = {BACKGROUND: Discerning genes crucial to antimicrobial resistance (AMR) mechanisms is becoming more and more important to accurately and swiftly identify AMR pathogenic strains. Pangenome-wide association studies (e.g. Scoary) identified numerous putative AMR genes. However, only a tiny proportion of the putative resistance genes are annotated by AMR databases or Gene Ontology. In addition, many putative resistance genes are of unknown function (termed hypothetical proteins). An annotation tool is crucially needed in order to reveal the functional organization of the resistome and expand our knowledge of the AMR gene repertoire.

RESULTS: We developed an approach (PangenomeNet) for building co-functional networks from pan-genomes to infer functions for hypothetical genes. Using Escherichia coli as an example, we demonstrated that it is possible to build co-functional network from its pan-genome using co-inheritance, domain-sharing, and protein-protein-interaction information. The investigation of the network revealed that it fits the characteristics of biological networks and can be used for functional inferences. The subgraph consisting of putative meropenem resistance genes consists of clusters of stress response genes and resistance gene acquisition pathways. Resistome subgraphs also demonstrate drug-specific AMR genes such as beta-lactamase, as well as functional roles shared among multiple classes of drugs, mostly in the stress-related pathways.

CONCLUSIONS: By demonstrating the idea of pan-genome-based co-functional network on the E. coli species, we showed that the network can infer functional roles of the genes, including those without functional annotations, and provides holistic views on the putative antimicrobial resistomes. We hope that the pan-genome network idea can help formulate hypothesis for targeted experimental works.}, } @article {pmid36824593, year = {2020}, author = {Braich, S and Baillie, RC and Spangenberg, GC and Cogan, NOI}, title = {A new and improved genome sequence of Cannabis sativa.}, journal = {GigaByte (Hong Kong, China)}, volume = {2020}, number = {}, pages = {gigabyte10}, pmid = {36824593}, issn = {2709-4715}, abstract = {Cannabis is a diploid species (2n = 20), the estimated haploid genome sizes of the female and male plants using flow cytometry are 818 and 843 Mb respectively. Although the genome of Cannabis has been sequenced (from hemp, wild and high-THC strains), all assemblies have significant gaps. In addition, there are inconsistencies in the chromosome numbering which limits their use. A new comprehensive draft genome sequence assembly (∼900 Mb) has been generated from the medicinal cannabis strain Cannbio-2, that produces a balanced ratio of cannabidiol and delta-9-tetrahydrocannabinol using long-read sequencing. The assembly was subsequently analysed for completeness by ordering the contigs into chromosome-scale pseudomolecules using a reference genome assembly approach, annotated and compared to other existing reference genome assemblies. The Cannbio-2 genome sequence assembly was found to be the most complete genome sequence available based on nucleotides assembled and BUSCO evaluation in Cannabis sativa with a comprehensive genome annotation. The new draft genome sequence is an advancement in Cannabis genomics permitting pan-genome analysis, genomic selection as well as genome editing.}, } @article {pmid34968242, year = {2020}, author = {Bruschi, M}, title = {The Epigenetic Progenitor Origin of Cancer Reassessed: DNA Methylation Brings Balance to the Stem Force.}, journal = {Epigenomes}, volume = {4}, number = {2}, pages = {}, pmid = {34968242}, issn = {2075-4655}, abstract = {Cancer initiation and progression toward malignant stages occur as the results of accumulating genetic alterations and epigenetic dysregulation. During the last decade, the development of next generation sequencing (NGS) technologies and the increasing pan-genomic knowledge have revolutionized how we consider the evolving epigenetic landscapes during homeostasis and tumor progression. DNA methylation represents the best studied mark and is considered as a common mechanism of epigenetic regulation in normal homeostasis and cancer. A remarkable amount of work has recently started clarifying the central role played by DNA methylation dynamics on the maintenance of cell identity and on cell fate decisions during the different steps of normal development and tumor evolution. Importantly, a growing number of studies show that DNA methylation is key in the maintenance of adult stemness and in orchestrating commitment in multiple ways. Perturbations of the normal DNA methylation patterns impair the homeostatic balance and can lead to tumor initiation. Therefore, DNA methylation represents an interesting therapeutic target to recover homeostasis in tumor stem cells.}, } @article {pmid34765994, year = {2020}, author = {Ma, S and Zhang, Y}, title = {Profiling chromatin regulatory landscape: insights into the development of ChIP-seq and ATAC-seq.}, journal = {Molecular biomedicine}, volume = {1}, number = {1}, pages = {9}, pmid = {34765994}, issn = {2662-8651}, abstract = {Chromatin regulatory landscape plays a critical role in many disease processes and embryo development. Epigenome sequencing technologies such as chromatin immunoprecipitation sequencing (ChIP-seq) and assay for transposase-accessible chromatin with high-throughput sequencing (ATAC-seq) have enabled us to dissect the pan-genomic regulatory landscape of cells and tissues in both time and space dimensions by detecting specific chromatin state and its corresponding transcription factors. Pioneered by the advancement of chromatin immunoprecipitation-chip (ChIP-chip) technology, abundant epigenome profiling technologies have become available such as ChIP-seq, DNase I hypersensitive site sequencing (DNase-seq), ATAC-seq and so on. The advent of single-cell sequencing has revolutionized the next-generation sequencing, applications in single-cell epigenetics are enriched rapidly. Epigenome sequencing technologies have evolved from low-throughput to high-throughput and from bulk sample to the single-cell scope, which unprecedentedly benefits scientists to interpret life from different angles. In this review, after briefly introducing the background knowledge of epigenome biology, we discuss the development of epigenome sequencing technologies, especially ChIP-seq & ATAC-seq and their current applications in scientific research. Finally, we provide insights into future applications and challenges.}, } @article {pmid34754514, year = {2021}, author = {Sun, TW and Ku, C}, title = {Unraveling gene content variation across eukaryotic giant viruses based on network analyses and host associations.}, journal = {Virus evolution}, volume = {7}, number = {2}, pages = {veab081}, pmid = {34754514}, issn = {2057-1577}, abstract = {The nucleocytoplasmic large DNA viruses (NCLDVs, phylum Nucleocytoviricota) infect vertebrates, invertebrates, algae, amoebae, and other unicellular organisms across supergroups of eukaryotes and in various ecosystems. The expanding collection of their genome sequences has revolutionized our view of virus genome size and coding capacity. Phylogenetic trees based on a few core genes are commonly used as a model to understand their evolution. However, the tree topology can differ between analyses, and the vast majority of encoded genes might not share a common evolutionary history. To explore the whole-genome variation and evolution of NCLDVs, we dissected their gene contents using clustering, network, and comparative analyses. Our updated core-gene tree served as a framework to classify NCLDVs into families and intrafamilial lineages, but networks of individual genomes and family pangenomes showed patterns of gene sharing that contradict with the tree topology, in particular at higher taxonomic levels. Clustering of NCLDV genomes revealed variable granularity and degrees of gene sharing within each family, which cannot be inferred from the tree. At the level of NCLDV families, a correlation exists between gene content variation, but not core-gene sequence divergence, and host supergroup diversity. In addition, there is significantly higher gene sharing between divergent viruses that infect similar host types. The identified shared genes would be a useful resource for further functional analyses of NCLDV-host interactions. Overall this study provides a comprehensive view of gene repertoire variation in NCLDVs at different taxonomic levels, as well as a novel approach to studying the extremely diverse giant virus genomes.}, } @article {pmid34747029, year = {2022}, author = {Yildirir, G and Sperschneider, J and Malar C, M and Chen, ECH and Iwasaki, W and Cornell, C and Corradi, N}, title = {Long reads and Hi-C sequencing illuminate the two-compartment genome of the model arbuscular mycorrhizal symbiont Rhizophagus irregularis.}, journal = {The New phytologist}, volume = {233}, number = {3}, pages = {1097-1107}, doi = {10.1111/nph.17842}, pmid = {34747029}, issn = {1469-8137}, mesh = {Fungi ; Genome, Fungal ; *Glomeromycota/genetics/metabolism ; *Mycorrhizae/physiology ; Plants/genetics ; }, abstract = {Chromosome folding links genome structure with gene function by generating distinct nuclear compartments and topologically associating domains. In mammals, these undergo preferential interactions and regulate gene expression. However, their role in fungal genome biology is unclear. Here, we combine Nanopore (ONT) sequencing with chromatin conformation capture sequencing (Hi-C) to reveal chromosome and epigenetic diversity in a group of obligate plant symbionts: the arbuscular mycorrhizal fungi (AMF). We find that five phylogenetically distinct strains of the model AMF Rhizophagus irregularis carry 33 chromosomes with substantial within-species variability in size, as well as in gene and repeat content. Strain-specific Hi-C contact maps reveal a 'checkerboard' pattern that underline two dominant euchromatin (A) and heterochromatin (B) compartments. Each compartment differs in the level of gene transcription, regulation of candidate effectors and methylation frequencies. The A-compartment is more gene-dense and contains most core genes, while the B-compartment is more repeat-rich and has higher rates of chromosomal rearrangement. While the B-compartment is transcriptionally repressed, it has significantly more secreted proteins and in planta upregulated candidate effectors, suggesting a possible host-induced change in chromosome conformation. Overall, this study provides a fine-scale view into the genome biology and evolution of model plant symbionts, and opens avenues to study the epigenetic mechanisms that modify chromosome folding during host-microbe interactions.}, } @article {pmid34745068, year = {2021}, author = {Ravin, NV and Rudenko, TS and Smolyakov, DD and Beletsky, AV and Rakitin, AL and Markov, ND and Fomenkov, A and Sun, L and Roberts, RJ and Novikov, AA and Karnachuk, OV and Grabovich, MY}, title = {Comparative Genome Analysis of the Genus Thiothrix Involving Three Novel Species, Thiothrix subterranea sp. nov. Ku-5, Thiothrix litoralis sp. nov. AS and "Candidatus Thiothrix anitrata" sp. nov. A52, Revealed the Conservation of the Pathways of Dissimilatory Sulfur Metabolism and Variations in the Genetic Inventory for Nitrogen Metabolism and Autotrophic Carbon Fixation.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {760289}, pmid = {34745068}, issn = {1664-302X}, abstract = {Two strains of filamentous, colorless sulfur bacteria were isolated from bacterial fouling in the outflow of hydrogen sulfide-containing waters from a coal mine (Thiothrix sp. Ku-5) and on the seashore of the White Sea (Thiothrix sp. AS). Metagenome-assembled genome (MAG) A52 was obtained from a sulfidic spring in the Volgograd region, Russia. Phylogenetic analysis based on the 16S rRNA gene sequences showed that all genomes represented the genus Thiothrix. Based on their average nucleotide identity and digital DNA-DNA hybridization data these new isolates and the MAG represent three species within the genus Thiothrix with the proposed names Thiothrix subterranea sp. nov. Ku-5[T], Thiothrix litoralis sp. nov. AS[T], and "Candidatus Thiothrix anitrata" sp. nov. A52. The complete genome sequences of Thiothrix fructosivorans Q[T] and Thiothrix unzii A1[T] were determined. Complete genomes of seven Thiothrix isolates, as well as two MAGs, were used for pangenome analysis. The Thiothrix core genome consisted of 1,355 genes, including ones for the glycolysis, the tricarboxylic acid cycle, the aerobic respiratory chain, and the Calvin cycle of carbon fixation. Genes for dissimilatory oxidation of reduced sulfur compounds, namely the branched SOX system (SoxAXBYZ), direct (soeABC) and indirect (aprAB, sat) pathways of sulfite oxidation, sulfur oxidation complex Dsr (dsrABEFHCEMKLJONR), sulfide oxidation systems SQR (sqrA, sqrF), and FCSD (fccAB) were found in the core genome. Genomes differ in the set of genes for dissimilatory reduction of nitrogen compounds, nitrogen fixation, and the presence of various types of RuBisCO.}, } @article {pmid34739370, year = {2021}, author = {Cuny, H and Offret, C and Boukerb, AM and Parizadeh, L and Lesouhaitier, O and Le Chevalier, P and Jégou, C and Bazire, A and Brillet, B and Fleury, Y}, title = {Pseudoalteromonas ostreae sp. nov., a new bacterial species harboured by the flat oyster Ostrea edulis.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {71}, number = {11}, pages = {}, doi = {10.1099/ijsem.0.005070}, pmid = {34739370}, issn = {1466-5034}, mesh = {Animals ; Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; France ; Nucleic Acid Hybridization ; *Ostrea/microbiology ; *Phylogeny ; *Pseudoalteromonas/classification/isolation & purification ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {Three bacterial strains, named hOe-66[T], hOe-124 and hOe-125, were isolated from the haemolymph of different specimens of the flat oyster Ostrea edulis collected in Concarneau bay (Finistère, France). These strains were characterized by a polyphasic approach, including (i) whole genome analyses with 16S rRNA gene sequence alignment and pangenome analysis, determination of the G+C content, average nucleotide identity (ANI), and in silico DNA-DNA hybridization (isDDH), and (ii) fatty acid methyl ester and other phenotypic analyses. Strains hOe-66[T], hOe-124 and hOe-125 were closely related to both type strains Pseudoalteromonas rhizosphaerae RA15[T] and Pseudoalteromonas neustonica PAMC 28425[T] with less than 93.3% ANI and 52.3% isDDH values. Regarding their phenotypic traits, the three strains were Gram-negative, 1-2 µm rod-shaped, aerobic, motile and non-spore-forming bacteria. Cells grew optimally at 25 °C in 2.5% NaCl and at 7-8 pH. The most abundant fatty acids were summed feature 3 (C16:1 ω7c/C16:1 ω6c), C16:0 and C17:1 ω8c. The strains carried a genome average size of 4.64 Mb and a G+C content of 40.28 mol%. The genetic and phenotypic results suggested that strains hOe-66[T], hOe-124 and hOe-125 belong to a new species of the genus Pseudoalteromonas. In this context, we propose the name Pseudoalteromonas ostreae sp. nov. The type strain is hOe-66[T] (=CECT 30303[T]=CIP 111911[T]).}, } @article {pmid34737728, year = {2021}, author = {Vázquez-Campos, X and Kinsela, AS and Bligh, MW and Payne, TE and Wilkins, MR and Waite, TD}, title = {Genomic Insights Into the Archaea Inhabiting an Australian Radioactive Legacy Site.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {732575}, pmid = {34737728}, issn = {1664-302X}, abstract = {During the 1960s, small quantities of radioactive materials were co-disposed with chemical waste at the Little Forest Legacy Site (LFLS, Sydney, Australia). The microbial function and population dynamics in a waste trench during a rainfall event have been previously investigated revealing a broad abundance of candidate and potentially undescribed taxa in this iron-rich, radionuclide-contaminated environment. Applying genome-based metagenomic methods, we recovered 37 refined archaeal MAGs, mainly from undescribed DPANN Archaea lineages without standing in nomenclature and 'Candidatus Methanoperedenaceae' (ANME-2D). Within the undescribed DPANN, the newly proposed orders 'Ca. Gugararchaeales', 'Ca. Burarchaeales' and 'Ca. Anstonellales', constitute distinct lineages with a more comprehensive central metabolism and anabolic capabilities within the 'Ca. Micrarchaeota' phylum compared to most other DPANN. The analysis of new and extant 'Ca. Methanoperedens spp.' MAGs suggests metal ions as the ancestral electron acceptors during the anaerobic oxidation of methane while the respiration of nitrate/nitrite via molybdopterin oxidoreductases would have been a secondary acquisition. The presence of genes for the biosynthesis of polyhydroxyalkanoates in most 'Ca. Methanoperedens' also appears to be a widespread characteristic of the genus for carbon accumulation. This work expands our knowledge about the roles of the Archaea at the LFLS, especially, DPANN Archaea and 'Ca. Methanoperedens', while exploring their diversity, uniqueness, potential role in elemental cycling, and evolutionary history.}, } @article {pmid34733249, year = {2021}, author = {Kurilung, A and Perreten, V and Prapasarakul, N}, title = {Comparative Genomic Analysis and a Novel Set of Missense Mutation of the Leptospira weilii Serogroup Mini From the Urine of Asymptomatic Dogs in Thailand.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {731937}, pmid = {34733249}, issn = {1664-302X}, abstract = {Leptospira weilii belongs to the pathogenic Leptospira group and is a causal agent of human and animal leptospirosis in many world regions. L. weilii can produce varied clinical presentations from asymptomatic through acute to chronic infections and occupy several ecological niches. Nevertheless, the genomic feature and genetic basis behind the host adaptability of L. weilii remain elusive due to limited information. Therefore, this study aimed to examine the complete circular genomes of two new L. weilii serogroup Mini strains (CUDO6 and CUD13) recovered from the urine of asymptomatic dogs in Thailand and then compared with the 17 genomes available for L. weilii. Variant calling analysis (VCA) was also undertaken to gain potential insight into the missense mutations, focusing on the known pathogenesis-related genes. Whole genome sequences revealed that the CUDO6 and CUD13 strains each contained two chromosomes and one plasmid, with average genome size and G+C content of 4.37 Mbp and 40.7%, respectively. Both strains harbored almost all the confirmed pathogenesis-related genes in Leptospira. Two novel plasmid sequences, pDO6 and pD13, were identified in the strains CUDO6 and CUD13. Both plasmids contained genes responsible for stress response that may play important roles in bacterial adaptation during persistence in the kidneys. The core-single nucleotide polymorphisms phylogeny demonstrated that both strains had a close genetic relationship. Amongst the 19 L. weilii strains analyzed, the pan-genome analysis showed an open pan-genome structure, correlated with their high genetic diversity. VCA identified missense mutations in genes involved in endoflagella, lipopolysaccharide (LPS) structure, mammalian cell entry protein, and hemolytic activities, and may be associated with host-adaptation in the strains. Missense mutations of the endoflagella genes of CUDO6 and CUD13 were associated with loss of motility. These findings extend the knowledge about the pathogenic molecular mechanisms and genomic evolution of this important zoonotic pathogen.}, } @article {pmid34723785, year = {2021}, author = {Heikema, AP and Strepis, N and Horst-Kreft, D and Huynh, S and Zomer, A and Kelly, DJ and Cooper, KK and Parker, CT}, title = {Biomolecule sulphation and novel methylations related to Guillain-Barré syndrome-associated Campylobacter jejuni serotype HS:19.}, journal = {Microbial genomics}, volume = {7}, number = {11}, pages = {}, pmid = {34723785}, issn = {2057-5858}, mesh = {*Campylobacter Infections/complications ; *Campylobacter jejuni/genetics ; *Guillain-Barre Syndrome/etiology ; Humans ; Methylation ; Serogroup ; }, abstract = {Campylobacter jejuni strains that produce sialylated lipooligosaccharides (LOS) can cause the immune-mediated disease Guillain-Barré syndrome (GBS). The risk of GBS after infection with C. jejuni Penner serotype HS:19 is estimated to be at least six times higher than the average risk. Aside from LOS biosynthesis genes, genomic characteristics that promote an increased risk for GBS following C. jejuni HS:19 infection, remain uncharacterized. We hypothesized that strains with the HS:19 serotype have unique genomic features that explain the increased risk for GBS. We performed genome sequencing, alignments, single nucleotide polymorphisms' analysis and methylome characterization on a subset, and pan-genome analysis on a large number of genomes to compare HS:19 with non-HS:19 C. jejuni genome sequences. Comparison of 36 C. jejuni HS:19 with 874 C. jejuni non-HS:19 genome sequences led to the identification of three single genes and ten clusters containing contiguous genes that were significantly associated with C. jejuni HS:19. One gene cluster of seven genes, localized downstream of the capsular biosynthesis locus, was related to sulphation of biomolecules. This cluster also encoded the campylobacter sialyl transferase Cst-I. Interestingly, sulphated bacterial biomolecules such as polysaccharides can promote immune responses and, therefore, (in the presence of sialic acid) may play a role in the development of GBS. Additional gene clusters included those involved in persistence-mediated pathogenicity and gene clusters involved in restriction-modification systems. Furthermore, characterization of methylomes of two HS:19 strains exhibited novel methylation patterns (5′-CATG-3 and 5′-[m6]AGTNNNNNNRTTG-3) that could differentially effect gene-expression patterns of C. jejuni HS:19 strains. Our study provides novel insight into specific genetic features and possible virulence factors of C. jejuni associated with the HS:19 serotype that may explain the increased risk of GBS.}, } @article {pmid34716462, year = {2021}, author = {Caicedo-Montoya, C and Gómez-Román, MP and Vázquez-Hernández, M and Mora-Rincón, RA and Rodriguez-Luna, SD and Rodríguez-Sanoja, R and Sanchez, S}, title = {Evolutionary genomics and biosynthetic potential of novel environmental Actinobacteria.}, journal = {Applied microbiology and biotechnology}, volume = {105}, number = {23}, pages = {8805-8822}, pmid = {34716462}, issn = {1432-0614}, support = {IN-205922//Dirección General de Asuntos del Personal Académico, Universidad Nacional Autónoma de México/ ; A-S1-9143//Consejo Nacional de Ciencia y Tecnología/ ; }, mesh = {*Actinobacteria/genetics ; Genomics ; Phylogeny ; Polyketide Synthases/genetics ; *Streptomyces/genetics ; }, abstract = {Actinobacteria embroil Gram-positive microbes with high guanine and cytosine contents in their DNA. They are the source of most antimicrobials of bacterial origin utilized in medicine today. Their genomes are among the richest in novel secondary metabolites with high biotechnological potential. Actinobacteria reveal complex patterns of evolution, responses, and adaptations to their environment, which are not yet well understood. We analyzed three novel plant isolates and explored their habitat adaptation, evolutionary patterns, and potential secondary metabolite production. The phylogenomically characterized isolates belonged to Actinoplanes sp. TFC3, Streptomyces sp. L06, and Embleya sp. NF3. Positively selected genes, relevant in strain evolution, encoded enzymes for stress resistance in all strains, including porphyrin, chlorophyll, and ubiquinone biosynthesis in Embleya sp. NF3. Streptomyces sp. L06 encoded for pantothenate and proteins for CoA biosynthesis with evidence of positive selection; furthermore, Actinoplanes sp. TFC3 encoded for a c-di-GMP synthetase, with adaptive mutations. Notably, the genomes harbored many genes involved in the biosynthesis of at least ten novel secondary metabolites, with many avenues for future new bioactive compound characterization-specifically, Streptomyces sp. L06 could make new ribosomally synthesized and post-translationally modified peptides, while Embleya sp. NF3 could produce new non-ribosomal peptide synthetases and ribosomally synthesized and post-translationally modified peptides. At the same time, TFC3 has particularly enriched in terpene and polyketide synthases. All the strains harbored conserved genes in response to diverse environmental stresses, plant growth promotion factors, and degradation of various carbohydrates, which supported their endophytic lifestyle and showed their capacity to colonize other niches. This study aims to provide a comprehensive estimation of the genomic features of novel Actinobacteria. It sets the groundwork for future research into experimental tests with new bioactive metabolites with potential application in medicine, biofertilizers, and plant biomass residue utilization, with potential application in medicine, as biofertilizers and in plant biomass residues utilization. KEY POINTS: • Potential of novel environmental bacteria for secondary metabolites production • Exploring the genomes of three novel endophytes isolated from a medicinal tree • Pan-genome analysis of Actinobacteria genera.}, } @article {pmid34715447, year = {2022}, author = {Rodrigues, GL and Matteoli, FP and Gazara, RK and Rodrigues, PSL and Dos Santos, ST and Alves, AF and Pedrosa-Silva, F and Oliveira-Pinheiro, I and Canedo-Alvarenga, D and Olivares, FL and Venancio, TM}, title = {Characterization of cellular, biochemical and genomic features of the diazotrophic plant growth-promoting bacterium Azospirillum sp. UENF-412522, a novel member of the Azospirillum genus.}, journal = {Microbiological research}, volume = {254}, number = {}, pages = {126896}, doi = {10.1016/j.micres.2021.126896}, pmid = {34715447}, issn = {1618-0623}, mesh = {*Azospirillum/chemistry/classification/genetics ; *Genome, Bacterial/genetics ; Genomics ; Passiflora/microbiology ; Phosphates/metabolism ; Phylogeny ; }, abstract = {Given their remarkable beneficial effects on plant growth, several Azospirillum isolates currently integrate the formulations of various commercial inoculants. Our research group isolated a new strain, Azospirillum sp. UENF-412522, from passion fruit rhizoplane. This isolate uses carbon sources that are partially distinct from closely-related Azospirillum isolates. Scanning electron microscopy analysis and population counts demonstrate the ability of Azospirillum sp. UENF-412522 to colonize the surface of passion fruit roots. In vitro assays demonstrate the ability of Azospirillum sp. UENF-412522 to fix atmospheric nitrogen, to solubilize phosphate and to produce indole-acetic acid. Passion fruit plantlets inoculated with Azospirillum sp. UENF-41255 showed increased shoot and root fresh matter by 13,8% and 88,6% respectively, as well as root dry matter by 61,4%, further highlighting its biotechnological potential for agriculture. We sequenced the genome of Azospirillum sp. UENF-412522 to investigate the genetic basis of its plant-growth promotion properties. We identified the key nif genes for nitrogen fixation, the complete PQQ operon for phosphate solubilization, the acdS gene that alleviates ethylene effects on plant growth, and the napCAB operon, which produces nitrite under anoxic conditions. We also found several genes conferring resistance to common soil antibiotics, which are critical for Azospirillum sp. UENF-412522 survival in the rhizosphere. Finally, we also assessed the Azospirillum pangenome and highlighted key genes involved in plant growth promotion. A phylogenetic reconstruction of the genus was also conducted. Our results support Azospirillum sp. UENF-412522 as a good candidate for bioinoculant formulations focused on plant growth promotion in sustainable systems.}, } @article {pmid34714446, year = {2021}, author = {Huang, YS and Lin, CY and Cheng, WC}, title = {Investigating the Transcriptomic and Expression Presence-Absence Variation Exist in Japanese Eel (Anguilla japonica), a Primitive Teleost.}, journal = {Marine biotechnology (New York, N.Y.)}, volume = {23}, number = {6}, pages = {943-954}, pmid = {34714446}, issn = {1436-2236}, support = {MOST107-2321-B002-057//ministry of science and technology, taiwan/ ; }, mesh = {Androgens/metabolism ; *Anguilla/genetics/metabolism ; Animals ; Genome ; Genomics ; Transcriptome ; }, abstract = {The pan-genome was defined as the complete gene set across strains, and it is built upon genes displaying presence-absence variations (PAVs); the pan-transcriptome is defined by recalling the pan-genome. Indeed, a PAV is reflected from the expression presence-absence variation (ePAV). In this study, treated with androgen, eels, which are a primitive fish from the basal lineage of Teleost, with different ovarian developments were chosen and submitted to RAN-sequencing. Transcriptomes were the assembly against eel genome scaffolds; a pair was the unit (the same eel before and after treatment) to analyze DEGs (differentially expressed genes); the core, unique, or accessory genes were identified, and the list of DEGs was analyzed to investigate ePAV. The results suggest that there was ePAV in Japanese eel, and the ePAV of eel was analyzed by pathway enrichment. These results signify the importance of genetic differential expression on the variations of phenotypes by androgen, and a transcriptomic approach appears to enable extracting multiple layers of genomic data.}, } @article {pmid34714230, year = {2021}, author = {Reis, AC and Cunha, MV}, title = {The open pan-genome architecture and virulence landscape of Mycobacterium bovis.}, journal = {Microbial genomics}, volume = {7}, number = {10}, pages = {}, pmid = {34714230}, issn = {2057-5858}, mesh = {Animals ; Cattle ; Genome Size ; Genome, Bacterial ; Genomics/*methods ; Lipid Metabolism ; Mycobacterium bovis/genetics/*pathogenicity ; Polymorphism, Single Nucleotide ; Tuberculosis, Bovine ; Virulence ; Whole Genome Sequencing/*methods ; }, abstract = {Animal tuberculosis (TB) is an emergent disease caused by Mycobacterium bovis , one of the animal-adapted ecotypes of the Mycobacterium tuberculosis complex (MTC). In this work, whole-genome comparative analyses of 70 M . bovis were performed to gain insights into the pan-genome architecture. The comparison across M. bovis predicted genome composition enabled clustering into the core- and accessory-genome components, with 2736 CDS for the former, while the accessory moiety included 3897 CDS, of which 2656 are restricted to one/two genomes only. These analyses predicted an open pan-genome architecture, with an average of 32 CDS added by each genome and show the diversification of discrete M. bovis subpopulations supported by both core- and accessory-genome components. The functional annotation of the pan-genome classified each CDS into one or several COG (Clusters of Orthologous Groups) categories, revealing ‘transcription’ (total average CDSs, n=258), ‘lipid metabolism and transport’ (n=242), ‘energy production and conversion’ (n=214) and ‘unknown function’ (n=876) as the most represented. The closer analysis of polymorphisms in virulence-related genes in a restrict group of M. bovis from a multi-host system enabled the identification of clade-monomorphic non-synonymous SNPs, illustrating clade-specific virulence landscapes and correlating with disease severity. This first comparative pan-genome study of a diverse collection of M. bovis encompassing all clonal complexes indicates a high percentage of accessory genes and denotes an open, dynamic non-conservative pan-genome structure, with high evolutionary potential, defying the canons of MTC biology. Furthermore, it shows that M. bovis can shape its virulence repertoire, either by acquisition and loss of genes or by SNP-based diversification, likely towards host immune evasion, adaptation and persistence.}, } @article {pmid34712653, year = {2021}, author = {Zin, NM and Ismail, A and Mark, DR and Westrop, G and Schniete, JK and Herron, PR}, title = {Adaptation to Endophytic Lifestyle Through Genome Reduction by Kitasatospora sp. SUK42.}, journal = {Frontiers in bioengineering and biotechnology}, volume = {9}, number = {}, pages = {740722}, pmid = {34712653}, issn = {2296-4185}, abstract = {Endophytic actinobacteria offer great potential as a source of novel bioactive compounds. In order to investigate the potential for the production of secondary metabolites by endophytes, we recovered a filamentous microorgansism from the tree Antidesma neurocarpum Miq. After phenotypic analysis and whole genome sequencing we demonstrated that this organism, SUK42 was a member of the actinobacterial genus Kitasatospora. This strain has a small genome in comparison with other type strains of this genus and has lost metabolic pathways associated with Stress Response, Nitrogen Metabolism and Secondary Metabolism. Despite this SUK42 can grow well in a laboratory environment and encodes a core genome that is consistent with other members of the genus. Finally, in contrast to other members of Kitasatospora, SUK42 encodes saccharide secondary metabolite biosynthetic gene clusters, one of which with similarity to the acarviostatin cluster, the product of which displays α-amylase inhibitory activity. As extracts of the host plant demonstrate this inhibitory activity, it suggests that the potential medicinal properties of A. neurocarpum Miq might be provided by the endophytic partner and illustrate the potential for exploitation of endophytes for clinical or industrial uses.}, } @article {pmid34710024, year = {2021}, author = {Wu, D and Liu, H and Zhou, Y and Wu, X and Nie, Y and Cai, M}, title = {Roseomonas oleicola sp. nov., isolated from an oil production mixture in Yumen Oilfield, and emended description of Roseomonas frigidaquae.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {71}, number = {10}, pages = {}, doi = {10.1099/ijsem.0.005064}, pmid = {34710024}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Base Composition ; China ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; *Methylobacteriaceae/classification/isolation & purification ; Nucleic Acid Hybridization ; *Oil and Gas Fields/microbiology ; Phospholipids/chemistry ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Ubiquinone/analogs & derivatives/chemistry ; }, abstract = {A pink, ovoid-shaped, Gram-stain-negative, strictly aerobic and motile bacterial strain, designated ROY-5-3[T], was isolated from an oil production mixture from Yumen Oilfield in PR China. The strain grew at 4-42 °C (optimum, 30 °C), at pH 5-10 (optimum, 7) and with 0-5 % (w/v) NaCl (optimum, 0%). The results of phylogenetic analysis based on 16S rRNA gene sequences indicated that ROY-5-3[T] belongs to the genus Roseomonas and shared the highest pairwise similarities with Roseomonas frigidaquae CW67[T] (98.1%), Roseomonas selenitidurans BU-1[T] (97.8%), Roseomonas tokyonensis K-20[T] (97.7%) and Roseomonas stagni HS-69[T] (97.3%). The average nucleotide identity and digital DNA-DNA hybridization values between ROY-5-3[T] and other related type strains of Roseomonas species were less than 84.08 and 28.60 %, respectively, both below the species delineation threshold. Pan-genomic analysis showed that the novel isolate ROY-5-3[T] shared 3265 core gene families with the four closely related type strains in Roseomonas, and the number of strain-specific gene families was 513. The major fatty acids were identified as summed feature 8 (C18 : 1 ω6c/C18 : 1 ω7c), summed feature 3 (C16 : 1 ω6c/C16 : 1 ω7c) and C16 : 0. Strain ROY-5-3[T] contained Q-10 as the main ubiquinone and the genomic DNA G+C content was 69.8 mol%. The major polar lipids were diphosphatidylglycerol, phosphatidylcholine, phosphatidylethanolamine and phosphatidylglycerol. Based on the phylogenetic, morphological, physiological, chemotaxonomic and genome analyses, strain ROY-5-3[T] represents a novel species of the genus Roseomonas for which the name Roseomonas oleicola sp. nov. is proposed. The type strain is ROY-5-3[T] (=CGMCC 1.13459[T] =KCTC 82484[T]).}, } @article {pmid34709628, year = {2022}, author = {Wu, H and Yang, ZK and Yang, T and Wang, D and Luo, H and Gao, F}, title = {An Effective Preprocessing Method for High-Quality Pan-Genome Analysis of Bacillus subtilis and Escherichia coli.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2377}, number = {}, pages = {371-390}, pmid = {34709628}, issn = {1940-6029}, mesh = {*Bacillus subtilis/genetics ; *Escherichia coli/genetics ; Genome, Bacterial ; }, abstract = {Bacillus subtilis and Escherichia coli, as widely used microbial species, are of great significance in studying microbial community relationships, adaptive evolution in various niches, engineering cell factories that produce specific products, and designing genome reduction. The pan-genome analysis is an effective method for studying the characteristics and functions of genes among and within species. Many research directions and conclusions usually depend on accurate gene identification and reliable pan-genome results. However, there currently lack enough studies showing how to achieve high-quality pan-genome results between or within certain species. This chapter will take Bacillus subtilis as an example to introduce a stepwise manner for improving the quality of the pan-genome by gradually removing confounding strains step-by-step, and ultimately obtaining a reliable high-quality pan-genome landscape of Bacillus subtilis, which could be used as a quality control protocol in pan-genome analysis pipeline. Finally, we suggest further improving the pan-genome analysis results of Escherichia coli to prove the feasibility and credibility of the quality control protocol for obtaining high-quality pan-genome landscape.}, } @article {pmid34704920, year = {2021}, author = {Mullally, CA and Mikucki, A and Wise, MJ and Kahler, CM}, title = {Modelling evolutionary pathways for commensalism and hypervirulence in Neisseria meningitidis.}, journal = {Microbial genomics}, volume = {7}, number = {10}, pages = {}, pmid = {34704920}, issn = {2057-5858}, mesh = {Bacterial Proteins/*genetics ; Evolution, Molecular ; Frameshift Mutation ; Gene Transfer, Horizontal ; Genome, Bacterial ; Genomic Islands ; Humans ; Loss of Function Mutation ; Meningococcal Infections/*microbiology ; Nasopharynx/microbiology ; Neisseria meningitidis/genetics/*pathogenicity ; Symbiosis ; Virulence ; Whole Genome Sequencing/*methods ; }, abstract = {Neisseria meningitidis, the meningococcus, resides exclusively in humans and causes invasive meningococcal disease (IMD). The population of N. meningitidis is structured into stable clonal complexes by limited horizontal recombination in this naturally transformable species. N. meningitidis is an opportunistic pathogen, with some clonal complexes, such as cc53, effectively acting as commensal colonizers, while other genetic lineages, such as cc11, are rarely colonizers but are over-represented in IMD and are termed hypervirulent. This study examined theoretical evolutionary pathways for pathogenic and commensal lineages by examining the prevalence of horizontally acquired genomic islands (GIs) and loss-of-function (LOF) mutations. Using a collection of 4850 genomes from the BIGSdb database, we identified 82 GIs in the pan-genome of 11 lineages (10 hypervirulent and one commensal lineage). A new computational tool, Phaser, was used to identify frameshift mutations, which were examined for statistically significant association with genetic lineage. Phaser identified a total of 144 frameshift loci of which 105 were shown to have a statistically significant non-random distribution in phase status. The 82 GIs, but not the LOF loci, were associated with genetic lineage and invasiveness using the disease carriage ratio metric. These observations have been integrated into a new model that infers the early events of the evolution of the human adapted meningococcus. These pathways are enriched for GIs that are involved in modulating attachment to the host, growth rate, iron uptake and toxin expression which are proposed to increase competition within the meningococcal population for the limited environmental niche of the human nasopharynx. We surmise that competition for the host mucosal surface with the nasopharyngeal microbiome has led to the selection of isolates with traits that enable access to cell types (non-phagocytic and phagocytic) in the submucosal tissues leading to an increased risk for IMD.}, } @article {pmid34699013, year = {2023}, author = {Dutra-Silva, L and Matteoli, FP and Arisi, ACM}, title = {Distribution of Genes Related to Probiotic Effects Across Lacticaseibacillus rhamnosus Revealed by Population Structure.}, journal = {Probiotics and antimicrobial proteins}, volume = {15}, number = {3}, pages = {548-557}, pmid = {34699013}, issn = {1867-1314}, mesh = {Humans ; Lacticaseibacillus ; *Lacticaseibacillus rhamnosus/genetics ; Genome, Bacterial ; *Probiotics ; *Bacteriocins/genetics ; }, abstract = {The Gram-positive Lacticaseibacillus rhamnosus has been broadly reported as capable of exerting beneficial health effects. Bacterial genomic diversity may promote niche specialization, thus creating subpatterns within populations. As L. rhamnosus advantageous effects have been widely reported at strain level and few is known regarding the distribution of beneficial genes among L. rhamnosus strains, we investigated all publicly available genomes of Lactobacillus and Lacticaseibacillus genera to study the pangenome and general population structure of L. rhamnosus. Core genome multilocus sequence typing detected eight L. rhamnosus phylogroups (PG1 to PG8). L. rhamnosus harbors an open pangenome; PG1, PG3, PG4, and PG5 exhibited highly conserved gene distribution patterns. Genes significantly associated to the PG1, which comprises L. rhamnosus GG, are mainly phage-related. The adhesion operon spaCBA-srtC1 was found in 44 (24.7%) genomes; however, considering only the PG1, the prevalence was of 65%. In PG2 the spaCBA-srtC1 prevalence was of 43%. Nevertheless, both human and milk-derived strains harbored this operon. Further, two main types of bacteriocin clusters were found (Bact1 and Bact2). Bact1 predictions indicate the presence of garQ, encoding the class II bacteriocin garvieacin Q, that is mainly present in the closely related PG8A and a PG2 subcluster. PG2 harbors two distinct subclusters, harboring either spaCBA-srtC1 or Bact1. Our findings provide novel insights on the distribution of biotechnological relevant genes across L. rhamnosus population, uncovering intra-species patterns that may bring forth the development of more efficient probiotic products.}, } @article {pmid34697247, year = {2021}, author = {Qiao, Q and Edger, PP and Xue, L and Qiong, L and Lu, J and Zhang, Y and Cao, Q and Yocca, AE and Platts, AE and Knapp, SJ and Van Montagu, M and Van de Peer, Y and Lei, J and Zhang, T}, title = {Evolutionary history and pan-genome dynamics of strawberry (Fragaria spp.).}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {118}, number = {45}, pages = {}, pmid = {34697247}, issn = {1091-6490}, mesh = {*Biological Evolution ; Fragaria/classification/*genetics ; Genetic Variation ; *Genome, Plant ; Phylogeography ; Pigmentation/genetics ; Selection, Genetic ; Whole Genome Sequencing ; }, abstract = {Strawberry (Fragaria spp.) has emerged as a model system for various fundamental and applied research in recent years. In total, the genomes of five different species have been sequenced over the past 10 y. Here, we report chromosome-scale reference genomes for five strawberry species, including three newly sequenced species' genomes, and genome resequencing data for 128 additional accessions to estimate the genetic diversity, structure, and demographic history of key Fragaria species. Our analyses obtained fully resolved and strongly supported phylogenies and divergence times for most diploid strawberry species. These analyses also uncovered a new diploid species (Fragaria emeiensis Jia J. Lei). Finally, we constructed a pan-genome for Fragaria and examined the evolutionary dynamics of gene families. Notably, we identified multiple independent single base mutations of the MYB10 gene associated with white pigmented fruit shared by different strawberry species. These reference genomes and datasets, combined with our phylogenetic estimates, should serve as a powerful comparative genomic platform and resource for future studies in strawberry.}, } @article {pmid34696195, year = {2021}, author = {Allemailem, KS}, title = {A Comprehensive Computer Aided Vaccine Design Approach to Propose a Multi-Epitopes Subunit Vaccine against Genus Klebsiella Using Pan-Genomics, Reverse Vaccinology, and Biophysical Techniques.}, journal = {Vaccines}, volume = {9}, number = {10}, pages = {}, pmid = {34696195}, issn = {2076-393X}, support = {Deanship of Scientific Research//Qassim University/ ; }, abstract = {Klebsiella is a genus of nosocomial bacterial pathogens and is placed in the most critical list of World Health Organization (WHO) for development of novel therapeutics. The pathogens of the genus are associated with high mortality and morbidity. Owing to their strong resistance profile against different classes of antibiotics and nonavailability of a licensed vaccine, urgent efforts are required to develop a novel vaccine candidate that can tackle all pathogenic species of the Klebsiella genus. The present study aims to design a broad-spectrum vaccine against all species of the Klebsiella genus with objectives to identify the core proteome of pathogen species, prioritize potential core vaccine proteins, analyze immunoinformatics of the vaccine proteins, construct a multi-epitopes vaccine, and provide its biophysical analysis. Herein, we investigated all reference species of the genus to reveal their core proteome. The core proteins were then subjected to multiple reverse vaccinology checks that are mandatory for the prioritization of potential vaccine candidates. Two proteins (TonB-dependent siderophore receptor and siderophore enterobactin receptor FepA) were found to fulfill all vaccine parameters. Both these proteins harbor several potent B-cell-derived T-cell epitopes that are antigenic, nonallergic, nontoxic, virulent, water soluble, IFN-γ producer, and efficient binder of DRB*0101 allele. The selected epitopes were modeled into a multi-epitope peptide comprising linkers and Cholera Toxin B adjuvant. For docking with innate immune and MHC receptors and afterward molecular dynamics simulations and binding free energy analysis, the vaccine structure was modeled for tertiary structure and refined for structural errors. To assess the binding affinity and presentation of the designed vaccine construct, binding mode and interactions analysis were performed using molecular docking and molecular dynamics simulation techniques. These biophysical approaches illustrated the vaccine as a good binder to the immune receptors and revealed robust interactions energies. The vaccine sequence was further translated to nucleotide sequence and cloned into an appropriate vector for expressing it at high rate in Escherichia coli K12 strain. In addition, the vaccine was illustrated to generate a good level of primary, secondary, and tertiary immune responses, proving good immunogenicity of the vaccine. Based on the reported results, the vaccine can be a good candidate to be evaluated for effectiveness in wet laboratory validation studies.}, } @article {pmid34694925, year = {2022}, author = {Bendia, AG and Callefo, F and Araújo, MN and Sanchez, E and Teixeira, VC and Vasconcelos, A and Battilani, G and Pellizari, VH and Rodrigues, F and Galante, D}, title = {Metagenome-Assembled Genomes from Monte Cristo Cave (Diamantina, Brazil) Reveal Prokaryotic Lineages As Functional Models for Life on Mars.}, journal = {Astrobiology}, volume = {22}, number = {3}, pages = {293-312}, doi = {10.1089/ast.2021.0016}, pmid = {34694925}, issn = {1557-8070}, mesh = {Brazil ; Caves/microbiology ; *Metagenome ; Metagenomics ; *Microbiota/genetics ; Phylogeny ; }, abstract = {Microbial communities have been explored in various terrestrial subsurface ecosystems, showing metabolic potentials that could generate noteworthy morphological and molecular biosignatures. Recent advancements in bioinformatic tools have allowed for descriptions of novel and yet-to-be cultivated microbial lineages in different ecosystems due to the genome reconstruction approach from metagenomic data. Using shotgun metagenomic data, we obtained metagenome-assembled genomes related to cultivated and yet-to-be cultivated prokaryotic lineages from a silica and iron-rich cave (Monte Cristo) in Minas Gerais State, Brazil. The Monte Cristo Cave has been shown to possess a high diversity of genes involved with different biogeochemical cycles, including reductive and oxidative pathways related to carbon, sulfur, nitrogen, and iron. Three genomes were selected for pangenomic analysis, assigned as Truepera sp., Ca. Methylomirabilis sp., and Ca. Koribacter sp. based on their lifestyles (radiation resistance, anaerobic methane oxidation, and potential iron oxidation). These bacteria exhibit genes involved with multiple DNA repair strategies, starvation, and stress response. Because these groups have few reference genomes deposited in databases, our study adds important genomic information about these lineages. The combination of techniques applied in this study allowed us to unveil the potential relationships between microbial genomes and their ecological processes with the cave mineralogy and highlight the lineages involved with anaerobic methane oxidation, iron oxidation, and radiation resistance as functional models for the search for extant life-forms outside our planet in silica- and iron-rich environments and potentially on Mars.}, } @article {pmid34691008, year = {2021}, author = {Oshkin, IY and Danilova, OV and But, SY and Miroshnikov, KK and Suleimanov, RZ and Belova, SE and Tikhonova, EN and Kuznetsov, NN and Khmelenina, VN and Pimenov, NV and Dedysh, SN}, title = {Expanding Characterized Diversity and the Pool of Complete Genome Sequences of Methylococcus Species, the Bacteria of High Environmental and Biotechnological Relevance.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {756830}, pmid = {34691008}, issn = {1664-302X}, abstract = {The bacterial genus Methylococcus, which comprises aerobic thermotolerant methanotrophic cocci, was described half-a-century ago. Over the years, a member of this genus, Methylococcus capsulatus Bath, has become a major model organism to study genomic and metabolic basis of obligate methanotrophy. High biotechnological potential of fast-growing Methylococcus species, mainly as a promising source of feed protein, has also been recognized. Despite this big research attention, the currently cultured Methylococcus diversity is represented by members of the two species, M. capsulatus and M. geothermalis, while finished genome sequences are available only for two strains of these methanotrophs. This study extends the pool of phenotypically characterized Methylococcus strains with good-quality genome sequences by contributing four novel isolates of these bacteria from activated sludge, landfill cover soil, and freshwater sediments. The determined genome sizes of novel isolates varied between 3.2 and 4.0Mb. As revealed by the phylogenomic analysis, strains IO1, BH, and KN2 affiliate with M. capsulatus, while strain Mc7 may potentially represent a novel species. Highest temperature optima (45-50°C) and highest growth rates in bioreactor cultures (up to 0.3h[-1]) were recorded for strains obtained from activated sludge. The comparative analysis of all complete genomes of Methylococcus species revealed 4,485 gene clusters. Of these, pan-genome core comprised 2,331 genes (on average 51.9% of each genome), with the accessory genome containing 846 and 1,308 genes in the shell and the cloud, respectively. Independently of the isolation source, all strains of M. capsulatus displayed surprisingly high genome synteny and a striking similarity in gene content. Strain Mc7 from a landfill cover soil differed from other isolates by the high content of mobile genetic elements in the genome and a number of genome-encoded features missing in M. capsulatus, such as sucrose biosynthesis and the ability to scavenge phosphorus and sulfur from the environment.}, } @article {pmid34688761, year = {2022}, author = {Yu, C and Wang, H and Blaustein, RA and Guo, L and Ye, Q and Fu, Y and Fan, J and Su, X and Hartmann, EM and Shen, C}, title = {Pangenomic and functional investigations for dormancy and biodegradation features of an organic pollutant-degrading bacterium Rhodococcus biphenylivorans TG9.}, journal = {The Science of the total environment}, volume = {809}, number = {}, pages = {151141}, doi = {10.1016/j.scitotenv.2021.151141}, pmid = {34688761}, issn = {1879-1026}, mesh = {Biodegradation, Environmental ; *Environmental Pollutants ; *Polychlorinated Biphenyls ; *Rhodococcus/genetics ; }, abstract = {Environmental bacteria contain a wealth of untapped potential in the form of biodegradative genes. Leveraging this potential can often be confounded by a lack of understanding of fundamental survival strategies, like dormancy, for environmental stress. Investigating bacterial dormancy-to-degradation relationships enables improvement of bioremediation. Here, we couple genomic and functional assessment to provide context for key attributes of the organic pollutant-degrading strain Rhodococcus biphenylivorans TG9. Whole genome sequencing, pangenome analysis and functional characterization were performed to elucidate important genes and gene products, including antimicrobial resistance, dormancy, and degradation. Rhodococcus as a genus has strong potential for degradation and dormancy, which we demonstrate using R. biphenylivorans TG9 as a model. We identified four Resuscitation-promoting factor (Rpf) encoding genes in TG9 involved in dormancy and resuscitation. We demonstrate that R. biphenylivorans TG9 grows on fourteen typical organic pollutants, and exhibits a robust ability to degrade biphenyl and several congeners of polychlorinated biphenyls. We further induced TG9 into a dormant state and demonstrated pronounced differences in morphology and activity. Together, these results expand our understanding of the genus Rhodococcus and the relationship between dormancy and biodegradation in the presence of environmental stressors.}, } @article {pmid34683464, year = {2021}, author = {Whitworth, DE and Sydney, N and Radford, EJ}, title = {Myxobacterial Genomics and Post-Genomics: A Review of Genome Biology, Genome Sequences and Related 'Omics Studies.}, journal = {Microorganisms}, volume = {9}, number = {10}, pages = {}, pmid = {34683464}, issn = {2076-2607}, abstract = {Myxobacteria are fascinating and complex microbes. They prey upon other members of the soil microbiome by secreting antimicrobial proteins and metabolites, and will undergo multicellular development if starved. The genome sequence of the model myxobacterium Myxococcus xanthus DK1622 was published in 2006 and 15 years later, 163 myxobacterial genome sequences have now been made public. This explosion in genomic data has enabled comparative genomics analyses to be performed across the taxon, providing important insights into myxobacterial gene conservation and evolution. The availability of myxobacterial genome sequences has allowed system-wide functional genomic investigations into entire classes of genes. It has also enabled post-genomic technologies to be applied to myxobacteria, including transcriptome analyses (microarrays and RNA-seq), proteome studies (gel-based and gel-free), investigations into protein-DNA interactions (ChIP-seq) and metabolism. Here, we review myxobacterial genome sequencing, and summarise the insights into myxobacterial biology that have emerged as a result. We also outline the application of functional genomics and post-genomic approaches in myxobacterial research, highlighting important findings to emerge from seminal studies. The review also provides a comprehensive guide to the genomic datasets available in mid-2021 for myxobacteria (including 24 genomes that we have sequenced and which are described here for the first time).}, } @article {pmid34683386, year = {2021}, author = {Kang, IJ and Kim, KS and Beattie, GA and Yang, JW and Sohn, KH and Heu, S and Hwang, I}, title = {Pan-Genome Analysis of Effectors in Korean Strains of the Soybean Pathogen Xanthomonas citri pv. glycines.}, journal = {Microorganisms}, volume = {9}, number = {10}, pages = {}, pmid = {34683386}, issn = {2076-2607}, support = {PJ01357303//the Cooperative Research Program for Agriculture Science & Technology Development of the Rural Development Administration in the Republic of Korea/ ; PJ01574401//the Next-Generation BioGreen 21 Program of the Rural Development Administration in the Re-public of Korea/ ; }, abstract = {Xanthomonas citri pv. glycines is a major pathogen of soybean in Korea. Here, we analyzed pathogenicity genes based on a comparative genome analysis of five Korean strains and one strain from the United States, 8ra. Whereas all six strains had nearly identical profiles of carbohydrate-active enzymes, they varied in diversity and number of candidate type III secretion system effector (T3SE) genes. The five Korean strains were similar in their effectors, but differed from the 8ra strain. Across the six strains, transcription activator-like effectors (TALEs) showed diverse repeat sizes and at least six forms of the repeat variable di-residue (RVD) sequences, with differences not correlated with the origin of the strains. However, a phylogenetic tree based on the alignment of RVD sequences showed two distinct clusters with 17.5 repeats, suggesting that two distinct 17.5 RVD clusters have evolved, potentially to adapt Xcg to growth on distinct soybean cultivars. The predicted effector binding elements of the TALEs fell into six groups and were strongly overlapping in sequence, suggesting evolving target specificity of the binding domains in soybean cultivars. Our findings reveal the variability and adaptability of T3SEs in the Xcg strains and enhance our understanding of Xcg pathogenicity in soybean.}, } @article {pmid34680899, year = {2021}, author = {Lu, W and Pei, Z and Zang, M and Lee, YK and Zhao, J and Chen, W and Wang, H and Zhang, H}, title = {Comparative Genomic Analysis of Bifidobacterium bifidum Strains Isolated from Different Niches.}, journal = {Genes}, volume = {12}, number = {10}, pages = {}, pmid = {34680899}, issn = {2073-4425}, mesh = {Adult ; Bifidobacterium bifidum/*genetics/isolation & purification ; Feces/microbiology ; Genes, Bacterial ; Humans ; Infant ; }, abstract = {The potential probiotic benefits of Bifidobacterium bifidum have received increasing attention recently. We used comparative genomic analysis to explore the differences in the genome and the physiological characteristics of B. bifidum isolated from the fecal samples of Chinese adults and infants. The relationships between genotypes and phenotypes were analyzed to assess the effects of isolation sources on the genetic variation of B. bifidum. The phylogenetic tree results indicated that the phylogeny of B. bifidum may be related to the geographical features of its isolation source. B. bifidum was found to have an open pan-genome and a conserved core genome. The genetic diversity of B. bifidum is mainly reflected in carbohydrate metabolism- and immune/competition-related factors, such as the glycoside hydrolase gene family, bacteriocin operons, antibiotic resistance genes, and clustered regularly interspaced short palindromic repeats (CRISPR)-Cas. Additionally, the type III A CRISPR-Cas system was discovered in B. bifidum for the first time. B. bifidum strains exhibited niche-specific characteristics, and the results of this study provide an improved understanding of the genetics of this species.}, } @article {pmid34680323, year = {2021}, author = {López-Carrasco, A and Berbegall, AP and Martín-Vañó, S and Blanquer-Maceiras, M and Castel, V and Navarro, S and Noguera, R}, title = {Intra-Tumour Genetic Heterogeneity and Prognosis in High-Risk Neuroblastoma.}, journal = {Cancers}, volume = {13}, number = {20}, pages = {}, pmid = {34680323}, issn = {2072-6694}, support = {FAECC2015/006//Asociación Española Contra el Cáncer/ ; PVR00157//NEN Association - Nico contra el cáncer infantil/ ; PI17/01558 and PI20/01107//Instituto de Salud Carlos III (ISCIII, FIS) and FEDER (European Regional Development Fund)/ ; CB16/12/00484//CIBERONC/ ; }, abstract = {Spatial ITH is defined by genomic and biological variations within a tumour acquired by tumour cell evolution under diverse microenvironments, and its role in NB patient prognosis is understudied. In this work, we applied pangenomic techniques to detect chromosomal aberrations in at least two different areas of each tumour and/or in simultaneously obtained solid and liquid biopsies, detecting ITH in the genomic profile of almost 40% of HR-NB. ITH was better detected when comparing one or more tumour pieces and liquid biopsy (50%) than between different tumour pieces (21%). Interestingly, we found that patients with ITH analysed by pangenomic techniques had a significantly better survival rate that those with non-heterogeneous tumours, especially in cases without MYCN amplification. Moreover, all patients in the studied cohort with high ITH (defined as 50% or more genomic aberration differences between areas of a tumour or simultaneously obtained samples) survived after 48 months. These results clearly support analysing at least two solid tumour areas (separately or mixed) and liquid samples to provide more accurate genomic diagnosis, prognosis and therapy options in HR-NB.}, } @article {pmid34678108, year = {2021}, author = {Lamback, EB and Wildemberg, LE and Gadelha, MR}, title = {Current opinion on the diagnosis and management of non-functioning pituitary adenomas.}, journal = {Expert review of endocrinology & metabolism}, volume = {16}, number = {6}, pages = {309-320}, doi = {10.1080/17446651.2021.1988851}, pmid = {34678108}, issn = {1744-8417}, mesh = {*Adenoma/diagnosis/therapy ; Humans ; *Pituitary Neoplasms/diagnosis/therapy ; Treatment Outcome ; }, abstract = {INTRODUCTION: Non-functioning pituitary adenomas (NFPAs) are clinically silent tumors and the second most common pituitary adenoma. Surgery is the mainstay of treatment as there is, as yet, no effective medical treatment.

AREAS COVERED: We present current knowledge on the clinical diagnosis, histopathological classification, molecular data, and management strategies in NFPA.

EXPERT OPINION: NFPA is a heterogeneous group of tumors, in respect to their origin and clinical course. In recent years, research on pathology and molecular biology have advanced our knowledge of NFPA pathogenesis. NFPA exhibit, in the majority of cases, an indolent behavior, with satisfactory response to treatment. In aggressive cases, multimodal management is needed; however, even this approach may be insufficient, so the development of new treatments is warranted for better management. In this setting, the understanding of the mechanisms involved in the genesis and progression of NFPA is crucial for the identification and development of directed treatments with higher chances of response.}, } @article {pmid34674747, year = {2021}, author = {Sandholt, AKS and Neimanis, A and Roos, A and Eriksson, J and Söderlund, R}, title = {Genomic signatures of host adaptation in group B Salmonella enterica ST416/ST417 from harbour porpoises.}, journal = {Veterinary research}, volume = {52}, number = {1}, pages = {134}, pmid = {34674747}, issn = {1297-9716}, mesh = {Animals ; *Genome, Bacterial ; *Host Adaptation ; *Phocoena ; Salmonella Infections, Animal/*microbiology ; Salmonella enterica/*genetics ; }, abstract = {A type of monophasic group B Salmonella enterica with the antigenic formula 4,12:a:- ("Fulica-like") has been described as associated with harbour porpoises (Phocoena phocoena), most frequently recovered from lung samples. In the present study, lung tissue samples from 47 porpoises found along the Swedish coast or as bycatch in fishing nets were analysed, two of which were positive for S. enterica. Pneumonia due to the infection was considered the likely cause of death for one of the two animals. The recovered isolates were whole genome sequenced and found to belong to sequence type (ST) 416 and to be closely related to ST416/ST417 porpoise isolates from UK waters as determined by core-genome MLST. Serovars Bispebjerg, Fulica and Abortusequi were identified as distantly related to the porpoise isolates, but no close relatives from other host species were found. All ST416/417 isolates had extensive loss of function mutations in key Salmonella pathogenicity islands, but carried accessory genetic elements associated with extraintestinal infection such as iron uptake systems. Gene ontology and pathway analysis revealed reduced secondary metabolic capabilities and loss of function in terms of signalling and response to environmental cues, consistent with adaptation for the extraintestinal niche. A classification system based on machine learning identified ST416/417 as more invasive than classical gastrointestinal serovars. Genome analysis results are thus consistent with ST416/417 as a host-adapted and extraintestinal clonal population of S. enterica, which while found in porpoises without associated pathology can also cause severe opportunistic infections.}, } @article {pmid34672955, year = {2021}, author = {Kirsche, M and Schatz, MC}, title = {Democratizing long-read genome assembly.}, journal = {Cell systems}, volume = {12}, number = {10}, pages = {945-947}, doi = {10.1016/j.cels.2021.09.010}, pmid = {34672955}, issn = {2405-4720}, mesh = {Genome, Human/genetics ; *Genomics ; *High-Throughput Nucleotide Sequencing ; Humans ; Sequence Analysis, DNA ; }, abstract = {De novo assembled genomes serve as the backbone for modern genomics. In an article in this issue of Cell Systems, Ekim et al. present the mdBG assembler that can assemble genomes 100-fold faster than previous methods, including a human genome in under 10 min, which unlocks pan-genomics for many species.}, } @article {pmid34671195, year = {2021}, author = {Jiao, D and Dong, X and Yu, Y and Wei, C}, title = {Gene Presence/Absence Variation analysis of coronavirus family displays its pan-genomic diversity.}, journal = {International journal of biological sciences}, volume = {17}, number = {14}, pages = {3717-3727}, pmid = {34671195}, issn = {1449-2288}, mesh = {Coronaviridae/*genetics ; *Genome, Viral ; *Phylogeny ; }, abstract = {SARS-CoV-2 belongs to the coronavirus family. Comparing genomic features of viral genomes of coronavirus family can improve our understanding about SARS-CoV-2. Here we present the first pan-genome analysis of 3,932 whole genomes of 101 species out of 4 genera from the coronavirus family. We found that a total of 181 genes in the pan-genome of coronavirus family, among which only 3 genes, the S gene, M gene and N gene, are highly conserved. We also constructed a pan-genome from 23,539 whole genomes of SARS-CoV-2. There are 13 genes in total in the SARS-CoV-2 pan-genome. All of the 13 genes are core genes for SARS-CoV-2. The pan-genome of coronaviruses shows a lower level of diversity than the pan-genomes of other RNA viruses, which contain no core gene. The three highly conserved genes in coronavirus family, which are also core genes in SARS-CoV-2 pan-genome, could be potential targets in developing nucleic acid diagnostic reagents with a decreased possibility of cross-reaction with other coronavirus species.}, } @article {pmid34669449, year = {2022}, author = {Rodrigues, RAL and Queiroz, VF and Ghosh, J and Dunigan, DD and Van Etten, JL}, title = {Functional Genomic Analyses Reveal an Open Pan-genome for the Chloroviruses and a Potential for Genetic Innovation in New Isolates.}, journal = {Journal of virology}, volume = {96}, number = {2}, pages = {e0136721}, pmid = {34669449}, issn = {1098-5514}, mesh = {Chlorella/classification/virology ; DNA, Viral/genetics ; Genetic Variation ; *Genome, Viral/genetics ; Genomics ; Host Specificity ; Phycodnaviridae/classification/*genetics/isolation & purification ; Phylogeny ; Viral Proteins/genetics ; }, abstract = {Chloroviruses (family Phycodnaviridae) are large double-stranded DNA (dsDNA) viruses that infect unicellular green algae present in inland waters. These viruses have been isolated using three main chlorella-like green algal host cells, traditionally called NC64A, SAG, and Pbi, revealing extensive genetic diversity. In this study, we performed a functional genomic analysis on 36 chloroviruses that infected the three different hosts. Phylogenetic reconstruction based on the DNA polymerase B family gene clustered the chloroviruses into three distinct clades. The viral pan-genome consists of 1,345 clusters of orthologous groups of genes (COGs), with 126 COGs conserved in all viruses. Totals of 368, 268, and 265 COGs are found exclusively in viruses that infect NC64A, SAG, and Pbi algal hosts, respectively. Two-thirds of the COGs have no known function, constituting the "dark pan-genome" of chloroviruses, and further studies focusing on these genes may identify important novelties. The proportions of functionally characterized COGs composing the pan-genome and the core-genome are similar, but those related to transcription and RNA processing, protein metabolism, and virion morphogenesis are at least 4-fold more represented in the core genome. Bipartite network construction evidencing the COG sharing among host-specific viruses identified 270 COGs shared by at least one virus from each of the different host groups. Finally, our results reveal an open pan-genome for chloroviruses and a well-established core genome, indicating that the isolation of new chloroviruses can be a valuable source of genetic discovery. IMPORTANCE Chloroviruses are large dsDNA viruses that infect unicellular green algae distributed worldwide in freshwater environments. They comprise a genetically diverse group of viruses; however, a comprehensive investigation of the genomic evolution of these viruses is still missing. Here, we performed a functional pan-genome analysis comprising 36 chloroviruses associated with three different algal hosts in the family Chlorellaceae, referred to as zoochlorellae because of their endosymbiotic lifestyle. We identified a set of 126 highly conserved genes, most of which are related to essential functions in the viral replicative cycle. Several genes are unique to distinct isolates, resulting in an open pan-genome for chloroviruses. This profile is associated with generalist organisms, and new insights into the evolution and ecology of chloroviruses are presented. Ultimately, our results highlight the potential for genetic diversity in new isolates.}, } @article {pmid34668097, year = {2021}, author = {González-Castillo, A and Carballo, JL and Bautista-Guerrero, E}, title = {Genomics and phylogeny of the proposed phylum 'Candidatus Poribacteria' associated with the excavating sponge Thoosa mismalolli.}, journal = {Antonie van Leeuwenhoek}, volume = {114}, number = {12}, pages = {2163-2174}, pmid = {34668097}, issn = {1572-9699}, support = {254806//CONACYT-SEP/ ; }, mesh = {Animals ; *Bacteria/genetics ; Genomics ; Metagenome ; *Microbiota ; Phylogeny ; }, abstract = {Members of the proposed phylum 'Candidatus Poribacteria' are among the most abundant microorganisms in the highly diverse microbiome of the sponge mesohyl. Genomic and phylogenetic characteristics of this proposed phylum are barely known. In this study, we analyzed metagenome-assembled genomes (MAGs) obtained from the coral reef excavating sponge Thoosa mismalolli from the Mexican Pacific Ocean. Two MAGs were extracted and analyzed together with 32 MAGs and single-amplified genomes (SAGs) obtained from NCBI. The phylogenetic tree based on the sequences of 139 single-copy genes (SCG) showed two clades. Clade A (23 genomes) represented 67.7% of the total of the genomes, while clade B (11 genomes) comprised 32.3% of the genomes. The Average Nucleotide Identity (ANI) showed values between 66 and 99% for the genomes of the proposed phylum, and the pangenome of genomes revealed a total of 37,234 genes that included 1722 core gene. The number of genes used in the phylogenetic analysis increased from 28 (previous studies) to 139 (this study), which allowed a better resolution of the phylogeny of the proposed phylum. The results supported the two previously described classes, 'Candidatus Entoporibacteria' and 'Candidatus Pelagiporibacteria', and the genomes SB0101 and SB0202 obtained in this study belong to two new species of the class 'Candidatus Entoporibacteria'. This is the first comparative study that includes MAGs from a non-sponge host (Porites lutea) to elucidate the taxonomy of the poorly known Candidatus phylum in a polyphasic approach. Finally, our study also contributes to the sponge microbiome project by reporting the first MAGs of the proposed phylum 'Candidatus Poribacteria' isolated from the excavating sponge T. mismalolli.}, } @article {pmid34665261, year = {2021}, author = {Douglas, GM and Shapiro, BJ}, title = {Genic Selection Within Prokaryotic Pangenomes.}, journal = {Genome biology and evolution}, volume = {13}, number = {11}, pages = {}, pmid = {34665261}, issn = {1759-6653}, mesh = {*Evolution, Molecular ; *Prokaryotic Cells ; Selection, Genetic ; }, abstract = {Understanding the evolutionary forces shaping prokaryotic pangenome structure is a major goal of microbial evolution research. Recent work has highlighted that a substantial proportion of accessory genes appear to confer niche-specific adaptations. This work has primarily focused on selection acting at the level of individual cells. Herein, we discuss a lower level of selection that also contributes to pangenome variation: genic selection. This refers to cases where genetic elements, rather than individual cells, are the entities under selection. The clearest examples of this form of selection are selfish mobile genetic elements, which are those that have either a neutral or a deleterious effect on host fitness. We review the major classes of these and other mobile elements and discuss the characteristic features of such elements that could be under genic selection. We also discuss how genetic elements that are beneficial to hosts can also be under genic selection, a scenario that may be more prevalent but not widely appreciated, because disentangling the effects of selection at different levels (i.e., organisms vs. genes) is challenging. Nonetheless, an appreciation for the potential action and implications of genic selection is important to better understand the evolution of prokaryotic pangenomes.}, } @article {pmid34659348, year = {2021}, author = {Liu, H and Prajapati, V and Prajapati, S and Bais, H and Lu, J}, title = {Comparative Genome Analysis of Bacillus amyloliquefaciens Focusing on Phylogenomics, Functional Traits, and Prevalence of Antimicrobial and Virulence Genes.}, journal = {Frontiers in genetics}, volume = {12}, number = {}, pages = {724217}, pmid = {34659348}, issn = {1664-8021}, abstract = {Bacillus amyloliquefaciens is a gram-positive, nonpathogenic, endospore-forming, member of a group of free-living soil bacteria with a variety of traits including plant growth promotion, production of antifungal and antibacterial metabolites, and production of industrially important enzymes. We have attempted to reconstruct the biogeographical structure according to functional traits and the evolutionary lineage of B. amyloliquefaciens using comparative genomics analysis. All the available 96 genomes of B. amyloliquefaciens strains were curated from the NCBI genome database, having a variety of important functionalities in all sectors keeping a high focus on agricultural aspects. In-depth analysis was carried out to deduce the orthologous gene groups and whole-genome similarity. Pan genome analysis revealed that shell genes, soft core genes, core genes, and cloud genes comprise 17.09, 5.48, 8.96, and 68.47%, respectively, which demonstrates that genomes are very different in the gene content. It also indicates that the strains may have flexible environmental adaptability or versatile functions. Phylogenetic analysis showed that B. amyloliquefaciens is divided into two clades, and clade 2 is further dived into two different clusters. This reflects the difference in the sequence similarity and diversification that happened in the B. amyloliquefaciens genome. The majority of plant-associated strains of B. amyloliquefaciens were grouped in clade 2 (73 strains), while food-associated strains were in clade 1 (23 strains). Genome mining has been adopted to deduce antimicrobial resistance and virulence genes and their prevalence among all strains. The genes tmrB and yuaB codes for tunicamycin resistance protein and hydrophobic coat forming protein only exist in clade 2, while clpP, which codes for serine proteases, is only in clade 1. Genome plasticity of all strains of B. amyloliquefaciens reflects their adaption to different niches.}, } @article {pmid34659160, year = {2021}, author = {Thomas, P and Abdel-Glil, MY and Eichhorn, I and Semmler, T and Werckenthin, C and Baumbach, C and Murmann, W and Bodenthin-Drauschke, A and Zimmermann, P and Schotte, U and Galante, D and Slavic, D and Wagner, M and Wieler, LH and Neubauer, H and Seyboldt, C}, title = {Genome Sequence Analysis of Clostridium chauvoei Strains of European Origin and Evaluation of Typing Options for Outbreak Investigations.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {732106}, pmid = {34659160}, issn = {1664-302X}, abstract = {Black quarter caused by Clostridium (C.) chauvoei is an important bacterial disease that affects cattle and sheep with high mortality. A comparative genomics analysis of 64 C. chauvoei strains, most of European origin and a few of non-European and unknown origin, was performed. The pangenome analysis showed limited new gene acquisition for the species. The accessory genome involved prophages and genomic islands, with variations in gene composition observed in a few strains. This limited accessory genome may indicate that the species replicates only in the host or that an active CRISPR/Cas system provides immunity to foreign genetic elements. All strains contained a CRISPR type I-B system and it was confirmed that the unique spacer sequences therein can be used to differentiate strains. Homologous recombination events, which may have contributed to the evolution of this pathogen, were less frequent compared to other related species from the genus. Pangenome single nucleotide polymorphism (SNP) based phylogeny and clustering indicate diverse clusters related to geographical origin. Interestingly the identified SNPs were mostly non-synonymous. The study demonstrates the possibility of the existence of polymorphic populations in one host, based on strain variability observed for strains from the same animal and strains from different animals of one outbreak. The study also demonstrates that new outbreak strains are mostly related to earlier outbreak strains from the same farm/region. This indicates the last common ancestor strain from one farm can be crucial to understand the genetic changes and epidemiology occurring at farm level. Known virulence factors for the species were highly conserved among the strains. Genetic elements involved in Nicotinamide adenine dinucleotide (NAD) precursor synthesis (via nadA, nadB, and nadC metabolic pathway) which are known as potential anti-virulence loci are completely absent in C. chauvoei compared to the partial inactivation in C. septicum. A novel core-genome MLST based typing method was compared to sequence typing based on CRISPR spacers to evaluate the usefulness of the methods for outbreak investigations.}, } @article {pmid34659136, year = {2021}, author = {Caicedo-Montoya, C and Manzo-Ruiz, M and Ríos-Estepa, R}, title = {Pan-Genome of the Genus Streptomyces and Prioritization of Biosynthetic Gene Clusters With Potential to Produce Antibiotic Compounds.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {677558}, pmid = {34659136}, issn = {1664-302X}, abstract = {Species of the genus Streptomyces are known for their ability to produce multiple secondary metabolites; their genomes have been extensively explored to discover new bioactive compounds. The richness of genomic data currently available allows filtering for high quality genomes, which in turn permits reliable comparative genomics studies and an improved prediction of biosynthetic gene clusters (BGCs) through genome mining approaches. In this work, we used 121 genome sequences of the genus Streptomyces in a comparative genomics study with the aim of estimating the genomic diversity by protein domains content, sequence similarity of proteins and conservation of Intergenic Regions (IGRs). We also searched for BGCs but prioritizing those with potential antibiotic activity. Our analysis revealed that the pan-genome of the genus Streptomyces is clearly open, with a high quantity of unique gene families across the different species and that the IGRs are rarely conserved. We also described the phylogenetic relationships of the analyzed genomes using multiple markers, obtaining a trustworthy tree whose relationships were further validated by Average Nucleotide Identity (ANI) calculations. Finally, 33 biosynthetic gene clusters were detected to have potential antibiotic activity and a predicted mode of action, which might serve up as a guide to formulation of related experimental studies.}, } @article {pmid34656081, year = {2021}, author = {Liang, Q and Lonardi, S}, title = {Reference-agnostic representation and visualization of pan-genomes.}, journal = {BMC bioinformatics}, volume = {22}, number = {1}, pages = {502}, pmid = {34656081}, issn = {1471-2105}, support = {IOS-1543963//National Science Foundation/ ; IIS-1814359//National Science Foundation/ ; }, mesh = {Computational Biology ; *Genome ; *Genomics ; Humans ; Software ; }, abstract = {BACKGROUND: The pan-genome of a species is the union of the genes and non-coding sequences present in all individuals (cultivar, accessions, or strains) within that species.

RESULTS: Here we introduce PGV, a reference-agnostic representation of the pan-genome of a species based on the notion of consensus ordering. Our experimental results demonstrate that PGV enables an intuitive, effective and interactive visualization of a pan-genome by providing a genome browser that can elucidate complex structural genomic variations.

CONCLUSIONS: The PGV software can be installed via conda or downloaded from https://github.com/ucrbioinfo/PGV . The companion PGV browser at http://pgv.cs.ucr.edu can be tested using example bed tracks available from the GitHub page.}, } @article {pmid34650545, year = {2021}, author = {Yuan, S and Wang, Y and Zhao, F and Kang, L}, title = {Complete Genome Sequence of Weissella confusa LM1 and Comparative Genomic Analysis.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {749218}, pmid = {34650545}, issn = {1664-302X}, abstract = {The genus Weissella is attracting an increasing amount of attention because of its multiple functions and probiotic potential. In particular, the species Weissella confusa is known to have great potential in industrial applications and exhibits numerous biological functions. However, the knowledge on this bacterium in insects is not investigated. Here, we isolated and identified W. confusa as the dominant lactic acid bacteria in the gut of the migratory locust. We named this strain W. confusa LM1, which is the first genome of an insect-derived W. confusa strain with one complete chromosome and one complete plasmid. Among all W. confusa strains, W. confusa LM1 had the largest genome. Its genome was the closest to that of W. confusa 1001271B_151109_G12, a strain from human feces. Our results provided accurate evolutionary relationships of known Weissella species and W. confusa strains. Based on genomic analysis, the pan-genome of W. confusa is in an open state. Most strains of W. confusa had the unique genes, indicating that these strains can adapt to different ecological niches and organisms. However, the variation of strain-specific genes did represent significant correlations with their hosts and ecological niches. These strains were predicted to have low potential to produce secondary metabolites. Furthermore, no antibiotic resistance genes were identified. At the same time, virulence factors associated with toxin production and secretion system were not found, indicating that W. confusa strains were not sufficient to perform virulence. Our study facilitated the discovery of the functions of W. confusa LM1 in locust biology and their potential application to locust management.}, } @article {pmid34646305, year = {2021}, author = {Zeng, Q and Xie, J and Li, Y and Gao, T and Zhang, X and Wang, Q}, title = {Comprehensive Genomic Analysis of the Endophytic Bacillus altitudinis Strain GLB197, a Potential Biocontrol Agent of Grape Downy Mildew.}, journal = {Frontiers in genetics}, volume = {12}, number = {}, pages = {729603}, pmid = {34646305}, issn = {1664-8021}, abstract = {Bacillus has been extensively studied for agricultural application as a biocontrol agent. B. altitudinis GLB197, an endophytic bacterium isolated from grape leaves, exhibits distinctive inhibition to grape downy mildew based on unknown mechanisms. To determine the genetic traits involved in the mechanism of biocontrol and host-interaction traits, the genome sequence of GLB197 was obtained and further analyzed. The genome of B. altitudinis GLB197 consisted of one plasmid and a 3,733,835-bp circular chromosome with 41.56% G + C content, containing 3,770 protein-coding genes. Phylogenetic analysis of 17 Bacillus strains using the concatenated 1,226 single-copy core genes divided into different clusters was conducted. In addition, average nucleotide identity (ANI) values indicate that the current taxonomy of some B. pumilus group strains is incorrect. Comparative analysis of B. altitudinis GLB197 proteins with other B. altitudinis strains identified 3,157 core genes. Furthermore, we found that the pan-genome of B. altitudinis is open. The genome of B. altitudinis GLB197 contains one nonribosomal peptide synthetase (NRPS) gene cluster which was annotated as lichenysin. Interestingly, the cluster in B. altitudinis has two more genes than other Bacillus strains (lgrD and lgrB). The two genes were probably obtained via horizontal gene transfer (HGT) during the evolutionary process from Brevibacillus. Taken together, these observations enable the future application of B. altitudinis GLB197 as a biocontrol agent for control of grape downy mildew and promote our understanding of the beneficial interactions between B. altitudinis GLB197 and plants.}, } @article {pmid34646248, year = {2021}, author = {Lisotto, P and Couto, N and Rosema, S and Lokate, M and Zhou, X and Bathoorn, E and Harmsen, HJM and Friedrich, AW and Rossen, JWA and Chlebowicz-Fliss, MA}, title = {Molecular Characterisation of Vancomycin-Resistant Enterococcus faecium Isolates Belonging to the Lineage ST117/CT24 Causing Hospital Outbreaks.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {728356}, pmid = {34646248}, issn = {1664-302X}, abstract = {Background: Vancomycin-resistant Enterococcus faecium (VREfm) is a successful nosocomial pathogen. The current molecular method recommended in the Netherlands for VREfm typing is based on core genome Multilocus sequence typing (cgMLST), however, the rapid emergence of specific VREfm lineages challenges distinguishing outbreak isolates solely based on their core genome. Here, we explored if a detailed molecular characterisation of mobile genetic elements (MGEs) and accessory genes could support and expand the current molecular typing of VREfm isolates sharing the same genetic background, enhancing the discriminatory power of the analysis. Materials/Methods: The genomes of 39 VREfm and three vancomycin-susceptible E. faecium (VSEfm) isolates belonging to ST117/CT24, as assessed by cgMLST, were retrospectively analysed. The isolates were collected from patients and environmental samples from 2011 to 2017, and their genomes were analysed using short-read sequencing. Pangenome analysis was performed on de novo assemblies, which were also screened for known predicted virulence factors, antimicrobial resistance genes, bacteriocins, and prophages. Two representative isolates were also sequenced using long-read sequencing, which allowed a detailed analysis of their plasmid content. Results: The cgMLST analysis showed that the isolates were closely related, with a minimal allelic difference of 10 between each cluster's closest related isolates. The vanB-carrying transposon Tn1549 was present in all VREfm isolates. However, in our data, we observed independent acquisitions of this transposon. The pangenome analysis revealed differences in the accessory genes related to prophages and bacteriocins content, whilst a similar profile was observed for known predicted virulence and resistance genes. Conclusion: In the case of closely related isolates sharing a similar genetic background, a detailed analysis of MGEs and the integration point of the vanB-carrying transposon allow to increase the discriminatory power compared to the use of cgMLST alone. Thus, enabling the identification of epidemiological links amongst hospitalised patients.}, } @article {pmid34638661, year = {2021}, author = {Chmielowska, C and Korsak, D and Chapkauskaitse, E and Decewicz, P and Lasek, R and Szuplewska, M and Bartosik, D}, title = {Plasmidome of Listeria spp.-The repA-Family Business.}, journal = {International journal of molecular sciences}, volume = {22}, number = {19}, pages = {}, pmid = {34638661}, issn = {1422-0067}, support = {UMO-2016/21/B/NZ8/00383//National Science Centre, Poland/ ; }, mesh = {DNA Transposable Elements/genetics ; Firmicutes/genetics ; Genes, Bacterial/*genetics ; Genome, Bacterial/genetics ; Listeria/*genetics ; Plasmids/*genetics ; Replicon/genetics ; }, abstract = {Bacteria of the genus Listeria (phylum Firmicutes) include both human and animal pathogens, as well as saprophytic strains. A common component of Listeria spp. genomes are plasmids, i.e., extrachromosomal replicons that contribute to gene flux in bacteria. This study provides an in-depth insight into the structure, diversity and evolution of plasmids occurring in Listeria strains inhabiting various environments under different anthropogenic pressures. Apart from the components of the conserved plasmid backbone (providing replication, stable maintenance and conjugational transfer functions), these replicons contain numerous adaptive genes possibly involved in: (i) resistance to antibiotics, heavy metals, metalloids and sanitizers, and (ii) responses to heat, oxidative, acid and high salinity stressors. Their genomes are also enriched by numerous transposable elements, which have influenced the plasmid architecture. The plasmidome of Listeria is dominated by a group of related replicons encoding the RepA replication initiation protein. Detailed comparative analyses provide valuable data on the level of conservation of these replicons and their role in shaping the structure of the Listeria pangenome, as well as their relationship to plasmids of other genera of Firmicutes, which demonstrates the range and direction of flow of genetic information in this important group of bacteria.}, } @article {pmid34633851, year = {2022}, author = {Schildkraut, JA and Coolen, JPM and Burbaud, S and Sangen, JJN and Kwint, MP and Floto, RA and Op den Camp, HJM and Te Brake, LHM and Wertheim, HFL and Neveling, K and Hoefsloot, W and van Ingen, J}, title = {RNA Sequencing Elucidates Drug-Specific Mechanisms of Antibiotic Tolerance and Resistance in Mycobacterium abscessus.}, journal = {Antimicrobial agents and chemotherapy}, volume = {66}, number = {1}, pages = {e0150921}, pmid = {34633851}, issn = {1098-6596}, mesh = {Anti-Bacterial Agents/pharmacology/therapeutic use ; Clarithromycin/pharmacology ; Humans ; Kinetics ; Microbial Sensitivity Tests ; *Mycobacterium Infections, Nontuberculous/drug therapy ; *Mycobacterium abscessus/genetics ; RNA ; Sequence Analysis, RNA ; }, abstract = {Mycobacterium abscessus is an opportunistic pathogen notorious for its resistance to most classes of antibiotics and low cure rates. M. abscessus carries an array of mostly unexplored defense mechanisms. A deeper understanding of antibiotic resistance and tolerance mechanisms is pivotal in development of targeted therapeutic regimens. We provide the first description of all major transcriptional mechanisms of tolerance to all antibiotics recommended in current guidelines, using RNA sequencing-guided experiments. M. abscessus ATCC 19977 bacteria were subjected to subinhibitory concentrations of clarithromycin (CLR), amikacin (AMK), tigecycline (TIG), cefoxitin (FOX), and clofazimine (CFZ) for 4 and 24 h, followed by RNA sequencing. To confirm key mechanisms of tolerance suggested by transcriptomic responses, we performed time-kill kinetic analysis using bacteria after preexposure to CLR, AMK, or TIG for 24 h and constructed isogenic knockout and knockdown strains. To assess strain specificity, pan-genome analysis of 35 strains from all three subspecies was performed. Mycobacterium abscessus shows both drug-specific and common transcriptomic responses to antibiotic exposure. Ribosome-targeting antibiotics CLR, AMK, and TIG elicit a common response characterized by upregulation of ribosome structural genes, the WhiB7 regulon and transferases, accompanied by downregulation of respiration through NuoA-N. Exposure to any of these drugs decreases susceptibility to ribosome-targeting drugs from multiple classes. The cytochrome bd-type quinol oxidase contributes to CFZ tolerance in M. abscessus, and the sigma factor sigH but not antisigma factor MAB_3542c is involved in TIG resistance. The observed transcriptomic responses are not strain-specific, as all genes involved in tolerance, except erm(41), are found in all included strains.}, } @article {pmid34632414, year = {2021}, author = {Ferrés, I and Iraola, G}, title = {Protocol for post-processing of bacterial pangenome data using Pagoo pipeline.}, journal = {STAR protocols}, volume = {2}, number = {4}, pages = {100802}, pmid = {34632414}, issn = {2666-1667}, mesh = {Bacteria/genetics ; Databases, Genetic ; Genome, Bacterial/*genetics ; Genomics/*methods ; *Software ; }, abstract = {Multiple downstream analyses are necessary to interpret the output of bacterial pangenome reconstruction software. This requires integrating diverse kinds of genetic and phenotypic data, which to date are left to each user's criterion. To fill this gap, we created Pagoo, a pangenome post-processing tool that leverages a standardized but flexible and extensible framework for data integration, analysis, and storage. Here, we provide the protocol for running Pagoo and performing from simple to more complex comparative analyses on bacterial pangenome data. For complete details on the use and execution of this protocol, please refer to Ferrés and Iraola (2021).}, } @article {pmid34621504, year = {2021}, author = {Mc Cartney, AM and Mahmoud, M and Jochum, M and Agustinho, DP and Zorman, B and Al Khleifat, A and Dabbaghie, F and K Kesharwani, R and Smolka, M and Dawood, M and Albin, D and Aliyev, E and Almabrazi, H and Arslan, A and Balaji, A and Behera, S and Billingsley, K and L Cameron, D and Daw, J and T Dawson, E and De Coster, W and Du, H and Dunn, C and Esteban, R and Jolly, A and Kalra, D and Liao, C and Liu, Y and Lu, TY and M Havrilla, J and M Khayat, M and Marin, M and Monlong, J and Price, S and Rafael Gener, A and Ren, J and Sagayaradj, S and Sapoval, N and Sinner, C and C Soto, D and Soylev, A and Subramaniyan, A and Syed, N and Tadimeti, N and Tater, P and Vats, P and Vaughn, J and Walker, K and Wang, G and Zeng, Q and Zhang, S and Zhao, T and Kille, B and Biederstedt, E and Chaisson, M and English, A and Kronenberg, Z and J Treangen, T and Hefferon, T and Chin, CS and Busby, B and J Sedlazeck, F}, title = {An international virtual hackathon to build tools for the analysis of structural variants within species ranging from coronaviruses to vertebrates.}, journal = {F1000Research}, volume = {10}, number = {}, pages = {246}, pmid = {34621504}, issn = {2046-1402}, support = {UM1 HG008898/HG/NHGRI NIH HHS/United States ; T32 HD098068/HD/NICHD NIH HHS/United States ; R01 GM132713/GM/NIGMS NIH HHS/United States ; }, mesh = {Animals ; *COVID-19 ; Genome, Viral ; Humans ; *SARS-CoV-2 ; Vertebrates ; }, abstract = {In October 2020, 62 scientists from nine nations worked together remotely in the Second Baylor College of Medicine & DNAnexus hackathon, focusing on different related topics on Structural Variation, Pan-genomes, and SARS-CoV-2 related research. The overarching focus was to assess the current status of the field and identify the remaining challenges. Furthermore, how to combine the strengths of the different interests to drive research and method development forward. Over the four days, eight groups each designed and developed new open-source methods to improve the identification and analysis of variations among species, including humans and SARS-CoV-2. These included improvements in SV calling, genotyping, annotations and filtering. Together with advancements in benchmarking existing methods. Furthermore, groups focused on the diversity of SARS-CoV-2. Daily discussion summary and methods are available publicly at https://github.com/collaborativebioinformatics provides valuable insights for both participants and the research community.}, } @article {pmid34623430, year = {2021}, author = {Fernández-de-Bobadilla, MD and Talavera-Rodríguez, A and Chacón, L and Baquero, F and Coque, TM and Lanza, VF}, title = {PATO: Pangenome Analysis Toolkit.}, journal = {Bioinformatics (Oxford, England)}, volume = {37}, number = {23}, pages = {4564-4566}, doi = {10.1093/bioinformatics/btab697}, pmid = {34623430}, issn = {1367-4811}, mesh = {Phylogeny ; *Software ; *Genome ; Gene Regulatory Networks ; }, abstract = {MOTIVATION: We present the Pangenome Analysis Toolkit (PATO) designed to simultaneously analyze thousands of genomes using a desktop computer. The tool performs common tasks of pangenome analysis such as core-genome definition and accessory genome properties and includes new features that help characterize population structure, annotate pathogenic features and create gene sharedness networks. PATO has been developed in R to integrate with the large set of tools available for genetic, phylogenetic and statistical analysis in this environment.

RESULTS: PATO can perform the most demanding bioinformatic analyses in minutes with an accuracy comparable to state-of-the-art software but 20-30× times faster. PATO also integrates all the necessary functions for the complete analysis of the most common objectives in microbiology studies. Finally, PATO includes the necessary tools for visualizing the results and can be integrated with other analytical packages available in R.

The source code for PATO is freely available at https://github.com/irycisBioinfo/PATO under the GPLv3 license.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid34621261, year = {2021}, author = {Yang, SM and Kim, E and Kim, D and Kim, HB and Baek, J and Ko, S and Kim, D and Yoon, H and Kim, HY}, title = {Rapid Real-Time Polymerase Chain Reaction for Salmonella Serotyping Based on Novel Unique Gene Markers by Pangenome Analysis.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {750379}, pmid = {34621261}, issn = {1664-302X}, abstract = {An accurate diagnostic method for Salmonella serovars is fundamental to preventing the spread of associated diseases. A diagnostic polymerase chain reaction (PCR)-based method has proven to be an effective tool for detecting pathogenic bacteria. However, the gene markers currently used in real-time PCR to detect Salmonella serovars have low specificity and are developed for only a few serovars. Therefore, in this study, we explored the novel unique gene markers for 60 serovars that share similar antigenic formulas and show high prevalence using pangenome analysis and developed a real-time PCR to detect them. Before exploring gene markers, the 535 Salmonella genomes were evaluated, and some genomes had serovars different from the designated serovar information. Based on these analyses, serovar-specific gene markers were explored. These markers were identified as genes present in all strains of target serovar genomes but absent in strains of other serovar genomes. Serovar-specific primer pairs were designed from the gene markers, and a real-time PCR method that can distinguish between 60 of the most common Salmonella serovars in a single 96-well plate assay was developed. As a result, real-time PCR showed 100% specificity for 199 Salmonella and 29 non-Salmonella strains. Subsequently, the method developed was applied successfully to both strains with identified serovars and an unknown strain, demonstrating that real-time PCR can accurately detect serovars of strains compared with traditional serotyping methods, such as antisera agglutination. Therefore, our method enables rapid and economical Salmonella serotyping compared with the traditional serotyping method.}, } @article {pmid34113437, year = {2021}, author = {Sutton, G and Fogel, GB and Abramson, B and Brinkac, L and Michael, T and Liu, ES and Thomas, S}, title = {A pan-genome method to determine core regions of the Bacillus subtilis and Escherichia coli genomes.}, journal = {F1000Research}, volume = {10}, number = {}, pages = {286}, pmid = {34113437}, issn = {2046-1402}, mesh = {*Bacillus subtilis/genetics ; Computational Biology ; *Escherichia coli/genetics ; Genome, Bacterial/genetics ; }, abstract = {Background: Synthetic engineering of bacteria to produce industrial products is a burgeoning field of research and application. In order to optimize genome design, designers need to understand which genes are essential, which are optimal for growth, and locations in the genome that will be tolerated by the organism when inserting engineered cassettes. Methods: We present a pan-genome based method for the identification of core regions in a genome that are strongly conserved at the species level. Results: We show that the core regions determined by our method contain all or almost all essential genes. This demonstrates the accuracy of our method as essential genes should be core genes. We show that we outperform previous methods by this measure. We also explain why there are exceptions to this rule for our method. Conclusions: We assert that synthetic engineers should avoid deleting or inserting into these core regions unless they understand and are manipulating the function of the genes in that region. Similarly, if the designer wishes to streamline the genome, non-core regions and in particular low penetrance genes would be good targets for deletion. Care should be taken to remove entire cassettes with similar penetrance of the genes within cassettes as they may harbor toxin/antitoxin genes which need to be removed in tandem. The bioinformatic approach introduced here saves considerable time and effort relative to knockout studies on single isolates of a given species and captures a broad understanding of the conservation of genes that are core to a species.}, } @article {pmid34610367, year = {2021}, author = {Bansal, K and Kumar, S and Kaur, A and Singh, A and Patil, PB}, title = {Deep phylo-taxono genomics reveals Xylella as a variant lineage of plant associated Xanthomonas and supports their taxonomic reunification along with Stenotrophomonas and Pseudoxanthomonas.}, journal = {Genomics}, volume = {113}, number = {6}, pages = {3989-4003}, doi = {10.1016/j.ygeno.2021.09.021}, pmid = {34610367}, issn = {1089-8646}, mesh = {Genomics ; Phylogeny ; Stenotrophomonas ; *Xanthomonas/genetics ; *Xylella/genetics ; }, abstract = {Genus Xanthomonas is a group of phytopathogens that is phylogenetically related to Xylella, Stenotrophomonas, and Pseudoxanthomonas, having diverse lifestyles. Xylella is a lethal plant pathogen with a highly reduced genome, atypical GC content and is taxonomically related to these three genera. Deep phylo-taxono genomics reveals that Xylella is a variant Xanthomonas lineage that is sandwiched between Xanthomonas clades. Comparative studies suggest the role of unique pigment and exopolysaccharide gene clusters in the emergence of Xanthomonas and Xylella clades. Pan-genome analysis identified a set of unique genes associated with sub-lineages representing plant-associated Xanthomonas clade and nosocomial origin Stenotrophomonas clade. Overall, our study reveals the importance of reconciling classical phenotypic data and genomic findings in reconstituting the taxonomic status of these four genera. SIGNIFICANCE STATEMENT: Xylella fastidiosa is a devastating pathogen of perennial dicots such as grapes, citrus, coffee, and olives. An insect vector transmits the pathogen to its specific host wherein the infection leads to complete wilting of the plants. The genome of X. fastidiosa is significantly reduced both in terms of size (2 Mb) and GC content (50%) when compared with its relatives such as Xanthomonas, Stenotrophomonas, and Pseudoxanthomonas that have higher GC content (65%) and larger genomes (5 Mb). In this study, using systematic and in-depth genome-based taxonomic and phylogenetic criteria and comparative studies, we assert the need to unify Xanthomonas with its relatives (Xylella, Stenotrophomonas and Pseudoxanthomonas). Interestingly, Xylella revealed itself as a minor variant lineage embedded within two major Xanthomonas lineages comprising member species of different hosts.}, } @article {pmid34605764, year = {2021}, author = {Pidcock, SE and Skvortsov, T and Santos, FG and Courtney, SJ and Sui-Ting, K and Creevey, CJ and Huws, SA}, title = {Phylogenetic systematics of Butyrivibrio and Pseudobutyrivibrio genomes illustrate vast taxonomic diversity, open genomes and an abundance of carbohydrate-active enzyme family isoforms.}, journal = {Microbial genomics}, volume = {7}, number = {10}, pages = {}, pmid = {34605764}, issn = {2057-5858}, support = {BB/J0013/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/W/10964A-01/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; Base Composition ; Butyrivibrio/*classification/*genetics ; Carbohydrates ; Clostridiales/*classification/*genetics ; DNA, Ribosomal ; Genome, Bacterial ; Haplotypes ; *Phylogeny ; Protein Isoforms ; Rumen/microbiology ; }, abstract = {Butyrivibrio and Pseudobutyrivibrio dominate in anaerobic gastrointestinal microbiomes, particularly the rumen, where they play a key role in harvesting dietary energy. Within these genera, five rumen species have been classified (Butyrivibrio fibrisolvens , Butyrivibrio hungatei , Butyrivibrio proteoclasticus , Pseudobutyrivibrio ruminis and Pseudobutyrivibrio xylanivorans) and more recently an additional Butyrivibrio sp. group was added. Given the recent increase in available genomes, we re-investigated the phylogenetic systematics and evolution of Butyrivibrio and Pseudobutyrivibrio . Across 71 genomes, we show using 16S rDNA and 40 gene marker phylogenetic trees that the current six species designations (P. ruminis , P. xylanivorans , B. fibrisolvens , Butyrivibrio sp., B. hungatei and B. proteclasticus) are found. However, pangenome analysis showed vast genomic variation and a high abundance of accessory genes (91.50–99.34 %), compared with core genes (0.66–8.50 %), within these six taxonomic groups, suggesting incorrectly assigned taxonomy. Subsequent pangenome accessory genomes under varying core gene cut-offs (%) and average nucleotide identity (ANI) analysis suggest the existence of 42 species within 32 genera. Pangenome analysis of those that still group within B. fibrisolvens , B. hungatei and P. ruminis , based on revised ANI phylogeny, also showed possession of very open genomes, illustrating the diversity that exists even within these groups. All strains of both Butyrivibrio and Pseudobutyrivibrio also shared a broad range of clusters of orthologous genes (COGs) (870), indicating recent evolution from a common ancestor. We also demonstrate that the carbohydrate-active enzymes (CAZymes) predominantly belong to glycosyl hydrolase (GH)2, 3, 5, 13 and 43, with numerous within family isoforms apparent, likely facilitating metabolic plasticity and resilience under dietary perturbations. This study provides a major advancement in our functional and evolutionary understanding of these important anaerobic bacteria.}, } @article {pmid34605762, year = {2021}, author = {Colombi, E and Perry, BJ and Sullivan, JT and Bekuma, AA and Terpolilli, JJ and Ronson, CW and Ramsay, JP}, title = {Comparative analysis of integrative and conjugative mobile genetic elements in the genus Mesorhizobium.}, journal = {Microbial genomics}, volume = {7}, number = {10}, pages = {}, pmid = {34605762}, issn = {2057-5858}, mesh = {Bacterial Proteins/genetics ; Conjugation, Genetic ; DNA Transposable Elements ; Evolution, Molecular ; Fabaceae ; Gene Transfer, Horizontal ; *Interspersed Repetitive Sequences ; Mesorhizobium/*genetics ; Nitrogen Fixation ; Plasmids ; Quorum Sensing ; Recombination, Genetic ; Symbiosis/genetics ; }, abstract = {Members of the Mesorhizobium genus are soil bacteria that often form nitrogen-fixing symbioses with legumes. Most characterised Mesorhizobium spp. genomes are ~8 Mb in size and harbour extensive pangenomes including large integrative and conjugative elements (ICEs) carrying genes required for symbiosis (ICESyms). Here, we document and compare the conjugative mobilome of 41 complete Mesorhizobium genomes. We delineated 56 ICEs and 24 integrative and mobilizable elements (IMEs) collectively occupying 16 distinct integration sites, along with 24 plasmids. We also demonstrated horizontal transfer of the largest (853,775 bp) documented ICE, the tripartite ICEMspSym[AA22]. The conjugation systems of all identified ICEs and several plasmids were related to those of the paradigm ICESym ICEMlSym[R7A], with each carrying conserved genes for conjugative pilus formation (trb), excision (rdfS), DNA transfer (rlxS) and regulation (fseA). ICESyms have likely evolved from a common ancestor, despite occupying a variety of distinct integration sites and specifying symbiosis with diverse legumes. We found extensive evidence for recombination between ICEs and particularly ICESyms, which all uniquely lack the conjugation entry-exclusion factor gene trbK. Frequent duplication, replacement and pseudogenization of genes for quorum-sensing-mediated activation and antiactivation of ICE transfer suggests ICE transfer regulation is constantly evolving. Pangenome-wide association analysis of the ICE identified genes potentially involved in symbiosis, rhizosphere colonisation and/or adaptation to distinct legume hosts. In summary, the Mesorhizobium genus has accumulated a large and dynamic pangenome that evolves through ongoing horizontal gene transfer of large conjugative elements related to ICEMlSym[R7A].}, } @article {pmid34604935, year = {2021}, author = {Cho, ES and Cha, IT and Roh, SW and Seo, MJ}, title = {Haloferax litoreum sp. nov., Haloferax marinisediminis sp. nov., and Haloferax marinum sp. nov., low salt-tolerant haloarchaea isolated from seawater and sediment.}, journal = {Antonie van Leeuwenhoek}, volume = {114}, number = {12}, pages = {2065-2082}, pmid = {34604935}, issn = {1572-9699}, support = {2018//Incheon National University Research Grant/ ; }, mesh = {DNA, Archaeal/genetics ; *Halobacteriaceae/genetics ; *Haloferax/genetics ; Nucleic Acid Hybridization ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Seawater ; Sequence Analysis, DNA ; }, abstract = {Three novel halophilic archaea were isolated from seawater and sediment near Yeoungheungdo Island, Republic of Korea. The genome size and G + C content of the isolates MBLA0076[T], MBLA0077[T], and MBLA0078[T] were 3.56, 3.48, and 3.48 Mb and 61.7, 60.8, and 61.1 mol%, respectively. The three strains shared 98.5-99.5 % sequence similarity of the 16 S rRNA gene, whereas their sequence similarity to the 16 S rRNA gene of type strains was below 98.5 %. Phylogenetic analysis based on sequences of the 16 S rRNA and RNA polymerase subunit beta genes indicated that the isolates belonged to the genus Haloferax. The orthologous average nucleotide identity, average amino-acid identity, and in silico DNA-DNA hybridization values were below species delineation thresholds. Pan-genomic analysis indicated that the three novel strains and 11 reference strains had 8981 pan-orthologous groups in total. Fourteen Haloferax strains shared 1766 core pan-genome orthologous groups, which were mainly related to amino acid transport and metabolism. Cells of the three isolates were gram-negative, motile, red-pink pigmented, and pleomorphic. The strains grew optimally at 30 °C (MBLA0076[T]) and 40 °C (MBLA0077[T], MBLA0078[T]) in the presence of 1.28 M (MBLA0077[T]) and 1.7 M (MBLA0076[T], MBLA0078[T]) NaCl and 0.1 M (MBLA0077[T]), 0.2 M (MBLA0076[T]), and 0.3 M (MBLA0078[T]) MgCl2·6H2O at pH 7.0-8.0. Cells of all isolates lysed in distilled water; the minimum NaCl concentration necessary to prevent lysis was 0.43 M. The major polar lipids of the three strains were phosphatidylglycerol, phosphatidylglycerol phosphate methyl ester, and sulphated diglycosyl archaeol-1. Based on their phenotypic and genotypic properties, MBLA0076[T], MBLA0077[T], and MBLA0078[T] were described as novel species of Haloferax, for which we propose the names Haloferax litoreum sp. nov., Haloferax marinisediminis sp. nov., and Haloferax marinum sp. nov., respectively. The respective type strains of these species are MBLA0076[T] (= KCTC 4288[T] = JCM 34,169[T]), MBLA0077[T] (= KCTC 4289[T] = JCM 34,170[T]), and MBLA0078[T] (= KCTC 4290[T] = JCM 34,171[T]).}, } @article {pmid34603347, year = {2021}, author = {Razzaq, A and Saleem, F and Wani, SH and Abdelmohsen, SAM and Alyousef, HA and Abdelbacki, AMM and Alkallas, FH and Tamam, N and Elansary, HO}, title = {De-novo Domestication for Improving Salt Tolerance in Crops.}, journal = {Frontiers in plant science}, volume = {12}, number = {}, pages = {681367}, pmid = {34603347}, issn = {1664-462X}, abstract = {Global agriculture production is under serious threat from rapidly increasing population and adverse climate changes. Food security is currently a huge challenge to feed 10 billion people by 2050. Crop domestication through conventional approaches is not good enough to meet the food demands and unable to fast-track the crop yields. Also, intensive breeding and rigorous selection of superior traits causes genetic erosion and eliminates stress-responsive genes, which makes crops more prone to abiotic stresses. Salt stress is one of the most prevailing abiotic stresses that poses severe damages to crop yield around the globe. Recent innovations in state-of-the-art genomics and transcriptomics technologies have paved the way to develop salinity tolerant crops. De novo domestication is one of the promising strategies to produce superior new crop genotypes through exploiting the genetic diversity of crop wild relatives (CWRs). Next-generation sequencing (NGS) technologies open new avenues to identifying the unique salt-tolerant genes from the CWRs. It has also led to the assembly of highly annotated crop pan-genomes to snapshot the full landscape of genetic diversity and recapture the huge gene repertoire of a species. The identification of novel genes alongside the emergence of cutting-edge genome editing tools for targeted manipulation renders de novo domestication a way forward for developing salt-tolerance crops. However, some risk associated with gene-edited crops causes hurdles for its adoption worldwide. Halophytes-led breeding for salinity tolerance provides an alternative strategy to identify extremely salt tolerant varieties that can be used to develop new crops to mitigate salinity stress.}, } @article {pmid34601567, year = {2021}, author = {Durant, É and Sabot, F and Conte, M and Rouard, M}, title = {Panache: a web browser-based viewer for linearized pangenomes.}, journal = {Bioinformatics (Oxford, England)}, volume = {37}, number = {23}, pages = {4556-4558}, pmid = {34601567}, issn = {1367-4811}, mesh = {Web Browser ; *Software ; *Genome ; Bacteria ; }, abstract = {MOTIVATION: Pangenomics evolved since its first applications on bacteria, extending from the study of genes for a given population to the study of all of its sequences available. While multiple methods are being developed to construct pangenomes in eukaryotic species there is still a gap for efficient and user-friendly visualization tools. Emerging graph representations come with their own challenges, and linearity remains a suitable option for user-friendliness.

RESULTS: We introduce Panache, a tool for the visualization and exploration of linear representations of gene-based and sequence-based pangenomes. It uses a layout similar to genome browsers to display presence absence variations and additional tracks along a linear axis with a pangenomics perspective.

Panache is available at github.com/SouthGreenPlatform/panache under the MIT License.}, } @article {pmid34600047, year = {2022}, author = {Jia, J and Liu, M and Feng, L and Wang, Z}, title = {Comparative genomic analysis reveals the evolution and environmental adaptation of Acinetobacter johnsonii.}, journal = {Gene}, volume = {808}, number = {}, pages = {145985}, doi = {10.1016/j.gene.2021.145985}, pmid = {34600047}, issn = {1879-0038}, mesh = {Acinetobacter/*genetics/metabolism ; Adaptation, Biological/genetics ; Biological Evolution ; China ; Databases, Genetic ; Evolution, Molecular ; Genome, Bacterial/genetics ; Genomics/methods ; Phylogeny ; Virulence ; }, abstract = {Genome plasticity is a key determinant that Acinetobacter johnsonii could widely distribute in natural and clinical environments. However, little attention has been paid to figure out the changes in the genome during A. johnsonii's evolution. Here, a comparative genomic analysis of A. johnsonii isolated from clinical and environmental sources was conducted. In this study, we found A. johnsonii has an open pan-genome and has great adaptability to different environments. Based on the results of the phylogenetic tree, ANI value and the distribution of accessory genes, we found that strains from the same habitat had a high degree of similarity. Though genes associated with the fundamental process were mostly conserved in evolution, clinical-derived isolates accumulate more genes associated with translational modification, β-lactamase and defense mechanisms, whereas environmental-derived isolates enriched more genes related to substances degradation. In addition, clinical-derived strains harbored some "strong" virulence islands and resistance islands. This study highlights the evolutionary relationship of A. johnsonii isolates from clinical and environmental sources.}, } @article {pmid34599163, year = {2021}, author = {Brackley, CA and Gilbert, N and Michieletto, D and Papantonis, A and Pereira, MCF and Cook, PR and Marenduzzo, D}, title = {Complex small-world regulatory networks emerge from the 3D organisation of the human genome.}, journal = {Nature communications}, volume = {12}, number = {1}, pages = {5756}, pmid = {34599163}, issn = {2041-1723}, mesh = {Chromatin/chemistry/metabolism ; Chromosomes, Human/chemistry/metabolism ; *Gene Regulatory Networks ; *Genome, Human ; Genome-Wide Association Study ; Humans ; *Models, Genetic ; Polymers/chemistry/metabolism ; Quantitative Trait Loci ; Transcription Factors/*metabolism ; Transcription, Genetic ; }, abstract = {The discovery that overexpressing one or a few critical transcription factors can switch cell state suggests that gene regulatory networks are relatively simple. In contrast, genome-wide association studies (GWAS) point to complex phenotypes being determined by hundreds of loci that rarely encode transcription factors and which individually have small effects. Here, we use computer simulations and a simple fitting-free polymer model of chromosomes to show that spatial correlations arising from 3D genome organisation naturally lead to stochastic and bursty transcription as well as complex small-world regulatory networks (where the transcriptional activity of each genomic region subtly affects almost all others). These effects require factors to be present at sub-saturating levels; increasing levels dramatically simplifies networks as more transcription units are pressed into use. Consequently, results from GWAS can be reconciled with those involving overexpression. We apply this pan-genomic model to predict patterns of transcriptional activity in whole human chromosomes, and, as an example, the effects of the deletion causing the diGeorge syndrome.}, } @article {pmid34595238, year = {2021}, author = {Awan, F and Ali, MM and Hamid, M and Awan, MH and Mushtaq, MH and Kalsoom, S and Ijaz, M and Mehmood, K and Liu, Y}, title = {Epi-Gene: An R-Package for Easy Pan-Genome Analysis.}, journal = {BioMed research international}, volume = {2021}, number = {}, pages = {5585586}, pmid = {34595238}, issn = {2314-6141}, mesh = {Aeromonas hydrophila/genetics ; Databases, Genetic ; *Genome, Bacterial ; *Genomics ; Multigene Family ; Phylogeny ; Principal Component Analysis ; *Software ; }, abstract = {The main aim of this study was to develop a set of functions that can analyze the genomic data with less time consumption and memory. Epi-gene is presented as a solution to large sequence file handling and computational time problems. It uses less time and less programming skills in order to work with a large number of genomes. In the current study, some features of the Epi-gene R-package were described and illustrated by using a dataset of the 14 Aeromonas hydrophila genomes. The joining, relabeling, and conversion functions were also included in this package to handle the FASTA formatted sequences. To calculate the subsets of core genes, accessory genes, and unique genes, various Epi-gene functions have been used. Heat maps and phylogenetic genome trees were also constructed. This whole procedure was completed in less than 30 minutes. This package can only work on Windows operating systems. Different functions from other packages such as dplyr and ggtree were also used that were available in R computing environment.}, } @article {pmid34589074, year = {2021}, author = {Wambui, J and Cernela, N and Stevens, MJA and Stephan, R}, title = {Whole Genome Sequence-Based Identification of Clostridium estertheticum Complex Strains Supports the Need for Taxonomic Reclassification Within the Species Clostridium estertheticum.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {727022}, pmid = {34589074}, issn = {1664-302X}, abstract = {Isolates within the Clostridium estertheticum complex (CEC) have routinely been identified through the 16S rRNA sequence, but the high interspecies sequence similarity reduces the resolution necessary for species level identification and often results in ambiguous taxonomic classification. The current study identified CEC isolates from meat juice (MJS) and bovine fecal samples (BFS) and determined the phylogeny of species within the CEC through whole genome sequence (WGS)-based analyses. About 1,054 MJS were screened for CEC using quantitative real-time PCR (qPCR). Strains were isolated from 33 MJS and 34 BFS qPCR-positive samples, respectively. Pan- and core-genome phylogenomics were used to determine the species identity of the isolates. Average nucleotide identity (ANI) and digital DNA-DNA hybridization (dDDH) were used to validate the species identity. The phylogeny of species within the CEC was determined through a combination of these methods. Twenty-eight clostridia strains were isolated from MJS and BFS samples out of which 13 belonged to CEC. At 95% ANI and 70% dDDH thresholds for speciation, six CEC isolates were identified as genomospecies2 (n=3), Clostridium tagluense (n=2) and genomospecies3 (n=1). Lower thresholds of 94% ANI and 58% dDDH were required for the classification of seven CEC isolates into species C. estertheticum and prevent an overlap between species C. estertheticum and Clostridium frigoriphilum. Combination of the two species and abolishment of current subspecies classification within the species C. estertheticum are proposed. These data demonstrate the suitability of phylogenomics to identify CEC isolates and determine the phylogeny within CEC.}, } @article {pmid34588372, year = {2022}, author = {Kim, JM and Fukushima, Y and Yoshida, H and Kim, JS and Takahashi, T}, title = {Comparative Genomic Features of Streptococcus canis Based on Pan-Genome Orthologous Group Analysis According to Sequence Type.}, journal = {Japanese journal of infectious diseases}, volume = {75}, number = {3}, pages = {269-276}, doi = {10.7883/yoken.JJID.2021.533}, pmid = {34588372}, issn = {1884-2836}, mesh = {DNA ; *Genomics ; Phylogeny ; *Streptococcus/genetics ; }, abstract = {Using bacterial pan-genomes obtained through whole genome sequencing (WGS), coding DNA sequences (CDSs) can be clustered into pan-genome orthologous groups (POGs). We aimed to investigate comparative genomic features of Streptococcus canis based on POG analysis and to determine CDSs specific to prevalent sequence type (ST) 9. Twenty WGS datasets from S. canis strains, including invasive and non-invasive specimens, were retrieved from the National Center for Biotechnology Information Assembly database. Based on the WGS data, we performed comparative genome hybridization (CGH), pan- and core-genome prediction, Venn diagram testing with five ST9 strains, and phylogenetic analysis with ST determination. We compared the CDSs of seven ST9 and 13 non-ST9 strains. We observed genomic diversity based on CGH and Venn diagram analyses. The predicted pan- and core-genomes contained 4,772 and 1,403 genes, respectively. We found five clades consisting of different STs (ST1, ST44/2, ST13/14, ST21/15/41, and ST9) based on the phylogenetic tree. There were differences in four pathways (DNA restriction-modification system, DNA-mediated transposition, extracellular region, and response to oxidative stress) regulated by CDSs specific to ST9. Our findings describe genomic diversity in CGH and Venn diagram testing, pan- and core-genomes, five clades of genomes consisting of different STs, and unique CDS features associated with ST9.}, } @article {pmid34581566, year = {2021}, author = {Sun, W and Sun, X and Häggblom, MM and Kolton, M and Lan, L and Li, B and Dong, Y and Xu, R and Li, F}, title = {Identification of Antimonate Reducing Bacteria and Their Potential Metabolic Traits by the Combination of Stable Isotope Probing and Metagenomic-Pangenomic Analysis.}, journal = {Environmental science & technology}, volume = {55}, number = {20}, pages = {13902-13912}, doi = {10.1021/acs.est.1c03967}, pmid = {34581566}, issn = {1520-5851}, mesh = {Antimony ; *Bacteria/genetics ; Isotopes ; *Metagenomics ; Oxidation-Reduction ; }, abstract = {Microorganisms play an important role in altering antimony (Sb) speciation, mobility, and bioavailability, but the understanding of the microorganisms responsible for Sb(V) reduction has been limited. In this study, DNA-stable isotope probing (DNA-SIP) and metagenomics analysis were combined to identify potential Sb(V)-reducing bacteria (SbRB) and predict their metabolic pathways for Sb(V) reduction. Soil slurry cultures inoculated with Sb-contaminated paddy soils from two Sb-contaminated sites demonstrated the capability to reduce Sb(V). DNA-SIP identified bacteria belonging to the genera Pseudomonas and Geobacter as putative SbRB in these two Sb-contaminated sites. In addition, bacteria such as Lysinibacillus and Dechloromonas may potentially participate in Sb(V) reduction. Nearly complete draft genomes of putative SbRB (i.e., Pseudomonas and Geobacter) were obtained, and the genes potentially responsible for arsenic (As) and Sb reduction (i.e., respiratory arsenate reductase (arrA) and antimonate reductase (anrA)) were examined. Notably, bins affiliated with Geobacter contained arrA and anrA genes, supporting our hypothesis that they are putative SbRB. Further, pangenomic analysis indicated that various Geobacter-associated genomes obtained from diverse habitats also contained arrA and anrA genes. In contrast, Pseudomonas may use a predicted DMSO reductase closely related to sbrA (Sb(V) reductase gene) clade II to reduce Sb(V), which may need further experiments to verify. This current work represents a demonstration of using DNA-SIP and metagenomic-binning to identify SbRB and their key genes involved in Sb(V) reduction and provides valuable data sets to link bacterial identities with Sb(V) reduction.}, } @article {pmid34576887, year = {2021}, author = {Huang, Z and Zhou, X and Stanton, C and Ross, RP and Zhao, J and Zhang, H and Yang, B and Chen, W}, title = {Comparative Genomics and Specific Functional Characteristics Analysis of Lactobacillus acidophilus.}, journal = {Microorganisms}, volume = {9}, number = {9}, pages = {}, pmid = {34576887}, issn = {2076-2607}, support = {32021005, 31820103010//National Natural Science Foundation of China/ ; JUFSTR20180102//National First-Class Discipline Program of Food Science and Technology/ ; JUSRP52003B//Fundamental Research Funds for the Central Universities/ ; Not applicable//Collaborative Innovation Center of Food Safety and Quality Control in Jiangsu Province/ ; }, abstract = {Lactobacillus acidophilus is a common kind of lactic acid bacteria usually found in the human gastrointestinal tract, oral cavity, vagina, and various fermented foods. At present, many studies have focused on the probiotic function and industrial application of L. acidophilus. Additionally, dozens of L. acidophilus strains have been genome sequenced, but there has been no research to compare them at the genomic level. In this study, 46 strains of L. acidophilus were performed comparative analyses to explore their genetic diversity. The results showed that all the L. acidophilus strains were divided into two clusters based on ANI values, phylogenetic analysis and whole genome comparison, due to the difference of their predicted gene composition of bacteriocin operon, CRISPR-Cas systems and prophages mainly. Additionally, L. acidophilus was a pan-genome open species with a difference in carbohydrates utilization, antibiotic resistance, EPS operon, surface layer protein operon and other functional gene composition. This work provides a better understanding of L. acidophilus from a genetic perspective, and offers a frame for the biotechnological potentiality of this species.}, } @article {pmid34576823, year = {2021}, author = {Maguvu, TE and Bezuidenhout, CC}, title = {Whole Genome Sequencing Based Taxonomic Classification, and Comparative Genomic Analysis of Potentially Human Pathogenic Enterobacter spp. Isolated from Chlorinated Wastewater in the North West Province, South Africa.}, journal = {Microorganisms}, volume = {9}, number = {9}, pages = {}, pmid = {34576823}, issn = {2076-2607}, support = {project2019/2020-00224//Water Research Commission, South Africa/ ; }, abstract = {Comparative genomics, in particular, pan-genome analysis, provides an in-depth understanding of the genetic variability and dynamics of a bacterial species. Coupled with whole-genome-based taxonomic analysis, these approaches can help to provide comprehensive, detailed insights into a bacterial species. Here, we report whole-genome-based taxonomic classification and comparative genomic analysis of potential human pathogenic Enterobacter hormaechei subsp. hoffmannii isolated from chlorinated wastewater. Genome Blast Distance Phylogeny (GBDP), digital DNA-DNA hybridization (dDDH), and average nucleotide identity (ANI) confirmed the identity of the isolates. The algorithm PathogenFinder predicted the isolates to be human pathogens with a probability of greater than 0.78. The potential pathogenic nature of the isolates was supported by the presence of biosynthetic gene clusters (BGCs), aerobactin, and aryl polyenes (APEs), which are known to be associated with pathogenic/virulent strains. Moreover, analysis of the genome sequences of the isolates reflected the presence of an arsenal of virulence factors and antibiotic resistance genes that augment the predictions of the algorithm PathogenFinder. The study comprehensively elucidated the genomic features of pathogenic Enterobacter isolates from wastewaters, highlighting the role of wastewaters in the dissemination of pathogenic microbes, and the need for monitoring the effectiveness of the wastewater treatment process.}, } @article {pmid34576801, year = {2021}, author = {Díaz, R and Torres-Miranda, A and Orellana, G and Garrido, D}, title = {Comparative Genomic Analysis of Novel Bifidobacterium longum subsp. longum Strains Reveals Functional Divergence in the Human Gut Microbiota.}, journal = {Microorganisms}, volume = {9}, number = {9}, pages = {}, pmid = {34576801}, issn = {2076-2607}, support = {FONDECYT 1190074//Agencia nacional de investigación y desarrollo/ ; Proyecto Interdisciplina II180018//Vicerrectoria de Investigacion PUC/ ; Seed Fund 2020//Escuela de Ingeniería UC/ ; 21200384//ANID Scholarship/ ; }, abstract = {Bifidobacterium longum subsp. longum is a prevalent group in the human gut microbiome. Its persistence in the intestinal microbial community suggests a close host-microbe relationship according to age. The subspecies adaptations are related to metabolic capabilities and genomic and functional diversity. In this study, 154 genomes from public databases and four new Chilean isolates were genomically compared through an in silico approach to identify genomic divergence in genes associated with carbohydrate consumption and their possible adaptations to different human intestinal niches. The pangenome of the subspecies was open, which correlates with its remarkable ability to colonize several niches. The new genomes homogenously clustered within subspecies longum, as observed in phylogenetic analysis. B. longum SC664 was different at the sequence level but not in its functions. COG analysis revealed that carbohydrate use is variable among longum subspecies. Glycosyl hydrolases participating in human milk oligosaccharide use were found in certain infant and adult genomes. Predictive genomic analysis revealed that B. longum M12 contained an HMO cluster associated with the use of fucosylated HMOs but only endowed with a GH95, being able to grow in 2-fucosyllactose as the sole carbon source. This study identifies novel genomes with distinct adaptations to HMOs and highlights the plasticity of B. longum subsp. longum to colonize the human gut microbiota.}, } @article {pmid34574222, year = {2021}, author = {Kim, E and Yang, SM and Kim, HY}, title = {Differentiation of Lacticaseibacillus zeae Using Pan-Genome Analysis and Real-Time PCR Method Targeting a Unique Gene.}, journal = {Foods (Basel, Switzerland)}, volume = {10}, number = {9}, pages = {}, pmid = {34574222}, issn = {2304-8158}, support = {2020R1A6A3A01100168//National Research Foundation of Korea/ ; }, abstract = {Lacticaseibacillus zeae strains, isolated from raw milk and fermented dairy products, are closely related to the Lacticaseibacillus species that has beneficial probiotic properties. However, it is difficult to distinguish those using conventional methods. In this study, a unique gene was revealed to differentiate L. zeae from other strains of the Lacticaseibacillus species and other species by pan-genome analysis, and a real-time PCR method was developed to rapidly and accurately detect the unique gene. The genome analysis of 141 genomes yielded an 17,978 pan-genome. Among them, 18 accessory genes were specifically present in five genomes of L. zeae. The glycosyltransferase family 8 was identified as a unique gene present only in L. zeae and not in 136 other genomes. A primer designed from the unique gene accurately distinguished L. zeae in pure and mixed DNA and successfully constructed the criterion for the quantified standard curve in real-time PCR. The real-time PCR method was applied to 61 strains containing other Lacticaseibacillus species and distinguished L. zeae with 100% accuracy. Also, the real-time PCR method was proven to be superior to the 16S rRNA gene method in the identification of L. zeae.}, } @article {pmid34573403, year = {2021}, author = {Zhao, Y and Wang, Y and Xia, C and Li, X and Ye, X and Fan, Q and Huang, Y and Li, Z and Zhu, C and Cui, Z}, title = {Whole-Genome Sequencing of Corallococcus sp. Strain EGB Reveals the Genetic Determinants Linking Taxonomy and Predatory Behavior.}, journal = {Genes}, volume = {12}, number = {9}, pages = {}, pmid = {34573403}, issn = {2073-4425}, mesh = {Animals ; Biosynthetic Pathways/genetics ; Chitinases/genetics ; Genetic Linkage ; Genome, Bacterial ; Host-Pathogen Interactions/*genetics ; Microbial Interactions/*genetics ; Multigene Family/genetics ; Myxococcales/*classification/*genetics ; Phylogeny ; Plant Diseases/microbiology ; Whole Genome Sequencing ; }, abstract = {Corallococcus sp. strain EGB is a Gram-negative myxobacteria isolated from saline soil, and has considerable potential for the biocontrol of phytopathogenic fungi. However, the detailed mechanisms related to development and predatory behavior are unclear. To obtain a comprehensive overview of genetic features, the genome of strain EGB was sequenced, annotated, and compared with 10 other Corallococcus species. The strain EGB genome was assembled as a single circular chromosome of 9.4 Mb with 7916 coding genes. Phylogenomics analysis showed that strain EGB was most closely related to Corallococcus interemptor AB047A, and it was inferred to be a novel species within the Corallococcus genus. Comparative genomic analysis revealed that the pan-genome of Corallococcus genus was large and open. Only a small proportion of genes were specific to strain EGB, and most of them were annotated as hypothetical proteins. Subsequent analyses showed that strain EGB produced abundant extracellular enzymes such as chitinases and β-(1,3)-glucanases, and proteases to degrade the cell-wall components of phytopathogenic fungi. In addition, 35 biosynthetic gene clusters potentially coding for antimicrobial compounds were identified in the strain EGB, and the majority of them were present in the dispensable pan-genome with unexplored metabolites. Other genes related to secretion and regulation were also explored for strain EGB. This study opens new perspectives in the greater understanding of the predatory behavior of strain EGB, and facilitates a potential application in the biocontrol of fungal plant diseases in the future.}, } @article {pmid34570769, year = {2021}, author = {Tognon, M and Bonnici, V and Garrison, E and Giugno, R and Pinello, L}, title = {GRAFIMO: Variant and haplotype aware motif scanning on pangenome graphs.}, journal = {PLoS computational biology}, volume = {17}, number = {9}, pages = {e1009444}, pmid = {34570769}, issn = {1553-7358}, support = {R00 HG008399/HG/NHGRI NIH HHS/United States ; R35 HG010717/HG/NHGRI NIH HHS/United States ; }, mesh = {Base Sequence ; Binding Sites/genetics ; Computational Biology ; Computer Graphics ; *Genetic Variation ; Genome, Human ; Genomics ; Haplotypes ; Humans ; *Nucleotide Motifs ; Protein Binding/genetics ; *Software ; Transcription Factors/*metabolism ; }, abstract = {Transcription factors (TFs) are proteins that promote or reduce the expression of genes by binding short genomic DNA sequences known as transcription factor binding sites (TFBS). While several tools have been developed to scan for potential occurrences of TFBS in linear DNA sequences or reference genomes, no tool exists to find them in pangenome variation graphs (VGs). VGs are sequence-labelled graphs that can efficiently encode collections of genomes and their variants in a single, compact data structure. Because VGs can losslessly compress large pangenomes, TFBS scanning in VGs can efficiently capture how genomic variation affects the potential binding landscape of TFs in a population of individuals. Here we present GRAFIMO (GRAph-based Finding of Individual Motif Occurrences), a command-line tool for the scanning of known TF DNA motifs represented as Position Weight Matrices (PWMs) in VGs. GRAFIMO extends the standard PWM scanning procedure by considering variations and alternative haplotypes encoded in a VG. Using GRAFIMO on a VG based on individuals from the 1000 Genomes project we recover several potential binding sites that are enhanced, weakened or missed when scanning only the reference genome, and which could constitute individual-specific binding events. GRAFIMO is available as an open-source tool, under the MIT license, at https://github.com/pinellolab/GRAFIMO and https://github.com/InfOmics/GRAFIMO.}, } @article {pmid34568925, year = {2021}, author = {Kim, MS and Lee, T and Baek, J and Kim, JH and Kim, C and Jeong, SC}, title = {Genome assembly of the popular Korean soybean cultivar Hwangkeum.}, journal = {G3 (Bethesda, Md.)}, volume = {11}, number = {10}, pages = {}, pmid = {34568925}, issn = {2160-1836}, mesh = {Alleles ; *Fabaceae ; High-Throughput Nucleotide Sequencing ; Republic of Korea ; *Soybeans/genetics ; }, abstract = {Massive resequencing efforts have been undertaken to catalog allelic variants in major crop species including soybean, but the scope of the information for genetic variation often depends on short sequence reads mapped to the extant reference genome. Additional de novo assembled genome sequences provide a unique opportunity to explore a dispensable genome fraction in the pan-genome of a species. Here, we report the de novo assembly and annotation of Hwangkeum, a popular soybean cultivar in Korea. The assembly was constructed using PromethION nanopore sequencing data and two genetic maps and was then error-corrected using Illumina short-reads and PacBio SMRT reads. The 933.12 Mb assembly was annotated as containing 79,870 transcripts for 58,550 genes using RNA-Seq data and the public soybean annotation set. Comparison of the Hwangkeum assembly with the Williams 82 soybean reference genome sequence (Wm82.a2.v1) revealed 1.8 million single-nucleotide polymorphisms, 0.5 million indels, and 25 thousand putative structural variants. However, there was no natural megabase-scale chromosomal rearrangement. Incidentally, by adding two novel subfamilies, we found that soybean contains four clearly separated subfamilies of centromeric satellite repeats. Analyses of satellite repeats and gene content suggested that the Hwangkeum assembly is a high-quality assembly. This was further supported by comparison of the marker arrangement of anthocyanin biosynthesis genes and of gene arrangement at the Rsv3 locus. Therefore, the results indicate that the de novo assembly of Hwangkeum is a valuable additional reference genome resource for characterizing traits for the improvement of this important crop species.}, } @article {pmid34568912, year = {2021}, author = {Sato, K and Mascher, M and Himmelbach, A and Haberer, G and Spannagl, M and Stein, N}, title = {Chromosome-scale assembly of wild barley accession "OUH602".}, journal = {G3 (Bethesda, Md.)}, volume = {11}, number = {10}, pages = {}, pmid = {34568912}, issn = {2160-1836}, mesh = {Chromosomes ; Chromosomes, Artificial, Bacterial ; Genome ; Genotype ; *Hordeum/genetics ; }, abstract = {Barley (Hordeum vulgare) was domesticated from its wild ancestral form ca. 10,000 years ago in the Fertile Crescent and is widely cultivated throughout the world, except for in tropical areas. The genome size of both cultivated barley and its conspecific wild ancestor is approximately 5 Gb. High-quality chromosome-level assemblies of 19 cultivated and one wild barley genotype were recently established by pan-genome analysis. Here, we release another equivalent short-read assembly of the wild barley accession "OUH602." A series of genetic and genomic resources were developed for this genotype in prior studies. Our assembly contains more than 4.4 Gb of sequence, with a scaffold N50 value of over 10 Mb. The haplotype shows high collinearity with the most recently updated barley reference genome, "Morex" V3, with some inversions. Gene projections based on "Morex" gene models revealed 46,807 protein-coding sequences and 43,375 protein-coding genes. Alignments to publicly available sequences of bacterial artificial chromosome (BAC) clones of "OUH602" confirm the high accuracy of the assembly. Since more loci of interest have been identified in "OUH602," the release of this assembly, with detailed genomic information, should accelerate gene identification and the utilization of this key wild barley accession.}, } @article {pmid34567086, year = {2021}, author = {Mahtha, SK and Purama, RK and Yadav, G}, title = {StAR-Related Lipid Transfer (START) Domains Across the Rice Pangenome Reveal How Ontogeny Recapitulated Selection Pressures During Rice Domestication.}, journal = {Frontiers in genetics}, volume = {12}, number = {}, pages = {737194}, pmid = {34567086}, issn = {1664-8021}, abstract = {The StAR-related lipid transfer (START) domain containing proteins or START proteins, encoded by a plant amplified family of evolutionary conserved genes, play important roles in lipid binding, transport, signaling, and modulation of transcriptional activity in the plant kingdom, but there is limited information on their evolution, duplication, and associated sub- or neo-functionalization. Here we perform a comprehensive investigation of this family across the rice pangenome, using 10 wild and cultivated varieties. Conservation of START domains across all 10 rice genomes suggests low dispensability and critical functional roles for this family, further supported by chromosomal mapping, duplication and domain structure patterns. Analysis of synteny highlights a preponderance of segmental and dispersed duplication among STARTs, while transcriptomic investigation of the main cultivated variety Oryza sativa var. japonica reveals sub-functionalization amongst genes family members in terms of preferential expression across various developmental stages and anatomical parts, such as flowering. Ka/Ks ratios confirmed strong negative/purifying selection on START family evolution, implying that ontogeny recapitulated selection pressures during rice domestication. Our findings provide evidence for high conservation of START genes across rice varieties in numbers, as well as in their stringent regulation of Ka/Ks ratio, and showed strong functional dependency of plants on START proteins for their growth and reproductive development. We believe that our findings advance the limited knowledge about plant START domain diversity and evolution, and pave the way for more detailed assessment of individual structural classes of START proteins among plants and their domain specific substrate preferences, to complement existing studies in animals and yeast.}, } @article {pmid34566921, year = {2021}, author = {Jaakkola, K and Virtanen, K and Lahti, P and Keto-Timonen, R and Lindström, M and Korkeala, H}, title = {Comparative Genome Analysis and Spore Heat Resistance Assay Reveal a New Component to Population Structure and Genome Epidemiology Within Clostridium perfringens Enterotoxin-Carrying Isolates.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {717176}, pmid = {34566921}, issn = {1664-302X}, abstract = {Clostridium perfringens causes a variety of human and animal enteric diseases including food poisoning, antibiotic-associated diarrhea, and necrotic enteritis. Yet, the reservoirs of enteropathogenic enterotoxin-producing strains remain unknown. We conducted a genomic comparison of 290 strains and a heat resistance phenotyping of 30 C. perfringens strains to elucidate the population structure and ecology of this pathogen. C. perfringens genomes shared a conserved genetic backbone with more than half of the genes of an average genome conserved in >95% of strains. The cpe-carrying isolates were found to share genetic context: the cpe-carrying plasmids had different distribution patterns within the genetic lineages and the estimated pan genome of cpe-carrying isolates had a larger core genome and a smaller accessory genome compared to that of 290 strains. We characterize cpe-negative strains related to chromosomal cpe-carrying strains elucidating the origin of these strains and disclose two distinct groups of chromosomal cpe-carrying strains with different virulence characteristics, spore heat resistance properties, and, presumably, ecological niche. Finally, an antibiotic-associated diarrhea isolate carrying two copies of the enterotoxin cpe gene and the associated genetic lineage with the potential for the emergence of similar strains are outlined. With C. perfringens as an example, implications of input genome quality for pan genome analysis are discussed. Our study furthers the understanding of genome epidemiology and population structure of enteropathogenic C. perfringens and brings new insight into this important pathogen and its reservoirs.}, } @article {pmid34566909, year = {2021}, author = {Wekesa, CS and Furch, ACU and Oelmüller, R}, title = {Isolation and Characterization of High-Efficiency Rhizobia From Western Kenya Nodulating With Common Bean.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {697567}, pmid = {34566909}, issn = {1664-302X}, abstract = {Common bean is one of the primary protein sources in third-world countries. They form nodules with nitrogen-fixing rhizobia, which have to be adapted to the local soils. Commercial rhizobial strains such as Rhizobium tropici CIAT899 are often used in agriculture. However, this strain failed to significantly increase the common bean yield in many places, including Kenya, due to the local soils' low pH. We isolated two indigenous rhizobial strains from the nodules of common bean from two fields in Western Kenya that have never been exposed to commercial inocula. We then determined their ability to fix nitrogen in common beans, solubilize phosphorus, and produce indole acetic acid. In greenhouse experiments, common bean plants inoculated with two isolates, B3 and S2 in sterile vermiculite, performed better than those inoculated with CIAT899 or plants grown with nitrogen fertilizer alone. In contrast to CIAT899, both isolates grew in the media with pH 4.8. Furthermore, isolate B3 had higher phosphate solubilization ability and produced more indole acetic acid than the other two rhizobia. Genome analyses revealed that B3 and S2 are different strains of Rhizobium phaseoli. We recommend fieldwork studies in Kenyan soils to test the efficacy of the two isolates in the natural environment in an effort to produce inoculants specific for these soils.}, } @article {pmid34564766, year = {2021}, author = {Vela Gurovic, MS and Díaz, ML and Gallo, CA and Dietrich, J}, title = {Phylogenomics, CAZyome and core secondary metabolome of Streptomyces albus species.}, journal = {Molecular genetics and genomics : MGG}, volume = {296}, number = {6}, pages = {1299-1311}, pmid = {34564766}, issn = {1617-4623}, support = {P-UE 2017//Consejo Nacional de Investigaciones Científicas y Técnicas/ ; PGI 24/B294//Secretaría General de Ciencia y Tecnología , Universidad Nacional del Sur/ ; }, mesh = {Cellulases/*genetics ; Databases, Genetic ; Genome, Bacterial/*genetics ; Glycoside Hydrolases/genetics ; Lignin/*metabolism ; Metabolome/genetics ; Mixed Function Oxygenases/genetics ; Multigene Family/genetics ; Phylogeny ; Secondary Metabolism/genetics ; Streptomyces/*genetics/*metabolism ; }, abstract = {A phylogenomic study conducted with different bioinformatic tools such as TYGS, REALPHY and AAI comparisons revealed a high rate of misidentified Streptomyces albus genomes in GenBank. Only 9 of the 18 annotated genomes available in the public database were correctly identified as S. albus species. The pangenome of the nine in silico confirmed S. albus genomes was almost closed. Lignocellulosic agroresidues were a common niche among strains of the S. albus clade while carbohydrate active enzymes (CAZymes) were highly conserved. Relevant enzymes for cellulose degradation such as beta glucosidases belonging to the GH1 family, a GH6 cellulase and a monooxygenase AA10-CBM2 were encoded by all S. albus genomes. Among them, one GH1 glycosidase would be regulated by CebR. However, this regulatory mechanism was not confirmed for other genes related to cellulose degradation. Based on AntiSMASH predictions, the core secondary metabolome of S. albus encompassed a total of 23 biosynthetic gene clusters (BGCs), where 4 were related to common metabolites within Streptomyces genus. Species specific BGCs included those related to pseudouridimycin and xantholipin. Additionally, four BGCs encoded putative derivatives of ibomycin, the lasso peptide SSV-2086, the lanthipeptide SapB and the terpene isorenieratene. Known metabolites could not be assigned to ten BGCs and three clusters did not match with any previously described BGC. The core genome of S. albus retrieved from nine closely related genomes revealed a high potential for the discovery of novel bioactive metabolites and underexplored regulatory genomic elements related to lignocellulose deconstruction.}, } @article {pmid34562304, year = {2021}, author = {Contreras-Moreira, B and Filippi, CV and Naamati, G and García Girón, C and Allen, JE and Flicek, P}, title = {K-mer counting and curated libraries drive efficient annotation of repeats in plant genomes.}, journal = {The plant genome}, volume = {14}, number = {3}, pages = {e20143}, pmid = {34562304}, issn = {1940-3372}, support = {108749//Wellcome Trust/United Kingdom ; BB/P016855/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/P027849/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Genome, Plant ; *Repetitive Sequences, Nucleic Acid ; }, abstract = {The annotation of repetitive sequences within plant genomes can help in the interpretation of observed phenotypes. Moreover, repeat masking is required for tasks such as whole-genome alignment, promoter analysis, or pangenome exploration. Although homology-based annotation methods are computationally expensive, k-mer strategies for masking are orders of magnitude faster. Here, we benchmarked a two-step approach, where repeats were first called by k-mer counting and then annotated by comparison to curated libraries. This hybrid protocol was tested on 20 plant genomes from Ensembl, with the k-mer-based Repeat Detector (Red) and two repeat libraries (REdat, last updated in 2013, and nrTEplants, curated for this work). Custom libraries produced by RepeatModeler were also tested. We obtained repeated genome fractions that matched those reported in the literature but with shorter repeated elements than those produced directly by sequence homology. Inspection of the masked regions that overlapped genes revealed no preference for specific protein domains. Most Red-masked sequences could be successfully classified by sequence similarity, with the complete protocol taking less than 2 h on a desktop Linux box. A guide to curating your own repeat libraries and the scripts for masking and annotating plant genomes can be obtained at https://github.com/Ensembl/plant-scripts.}, } @article {pmid34559043, year = {2021}, author = {Horesh, G and Taylor-Brown, A and McGimpsey, S and Lassalle, F and Corander, J and Heinz, E and Thomson, NR}, title = {Different evolutionary trends form the twilight zone of the bacterial pan-genome.}, journal = {Microbial genomics}, volume = {7}, number = {9}, pages = {}, pmid = {34559043}, issn = {2057-5858}, support = {/WT_/Wellcome Trust/United Kingdom ; 217303/Z/19/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Bacteria/*genetics ; Escherichia coli/genetics ; *Evolution, Molecular ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Genomics ; Multigene Family ; Phylogeny ; }, abstract = {The pan-genome is defined as the combined set of all genes in the gene pool of a species. Pan-genome analyses have been very useful in helping to understand different evolutionary dynamics of bacterial species: an open pan-genome often indicates a free-living lifestyle with metabolic versatility, while closed pan-genomes are linked to host-restricted, ecologically specialized bacteria. A detailed understanding of the species pan-genome has also been instrumental in tracking the phylodynamics of emerging drug resistance mechanisms and drug-resistant pathogens. However, current approaches to analyse a species' pan-genome do not take the species population structure into account, nor do they account for the uneven sampling of different lineages, as is commonplace due to over-sampling of clinically relevant representatives. Here we present the application of a population structure-aware approach for classifying genes in a pan-genome based on within-species distribution. We demonstrate our approach on a collection of 7500 Escherichia coli genomes, one of the most-studied bacterial species and used as a model for an open pan-genome. We reveal clearly distinct groups of genes, clustered by different underlying evolutionary dynamics, and provide a more biologically informed and accurate description of the species' pan-genome.}, } @article {pmid34546072, year = {2021}, author = {Flores Ramos, S and Brugger, SD and Escapa, IF and Skeete, CA and Cotton, SL and Eslami, SM and Gao, W and Bomar, L and Tran, TH and Jones, DS and Minot, S and Roberts, RJ and Johnston, CD and Lemon, KP}, title = {Genomic Stability and Genetic Defense Systems in Dolosigranulum pigrum, a Candidate Beneficial Bacterium from the Human Microbiome.}, journal = {mSystems}, volume = {6}, number = {5}, pages = {e0042521}, pmid = {34546072}, issn = {2379-5077}, support = {R01 DE027850/DE/NIDCR NIH HHS/United States ; R01 GM117174/GM/NIGMS NIH HHS/United States ; Stimulus Pilot Grant//The Forsyth Institute/ ; P3SMP3_155315//Swiss National Science Foundation and Swiss Foundation for Grants in Biology and Medicine/ ; R01 DE027850/DE/NIDCR NIH HHS/United States ; R01 GM117174/GM/NIGMS NIH HHS/United States ; 16B065//Novartis Stiftung für Medizinisch-Biologische Forschung (Novartis Foundation for Medical-Biological Research)/ ; 1449/M//Promedica Stiftung/ ; }, abstract = {Dolosigranulum pigrum is positively associated with indicators of health in multiple epidemiological studies of human nasal microbiota. Knowledge of the basic biology of D. pigrum is a prerequisite for evaluating its potential for future therapeutic use; however, such data are very limited. To gain insight into D. pigrum's chromosomal structure, pangenome, and genomic stability, we compared the genomes of 28 D. pigrum strains that were collected across 20 years. Phylogenomic analysis showed closely related strains circulating over this period and closure of 19 genomes revealed highly conserved chromosomal synteny. Gene clusters involved in the mobilome and in defense against mobile genetic elements (MGEs) were enriched in the accessory genome versus the core genome. A systematic analysis for MGEs identified the first candidate D. pigrum prophage and insertion sequence. A systematic analysis for genetic elements that limit the spread of MGEs, including restriction modification (RM), CRISPR-Cas, and deity-named defense systems, revealed strain-level diversity in host defense systems that localized to specific genomic sites, including one RM system hot spot. Analysis of CRISPR spacers pointed to a wealth of MGEs against which D. pigrum defends itself. These results reveal a role for horizontal gene transfer and mobile genetic elements in strain diversification while highlighting that in D. pigrum this occurs within the context of a highly stable chromosomal organization protected by a variety of defense mechanisms. IMPORTANCE Dolosigranulum pigrum is a candidate beneficial bacterium with potential for future therapeutic use. This is based on its positive associations with characteristics of health in multiple studies of human nasal microbiota across the span of human life. For example, high levels of D. pigrum nasal colonization in adults predicts the absence of Staphylococcus aureus nasal colonization. Also, D. pigrum nasal colonization in young children is associated with healthy control groups in studies of middle ear infections. Our analysis of 28 genomes revealed a remarkable stability of D. pigrum strains colonizing people in the United States across a 20-year span. We subsequently identified factors that can influence this stability, including genomic stability, phage predators, the role of MGEs in strain-level variation, and defenses against MGEs. Finally, these D. pigrum strains also lacked predicted virulence factors. Overall, these findings add additional support to the potential for D. pigrum as a therapeutic bacterium.}, } @article {pmid34544144, year = {2021}, author = {Sonnenberg, CB and Haugen, P}, title = {The Pseudoalteromonas multipartite genome: distribution and expression of pangene categories, and a hypothesis for the origin and evolution of the chromid.}, journal = {G3 (Bethesda, Md.)}, volume = {11}, number = {9}, pages = {}, pmid = {34544144}, issn = {2160-1836}, mesh = {Genome, Bacterial ; Genomics ; Humans ; Plasmids ; *Pseudoalteromonas ; Replicon ; }, abstract = {Bacterial genomes typically consist of one large chromosome, but can also include secondary replicons. These so-called multipartite genomes are scattered on the bacterial tree of life with the majority of cases belonging to Proteobacteria. Within the class gamma-proteobacteria, multipartite genomes are restricted to the two families Vibrionaceae and Pseudoalteromonadaceae. Whereas the genome of vibrios is well studied, information on the Pseudoalteromonadaceae genome is much scarcer. We have studied Pseudoalteromonadaceae with respect to the origin of the chromid, how pangene categories are distributed, how genes are expressed relative to their genomic location, and identified chromid hallmark genes. We calculated the Pseudoalteromonadaceae pangenome based on 25 complete genomes and found that core/softcore are significantly overrepresented in late replicating sectors of the chromid, regardless of how the chromid is replicated. On the chromosome, core/softcore and shell/cloud genes are only weakly overrepresented at the chromosomal replication origin and termination sequences, respectively. Gene expression is trending downwards with increasing distance from the chromosomal oriC, whereas the chromidal expression pattern is more complex. Moreover, we identified 78 chromid hallmark genes, and BLASTp searches suggest that the majority of them were acquired from the ancestral gene pool of Alteromonadales. Finally, our data strongly suggest that the chromid originates from a plasmid that was acquired in a relatively recent event. In summary, this study extends our knowledge on multipartite genomes, and helps us understand how and why secondary replicons are acquired, why they are maintained, and how they are shaped by evolution.}, } @article {pmid34542586, year = {2021}, author = {Zou, W and Ye, G and Liu, C and Zhang, K and Li, H and Yang, J}, title = {Comparative genome analysis of Clostridium beijerinckii strains isolated from pit mud of Chinese strong flavor baijiu ecosystem.}, journal = {G3 (Bethesda, Md.)}, volume = {11}, number = {11}, pages = {}, pmid = {34542586}, issn = {2160-1836}, mesh = {China ; *Clostridium beijerinckii ; Ecosystem ; Fermentation ; Humans ; Taste ; }, abstract = {Clostridium beijerinckii is a well-known anaerobic solventogenic bacterium which inhabits a wide range of different niches. Previously, we isolated five butyrate-producing C. beijerinckii strains from pit mud (PM) of strong-flavor baijiu (SFB) ecosystems. Genome annotation of the five strains showed that they could assimilate various carbon sources as well as ammonium to produce acetate, butyrate, lactate, hydrogen, and esters but did not produce the undesirable flavors isopropanol and acetone, making them useful for further exploration in SFB production. Our analysis of the genomes of an additional 233 C. beijerinckii strains revealed an open pangenome based on current sampling and will likely change with additional genomes. The core genome, accessory genome, and strain-specific genes comprised 1567, 8851, and 2154 genes, respectively. A total of 298 genes were found only in the five C. beijerinckii strains from PM, among which only 77 genes were assigned to Clusters of Orthologous Genes categories. In addition, 15 transposase and 12 phage integrase families were found in all five C. beijerinckii strains from PM. Between 18 and 21 genome islands were predicted for the five C. beijerinckii genomes. The existence of a large number of mobile genetic elements indicated that the genomes of the five C. beijerinckii strains evolved with the loss or insertion of DNA fragments in the PM of SFB ecosystems. This study presents a genomic framework of C. beijerinckii strains from PM that could be used for genetic diversification studies and further exploration of these strains.}, } @article {pmid34533662, year = {2021}, author = {Bansal, K and Kaur, A and Midha, S and Kumar, S and Korpole, S and Patil, PB}, title = {Xanthomonas sontii sp. nov., a non-pathogenic bacterium isolated from healthy basmati rice (Oryza sativa) seeds from India.}, journal = {Antonie van Leeuwenhoek}, volume = {114}, number = {11}, pages = {1935-1947}, pmid = {34533662}, issn = {1572-9699}, support = {MLP0020//Council of Scientific and Industrial Research, India/ ; }, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids/analysis ; Nucleic Acid Hybridization ; *Oryza ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Seeds ; Sequence Analysis, DNA ; *Xanthomonas/genetics ; }, abstract = {We report three yellow-pigmented, Gram-negative, aerobic, rod-shaped, motile bacterial isolates designated as PPL1[T], PPL2, and PPL3 from healthy basmati rice seeds. Phenotypic and 16S rRNA gene sequence analysis assigned these isolates to the genus Xanthomonas. The 16S rRNA showed a 99.59% similarity with X. sacchari CFBP 4641[T], a sugarcane pathogen. Further, biochemical and fatty acid analysis revealed it to be closer to X. sacchari. Still, it differed from other species in general and known rice associated species such as X. oryzae (pathogenic) and X. maliensis (non-pathogenic) in particular. Interestingly, the isolatess in this study were isolated from healthy rice plants but are closely related to species that is pathogenic and isolated from diseased sugarcane. Accordingly, in planta studies revealed that PPL1[T], PPL2, and PPL3 are non-pathogenic to rice plants upon leaf inoculation. Taxonogenomic studies based on orthologous average nucleotide identity (OrthoANI) and digital DNA-DNA hybridization (dDDH) values with type strains of Xanthomonas species were below the recommended threshold values for species delineation. Whole genome-based phylogenomic analysis revealed that these isolates formed a distinct monophyletic clade with X. sacchari CFBP 4641[T] as their closest neighbour. Further, pangenome analysis revealed PPL1[T], PPL2, and PPL3 isolates to comprise NRPS cluster along with a large number of unique genes associated with the novel species. Based on polyphasic and genomic approaches, a novel lineage and species associated with healthy rice seeds for which the name Xanthomonas sontii sp. nov. is proposed. The type strain for the X. sontii sp. nov. is PPL1[T] (JCM 33631[T] = CFBP 8688[T] = ICMP 23426[T] = MTCC 12491[T]) and PPL2 (JCM 33632 = CFBP 8689 = ICMP 23427 = MTCC 12492) and PPL3 (JCM 33633 = CFBP 8690 = ICMP 23428 = MTCC 12493) as other strains of the species.}, } @article {pmid34533621, year = {2021}, author = {Zhang, M and Zhang, Y and Han, X and Wang, J and Yang, Y and Ren, B and Xia, M and Li, G and Fang, R and He, H and Jia, Y}, title = {Whole genome sequencing of Enterobacter mori, an emerging pathogen of kiwifruit and the potential genetic adaptation to pathogenic lifestyle.}, journal = {AMB Express}, volume = {11}, number = {1}, pages = {129}, pmid = {34533621}, issn = {2191-0855}, support = {2016YFD0100600//National Key Research and Development Plan/ ; 31972233//National Natural Science Foundation of China/ ; 2019YFSY0025//Sichuan science and technology support project/ ; }, abstract = {Members of the Enterobacter genus are gram-negative bacteria, which are used as plant growth-promoting bacteria, and increasingly recovered from economic plants as emerging pathogens. A new Enterobacter mori strain, designated CX01, was isolated as an emerging bacterial pathogen of a recent outbreak of kiwifruit canker-like disease in China. The main symptoms associated with this syndrome are bleeding cankers on the trunk and branch, and brown leaf spots. The genome sequence of E. mori CX01 was determined as a single chromosome of 4,966,908 bp with 4640 predicted open reading frames (ORFs). To better understand the features of the genus and its potential pathogenic mechanisms, five available Enterobacter genomes were compared and a pan-genome of 4870 COGs with 3158 core COGs were revealed. An important feature of the E. mori CX01 genome is that it lacks a type III secretion system often found in pathogenic bacteria, instead it is equipped with type I, II, and VI secretory systems. Besides, the genes encoding putative virulence effectors, two-component systems, nutrient acquisition systems, proteins involved in phytohormone synthesis, which may contribute to the virulence and adaption to the host plant niches are included. The genome sequence of E. mori CX01 has high similarity with that of E. mori LMG 25,706, though the rearrangements occur throughout two genomes. Further pathogenicity assay showed that both strains can either invade kiwifruit or mulberry, indicating they may have similar host range. Comparison with a closely related isolate enabled us to understand its pathogenesis and ecology.}, } @article {pmid34533282, year = {2022}, author = {Yocca, AE and Edger, PP}, title = {Machine learning approaches to identify core and dispensable genes in pangenomes.}, journal = {The plant genome}, volume = {15}, number = {1}, pages = {e20135}, doi = {10.1002/tpg2.20135}, pmid = {34533282}, issn = {1940-3372}, mesh = {*Genomics ; Machine Learning ; *Oryza/genetics ; }, abstract = {A gene in a given taxonomic group is either present in every individual (core) or absent in at least a single individual (dispensable). Previous pangenomic studies have identified certain functional differences between core and dispensable genes. However, identifying if a gene belongs to the core or dispensable portion of the genome requires the construction of a pangenome, which involves sequencing the genomes of many individuals. Here we aim to leverage the previously characterized core and dispensable gene content for two grass species [Brachypodium distachyon (L.) P. Beauv. and Oryza sativa L.] to construct a machine learning model capable of accurately classifying genes as core or dispensable using only a single annotated reference genome. Such a model may mitigate the need for pangenome construction, an expensive hurdle especially in orphan crops, which often lack the adequate genomic resources.}, } @article {pmid34529229, year = {2021}, author = {Olanrewaju, OS and Ayilara, MS and Ayangbenro, AS and Babalola, OO}, title = {Genome Mining of Three Plant Growth-Promoting Bacillus Species from Maize Rhizosphere.}, journal = {Applied biochemistry and biotechnology}, volume = {193}, number = {12}, pages = {3949-3969}, pmid = {34529229}, issn = {1559-0291}, support = {UID123634//national research foundation/ ; }, mesh = {Bacillus/*classification/*genetics ; *Genome, Bacterial ; *Rhizosphere ; Zea mays/growth & development/*microbiology ; }, abstract = {Bacillus species genomes are rich in plant growth-promoting genetic elements. Bacillus subtilis and Bacillus velezensis are important plant growth promoters; hence, to further improve their abilities, the genetic elements responsible for these traits were characterized and reported. Genetic elements reported include those of auxin, nitrogen fixation, siderophore production, iron acquisition, volatile organic compounds, and antibiotics. Furthermore, the presence of phages and antibiotic-resistant genes in the genomes are reported. Pan-genome analysis was conducted using ten Bacillus species. From the analysis, pan-genome of Bacillus subtilis and Bacillus velezensis are still open. Ultimately, this study brings an insight into the genetic components of the plant growth-promoting abilities of these strains and shows their potential biotechnological applications in agriculture and other relevant sectors.}, } @article {pmid34525345, year = {2021}, author = {Ekim, B and Berger, B and Chikhi, R}, title = {Minimizer-space de Bruijn graphs: Whole-genome assembly of long reads in minutes on a personal computer.}, journal = {Cell systems}, volume = {12}, number = {10}, pages = {958-968.e6}, pmid = {34525345}, issn = {2405-4720}, support = {R01 HG010959/HG/NHGRI NIH HHS/United States ; R35 GM141861/GM/NIGMS NIH HHS/United States ; }, mesh = {*Algorithms ; *Genomics ; Humans ; Metagenomics ; Microcomputers ; Sequence Analysis, DNA/methods ; }, abstract = {DNA sequencing data continue to progress toward longer reads with increasingly lower sequencing error rates. Here, we define an algorithmic approach, mdBG, that makes use of minimizer-space de Bruijn graphs to enable long-read genome assembly. mdBG achieves orders-of-magnitude improvement in both speed and memory usage over existing methods without compromising accuracy. A human genome is assembled in under 10 min using 8 cores and 10 GB RAM, and 60 Gbp of metagenome reads are assembled in 4 min using 1 GB RAM. In addition, we constructed a minimizer-space de Bruijn graph-based representation of 661,405 bacterial genomes, comprising 16 million nodes and 45 million edges, and successfully search it for anti-microbial resistance (AMR) genes in 12 min. We expect our advances to be essential to sequence analysis, given the rise of long-read sequencing in genomics, metagenomics, and pangenomics. Code for constructing mdBGs is freely available for download at https://github.com/ekimb/rust-mdbg/.}, } @article {pmid34521456, year = {2021}, author = {Colquhoun, RM and Hall, MB and Lima, L and Roberts, LW and Malone, KM and Hunt, M and Letcher, B and Hawkey, J and George, S and Pankhurst, L and Iqbal, Z}, title = {Pandora: nucleotide-resolution bacterial pan-genomics with reference graphs.}, journal = {Genome biology}, volume = {22}, number = {1}, pages = {267}, pmid = {34521456}, issn = {1474-760X}, support = {102541/Z/13/Z/WT_/Wellcome Trust/United Kingdom ; 05279/Z/14/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Algorithms ; Escherichia coli/genetics ; Genetic Variation ; *Genome, Bacterial ; Genomics/*methods ; High-Throughput Nucleotide Sequencing ; Nanopore Sequencing ; Nucleotides ; Sequence Alignment ; Sequence Analysis, DNA ; *Software ; }, abstract = {We present pandora, a novel pan-genome graph structure and algorithms for identifying variants across the full bacterial pan-genome. As much bacterial adaptability hinges on the accessory genome, methods which analyze SNPs in just the core genome have unsatisfactory limitations. Pandora approximates a sequenced genome as a recombinant of references, detects novel variation and pan-genotypes multiple samples. Using a reference graph of 578 Escherichia coli genomes, we compare 20 diverse isolates. Pandora recovers more rare SNPs than single-reference-based tools, is significantly better than picking the closest RefSeq reference, and provides a stable framework for analyzing diverse samples without reference bias.}, } @article {pmid34513324, year = {2021}, author = {Da Silva, K and Pons, N and Berland, M and Plaza Oñate, F and Almeida, M and Peterlongo, P}, title = {StrainFLAIR: strain-level profiling of metagenomic samples using variation graphs.}, journal = {PeerJ}, volume = {9}, number = {}, pages = {e11884}, pmid = {34513324}, issn = {2167-8359}, abstract = {Current studies are shifting from the use of single linear references to representation of multiple genomes organised in pangenome graphs or variation graphs. Meanwhile, in metagenomic samples, resolving strain-level abundances is a major step in microbiome studies, as associations between strain variants and phenotype are of great interest for diagnostic and therapeutic purposes. We developed StrainFLAIR with the aim of showing the feasibility of using variation graphs for indexing highly similar genomic sequences up to the strain level, and for characterizing a set of unknown sequenced genomes by querying this graph. On simulated data composed of mixtures of strains from the same bacterial species Escherichia coli, results show that StrainFLAIR was able to distinguish and estimate the abundances of close strains, as well as to highlight the presence of a new strain close to a referenced one and to estimate its abundance. On a real dataset composed of a mix of several bacterial species and several strains for the same species, results show that in a more complex configuration StrainFLAIR correctly estimates the abundance of each strain. Hence, results demonstrated how graph representation of multiple close genomes can be used as a reference to characterize a sample at the strain level.}, } @article {pmid34499026, year = {2021}, author = {Hall, RJ and Whelan, FJ and Cummins, EA and Connor, C and McNally, A and McInerney, JO}, title = {Gene-gene relationships in an Escherichia coli accessory genome are linked to function and mobility.}, journal = {Microbial genomics}, volume = {7}, number = {9}, pages = {}, pmid = {34499026}, issn = {2057-5858}, support = {/WT_/Wellcome Trust/United Kingdom ; BB/N018044/2/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {DNA Transposable Elements ; Escherichia coli/*genetics ; Escherichia coli Infections/microbiology ; Evolution, Molecular ; Genes, Bacterial ; *Genome, Bacterial ; Phylogeny ; Virulence/genetics ; }, abstract = {The pangenome contains all genes encoded by a species, with the core genome present in all strains and the accessory genome in only a subset. Coincident gene relationships are expected within the accessory genome, where the presence or absence of one gene is influenced by the presence or absence of another. Here, we analysed the accessory genome of an Escherichia coli pangenome consisting of 400 genomes from 20 sequence types to identify genes that display significant co-occurrence or avoidance patterns with one another. We present a complex network of genes that are either found together or that avoid one another more often than would be expected by chance, and show that these relationships vary by lineage. We demonstrate that genes co-occur by function, and that several highly connected gene relationships are linked to mobile genetic elements. We find that genes are more likely to co-occur with, rather than avoid, another gene in the accessory genome. This work furthers our understanding of the dynamic nature of prokaryote pangenomes and implicates both function and mobility as drivers of gene relationships.}, } @article {pmid34491998, year = {2021}, author = {Mazzuoli, MV and Daunesse, M and Varet, H and Rosinski-Chupin, I and Legendre, R and Sismeiro, O and Gominet, M and Kaminski, PA and Glaser, P and Chica, C and Trieu-Cuot, P and Firon, A}, title = {The CovR regulatory network drives the evolution of Group B Streptococcus virulence.}, journal = {PLoS genetics}, volume = {17}, number = {9}, pages = {e1009761}, pmid = {34491998}, issn = {1553-7404}, mesh = {Bacterial Proteins/genetics/*physiology ; Chromosomes, Bacterial ; *Gene Regulatory Networks ; Genes, Bacterial ; Host-Pathogen Interactions ; Humans ; Promoter Regions, Genetic ; Prophages/genetics ; Streptococcus agalactiae/genetics/*pathogenicity ; Transcription, Genetic/physiology ; Virulence/*genetics ; Virulence Factors/genetics/*physiology ; }, abstract = {Virulence of the neonatal pathogen Group B Streptococcus is under the control of the master regulator CovR. Inactivation of CovR is associated with large-scale transcriptome remodeling and impairs almost every step of the interaction between the pathogen and the host. However, transcriptome analyses suggested a plasticity of the CovR signaling pathway in clinical isolates leading to phenotypic heterogeneity in the bacterial population. In this study, we characterized the CovR regulatory network in a strain representative of the CC-17 hypervirulent lineage responsible of the majority of neonatal meningitis. Transcriptome and genome-wide binding analysis reveal the architecture of the CovR network characterized by the direct repression of a large array of virulence-associated genes and the extent of co-regulation at specific loci. Comparative functional analysis of the signaling network links strain-specificities to the regulation of the pan-genome, including the two specific hypervirulent adhesins and horizontally acquired genes, to mutations in CovR-regulated promoters, and to variability in CovR activation by phosphorylation. This regulatory adaptation occurs at the level of genes, promoters, and of CovR itself, and allows to globally reshape the expression of virulence genes. Overall, our results reveal the direct, coordinated, and strain-specific regulation of virulence genes by the master regulator CovR and suggest that the intra-species evolution of the signaling network is as important as the expression of specific virulence factors in the emergence of clone associated with specific diseases.}, } @article {pmid34490049, year = {2021}, author = {Li, G and Jiang, T and Li, J and Wang, Y}, title = {PanSVR: Pan-Genome Augmented Short Read Realignment for Sensitive Detection of Structural Variations.}, journal = {Frontiers in genetics}, volume = {12}, number = {}, pages = {731515}, pmid = {34490049}, issn = {1664-8021}, abstract = {The comprehensive discovery of structure variations (SVs) is fundamental to many genomics studies and high-throughput sequencing has become a common approach to this task. However, due the limited length, it is still non-trivial to state-of-the-art tools to accurately align short reads and produce high-quality SV callsets. Pan-genome provides a novel and promising framework to short read-based SV calling since it enables to comprehensively integrate known variants to reduce the incompleteness and bias of single reference to breakthrough the bottlenecks of short read alignments and provide new evidences to the detection of SVs. However, it is still an open problem to develop effective computational approaches to fully take the advantage of pan-genomes. Herein, we propose Pan-genome augmented Structure Variation calling tool with read Re-alignment (PanSVR), a novel pan-genome-based SV calling approach. PanSVR uses several tailored methods to implement precise re-alignment for SV-spanning reads against well-organized pan-genome reference with plenty of known SVs. PanSVR enables to greatly improve the quality of short read alignments and produce clear and homogenous SV signatures which facilitate SV calling. Benchmark results on real sequencing data suggest that PanSVR is able to largely improve the sensitivity of SV calling than that of state-of-the-art SV callers, especially for the SVs from repeat-rich regions and/or novel insertions which are difficult to existing tools.}, } @article {pmid34489888, year = {2021}, author = {Karthik, K and Anbazhagan, S and Thomas, P and Ananda Chitra, M and Senthilkumar, TMA and Sridhar, R and Dhinakar Raj, G}, title = {Genome Sequencing and Comparative Genomics of Indian Isolates of Brucella melitensis.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {698069}, pmid = {34489888}, issn = {1664-302X}, abstract = {Brucella melitensis causes small ruminant brucellosis and a zoonotic pathogen prevalent worldwide. Whole genome phylogeny of all available B. melitensis genomes (n = 355) revealed that all Indian isolates (n = 16) clustered in the East Mediterranean lineage except the ADMAS-GI strain. Pangenome analysis indicated the presence of limited accessory genomes with few clades showing specific gene presence/absence pattern. A total of 43 virulence genes were predicted in all the Indian strains of B. melitensis except 2007BM-1 (ricA and wbkA are absent). Multilocus sequence typing (MLST) analysis indicated all except one Indian strain (ADMAS-GI) falling into sequence type (ST 8). In comparison with MLST, core genome phylogeny indicated two major clusters (>70% bootstrap support values) among Indian strains. Clusters with <70% bootstrap support values represent strains with diverse evolutionary origins present among animal and human hosts. Genetic relatedness among animal (sheep and goats) and human strains with 100% bootstrap values shows its zoonotic transfer potentiality. SNP-based analysis indicated similar clustering to that of core genome phylogeny. Among the Indian strains, the highest number of unique SNPs (112 SNPs) were shared by a node that involved three strains from Tamil Nadu. The node SNPs involved several peptidase genes like U32, M16 inactive domain protein, clp protease family protein, and M23 family protein and mostly represented non-synonymous (NS) substitutions. Vaccination has been followed in several parts of the world to prevent small ruminant brucellosis but not in India. Comparison of Indian strains with vaccine strains showed that M5 is genetically closer to most of the Indian strains than Rev.1 strain. The presence of most of the virulence genes among all Indian strains and conserved core genome compositions suggest the use of any circulating strain/genotypes for the development of a vaccine candidate for small ruminant brucellosis in India.}, } @article {pmid34489883, year = {2021}, author = {Agarwal, G and Choudhary, D and Stice, SP and Myers, BK and Gitaitis, RD and Venter, SN and Kvitko, BH and Dutta, B}, title = {Pan-Genome-Wide Analysis of Pantoea ananatis Identified Genes Linked to Pathogenicity in Onion.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {684756}, pmid = {34489883}, issn = {1664-302X}, abstract = {Pantoea ananatis, a gram negative and facultative anaerobic bacterium is a member of a Pantoea spp. complex that causes center rot of onion, which significantly affects onion yield and quality. This pathogen does not have typical virulence factors like type II or type III secretion systems but appears to require a biosynthetic gene-cluster, HiVir/PASVIL (located chromosomally comprised of 14 genes), for a phosphonate secondary metabolite, and the 'alt' gene cluster (located in plasmid and comprised of 11 genes) that aids in bacterial colonization in onion bulbs by imparting tolerance to thiosulfinates. We conducted a deep pan-genome-wide association study (pan-GWAS) to predict additional genes associated with pathogenicity in P. ananatis using a panel of diverse strains (n = 81). We utilized a red-onion scale necrosis assay as an indicator of pathogenicity. Based on this assay, we differentiated pathogenic (n = 51)- vs. non-pathogenic (n = 30)-strains phenotypically. Pan-genome analysis revealed a large core genome of 3,153 genes and a flexible accessory genome. Pan-GWAS using the presence and absence variants (PAVs) predicted 42 genes, including 14 from the previously identified HiVir/PASVIL cluster associated with pathogenicity, and 28 novel genes that were not previously associated with pathogenicity in onion. Of the 28 novel genes identified, eight have annotated functions of site-specific tyrosine kinase, N-acetylmuramoyl-L-alanine amidase, conjugal transfer, and HTH-type transcriptional regulator. The remaining 20 genes are currently hypothetical. Further, a core-genome SNPs-based phylogeny and horizontal gene transfer (HGT) studies were also conducted to assess the extent of lateral gene transfer among diverse P. ananatis strains. Phylogenetic analysis based on PAVs and whole genome multi locus sequence typing (wgMLST) rather than core-genome SNPs distinguished red-scale necrosis inducing (pathogenic) strains from non-scale necrosis inducing (non-pathogenic) strains of P. ananatis. A total of 1182 HGT events including the HiVir/PASVIL and alt cluster genes were identified. These events could be regarded as a major contributing factor to the diversification, niche-adaptation and potential acquisition of pathogenicity/virulence genes in P. ananatis.}, } @article {pmid34488837, year = {2021}, author = {Letcher, B and Hunt, M and Iqbal, Z}, title = {Gramtools enables multiscale variation analysis with genome graphs.}, journal = {Genome biology}, volume = {22}, number = {1}, pages = {259}, pmid = {34488837}, issn = {1474-760X}, support = {/WT_/Wellcome Trust/United Kingdom ; 200205/WT_/Wellcome Trust/United Kingdom ; /MRC_/Medical Research Council/United Kingdom ; }, mesh = {*Algorithms ; Alleles ; Antigens, Surface/metabolism ; Computer Simulation ; *Genetic Variation ; *Genome, Human ; Genotyping Techniques ; Haplotypes/genetics ; Humans ; Mycobacterium tuberculosis/genetics ; Plasmodium falciparum/genetics ; Polymorphism, Single Nucleotide/genetics ; Reproducibility of Results ; Sequence Deletion ; }, abstract = {Genome graphs allow very general representations of genetic variation; depending on the model and implementation, variation at different length-scales (single nucleotide polymorphisms (SNPs), structural variants) and on different sequence backgrounds can be incorporated with different levels of transparency. We implement a model which handles this multiscale variation and develop a JSON extension of VCF (jVCF) allowing for variant calls on multiple references, both implemented in our software gramtools. We find gramtools outperforms existing methods for genotyping SNPs overlapping large deletions in M. tuberculosis and is able to genotype on multiple alternate backgrounds in P. falciparum, revealing previously hidden recombination.}, } @article {pmid34486143, year = {2021}, author = {Gupta, PK}, title = {GWAS for genetics of complex quantitative traits: Genome to pangenome and SNPs to SVs and k-mers.}, journal = {BioEssays : news and reviews in molecular, cellular and developmental biology}, volume = {43}, number = {11}, pages = {e2100109}, doi = {10.1002/bies.202100109}, pmid = {34486143}, issn = {1521-1878}, mesh = {Genome, Plant/genetics ; *Genome-Wide Association Study ; Humans ; Phenotype ; *Polymorphism, Single Nucleotide/genetics ; }, abstract = {The development of improved methods for genome-wide association studies (GWAS) for genetics of quantitative traits has been an active area of research during the last 25 years. This activity initially started with the use of mixed linear model (MLM), which was variously modified. During the last decade, however, with the availability of high throughput next generation sequencing (NGS) technology, development and use of pangenomes and novel markers including structural variations (SVs) and k-mers for GWAS has taken over as a new thrust area of research. Pangenomes and SVs are now available in humans, livestock, and a number of plant species, so that these resources along with k-mers are being used in GWAS for exploring additional genetic variation that was hitherto not available for analysis. These developments have resulted in significant improvement in GWAS methodology for detection of marker-trait associations (MTAs) that are relevant to human healthcare and crop improvement.}, } @article {pmid34485641, year = {2021}, author = {Mann, A and Malik, S and Rana, JS and Nehra, K}, title = {Whole genome sequencing data of Klebsiella aerogenes isolated from agricultural soil of Haryana, India.}, journal = {Data in brief}, volume = {38}, number = {}, pages = {107311}, pmid = {34485641}, issn = {2352-3409}, abstract = {Klebsiella aerogenes, is a Gram-negative bacterium, which was previously known as Enterobacter aerogenes. It is present in all environments such as water, soil, air and hospitals; and is an opportunistic pathogen that causes several types of infections. As compared to other clinically important pathogens included in the ESKAPE category (Enterococcus faecium, Staphylococcus aureus, Klebsiella pneumoniae, Acinetobacter baumannii, Pseudomonas aeruginosa, and Enterobacter species), the pangenome and population structure of Klebsiella aerogenes is still poorly understood. For the present study, the bacterial sample was isolated from agricultural soils of Haryana, India. With an aim to identify the occurrence of multi-drug resistance genes in the agricultural field soil bacterial isolate, whole genome sequencing (WGS) of the bacteria was performed; and the antibiotic resistance causing genes, along with the genes responsible for other major functions of the cell; and the different Single Nuceotide Polymorphisms (SNPs) and Insertions and deletions (InDels) were identified. The data presented in this manuscript can be reused by researchers as a reference for determining the antibiotic resistance genes that could be present in different bacterial isolates, and it would also help in determination of functions of various other genes present in other genomes of Klebsiella species.}, } @article {pmid34484138, year = {2021}, author = {Rai, A and Jagadeeshwari, U and Deepshikha, G and Smita, N and Sasikala, C and Ramana, CV}, title = {Phylotaxogenomics for the Reappraisal of the Genus Roseomonas With the Creation of Six New Genera.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {677842}, pmid = {34484138}, issn = {1664-302X}, abstract = {The genus Roseomonas is a significant group of bacteria which is invariably of great clinical and ecological importance. Previous studies have shown that the genus Roseomonas is polyphyletic in nature. Our present study focused on generating a lucid understanding of the phylogenetic framework for the re-evaluation and reclassification of the genus Roseomonas. Phylogenetic studies based on the 16S rRNA gene and 92 concatenated genes suggested that the genus is heterogeneous, forming seven major groups. Existing Roseomonas species were subjected to an array of genomic, phenotypic, and chemotaxonomic analyses in order to resolve the heterogeneity. Genomic similarity indices (dDDH and ANI) indicated that the members were well-defined at the species level. The Percentage of Conserved Proteins (POCP) and the average Amino Acid Identity (AAI) values between the groups of the genus Roseomonas and other interspersing members of the family Acetobacteraceae were below 65 and 70%, respectively. The pan-genome evaluation depicted that the pan-genome was an open type and the members shared 958 core genes. This claim of reclassification was equally supported by the phenotypic and chemotaxonomic differences between the groups. Thus, in this study, we propose to re-evaluate and reclassify the genus Roseomonas and propose six novel genera as Pararoseomonas gen. nov., Falsiroseomonas gen. nov., Paeniroseomonas gen. nov., Plastoroseomonas gen. nov., Neoroseomonas gen. nov., and Pseudoroseomonas gen. nov.}, } @article {pmid34480670, year = {2021}, author = {Vandamme, P and Peeters, C and Seth-Smith, HMB and Graf, L and Cnockaert, M and Egli, A and Goldenberger, D}, title = {Gulosibacter hominis sp. nov.: a novel human microbiome bacterium that may cause opportunistic infections.}, journal = {Antonie van Leeuwenhoek}, volume = {114}, number = {11}, pages = {1841-1854}, pmid = {34480670}, issn = {1572-9699}, mesh = {Bacterial Typing Techniques ; DNA, Bacterial/genetics ; Fatty Acids/analysis ; Humans ; *Microbiota ; Nucleic Acid Hybridization ; *Opportunistic Infections ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {We present genomic, phylogenomic, and phenotypic taxonomic data to demonstrate that three human ear isolates represent a novel species within the genus Gulosibacter. These isolates could not be identified reliably using MALDI-TOF mass spectrometry during routine diagnostic work, but partial 16S rRNA gene sequence analysis revealed that they belonged to the genus Gulosibacter. Overall genomic relatedness indices between the draft genome sequences of the three isolates and of the type strains of established Gulosibacter species confirmed that the three isolates represented a single novel Gulosibacter species. A biochemical characterisation yielded differential tests between the novel and established Gulosibacter species, which could also be differentiated using MALDI-TOF mass spectrometry. We propose to formally classify these three isolates into Gulosibacter hominis sp. nov., with 401352-2018[ T] (= LMG 31778[ T], CCUG 74795[ T]) as the type strain. The whole-genome sequence of strain 401352-2018[ T] has a size of 2,340,181 bp and a G+C content of 62.04 mol%. A Gulosibacter pangenome analysis revealed 467 gene clusters that were exclusively present in G. hominis genomes. While these G. hominis specific gene clusters were enriched in several COG functional categories, this analysis did not reveal functions that suggested a role in the human microbiome, nor did it explain the occurrence of G. hominis in ear infections. The absence of acquired antimicrobial resistance determinants and virulence factors in the G. hominis genomes, and an analysis of publicly available 16S rRNA gene sequences and 16S rRNA amplicon sequencing data sets suggested that G. hominis is a member of the human skin microbiota that may occasionally be involved in opportunistic infections.}, } @article {pmid34462542, year = {2021}, author = {Li, Q and Tian, S and Yan, B and Liu, CM and Lam, TW and Li, R and Luo, R}, title = {Building a Chinese pan-genome of 486 individuals.}, journal = {Communications biology}, volume = {4}, number = {1}, pages = {1016}, pmid = {34462542}, issn = {2399-3642}, mesh = {China ; *Computational Biology ; *Genome, Human ; Humans ; }, abstract = {Pan-genome sequence analysis of human population ancestry is critical for expanding and better defining human genome sequence diversity. However, the amount of genetic variation still missing from current human reference sequences is still unknown. Here, we used 486 deep-sequenced Han Chinese genomes to identify 276 Mbp of DNA sequences that, to our knowledge, are absent in the current human reference. We classified these sequences into individual-specific and common sequences, and propose that the common sequence size is uncapped with a growing population. The 46.646 Mbp common sequences obtained from the 486 individuals improved the accuracy of variant calling and mapping rate when added to the reference genome. We also analyzed the genomic positions of these common sequences and found that they came from genomic regions characterized by high mutation rate and low pathogenicity. Our study authenticates the Chinese pan-genome as representative of DNA sequences specific to the Han Chinese population missing from the GRCh38 reference genome and establishes the newly defined common sequences as candidates to supplement the current human reference.}, } @article {pmid34462533, year = {2021}, author = {Peters, S and Pascoe, B and Wu, Z and Bayliss, SC and Zeng, X and Edwinson, A and Veerabadhran-Gurunathan, S and Jawahir, S and Calland, JK and Mourkas, E and Patel, R and Wiens, T and Decuir, M and Boxrud, D and Smith, K and Parker, CT and Farrugia, G and Zhang, Q and Sheppard, SK and Grover, M}, title = {Campylobacter jejuni genotypes are associated with post-infection irritable bowel syndrome in humans.}, journal = {Communications biology}, volume = {4}, number = {1}, pages = {1015}, pmid = {34462533}, issn = {2399-3642}, support = {K23 DK103911/DK/NIDDK NIH HHS/United States ; R03 DK120745/DK/NIDDK NIH HHS/United States ; MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Adult ; Campylobacter Infections/*epidemiology/microbiology ; Campylobacter jejuni/*genetics/*pathogenicity ; Female ; *Genotype ; Humans ; Irritable Bowel Syndrome/*epidemiology/microbiology ; Male ; Middle Aged ; Risk Factors ; Virulence/genetics ; }, abstract = {Campylobacter enterocolitis may lead to post-infection irritable bowel syndrome (PI-IBS) and while some C. jejuni strains are more likely than others to cause human disease, genomic and virulence characteristics promoting PI-IBS development remain uncharacterized. We combined pangenome-wide association studies and phenotypic assays to compare C. jejuni isolates from patients who developed PI-IBS with those who did not. We show that variation in bacterial stress response (Cj0145_phoX), adhesion protein (Cj0628_CapA), and core biosynthetic pathway genes (biotin: Cj0308_bioD; purine: Cj0514_purQ; isoprenoid: Cj0894c_ispH) were associated with PI-IBS development. In vitro assays demonstrated greater adhesion, invasion, IL-8 and TNFα secretion on colonocytes with PI-IBS compared to PI-no-IBS strains. A risk-score for PI-IBS development was generated using 22 genomic markers, four of which were from Cj1631c, a putative heme oxidase gene linked to virulence. Our finding that specific Campylobacter genotypes confer greater in vitro virulence and increased risk of PI-IBS has potential to improve understanding of the complex host-pathogen interactions underlying this condition.}, } @article {pmid34462475, year = {2021}, author = {Porcellato, D and Smistad, M and Skeie, SB and Jørgensen, HJ and Austbø, L and Oppegaard, O}, title = {Whole genome sequencing reveals possible host species adaptation of Streptococcus dysgalactiae.}, journal = {Scientific reports}, volume = {11}, number = {1}, pages = {17350}, pmid = {34462475}, issn = {2045-2322}, mesh = {Animals ; Cattle ; Cluster Analysis ; DNA, Bacterial/genetics ; Genes, Bacterial ; *Genome ; Phenotype ; Phylogeny ; Sheep/*microbiology ; Streptococcal Infections/*veterinary ; Streptococcus/*genetics ; Virulence ; Virulence Factors ; Whole Genome Sequencing ; }, abstract = {Streptococcus dysgalactiae (SD) is an emerging pathogen in human and veterinary medicine, and is associated with several host species, disease phenotypes and virulence mechanisms. SD has traditionally been divided into the subspecies dysgalactiae (SDSD) and subsp. equisimilis (SDSE), but recent molecular studies have indicated that the phylogenetic relationships are more complex. Moreover, the genetic basis for the niche versatility of SD has not been extensively investigated. To expand the knowledge about virulence factors, phylogenetic relationships and host-adaptation strategies of SD, we analyzed 78 SDSD genomes from cows and sheep, and 78 SDSE genomes from other host species. Sixty SDSD and 40 SDSE genomes were newly sequenced in this study. Phylogenetic analysis supported SDSD as a distinct taxonomic entity, presenting a mean value of the average nucleotide identity of 99%. Bovine and ovine associated SDSD isolates clustered separately on pangenome analysis, but no single gene or genetic region was uniquely associated with host species. In contrast, SDSE isolates were more heterogenous and could be delineated in accordance with host. Although phylogenetic clustering suggestive of cross species transmission was observed, we predominantly detected a host restricted distribution of the SD-lineages. Furthermore, lineage specific virulence factors were detected, several of them located in proximity to hotspots for integration of mobile genetic elements. Our study indicates that SD has evolved to adapt to several different host species and infers a potential role of horizontal genetic transfer in niche specialization.}, } @article {pmid34456897, year = {2021}, author = {Bachert, BA and Richardson, JB and Mlynek, KD and Klimko, CP and Toothman, RG and Fetterer, DP and Luquette, AE and Chase, K and Storrs, JL and Rogers, AK and Cote, CK and Rozak, DA and Bozue, JA}, title = {Development, Phenotypic Characterization and Genomic Analysis of a Francisella tularensis Panel for Tularemia Vaccine Testing.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {725776}, pmid = {34456897}, issn = {1664-302X}, abstract = {Francisella tularensis is one of several biothreat agents for which a licensed vaccine is needed to protect against this pathogen. To aid in the development of a vaccine protective against pneumonic tularemia, we generated and characterized a panel of F. tularensis isolates that can be used as challenge strains to assess vaccine efficacy. Our panel consists of both historical and contemporary isolates derived from clinical and environmental sources, including human, tick, and rabbit isolates. Whole genome sequencing was performed to assess the genetic diversity in comparison to the reference genome F. tularensis Schu S4. Average nucleotide identity analysis showed >99% genomic similarity across the strains in our panel, and pan-genome analysis revealed a core genome of 1,707 genes, and an accessory genome of 233 genes. Three of the strains in our panel, FRAN254 (tick-derived), FRAN255 (a type B strain), and FRAN256 (a human isolate) exhibited variation from the other strains. Moreover, we identified several unique mutations within the Francisella Pathogenicity Island across multiple strains in our panel, revealing unexpected diversity in this region. Notably, FRAN031 (Scherm) completely lacked the second pathogenicity island but retained virulence in mice. In contrast, FRAN037 (Coll) was attenuated in a murine pneumonic tularemia model and had mutations in pdpB and iglA which likely led to attenuation. All of the strains, except FRAN037, retained full virulence, indicating their effectiveness as challenge strains for future vaccine testing. Overall, we provide a well-characterized panel of virulent F. tularensis strains that can be utilized in ongoing efforts to develop an effective vaccine against pneumonic tularemia to ensure protection is achieved across a range F. tularensis strains.}, } @article {pmid34456520, year = {2021}, author = {Outten, J and Warren, A}, title = {Methods and Developments in Graphical Pangenomics.}, journal = {Journal of the Indian Institute of Science}, volume = {101}, number = {3}, pages = {485-498}, pmid = {34456520}, issn = {0970-4140}, abstract = {Pangenomes are organized collections of the genomic information from related individuals or groups. Graphical pangenomics is the study of these pangenomes using graphical methods to identify and analyze genes, regions, and mutations of interest to an array of biological questions. This field has seen significant progress in recent years including the development of graph based models that better resolve biological phenomena, and an explosion of new tools for mapping reads, creating graphical genomes, and performing pangenome analysis. In this review, we discuss recent developments in models, algorithms associated with graphical genomes, and comparisons between similar tools. In addition we briefly discuss what these developments may mean for the future of genomics.}, } @article {pmid34454585, year = {2021}, author = {Moolhuijzen, P and See, PT and Moffat, CS}, title = {The first genome assembly of fungal pathogen Pyrenophora tritici-repentis race 1 isolate using Oxford Nanopore MinION sequencing.}, journal = {BMC research notes}, volume = {14}, number = {1}, pages = {334}, pmid = {34454585}, issn = {1756-0500}, support = {CUR00023//Grains Research and Development Corporation/ ; CUR00023//Curtin University of Technology/ ; }, mesh = {*Ascomycota/genetics ; Genome, Fungal/genetics ; High-Throughput Nucleotide Sequencing ; *Nanopores ; }, abstract = {OBJECTIVES: The assembly of fungal genomes using short-reads is challenged by long repetitive and low GC regions. However, long-read sequencing technologies, such as PacBio and Oxford Nanopore, are able to overcome many problematic regions, thereby providing an opportunity to improve fragmented genome assemblies derived from short reads only. Here, a necrotrophic fungal pathogen Pyrenophora tritici-repentis (Ptr) isolate 134 (Ptr134), which causes tan spot disease on wheat, was sequenced on a MinION using Oxford Nanopore Technologies (ONT), to improve on a previous Illumina short-read genome assembly and provide a more complete genome resource for pan-genomic analyses of Ptr.

RESULTS: The genome of Ptr134 sequenced on a MinION using ONT was assembled into 28 contiguous sequences with a total length of 40.79 Mb and GC content of 50.81%. The long-read assembly provided 6.79 Mb of new sequence and 2846 extra annotated protein coding genes as compared to the previous short-read assembly. This improved genome sequence represents near complete chromosomes, an important resource for large scale and pan genomic comparative analyses.}, } @article {pmid34442854, year = {2021}, author = {Mashima, I and Liao, YC and Lin, CH and Nakazawa, F and Haase, EM and Kiyoura, Y and Scannapieco, FA}, title = {Comparative Pan-Genome Analysis of Oral Veillonella Species.}, journal = {Microorganisms}, volume = {9}, number = {8}, pages = {}, pmid = {34442854}, issn = {2076-2607}, support = {19K18975//Japan Society for the Promotion of Science/ ; No grant number//Yakult Bio-Science Foundation/ ; No grant number//Kato Memorial Bioscience Foundation/ ; No grant number//Ohu University/ ; }, abstract = {The genus Veillonella is a common and abundant member of the oral microbiome. It includes eight species, V. atypica, V. denticariosi, V. dispar, V. infantium, V. nakazawae, V. parvula, V. rogosae and V. tobetusensis. They possess important metabolic pathways that utilize lactate as an energy source. However, the overall metabolome of these species has not been studied. To further understand the metabolic framework of Veillonella in the human oral microbiome, we conducted a comparative pan-genome analysis of the eight species of oral Veillonella. Analysis of the oral Veillonella pan-genome revealed features based on KEGG pathway information to adapt to the oral environment. We found that the fructose metabolic pathway was conserved in all oral Veillonella species, and oral Veillonella have conserved pathways that utilize carbohydrates other than lactate as an energy source. This discovery may help to better understand the metabolic network among oral microbiomes and will provide guidance for the design of future in silico and in vitro studies.}, } @article {pmid34442840, year = {2021}, author = {Agarwal, G and Gitaitis, RD and Dutta, B}, title = {Pan-Genome of Novel Pantoea stewartii subsp. indologenes Reveals Genes Involved in Onion Pathogenicity and Evidence of Lateral Gene Transfer.}, journal = {Microorganisms}, volume = {9}, number = {8}, pages = {}, pmid = {34442840}, issn = {2076-2607}, support = {AWD00009682//Specialty Crop Block Grant/ ; }, abstract = {Pantoea stewartii subsp. indologenes (Psi) is a causative agent of leafspot on foxtail millet and pearl millet; however, novel strains were recently identified that are pathogenic on onions. Our recent host range evaluation study identified two pathovars; P. stewartii subsp. indologenes pv. cepacicola pv. nov. and P. stewartii subsp. indologenes pv. setariae pv. nov. that are pathogenic on onions and millets or on millets only, respectively. In the current study, we developed a pan-genome using the whole genome sequencing of newly identified/classified Psi strains from both pathovars [pv. cepacicola (n = 4) and pv. setariae (n = 13)]. The full spectrum of the pan-genome contained 7030 genes. Among these, 3546 (present in genomes of all 17 strains) were the core genes that were a subset of 3682 soft-core genes (present in ≥16 strains). The accessory genome included 1308 shell genes and 2040 cloud genes (present in ≤2 strains). The pan-genome showed a clear linear progression with >6000 genes, suggesting that the pan-genome of Psi is open. Comparative phylogenetic analysis showed differences in phylogenetic clustering of Pantoea spp. using PAVs/wgMLST approach in comparison with core genome SNPs-based phylogeny. Further, we conducted a horizontal gene transfer (HGT) study using Psi strains from both pathovars along with strains from other Pantoea species, namely, P. stewartii subsp. stewartii LMG 2715[T], P. ananatis LMG 2665[T], P. agglomerans LMG L15, and P. allii LMG 24248[T]. A total of 317 HGT events among four Pantoea species were identified with most gene transfer events occurring between Psi pv. cepacicola and Psi pv. setariae. Pan-GWAS analysis predicted a total of 154 genes, including seven gene-clusters, which were associated with the pathogenicity phenotype (necrosis on seedling) on onions. One of the gene-clusters contained 11 genes with known functions and was found to be chromosomally located.}, } @article {pmid34442814, year = {2021}, author = {Lee, JY and Lee, DH and Kim, DH}, title = {Characterization of Martelella soudanensis sp. nov., Isolated from a Mine Sediment.}, journal = {Microorganisms}, volume = {9}, number = {8}, pages = {}, pmid = {34442814}, issn = {2076-2607}, abstract = {Gram-stain-negative, strictly aerobic, non-spore-forming, non-motile, and rod-shaped bacterial strains, designated NC18[T] and NC20, were isolated from the sediment near-vertical borehole effluent originating 714 m below the subsurface located in the Soudan Iron Mine in Minnesota, USA. The 16S rRNA gene sequence showed that strains NC18[T] and NC20 grouped with members of the genus Martelella, including M. mediterranea DSM 17316[T] and M. limonii YC7034[T]. The genome sizes and G + C content of both NC18[T] and NC20 were 6.1 Mb and 61.8 mol%, respectively. Average nucleotide identity (ANI), the average amino acid identity (AAI), and digital DNA-DNA hybridization (dDDH) values were below the species delineation threshold. Pan-genomic analysis showed that NC18[T], NC20, M. mediterranea DSM 17316[T], M. endophytica YC6887[T], and M. lutilitoris GH2-6[T] had 8470 pan-genome orthologous groups (POGs) in total. Five Martelella strains shared 2258 POG core, which were mainly associated with amino acid transport and metabolism, general function prediction only, carbohydrate transport and metabolism, translation, ribosomal structure and biogenesis, and transcription. The two novel strains had major fatty acids (>5%) including summed feature 8 (C18:1 ω7c and/or C18:1 ω6c), C19:0 cyclo ω8c, C16:0, C18:1 ω7c 11-methyl, C18:0, and summed feature 2 (C12:0 aldehyde and/or iso-C16:1 I and/or C14:0 3-OH). The sole respiratory quinone was uniquinone-10 (Q-10). On the basis of polyphasic taxonomic analyses, strains NC18[T] and NC20 represent novel species of the genus Martelella, for which the name Martelella soudanensis sp. nov. is proposed. The type strain is NC18[T] (=KTCT 82174[T] = NBRC 114661[T]).}, } @article {pmid34442736, year = {2021}, author = {Castillo, D and Donati, VL and Jørgensen, J and Sundell, K and Dalsgaard, I and Madsen, L and Wiklund, T and Middelboe, M}, title = {Comparative Genomic Analyses of Flavobacterium psychrophilum Isolates Reveals New Putative Genetic Determinants of Virulence Traits.}, journal = {Microorganisms}, volume = {9}, number = {8}, pages = {}, pmid = {34442736}, issn = {2076-2607}, support = {BONUS FLAVOPHAGE//EU BONUS and Innovation Fund Denmark/ ; }, abstract = {The fish pathogen Flavobacterium psychrophilum is currently one of the main pathogenic bacteria hampering the productivity of salmonid farming worldwide. Although putative virulence determinants have been identified, the genetic basis for variation in virulence of F. psychrophilum is not fully understood. In this study, we analyzed whole-genome sequences of a collection of 25 F. psychrophilum isolates from Baltic Sea countries and compared genomic information with a previous determination of their virulence in juvenile rainbow trout. The results revealed a conserved population of F. psychrophilum that were consistently present across the Baltic Sea countries, with no clear association between genomic repertoire, phylogenomic, or gene distribution and virulence traits. However, analysis of the entire genome of four F. psychrophilum isolates by hybrid assembly provided an unprecedented resolution for discriminating even highly related isolates. The results showed that isolates with different virulence phenotypes harbored genetic variances on a number of consecutive leucine-rich repeat (LRR) proteins, repetitive motifs in gliding motility-associated protein, and the insertion of transposable elements into intergenic and genic regions. Thus, these findings provide novel insights into the genetic variation of these elements and their putative role in the modulation of F. psychrophilum virulence.}, } @article {pmid34442656, year = {2021}, author = {Lin, N and Tao, Y and Gao, P and Xu, Y and Xing, P}, title = {Comparative Genomics Revealing Insights into Niche Separation of the Genus Methylophilus.}, journal = {Microorganisms}, volume = {9}, number = {8}, pages = {}, pmid = {34442656}, issn = {2076-2607}, support = {2014273//the Youth Innovation Promotion Association of CAS/ ; 31670508; 31730013//National Natural Science Foundation of China/ ; }, abstract = {The genus Methylophilus uses methanol as a carbon and energy source, which is widely distributed in terrestrial, freshwater and marine ecosystems. Here, three strains (13, 14 and QUAN) related to the genus Methylophilus, were newly isolated from Lake Fuxian sediments. The draft genomes of strains 13, 14 and QUAN were 3.11 Mb, 3.02 Mb, 3.15 Mb with a G+C content of 51.13, 50.48 and 50.33%, respectively. ANI values between strains 13 and 14, 13 and QUAN, and 14 and QUAN were 81.09, 81.06 and 91.46%, respectively. Pan-genome and core-genome included 3994 and 1559 genes across 18 Methylophilus genomes, respectively. Phylogenetic analysis based on 1035 single-copy genes and 16S rRNA genes revealed two clades, one containing strains isolated from aquatic and the other from the leaf surface. Twenty-three aquatic-specific genes, such as 2OG/Fe(II) oxygenase and diguanylate cyclase, reflected the strategy to survive in oxygen-limited water and sediment. Accordingly, 159 genes were identified specific to leaf association. Besides niche separation, Methylophilus could utilize the combination of ANRA and DNRA to convert nitrate to ammonia and reduce sulfate to sulfur according to the complete sulfur metabolic pathway. Genes encoding the cytochrome c protein and riboflavin were detected in Methylophilus genomes, which directly or indirectly participate in electron transfer.}, } @article {pmid34441495, year = {2021}, author = {Chen, Y and Li, N and Zhao, S and Zhang, C and Qiao, N and Duan, H and Xiao, Y and Yan, B and Zhao, J and Tian, F and Zhai, Q and Yu, L and Chen, W}, title = {Integrated Phenotypic-Genotypic Analysis of Latilactobacillus sakei from Different Niches.}, journal = {Foods (Basel, Switzerland)}, volume = {10}, number = {8}, pages = {}, pmid = {34441495}, issn = {2304-8158}, support = {32001665//National Natural Science Foundation of China/ ; }, abstract = {Increasing attention has been paid to the potential probiotic effects of Latilactobacillus sakei. To explore the genetic diversity of L. sakei, 14 strains isolated from different niches (feces, fermented kimchi, and meat products) and 54 published strains were compared and analyzed. The results showed that the average genome size and GC content of L.&nbsp;sakei were 1.98 Mb and 41.22%, respectively. Its core genome mainly encodes translation and transcription, amino acid synthesis, glucose metabolism, and defense functions. L.&nbsp;sakei has open pan-genomic characteristics, and its pan-gene curve shows an upward trend. The genetic diversity of L.&nbsp;sakei is mainly reflected in carbohydrate utilization, antibiotic tolerance, and immune/competition-related factors, such as clustering regular interval short palindromic repeat sequence (CRISPR)-Cas. The CRISPR system is mainly IIA type, and a few are IIC types. This work provides a basis for the study of this species.}, } @article {pmid34439262, year = {2021}, author = {Singh, K and Nassar, N and Bachari, A and Schanknecht, E and Telukutla, S and Zomer, R and Piva, TJ and Mantri, N}, title = {The Pathophysiology and the Therapeutic Potential of Cannabinoids in Prostate Cancer.}, journal = {Cancers}, volume = {13}, number = {16}, pages = {}, pmid = {34439262}, issn = {2072-6694}, support = {Not Applicable//MGC Pharmaceuticals Pty Ltd/ ; }, abstract = {Prostate cancer is the second most frequently occurring cancer diagnosed among males. Recent preclinical evidence implicates cannabinoids as powerful regulators of cell growth and differentiation. In this review, we focused on studies that demonstrated anticancer effects of cannabinoids and their possible mechanisms of action in prostate cancer. Besides the palliative effects of cannabinoids, research from the past two decades has demonstrated their promising potential as antitumor agents in a wide variety of cancers. This analysis may provide pharmacological insights into the selection of specific cannabinoids for the development of antitumor drugs for the treatment of prostate cancer.}, } @article {pmid34437546, year = {2021}, author = {Xu, S and Li, Z and Huang, Y and Han, L and Che, Y and Hou, X and Li, D and Fan, S and Li, Z}, title = {Whole genome sequencing reveals the genomic diversity, taxonomic classification, and evolutionary relationships of the genus Nocardia.}, journal = {PLoS neglected tropical diseases}, volume = {15}, number = {8}, pages = {e0009665}, pmid = {34437546}, issn = {1935-2735}, mesh = {Chromosome Mapping ; Classification/*methods ; *Genome, Bacterial ; Humans ; Nocardia/*classification/*genetics ; Phylogeny ; Whole Genome Sequencing ; }, abstract = {Nocardia is a complex and diverse genus of aerobic actinomycetes that cause complex clinical presentations, which are difficult to diagnose due to being misunderstood. To date, the genetic diversity, evolution, and taxonomic structure of the genus Nocardia are still unclear. In this study, we investigated the pan-genome of 86 Nocardia type strains to clarify their genetic diversity. Our study revealed an open pan-genome for Nocardia containing 265,836 gene families, with about 99.7% of the pan-genome being variable. Horizontal gene transfer appears to have been an important evolutionary driver of genetic diversity shaping the Nocardia genome and may have caused historical taxonomic confusion from other taxa (primarily Rhodococcus, Skermania, Aldersonia, and Mycobacterium). Based on single-copy gene families, we established a high-accuracy phylogenomic approach for Nocardia using 229 genome sequences. Furthermore, we found 28 potentially new species and reclassified 16 strains. Finally, by comparing the topology between a phylogenomic tree and 384 phylogenetic trees (from 384 single-copy genes from the core genome), we identified a novel locus for inferring the phylogeny of this genus. The dapb1 gene, which encodes dipeptidyl aminopeptidase BI, was far superior to commonly used markers for Nocardia and yielded a topology almost identical to that of genome-based phylogeny. In conclusion, the present study provides insights into the genetic diversity, contributes a robust framework for the taxonomic classification, and elucidates the evolutionary relationships of Nocardia. This framework should facilitate the development of rapid tests for the species identification of highly variable species and has given new insight into the behavior of this genus.}, } @article {pmid34434663, year = {2021}, author = {Shapiro, JW and Putonti, C}, title = {Rephine.r: a pipeline for correcting gene calls and clusters to improve phage pangenomes and phylogenies.}, journal = {PeerJ}, volume = {9}, number = {}, pages = {e11950}, pmid = {34434663}, issn = {2167-8359}, abstract = {BACKGROUND: A pangenome is the collection of all genes found in a set of related genomes. For microbes, these genomes are often different strains of the same species, and the pangenome offers a means to compare gene content variation with differences in phenotypes, ecology, and phylogenetic relatedness. Though most frequently applied to bacteria, there is growing interest in adapting pangenome analysis to bacteriophages. However, working with phage genomes presents new challenges. First, most phage families are under-sampled, and homologous genes in related viruses can be difficult to identify. Second, homing endonucleases and intron-like sequences may be present, resulting in fragmented gene calls. Each of these issues can reduce the accuracy of standard pangenome analysis tools.

METHODS: We developed an R pipeline called Rephine.r that takes as input the gene clusters produced by an initial pangenomics workflow. Rephine.r then proceeds in two primary steps. First, it identifies three common causes of fragmented gene calls: (1) indels creating early stop codons and new start codons; (2) interruption by a selfish genetic element; and (3) splitting at the ends of the reported genome. Fragmented genes are then fused to create new sequence alignments. In tandem, Rephine.r searches for distant homologs separated into different gene families using Hidden Markov Models. Significant hits are used to merge families into larger clusters. A final round of fragment identification is then run, and results may be used to infer single-copy core genomes and phylogenetic trees.

RESULTS: We applied Rephine.r to three well-studied phage groups: the Tevenvirinae (e.g., T4), the Studiervirinae (e.g., T7), and the Pbunaviruses (e.g., PB1). In each case, Rephine.r recovered additional members of the single-copy core genome and increased the overall bootstrap support of the phylogeny. The Rephine.r pipeline is provided through GitHub (https://www.github.com/coevoeco/Rephine.r) as a single script for automated analysis and with utility functions to assist in building single-copy core genomes and predicting the sources of fragmented genes.}, } @article {pmid34431768, year = {2021}, author = {Zhang, Y and Qiao, D and Shi, W and Wu, D and Cai, M}, title = {Capnocytophaga periodontitidis sp. nov., isolated from subgingival plaque of periodontitis patient.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {71}, number = {8}, pages = {}, doi = {10.1099/ijsem.0.004979}, pmid = {34431768}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Base Composition ; *Capnocytophaga/classification/isolation & purification ; China ; DNA, Bacterial/genetics ; Dental Plaque/*microbiology ; Fatty Acids/chemistry ; Humans ; Nucleic Acid Hybridization ; *Periodontitis/microbiology ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {Two carbon dioxide-requiring, gliding, Gram-stain-negative strains, designated p1a2[T] and 051621, were isolated from subgingival plaque in association with severe periodontitis. The 16S rRNA gene sequence analysis revealed that they represented members of the genus Capnocytophaga and had less than 96.4 % pairwise similarity with species with validly published names in this genus. The whole-genome sequences of those strains had less than 91.9 % average nucleotide identity and 48.4 % digital DNA-DNA hybridization values with the other type strains of species of the genus Capnocytophaga, both below the species delineation threshold. The results of pan-genomic analysis indicated that p1a2[T] and 051621 shared 765 core gene families with the other ten species in this genus, and the numbers of strain-specific gene families were 493 and 455, respectively. The major fatty acids were iso-C15 : 0 and C16 : 0. A combination of phenotypic, chemotaxonomic, phylogenetic and genotypic data clearly indicate that p1a2[T] and 051621 should be considered to represent a novel species of the genus Capnocytophaga, for which the name Capnocytophaga periodontitidis sp. nov. is proposed. The type strain is p1a2[T] (=CGMCC 1.17337[T]=JCM 34126[T]).}, } @article {pmid34431197, year = {2021}, author = {Clermont, O and Condamine, B and Dion, S and Gordon, DM and Denamur, E}, title = {The E phylogroup of Escherichia coli is highly diverse and mimics the whole E. coli species population structure.}, journal = {Environmental microbiology}, volume = {23}, number = {11}, pages = {7139-7151}, doi = {10.1111/1462-2920.15742}, pmid = {34431197}, issn = {1462-2920}, mesh = {Animals ; Escherichia coli ; *Escherichia coli Infections ; *Escherichia coli Proteins/genetics ; Mice ; Phylogeny ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {To get a global picture of the population structure of the Escherichia coli phylogroup E, encompassing the O157:H7 EHEC lineage, we analysed the whole genome of 144 strains isolated from various continents, hosts and lifestyles and representative of the phylogroup diversity. The strains possess 4331 to 5440 genes with a core genome of 2771 genes and a pangenome of 33 722 genes. The distribution of these genes among the strains shows an asymmetric U-shaped distribution. E phylogenetic strains have the largest genomes of the species, partly explained by the presence of mobile genetic elements. Sixty-eight lineages were delineated, some of them exhibiting extra-intestinal virulence genes and being virulent in the mouse sepsis model. Except for the EHEC lineages and the reference EPEC, EIEC and ETEC strains, very few strains possess intestinal virulence genes. Most of the strains were devoid of acquired resistance genes, but eight strains possessed extended-spectrum beta-lactamase genes. Human strains belong to specific lineages, some of them being virulent and antibiotic-resistant [sequence type complexes (STcs) 350 and 2064]. The E phylogroup mimics all the features of the species as a whole, a phenomenon already observed at the STc level, arguing for a fractal population structure of E. coli.}, } @article {pmid34425698, year = {2021}, author = {Bonnet, R and Beyrouthy, R and Haenni, M and Nicolas-Chanoine, MH and Dalmasso, G and Madec, JY}, title = {Host Colonization as a Major Evolutionary Force Favoring the Diversity and the Emergence of the Worldwide Multidrug-Resistant Escherichia coli ST131.}, journal = {mBio}, volume = {12}, number = {4}, pages = {e0145121}, pmid = {34425698}, issn = {2150-7511}, mesh = {Animals ; Birds/microbiology ; Disease Reservoirs/*microbiology/*veterinary ; Dogs/microbiology ; *Drug Resistance, Multiple, Bacterial/genetics/physiology ; Escherichia coli/*drug effects/*genetics/pathogenicity/physiology ; Escherichia coli Infections/epidemiology/microbiology ; *Evolution, Molecular ; *Genome, Bacterial ; Global Health ; Host-Pathogen Interactions ; Humans ; Male ; Mice ; Virulence Factors/genetics ; }, abstract = {The emergence of multidrug-resistant Escherichia coli ST131 is a major worldwide public health problem in humans. According to the "one health" approach, this study investigated animal reservoirs of ST131, their relationships with human strains, and the genetic features associated with host colonization. High-quality genomes originating from human, avian, and canine hosts were classified on the basis of their accessory gene content using pangenomic. Pangenomic clusters and subclusters were specifically and significantly associated with hosts. The functions of clustering accessory genes were mainly enriched in functions involved in DNA acquisition, interactions, and virulence (e.g., pathogenesis, response to biotic stimulus and interaction between organisms). Accordingly, networks of cooccurrent host interaction factors were significantly associated with the pangenomic clusters and the originating hosts. The avian strains exhibited a specific content in virulence factors. Rarely found in humans, they corresponded to pathovars responsible for severe human infections. An emerging subcluster significantly associated with both human and canine hosts was evidenced. This ability to significantly colonize canine hosts in addition to humans was associated with a specific content in virulence factors (VFs) and metabolic functions encoded by a new pathogenicity island in ST131 and an improved fitness that is probably involved in its emergence. Overall, VF content, unlike the determinants of antimicrobial resistance, appeared as a key actor of bacterial host adaptation. The host dimension emerges as a major driver of genetic evolution that shapes ST131 genome, enhances its diversity, and favors its dissemination. IMPORTANCE Until now, there has been no indication that the evolutionary dynamics of Escherichia coli ST131 may reflect independent and host-specific adaptation of this lineage outside humans. In contrast, the limited number of ST131 reports in animals supported the common view that it rather reflects a spillover of the human sector. This study uncovered a link between host, ST131 population structure, and virulence factor content which appeared to reflect adaptation to hosts. This study helps to better understand the reservoir of ST131, the putative transmission flux, associated risks and the evolutionary dynamics of this bacterial population and highlights a paradigm in which host colonization stands as a key ecological force of the ST131 evolution.}, } @article {pmid34424587, year = {2021}, author = {Wold, J and Koepfli, KP and Galla, SJ and Eccles, D and Hogg, CJ and Le Lec, MF and Guhlin, J and Santure, AW and Steeves, TE}, title = {Expanding the conservation genomics toolbox: Incorporating structural variants to enhance genomic studies for species of conservation concern.}, journal = {Molecular ecology}, volume = {30}, number = {23}, pages = {5949-5965}, pmid = {34424587}, issn = {1365-294X}, support = {OIA-1826801//NSF Track 2 EPSCoR Program/ ; }, mesh = {Animals ; Endangered Species ; *Genome ; *Genomics ; Humans ; Phenotype ; }, abstract = {Structural variants (SVs) are large rearrangements (>50 bp) within the genome that impact gene function and the content and structure of chromosomes. As a result, SVs are a significant source of functional genomic variation, that is, variation at genomic regions underpinning phenotype differences, that can have large effects on individual and population fitness. While there are increasing opportunities to investigate functional genomic variation in threatened species via single nucleotide polymorphism (SNP) data sets, SVs remain understudied despite their potential influence on fitness traits of conservation interest. In this future-focused Opinion, we contend that characterizing SVs offers the conservation genomics community an exciting opportunity to complement SNP-based approaches to enhance species recovery. We also leverage the existing literature-predominantly in human health, agriculture and ecoevolutionary biology-to identify approaches for readily characterizing SVs and consider how integrating these into the conservation genomics toolbox may transform the way we manage some of the world's most threatened species.}, } @article {pmid34424159, year = {2021}, author = {Lee, AHY and Porto, WF and de Faria, C and Dias, SC and Alencar, SA and Pickard, DJ and Hancock, REW and Franco, OL}, title = {Genomic insights into the diversity, virulence and resistance of Klebsiella pneumoniae extensively drug resistant clinical isolates.}, journal = {Microbial genomics}, volume = {7}, number = {8}, pages = {}, pmid = {34424159}, issn = {2057-5858}, support = {/WT_/Wellcome Trust/United Kingdom ; FDN-154287//CIHR/Canada ; }, mesh = {Brazil ; DNA, Bacterial/genetics ; Drug Resistance, Multiple, Bacterial/*genetics ; *Genomics ; Humans ; Klebsiella Infections/epidemiology ; Klebsiella pneumoniae/*genetics/isolation & purification ; Phylogeny ; Virulence/genetics ; Virulence Factors/*genetics ; beta-Lactamases/genetics ; }, abstract = {Klebsiella pneumoniae has been implicated in wide-ranging nosocomial outbreaks, causing severe infections without effective treatments due to antibiotic resistance. Here, we performed genome sequencing of 70 extensively drug resistant clinical isolates, collected from Brasília's hospitals (Brazil) between 2010 and 2014. The majority of strains (60 out of 70) belonged to a single clonal complex (CC), CC258, which has become distributed worldwide in the last two decades. Of these CC258 strains, 44 strains were classified as sequence type 11 (ST11) and fell into two distinct clades, but no ST258 strains were found. These 70 strains had a pan-genome size of 10 366 genes, with a core-genome size of ~4476 genes found in 95 % of isolates. Analysis of sequences revealed diverse mechanisms of resistance, including production of multidrug efflux pumps, enzymes with the same target function but with reduced or no affinity to the drug, and proteins that protected the drug target or inactivated the drug. β-Lactamase production provided the most notable mechanism associated with K. pneumoniae. Each strain presented two or three different β-lactamase enzymes, including class A (SHV, CTX-M and KPC), class B and class C AmpC enzymes, although no class D β-lactamase was identified. Strains carrying the NDM enzyme involved three different ST types, suggesting that there was no common genetic origin.}, } @article {pmid34416864, year = {2021}, author = {Woodhouse, MR and Cannon, EK and Portwood, JL and Harper, LC and Gardiner, JM and Schaeffer, ML and Andorf, CM}, title = {A pan-genomic approach to genome databases using maize as a model system.}, journal = {BMC plant biology}, volume = {21}, number = {1}, pages = {385}, pmid = {34416864}, issn = {1471-2229}, support = {5030-21000-068-00-D//Agricultural Research Service (US)/ ; }, mesh = {*Data Accuracy ; Data Collection/*methods ; *Databases as Topic ; Genetic Variation ; *Genome, Plant ; *Genomics ; Zea mays/*genetics ; }, abstract = {Research in the past decade has demonstrated that a single reference genome is not representative of a species' diversity. MaizeGDB introduces a pan-genomic approach to hosting genomic data, leveraging the large number of diverse maize genomes and their associated datasets to quickly and efficiently connect genomes, gene models, expression, epigenome, sequence variation, structural variation, transposable elements, and diversity data across genomes so that researchers can easily track the structural and functional differences of a locus and its orthologs across maize. We believe our framework is unique and provides a template for any genomic database poised to host large-scale pan-genomic data.}, } @article {pmid34413844, year = {2021}, author = {Hudec, C and Biessy, A and Novinscak, A and St-Onge, R and Lamarre, S and Blom, J and Filion, M}, title = {Comparative Genomics of Potato Common Scab-Causing Streptomyces spp. Displaying Varying Virulence.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {716522}, pmid = {34413844}, issn = {1664-302X}, abstract = {Common scab of potato causes important economic losses worldwide following the development of necrotic lesions on tubers. In this study, the genomes of 14 prevalent scab-causing Streptomyces spp. isolated from Prince Edward Island, one of the most important Canadian potato production areas, were sequenced and annotated. Their phylogenomic affiliation was determined, their pan-genome was characterized, and pathogenic determinants involved in their virulence, ranging from weak to aggressive, were compared. 13 out of 14 strains clustered with Streptomyces scabiei, while the last strain clustered with Streptomyces acidiscabies. The toxicogenic and colonization genomic regions were compared, and while some atypical gene organizations were observed, no clear correlation with virulence was observed. The production of the phytotoxin thaxtomin A was also quantified and again, contrary to previous reports in the literature, no clear correlation was found between the amount of thaxtomin A secreted, and the virulence observed. Although no significant differences were observed when comparing the presence/absence of the main virulence factors among the strains of S. scabiei, a distinct profile was observed for S. acidiscabies. Several mutations predicted to affect the functionality of some virulence factors were identified, including one in the bldA gene that correlates with the absence of thaxtomin A production despite the presence of the corresponding biosynthetic gene cluster in S. scabiei LBUM 1485. These novel findings obtained using a large number of scab-causing Streptomyces strains are challenging some assumptions made so far on Streptomyces' virulence and suggest that other factors, yet to be characterized, are also key contributors.}, } @article {pmid34412679, year = {2021}, author = {Pandey, P and Gao, Y and Kingsford, C}, title = {VariantStore: an index for large-scale genomic variant search.}, journal = {Genome biology}, volume = {22}, number = {1}, pages = {231}, pmid = {34412679}, issn = {1474-760X}, support = {R01 GM122935/GM/NIGMS NIH HHS/United States ; }, mesh = {Algorithms ; Genome, Human ; *Genomics ; Humans ; *Software ; }, abstract = {Efficiently scaling genomic variant search indexes to thousands of samples is computationally challenging due to the presence of multiple coordinate systems to avoid reference biases. We present VariantStore, a system that indexes genomic variants from multiple samples using a variation graph and enables variant queries across any sample-specific coordinate system. We show the scalability of VariantStore by indexing genomic variants from the TCGA project in 4 h and the 1000 Genomes project in 3 h. Querying for variants in a gene takes between 0.002 and 3 seconds using memory only 10% of the size of the full representation.}, } @article {pmid34411120, year = {2021}, author = {Vaid, RK and Thakur, Z and Anand, T and Kumar, S and Tripathi, BN}, title = {Comparative genome analysis of Salmonella enterica serovar Gallinarum biovars Pullorum and Gallinarum decodes strain specific genes.}, journal = {PloS one}, volume = {16}, number = {8}, pages = {e0255612}, pmid = {34411120}, issn = {1932-6203}, mesh = {Animals ; Bacterial Proteins/*genetics ; Chickens ; Genomics/*methods ; India/epidemiology ; Poultry Diseases/*diagnosis/epidemiology/genetics/microbiology ; Salmonella Infections, Animal/*diagnosis/epidemiology/genetics/microbiology ; Salmonella enterica/classification/*genetics/isolation & purification ; Serogroup ; }, abstract = {Salmonella enterica serovar Gallinarum biovar Pullorum (bvP) and biovar Gallinarum (bvG) are the etiological agents of pullorum disease (PD) and fowl typhoid (FT) respectively, which cause huge economic losses to poultry industry especially in developing countries including India. Vaccination and biosecurity measures are currently being employed to control and reduce the S. Gallinarum infections. High endemicity, poor implementation of hygiene and lack of effective vaccines pose challenges in prevention and control of disease in intensively maintained poultry flocks. Comparative genome analysis unravels similarities and dissimilarities thus facilitating identification of genomic features that aids in pathogenesis, niche adaptation and in tracing of evolutionary history. The present investigation was carried out to assess the genotypic differences amongst S.enterica serovar Gallinarum strains including Indian strain S. Gallinarum Sal40 VTCCBAA614. The comparative genome analysis revealed an open pan-genome consisting of 5091 coding sequence (CDS) with 3270 CDS belonging to core-genome, 1254 CDS to dispensable genome and strain specific genes i.e. singletons ranging from 3 to 102 amongst the analyzed strains. Moreover, the investigated strains exhibited diversity in genomic features such as virulence factors, genomic islands, prophage regions, toxin-antitoxin cassettes, and acquired antimicrobial resistance genes. Core genome identified in the study can give important leads in the direction of design of rapid and reliable diagnostics, and vaccine design for effective infection control as well as eradication. Additionally, the identified genetic differences among the S. enterica serovar Gallinarum strains could be used for bacterial typing, structure based inhibitor development by future experimental investigations on the data generated.}, } @article {pmid34408268, year = {2022}, author = {Simonsen, AK}, title = {Environmental stress leads to genome streamlining in a widely distributed species of soil bacteria.}, journal = {The ISME journal}, volume = {16}, number = {2}, pages = {423-434}, pmid = {34408268}, issn = {1751-7370}, mesh = {Bacteria/genetics ; *Evolution, Molecular ; *Genome, Bacterial ; Phylogeny ; Soil ; }, abstract = {Bacteria have highly flexible pangenomes, which are thought to facilitate evolutionary responses to environmental change, but the impacts of environmental stress on pangenome evolution remain unclear. Using a landscape pangenomics approach, I demonstrate that environmental stress leads to consistent, continuous reduction in genome content along four environmental stress gradients (acidity, aridity, heat, salinity) in naturally occurring populations of Bradyrhizobium diazoefficiens (widespread soil-dwelling plant mutualists). Using gene-level network and duplication functional traits to predict accessory gene distributions across environments, genes predicted to be superfluous are more likely lost in high stress, while genes with multi-functional roles are more likely retained. Genes with higher probabilities of being lost with stress contain significantly higher proportions of codons under strong purifying and positive selection. Gene loss is widespread across the entire genome, with high gene-retention hotspots in close spatial proximity to core genes, suggesting Bradyrhizobium has evolved to cluster essential-function genes (accessory genes with multifunctional roles and core genes) in discrete genomic regions, which may stabilise viability during genomic decay. In conclusion, pangenome evolution through genome streamlining are important evolutionary responses to environmental change. This raises questions about impacts of genome streamlining on the adaptive capacity of bacterial populations facing rapid environmental change.}, } @article {pmid34403199, year = {2021}, author = {Godoy, P and García-Franco, A and Recio, MI and Ramos, JL and Duque, E}, title = {Synthesis of aromatic amino acids from 2G lignocellulosic substrates.}, journal = {Microbial biotechnology}, volume = {14}, number = {5}, pages = {1931-1943}, pmid = {34403199}, issn = {1751-7915}, mesh = {*Amino Acids, Aromatic ; Glucose ; Lignin ; *Pseudomonas putida/genetics ; Xylose ; }, abstract = {Pseudomonas putida is a highly solvent-resistant microorganism and useful chassis for the production of value-added compounds from lignocellulosic residues, in particular aromatic compounds that are made from phenylalanine. The use of these agricultural residues requires a two-step treatment to release the components of the polysaccharides of cellulose and hemicellulose as monomeric sugars, the most abundant monomers being glucose and xylose. Pan-genomic studies have shown that Pseudomonas putida metabolizes glucose through three convergent pathways to yield 6-phosphogluconate and subsequently metabolizes it through the Entner-Doudoroff pathway, but the strains do not degrade xylose. The valorization of both sugars is critical from the point of view of economic viability of the process. For this reason, a P. putida strain was endowed with the ability to metabolize xylose via the xylose isomerase pathway, by incorporating heterologous catabolic genes that convert this C5 sugar into intermediates of the pentose phosphate cycle. In addition, the open reading frame T1E_2822, encoding glucose dehydrogenase, was knocked-out to avoid the production of the dead-end product xylonate. We generated a set of DOT-T1E-derived strains that metabolized glucose and xylose simultaneously in culture medium and that reached high cell density with generation times of around 100 min with glucose and around 300 min with xylose. The strains grew in 2G hydrolysates from diluted acid and steam explosion pretreated corn stover and sugarcane straw. During growth, the strains metabolized > 98% of glucose, > 96% xylose and > 85% acetic acid. In 2G hydrolysates P. putida 5PL, a DOT-T1E derivative strain that carries up to five independent mutations to avoid phenylalanine metabolism, accumulated this amino acid in the medium. We constructed P. putida 5PLΔgcd (xylABE) that produced up to 250 mg l[-1] of phenylalanine when grown in 2G pretreated corn stover or sugarcane straw. These results support as a proof of concept the potential of P. putida as a chassis for 2G processes.}, } @article {pmid34402778, year = {2021}, author = {Belloso Daza, MV and Cortimiglia, C and Bassi, D and Cocconcelli, PS}, title = {Genome-based studies indicate that the Enterococcus faecium Clade B strains belong to Enterococcus lactis species and lack of the hospital infection associated markers.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {71}, number = {8}, pages = {}, doi = {10.1099/ijsem.0.004948}, pmid = {34402778}, issn = {1466-5034}, mesh = {Anti-Bacterial Agents ; Bacterial Typing Techniques ; Base Composition ; *Cross Infection/microbiology ; DNA, Bacterial/genetics ; Enterococcus/*classification ; *Enterococcus faecium/classification ; Fatty Acids/chemistry ; Humans ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {Enterococcus lactis and the heterotypic synonym Enterococcus xinjiangensis from dairy origin have recently been identified as a novel species based on 16S rRNA gene sequence analysis. Enterococcus faecium type strain NCTC 7171[T] was used as the reference genome for determining E. lactis and E. faecium to be separate species. However, this taxonomic classification did not consider the diverse lineages of E. faecium, and the double nature of hospital-associated (clade A) and community-associated (clade B) isolates. Here, we investigated the taxonomic relationship among isolates of E. faecium of different origins and E. lactis, using a genome-based approach. Additional to 16S rRNA gene sequence analysis, we estimated the relatedness among strains and species using phylogenomics based on the core pangenome, multilocus sequence typing, the average nucleotide identity and digital DNA-DNA hybridization. Moreover, following the available safety assessment schemes, we evaluated the virulence profile and the ampicillin resistance of E. lactis and E. faecium clade B strains. Our results confirmed the genetic and evolutionary differences between clade A and the intertwined clade B and E. lactis group. We also confirmed the absence in these strains of virulence gene markers IS16, hylEfm and esp and the lack of the PBP5 allelic profile associated with ampicillin resistance. Taken together, our findings support the reassignment of the strains of E. faecium clade B as E. lactis.}, } @article {pmid34400240, year = {2021}, author = {Matteoli, FP and Pedrosa-Silva, F and Dutra-Silva, L and Giachini, AJ}, title = {The global population structure and beta-lactamase repertoire of the opportunistic pathogen Serratia marcescens.}, journal = {Genomics}, volume = {113}, number = {6}, pages = {3523-3532}, doi = {10.1016/j.ygeno.2021.08.009}, pmid = {34400240}, issn = {1089-8646}, mesh = {Anti-Bacterial Agents ; Humans ; Plasmids/genetics ; *Serratia marcescens/genetics ; Whole Genome Sequencing ; *beta-Lactamases/genetics ; }, abstract = {Serratia marcescens is a global spread nosocomial pathogen. This rod-shaped bacterium displays a broad host range and worldwide geographical distribution. Here we analyze an international collection of this multidrug-resistant, opportunistic pathogen from 35 countries to infer its population structure. We show that S. marcescens comprises 12 lineages; Sm1, Sm4, and Sm10 harbor 78.3% of the known environmental strains. Sm5, Sm6, and Sm7 comprise only human-associated strains which harbor smallest pangenomes, genomic fluidity and lowest levels of core recombination, indicating niche specialization. Sm7 and Sm9 lineages exhibit the most concerning resistome; blaKPC-2 plasmid is widespread in Sm7, whereas Sm9, also an anthropogenic-exclusive lineage, presents highest plasmid/lineage size ratio and plasmid-diversity encoding metallo-beta-lactamases comprising blaNDM-1. The heterogeneity of resistance patterns of S. marcescens lineages elucidated herein highlights the relevance of surveillance programs, using whole-genome sequencing, to provide insights into the molecular epidemiology of carbapenemase producing strains of this species.}, } @article {pmid34399613, year = {2021}, author = {Orsi, WD and Magritsch, T and Vargas, S and Coskun, ÖK and Vuillemin, A and Höhna, S and Wörheide, G and D'Hondt, S and Shapiro, BJ and Carini, P}, title = {Genome Evolution in Bacteria Isolated from Million-Year-Old Subseafloor Sediment.}, journal = {mBio}, volume = {12}, number = {4}, pages = {e0115021}, pmid = {34399613}, issn = {2150-7511}, mesh = {*Evolution, Molecular ; Genetic Variation ; *Genome, Bacterial ; Geologic Sediments/*microbiology ; Phylogeny ; *Point Mutation ; RNA, Ribosomal, 16S/genetics ; Rhodospirillaceae/*genetics ; Sequence Analysis, DNA ; Time Factors ; }, abstract = {Beneath the seafloor, microbial life subsists in isolation from the surface world under persistent energy limitation. The nature and extent of genomic evolution in subseafloor microbes have been unknown. Here, we show that the genomes of Thalassospira bacterial populations cultured from million-year-old subseafloor sediments evolve in clonal populations by point mutation, with a relatively low rate of homologous recombination and elevated numbers of pseudogenes. Ratios of nonsynonymous to synonymous substitutions correlate with the accumulation of pseudogenes, consistent with a role for genetic drift in the subseafloor strains but not in type strains of Thalassospira isolated from the surface world. Consistent with this, pangenome analysis reveals that the subseafloor bacterial genomes have a significantly lower number of singleton genes than the type strains, indicating a reduction in recent gene acquisitions. Numerous insertion-deletion events and pseudogenes were present in a flagellar operon of the subseafloor bacteria, indicating that motility is nonessential in these million-year-old subseafloor sediments. This genomic evolution in subseafloor clonal populations coincided with a phenotypic difference: all subseafloor isolates have a lower rate of growth under laboratory conditions than the Thalassospira xiamenensis type strain. Our findings demonstrate that the long-term physical isolation of Thalassospira, in the absence of recombination, has resulted in clonal populations whereby reduced access to novel genetic material from neighbors has resulted in the fixation of new mutations that accumulate in genomes over millions of years. IMPORTANCE The nature and extent of genomic evolution in subseafloor microbial populations subsisting for millions of years below the seafloor are unknown. Subseafloor populations have ultralow metabolic rates that are hypothesized to restrict reproduction and, consequently, the spread of new traits. Our findings demonstrate that genomes of cultivated bacterial strains from the genus Thalassospira isolated from million-year-old abyssal sediment exhibit greatly reduced levels of homologous recombination, elevated numbers of pseudogenes, and genome-wide evidence of relaxed purifying selection. These substitutions and pseudogenes are fixed into the population, suggesting that the genome evolution of these bacteria has been dominated by genetic drift. Thus, reduced recombination, stemming from long-term physical isolation, resulted in small clonal populations of Thalassospira that have accumulated mutations in their genomes over millions of years.}, } @article {pmid34392432, year = {2021}, author = {Huang, RR and Yang, SR and Zhen, C and Ge, XF and Chen, XK and Wen, ZQ and Li, YN and Liu, WZ}, title = {Genomic molecular signatures determined characterization of Mycolicibacterium gossypii sp. nov., a fast-growing mycobacterial species isolated from cotton field soil.}, journal = {Antonie van Leeuwenhoek}, volume = {114}, number = {10}, pages = {1735-1744}, pmid = {34392432}, issn = {1572-9699}, support = {BK20190703//Natural Science Foundation of Jiangsu Province/ ; 19KJB530010//Natural Science Research of Jiangsu Higher Education Institutions of China (CN)/ ; }, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids/analysis ; Genomics ; *Mycobacterium ; Phospholipids/analysis ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; *Soil ; Soil Microbiology ; }, abstract = {A Gram-positive, acid-fast and rapidly growing rod, designated S2-37[ T], that could form yellowish colonies was isolated from one soil sample collected from cotton cropping field located in the Xinjiang region of China. Genomic analyses indicated that strain S2-37[ T] harbored T7SS secretion system and was very likely able to produce mycolic acid, which were typical features of pathogenetic mycobacterial species. 16S rRNA-directed phylogenetic analysis referred that strain S2-37[ T] was closely related to bacterial species belonging to the genus Mycolicibacterium, which was further confirmed by pan-genome phylogenetic analysis. Digital DNA-DNA hybridization and the average nucleotide identity presented that strain S2-37[ T] displayed the highest values of 39.1% (35.7-42.6%) and 81.28% with M. litorale CGMCC 4.5724[ T], respectively. And characterization of conserved molecular signatures further supported the taxonomic position of strain S2-37[ T] belonging to the genus Mycolicibacterium. The main fatty acids were identified as C16:0, C18:0, C20:3ω3 and C22:6ω3. In addition, polar lipids profile was mainly composed of diphosphatidylglycerol, phosphatidylethanolamine and phosphatidylinositol. Phylogenetic analyses, distinct fatty aids and antimicrobial resistance profiles indicated that strain S2-37[ T] represented genetically and phenotypically distinct from its closest phylogenetic neighbour, M. litorale CGMCC 4.5724[ T]. Here, we propose a novel species of the genus Mycolicibacterium: Mycolicibacterium gossypii sp. nov. with the type strain S2-37[ T] (= JCM 34327[ T] = CGMCC 1.18817[ T]).}, } @article {pmid34389059, year = {2021}, author = {Boeuf, D and Eppley, JM and Mende, DR and Malmstrom, RR and Woyke, T and DeLong, EF}, title = {Metapangenomics reveals depth-dependent shifts in metabolic potential for the ubiquitous marine bacterial SAR324 lineage.}, journal = {Microbiome}, volume = {9}, number = {1}, pages = {172}, pmid = {34389059}, issn = {2049-2618}, mesh = {Bacteria/genetics ; *Microbiota ; Oceans and Seas ; Phylogeny ; *Seawater ; }, abstract = {BACKGROUND: Oceanic microbiomes play a pivotal role in the global carbon cycle and are central to the transformation and recycling of carbon and energy in the ocean's interior. SAR324 is a ubiquitous but poorly understood uncultivated clade of Deltaproteobacteria that inhabits the entire water column, from ocean surface waters to its deep interior. Although some progress has been made in elucidating potential metabolic traits of SAR324 in the dark ocean, very little is known about the ecology and the metabolic capabilities of this group in the euphotic and twilight zones. To investigate the comparative genomics, ecology, and physiological potential of the SAR324 clade, we examined the distribution and variability of key genomic features and metabolic pathways in this group from surface waters to the abyss in the North Pacific Subtropical Gyre, one of the largest biomes on Earth.

RESULTS: We leveraged a pangenomic ecological approach, combining spatio-temporally resolved single-amplified genome, metagenomic, and metatranscriptomic datasets. The data revealed substantial genomic diversity throughout the SAR324 clade, with distinct depth and temporal distributions that clearly differentiated ecotypes. Phylogenomic subclade delineation, environmental distributions, genomic feature similarities, and metabolic capacities revealed strong congruence. The four SAR324 ecotypes delineated in this study revealed striking divergence from one another with respect to their habitat-specific metabolic potentials. The ecotypes living in the dark or twilight oceans shared genomic features and metabolic capabilities consistent with a sulfur-based chemolithoautotrophic lifestyle. In contrast, those inhabiting the sunlit ocean displayed higher plasticity energy-related metabolic pathways, supporting a presumptive photoheterotrophic lifestyle. In epipelagic SAR324 ecotypes, we observed the presence of two types of proton-pumping rhodopsins, as well as genomic, transcriptomic, and ecological evidence for active photoheterotrophy, based on xanthorhodopsin-like light-harvesting proteins.

CONCLUSIONS: Combining pangenomic and both metagenomic and metatranscriptomic profiling revealed a striking divergence in the vertical distribution, genomic composition, metabolic potential, and predicted lifestyle strategies of geographically co-located members of the SAR324 bacterial clade. The results highlight the utility of metapangenomic approaches employed across environmental gradients, to decipher the properties and variation in function and ecological traits of specific phylogenetic clades within complex microbiomes. Video abstract.}, } @article {pmid34386003, year = {2021}, author = {Saco, A and Rey-Campos, M and Rosani, U and Novoa, B and Figueras, A}, title = {The Evolution and Diversity of Interleukin-17 Highlight an Expansion in Marine Invertebrates and Its Conserved Role in Mucosal Immunity.}, journal = {Frontiers in immunology}, volume = {12}, number = {}, pages = {692997}, pmid = {34386003}, issn = {1664-3224}, mesh = {Animals ; *Evolution, Molecular ; Host-Pathogen Interactions ; *Immunity, Mucosal ; Interleukin-17/genetics/*immunology/metabolism ; Mytilus/genetics/*immunology/metabolism ; Phylogeny ; Protein Isoforms ; Receptors, Interleukin-17/genetics/*immunology/metabolism ; Signal Transduction ; Species Specificity ; Vibrio/immunology/pathogenicity ; Vibrio Infections/immunology/metabolism/microbiology ; }, abstract = {The interleukin-17 (IL-17) family consists of proinflammatory cytokines conserved during evolution. A comparative genomics approach was applied to examine IL-17 throughout evolution from poriferans to higher vertebrates. Cnidaria was highlighted as the most ancient diverged phylum, and several evolutionary patterns were revealed. Large expansions of the IL-17 repertoire were observed in marine molluscs and echinoderm species. We further studied this expansion in filter-fed Mytilus galloprovincialis, which is a bivalve with a highly effective innate immune system supported by a variable pangenome. We recovered 379 unique IL-17 sequences and 96 receptors from individual genomes that were classified into 23 and 6 isoforms after phylogenetic analyses. Mussel IL-17 isoforms were conserved among individuals and shared between closely related Mytilidae species. Certain isoforms were specifically implicated in the response to a waterborne infection with Vibrio splendidus in mussel gills. The involvement of IL-17 in mucosal immune responses could be conserved in higher vertebrates from these ancestral lineages.}, } @article {pmid34384905, year = {2021}, author = {Zhang, X and Liu, T and Wang, J and Wang, P and Qiu, Y and Zhao, W and Pang, S and Li, X and Wang, H and Song, J and Zhang, W and Yang, W and Sun, Y and Li, X}, title = {Pan-genome of Raphanus highlights genetic variation and introgression among domesticated, wild, and weedy radishes.}, journal = {Molecular plant}, volume = {14}, number = {12}, pages = {2032-2055}, doi = {10.1016/j.molp.2021.08.005}, pmid = {34384905}, issn = {1752-9867}, mesh = {Crops, Agricultural ; *Domestication ; Evolution, Molecular ; *Gene Flow ; Genes, Plant ; *Genetic Variation ; *Phenotype ; Phylogeny ; Plant Weeds ; Polyploidy ; Raphanus/*genetics ; }, abstract = {Post-polyploid diploidization associated with descending dysploidy and interspecific introgression drives plant genome evolution by unclear mechanisms. Raphanus is an economically and ecologically important Brassiceae genus and model system for studying post-polyploidization genome evolution and introgression. Here, we report the de novo sequence assemblies for 11 genomes covering most of the typical sub-species and varieties of domesticated, wild and weedy radishes from East Asia, South Asia, Europe, and America. Divergence among the species, sub-species, and South/East Asian types coincided with Quaternary glaciations. A genus-level pan-genome was constructed with family-based, locus-based, and graph-based methods, and whole-genome comparisons revealed genetic variations ranging from single-nucleotide polymorphisms (SNPs) to inversions and translocations of whole ancestral karyotype (AK) blocks. Extensive gene flow occurred between wild, weedy, and domesticated radishes. High frequencies of genome reshuffling, biased retention, and large-fragment translocation have shaped the genomic diversity. Most variety-specific gene-rich blocks showed large structural variations. Extensive translocation and tandem duplication of dispensable genes were revealed in two large rearrangement-rich islands. Disease resistance genes mostly resided on specific and dispensable loci. Variations causing the loss of function of enzymes modulating gibberellin deactivation were identified and could play an important role in phenotype divergence and adaptive evolution. This study provides new insights into the genomic evolution underlying post-polyploid diploidization and lays the foundation for genetic improvement of radish crops, biological control of weeds, and protection of wild species' germplasms.}, } @article {pmid34378983, year = {2021}, author = {Baker, JL}, title = {Complete Genomes of Clade G6 Saccharibacteria Suggest a Divergent Ecological Niche and Lifestyle.}, journal = {mSphere}, volume = {6}, number = {4}, pages = {e0053021}, pmid = {34378983}, issn = {2379-5042}, support = {K99 DE029228/DE/NIDCR NIH HHS/United States ; }, mesh = {Acetobacteraceae/*classification/*genetics/metabolism ; *Genome, Bacterial ; Metabolic Networks and Pathways/genetics ; Microbiota ; Mouth/*microbiology ; *Phylogeny ; Sequence Analysis, DNA/methods ; }, abstract = {Saccharibacteria (formerly TM7) have reduced genomes and a small cell size and appear to have a parasitic lifestyle dependent on a bacterial host. Although there are at least 6 major clades of Saccharibacteria inhabiting the human oral cavity, complete genomes of oral Saccharibacteria were previously limited to the G1 clade. In this study, nanopore sequencing was used to obtain three complete genome sequences from clade G6. Phylogenetic analysis suggested the presence of at least 3 to 5 distinct species within G6, with two discrete taxa represented by the 3 complete genomes. G6 Saccharibacteria were highly divergent from the more-well-studied clade G1 and had the smallest genomes and lowest GC content of all Saccharibacteria. Pangenome analysis showed that although 97% of shared pan-Saccharibacteria core genes and 89% of G1-specific core genes had putative functions, only 50% of the 244 G6-specific core genes had putative functions, highlighting the novelty of this group. Compared to G1, G6 harbored divergent metabolic pathways. G6 genomes lacked an F1Fo ATPase, the pentose phosphate pathway, and several genes involved in nucleotide metabolism, which were all core genes for G1. G6 genomes were also unique compared to that of G1 in that they encoded d-lactate dehydrogenase, adenylate cyclase, limited glycerolipid metabolism, a homolog to a lipoarabinomannan biosynthesis enzyme, and the means to degrade starch. These differences at key metabolic steps suggest a distinct lifestyle and ecological niche for clade G6, possibly with alternative hosts and/or host dependencies, which would have significant ecological, evolutionary, and likely pathogenic implications. IMPORTANCESaccharibacteria are ultrasmall parasitic bacteria that are common members of the oral microbiota and have been increasingly linked to disease and inflammation. However, the lifestyle and impact on human health of Saccharibacteria remain poorly understood, especially for the clades with no complete genomes (G2 to G6) or cultured isolates (G2 and G4 to G6). Obtaining complete genomes is of particular importance for Saccharibacteria, because they lack many of the "essential" core genes used for determining draft genome completeness, and few references exist outside clade G1. In this study, complete genomes of 3 G6 strains, representing two candidate species, were obtained and analyzed. The G6 genomes were highly divergent from that of G1 and enigmatic, with 50% of the G6 core genes having no putative functions. The significant difference in encoded functional pathways is suggestive of a distinct lifestyle and ecological niche, probably with alternative hosts and/or host dependencies, which would have major implications in ecology, evolution, and pathogenesis.}, } @article {pmid34377059, year = {2021}, author = {Shirasawa, K and Harada, D and Hirakawa, H and Isobe, S and Kole, C}, title = {Chromosome-level de novo genome assemblies of over 100 plant species.}, journal = {Breeding science}, volume = {71}, number = {2}, pages = {117-124}, pmid = {34377059}, issn = {1344-7610}, abstract = {Genome sequence analysis in higher plants began with the whole-genome sequencing of Arabidopsis thaliana. Owing to the great advances in sequencing technologies, also known as next-generation sequencing (NGS) technologies, genomes of more than 400 plant species have been sequenced to date. Long-read sequencing technologies, together with sequence scaffolding methods, have enabled the synthesis of chromosome-level de novo genome sequence assemblies, which has further allowed comparative analysis of the structural features of multiple plant genomes, thus elucidating the evolutionary history of plants. However, the quality of the assembled chromosome-level sequences varies among plant species. In this review, we summarize the status of chromosome-level assemblies of 114 plant species, with genome sizes ranging from 125 Mb to 16.9 Gb. While the average genome coverage of the assembled sequences reached up to 89.1%, the average coverage of chromosome-level pseudomolecules was 73.3%. Thus, further improvements in sequencing technologies and scaffolding, and data analysis methods, are required to establish gap-free telomere-to-telomere genome sequence assemblies. With the forthcoming new technologies, we are going to enter into a new genomics era where pan-genomics and the >1,000 or >1 million genomes' project will be routine in higher plants.}, } @article {pmid34374564, year = {2021}, author = {Gómez-Sanz, E and Haro-Moreno, JM and Jensen, SO and Roda-García, JJ and López-Pérez, M}, title = {The Resistome and Mobilome of Multidrug-Resistant Staphylococcus sciuri C2865 Unveil a Transferable Trimethoprim Resistance Gene, Designated dfrE, Spread Unnoticed.}, journal = {mSystems}, volume = {6}, number = {4}, pages = {e0051121}, pmid = {34374564}, issn = {2379-5077}, support = {BES-2014-067828//Spanish Ministerio de Economia y Competitividad/ ; 659314//EC | H2020 | H2020 Priority Excellent Science | H2020 Marie Skłodowska-Curie Actions (MSCA)/ ; SEED-01 18-1//ETH Zürich Foundation (ETH Zurich Foundation)/ ; 167090//Schweizerischer Nationalfonds zur Förderung der Wissenschaftlichen Forschung (SNF)/ ; }, abstract = {Methicillin-resistant Staphylococcus sciuri (MRSS) strain C2865 from a stranded dog in Nigeria was trimethoprim (TMP) resistant but lacked formerly described staphylococcal TMP-resistant dihydrofolate reductase genes (dfr). Whole-genome sequencing, comparative genomics, and pan-genome analyses were pursued to unveil the molecular bases for TMP resistance via resistome and mobilome profiling. MRSS C2865 comprised a species subcluster and positioned just above the intraspecies boundary. Lack of species host tropism was observed. S. sciuri exhibited an open pan-genome, while MRSS C2865 harbored the highest number of unique genes (75% associated with mobilome). Within this fraction, we discovered a transferable TMP resistance gene, named dfrE, which confers high-level TMP resistance in Staphylococcus aureus and Escherichia coli. dfrE was located in a novel multidrug resistance mosaic plasmid (pUR2865-34) encompassing adaptive, mobilization, and segregational stability traits. dfrE was formerly denoted as dfr_like in Exiguobacterium spp. from fish farm sediment in China but escaped identification in one macrococcal and diverse staphylococcal genomes in different Asian countries. dfrE shares the highest identity with dfr of soil-related Paenibacillus anaericanus (68%). Data analysis discloses that dfrE has emerged from a single ancestor and places S. sciuri as a plausible donor. C2865 unique fraction additionally enclosed novel chromosomal mobile islands, including a multidrug-resistant pseudo-SCCmec cassette, three apparently functional prophages (Siphoviridae), and an SaPI4-related staphylococcal pathogenicity island. Since dfrE seems not yet common in staphylococcal clinical specimens, our data promote early surveillance and enable molecular diagnosis. We evidence the genome plasticity of S. sciuri and highlight its role as a resourceful reservoir for adaptive traits. IMPORTANCE The discovery and surveillance of antimicrobial resistance genes (AMRG) and their mobilization platforms are critical to understand the evolution of bacterial resistance and to restrain further expansion. Limited genomic data are available on Staphylococcus sciuri; regardless, it is considered a reservoir for critical AMRG and mobile elements. We uncover a transferable staphylococcal TMP resistance gene, named dfrE, in a novel mosaic plasmid harboring additional resistance, adaptive, and self-stabilization features. dfrE is present but evaded detection in diverse species from varied sources geographically distant. Our analyses evidence that the dfrE-carrying element has emerged from a single ancestor and position S. sciuri as the donor species for dfrE spread. We also identify novel mobilizable chromosomal islands encompassing AMRG and three unrelated prophages. We prove high intraspecies heterogenicity and genome plasticity for S. sciuri. This work highlights the importance of genome-wide ecological studies to facilitate identification, characterization, and evolution routes of bacteria adaptive features.}, } @article {pmid34370094, year = {2021}, author = {Hily, JM and Poulicard, N and Kubina, J and Reynard, JS and Spilmont, AS and Fuchs, M and Lemaire, O and Vigne, E}, title = {Metagenomic analysis of nepoviruses: diversity, evolution and identification of a genome region in members of subgroup A that appears to be important for host range.}, journal = {Archives of virology}, volume = {166}, number = {10}, pages = {2789-2801}, pmid = {34370094}, issn = {1432-8798}, support = {Vaccivine//PNDV/ ; GPGV//PNDV/ ; }, mesh = {Evolution, Molecular ; Genetic Variation ; Genome, Viral/*genetics ; Host Specificity/*genetics ; Metagenomics ; Nepovirus/classification/*genetics ; Open Reading Frames/genetics ; Phylogeny ; Phylogeography ; Plants/classification/virology ; RNA, Viral/genetics ; Recombination, Genetic ; }, abstract = {Data mining and metagenomic analysis of 277 open reading frame sequences of bipartite RNA viruses of the genus Nepovirus, family Secoviridae, were performed, documenting how challenging it can be to unequivocally assign a virus to a particular species, especially those in subgroups A and C, based on some of the currently adopted taxonomic demarcation criteria. This work suggests a possible need for their amendment to accommodate pangenome information. In addition, we revealed a host-dependent structure of arabis mosaic virus (ArMV) populations at a cladistic level and confirmed a phylogeographic structure of grapevine fanleaf virus (GFLV) populations. We also identified new putative recombination events in members of subgroups A, B and C. The evolutionary specificity of some capsid regions of ArMV and GFLV that were described previously and biologically validated as determinants of nematode transmission was circumscribed in silico. Furthermore, a C-terminal segment of the RNA-dependent RNA polymerase of members of subgroup A was predicted to be a putative host range determinant based on statistically supported higher π (substitutions per site) values for GFLV and ArMV isolates infecting Vitis spp. compared with non-Vitis-infecting ArMV isolates. This study illustrates how sequence information obtained via high-throughput sequencing can increase our understanding of mechanisms that modulate virus diversity and evolution and create new opportunities for advancing studies on the biology of economically important plant viruses.}, } @article {pmid34367194, year = {2021}, author = {Razzaq, A and Kaur, P and Akhter, N and Wani, SH and Saleem, F}, title = {Next-Generation Breeding Strategies for Climate-Ready Crops.}, journal = {Frontiers in plant science}, volume = {12}, number = {}, pages = {620420}, pmid = {34367194}, issn = {1664-462X}, abstract = {Climate change is a threat to global food security due to the reduction of crop productivity around the globe. Food security is a matter of concern for stakeholders and policymakers as the global population is predicted to bypass 10 billion in the coming years. Crop improvement via modern breeding techniques along with efficient agronomic practices innovations in microbiome applications, and exploiting the natural variations in underutilized crops is an excellent way forward to fulfill future food requirements. In this review, we describe the next-generation breeding tools that can be used to increase crop production by developing climate-resilient superior genotypes to cope with the future challenges of global food security. Recent innovations in genomic-assisted breeding (GAB) strategies allow the construction of highly annotated crop pan-genomes to give a snapshot of the full landscape of genetic diversity (GD) and recapture the lost gene repertoire of a species. Pan-genomes provide new platforms to exploit these unique genes or genetic variation for optimizing breeding programs. The advent of next-generation clustered regularly interspaced short palindromic repeat/CRISPR-associated (CRISPR/Cas) systems, such as prime editing, base editing, and de nova domestication, has institutionalized the idea that genome editing is revamped for crop improvement. Also, the availability of versatile Cas orthologs, including Cas9, Cas12, Cas13, and Cas14, improved the editing efficiency. Now, the CRISPR/Cas systems have numerous applications in crop research and successfully edit the major crop to develop resistance against abiotic and biotic stress. By adopting high-throughput phenotyping approaches and big data analytics tools like artificial intelligence (AI) and machine learning (ML), agriculture is heading toward automation or digitalization. The integration of speed breeding with genomic and phenomic tools can allow rapid gene identifications and ultimately accelerate crop improvement programs. In addition, the integration of next-generation multidisciplinary breeding platforms can open exciting avenues to develop climate-ready crops toward global food security.}, } @article {pmid34356076, year = {2021}, author = {Iqbal, S and Vollmers, J and Janjua, HA}, title = {Genome Mining and Comparative Genome Analysis Revealed Niche-Specific Genome Expansion in Antibacterial Bacillus pumilus Strain SF-4.}, journal = {Genes}, volume = {12}, number = {7}, pages = {}, pmid = {34356076}, issn = {2073-4425}, mesh = {Anti-Bacterial Agents/*metabolism ; Bacillus pumilus/*genetics ; Bacterial Proteins/genetics/*metabolism ; *Genome, Bacterial ; *Multigene Family ; Peptide Synthases/*genetics ; *Phylogeny ; }, abstract = {The present study reports the isolation of antibacterial exhibiting Bacillus pumilus (B. pumilus) SF-4 from soil field. The genome of this strain SF-4 was sequenced and analyzed to acquire in-depth genomic level insight related to functional diversity, evolutionary history, and biosynthetic potential. The genome of the strain SF-4 harbor 12 Biosynthetic Gene Clusters (BGCs) including four Non-ribosomal peptide synthetases (NRPSs), two terpenes, and one each of Type III polyketide synthases (PKSs), hybrid (NRPS/PKS), lipopeptide, β-lactone, and bacteriocin clusters. Plant growth-promoting genes associated with de-nitrification, iron acquisition, phosphate solubilization, and nitrogen metabolism were also observed in the genome. Furthermore, all the available complete genomes of B. pumilus strains were used to highlight species boundaries and diverse niche adaptation strategies. Phylogenetic analyses revealed local diversification and indicate that strain SF-4 is a sister group to SAFR-032 and 150a. Pan-genome analyses of 12 targeted strains showed regions of genome plasticity which regulate function of these strains and proposed direct strain adaptations to specific habitats. The unique genome pool carries genes mostly associated with "biosynthesis of secondary metabolites, transport, and catabolism" (Q), "replication, recombination and repair" (L), and "unknown function" (S) clusters of orthologous groups (COG) categories. Moreover, a total of 952 unique genes and 168 exclusively absent genes were prioritized across the 12 genomes. While newly sequenced B. pumilus SF-4 genome consists of 520 accessory, 59 unique, and seven exclusively absent genes. The current study demonstrates genomic differences among 12 B. pumilus strains and offers comprehensive knowledge of the respective genome architecture which may assist in the agronomic application of this strain in future.}, } @article {pmid34352473, year = {2021}, author = {Surachat, K and Deachamag, P and Kantachote, D and Wonglapsuwan, M and Jeenkeawpiam, K and Chukamnerd, A}, title = {In silico comparative genomics analysis of Lactiplantibacillus plantarum DW12, a potential gamma-aminobutyric acid (GABA)-producing strain.}, journal = {Microbiological research}, volume = {251}, number = {}, pages = {126833}, doi = {10.1016/j.micres.2021.126833}, pmid = {34352473}, issn = {1618-0623}, mesh = {Computer Simulation ; *Genome, Bacterial/genetics ; Genomics ; *Lactobacillaceae/genetics ; *gamma-Aminobutyric Acid/metabolism ; }, abstract = {Gamma-aminobutyric acid (GABA) is an amino that plays a major role as a neurotransmitter. It iscommonly produced by lactic acid bacteria (LAB) naturally found in fermented food and fruit. Lactiplantibacillus plantarum DW12 is a high potential GABA-producing strain isolated from a fermented beverage. In this study, to highlight its ability to produce GABA, we sequenced the genome of L. plantarum DW12 and then performed comprehensive bioinformatics and meta-analysis to compare the genomic data of previously published genomes. Also, the evolutionary analysis among L. plantarum species was demonstrated using pan-genome analysis against 576 genomes from the database. As a result, the DW12 genome comprises one circular chromosome of 3,217,574 bp. It contains several genes that encode for the production of antimicrobial compounds including plantaricin A, E, F, J, K, and N. The glutamic acid decarboxylase (GAD) operon was found in the DW12 genome, suggests a high potential of producing GABA in this strain. Therefore, L. plantarum DW12 could be a good candidate as a starter culture in the beverage and food industries due to its safety aspects and ability to produce GABA.}, } @article {pmid34349894, year = {2021}, author = {Shariq, OA and McKenzie, TJ}, title = {Adrenocortical carcinoma: current state of the art, ongoing controversies, and future directions in diagnosis and treatment.}, journal = {Therapeutic advances in chronic disease}, volume = {12}, number = {}, pages = {20406223211033103}, pmid = {34349894}, issn = {2040-6223}, abstract = {Adrenocortical carcinoma (ACC) is a rare, aggressive malignancy with an annual incidence of ~1 case per million population. Differentiating between ACC and benign adrenocortical tumors can be challenging in patients who present with an incidentally discovered adrenal mass, due to the limited specificity of standard diagnostic imaging. Recently, urine steroid metabolite profiling has been prospectively validated as a novel diagnostic tool for the detection of malignancy with improved accuracy over current modalities. Surgery represents the only curative treatment for ACC, although local recurrence and metastases are common, even after a margin-negative resection is performed. Unlike other intra-abdominal cancers, the role of minimally invasive surgery and lymphadenectomy in ACC is controversial. Adjuvant therapy with the adrenolytic drug mitotane is used to reduce the risk of recurrence after surgery, although evidence supporting its efficacy is limited; it is also currently unclear whether all patients or a subset with the highest risk of recurrence should receive this treatment. Large-scale pan-genomic studies have yielded insights into the pathogenesis of ACC and have defined distinct molecular signatures associated with clinical outcomes that may be used to improve prognostication. For patients with advanced ACC, palliative combination chemotherapy with mitotane is the current standard of care; however, this is associated with poor response rates (RR). Knowledge from molecular profiling studies has been used to guide the development of novel targeted therapies; however, these have shown limited efficacy in early phase trials. As a result, there is an urgent unmet need for more effective therapies for patients with this devastating disease.}, } @article {pmid34346542, year = {2021}, author = {Hufnagel, B and Soriano, A and Taylor, J and Divol, F and Kroc, M and Sanders, H and Yeheyis, L and Nelson, M and Péret, B}, title = {Pangenome of white lupin provides insights into the diversity of the species.}, journal = {Plant biotechnology journal}, volume = {19}, number = {12}, pages = {2532-2543}, pmid = {34346542}, issn = {1467-7652}, mesh = {Chromosome Mapping ; Domestication ; *Genome, Plant/genetics ; *Lupinus/genetics ; Plant Breeding ; }, abstract = {White lupin is an old crop with renewed interest due to its seed high protein content and high nutritional value. Despite a long domestication history in the Mediterranean basin, modern breeding efforts have been fairly scarce. Recent sequencing of its genome has provided tools for further description of genetic resources but detailed characterization of genomic diversity is still missing. Here, we report the genome sequencing of 39 accessions that were used to establish a white lupin pangenome. We defined 32 068 core genes that are present in all individuals and 14 822 that are absent in some and may represent a gene pool for breeding for improved productivity, grain quality, and stress adaptation. We used this new pangenome resource to identify candidate genes for alkaloid synthesis, a key grain quality trait. The white lupin pangenome provides a novel genetic resource to better understand how domestication has shaped the genomic variability within this crop. Thus, this pangenome resource is an important step towards the effective and efficient genetic improvement of white lupin to help meet the rapidly growing demand for plant protein sources for human and animal consumption.}, } @article {pmid34343716, year = {2021}, author = {Kim, E and Yang, SM and Kim, D and Kim, HY}, title = {Real-time PCR method for qualitative and quantitative detection of Lactobacillus sakei group species targeting novel markers based on bioinformatics analysis.}, journal = {International journal of food microbiology}, volume = {355}, number = {}, pages = {109335}, doi = {10.1016/j.ijfoodmicro.2021.109335}, pmid = {34343716}, issn = {1879-3460}, mesh = {*Computational Biology ; Fermented Foods/microbiology ; Genetic Markers/genetics ; *Latilactobacillus sakei/genetics ; RNA, Ribosomal, 16S/genetics ; *Real-Time Polymerase Chain Reaction ; }, abstract = {Latilactobacillus sakei group comprises four closely related species, making it difficult to accurately distinguish them with standard markers such as the 16S rRNA gene. The objective of our study was to mine novel markers for PCR detection and discrimination of L. sakei group species and L. sakei subspecies by comparative pan-genomic analysis. A total of 63 genome sequences of L. sakei group species consisted of 119,899 coding genes, yielding 5741 pan-genomes, 831 core-genomes, 3347 accessory-genomes, and 1563 unique-genomes. The accessory-genome was compared to extract unique candidate genes common only to genomes of the same species. The candidate genes were then aligned with the other bacterial genomes to select marker genes present in all genomes of a given species, but not in the genomes of other species. We identified the arginine/ornithine antiporter, putative cell surface protein precursor, sodium:solute symporter, PRD domain protein, PTS sugar transporter subunit IIC, and phosphoenolpyruvate-dependent sugar phosphotransferase system EIIC as marker genes for L. sakei, L. sakei subsp. sakei, L. sakei subsp. carnosus, L. curvatus, L. graminis, and L. fuchuensis, respectively. Primer pairs were designed for each marker and showed 100% specificity for 48 lactic acid bacterial reference strains. The PCR method developed in this study was used to evaluate 106 strains isolated from fermented foods to demonstrate that the marker genes provided a viable alternative to the 16S rRNA gene. We also applied the method to the monitoring of kimchi samples to quantify L. sakei group species or subspecies. Our PCR method based on novel markers can rapidly identify L. sakei group with high accuracy and high throughput.}, } @article {pmid34343181, year = {2021}, author = {Maarala, AI and Arasalo, O and Valenzuela, D and Mäkinen, V and Heljanko, K}, title = {Distributed hybrid-indexing of compressed pan-genomes for scalable and fast sequence alignment.}, journal = {PloS one}, volume = {16}, number = {8}, pages = {e0255260}, pmid = {34343181}, issn = {1932-6203}, mesh = {Base Sequence ; Data Compression ; Escherichia coli/*genetics ; *Genome, Bacterial ; Genome, Human ; High-Throughput Nucleotide Sequencing ; Humans ; *Sequence Alignment ; }, abstract = {Computational pan-genomics utilizes information from multiple individual genomes in large-scale comparative analysis. Genetic variation between case-controls, ethnic groups, or species can be discovered thoroughly using pan-genomes of such subpopulations. Whole-genome sequencing (WGS) data volumes are growing rapidly, making genomic data compression and indexing methods very important. Despite current space-efficient repetitive sequence compression and indexing methods, the deployed compression methods are often sequential, computationally time-consuming, and do not provide efficient sequence alignment performance on vast collections of genomes such as pan-genomes. For performing rapid analytics with the ever-growing genomics data, data compression and indexing methods have to exploit distributed and parallel computing more efficiently. Instead of strict genome data compression methods, we will focus on the efficient construction of a compressed index for pan-genomes. Compressed hybrid-index enables fast sequence alignments to several genomes at once while shrinking the index size significantly compared to traditional indexes. We propose a scalable distributed compressed hybrid-indexing method for large genomic data sets enabling pan-genome-based sequence search and read alignment capabilities. We show the scalability of our tool, DHPGIndex, by executing experiments in a distributed Apache Spark-based computing cluster comprising 448 cores distributed over 26 nodes. The experiments have been performed both with human and bacterial genomes. DHPGIndex built a BLAST index for n = 250 human pan-genome with an 870:1 compression ratio (CR) in 342 minutes and a Bowtie2 index with 157:1 CR in 397 minutes. For n = 1,000 human pan-genome, the BLAST index was built in 1520 minutes with 532:1 CR and the Bowtie2 index in 1938 minutes with 76:1 CR. Bowtie2 aligned 14.6 GB of paired-end reads to the compressed (n = 1,000) index in 31.7 minutes on a single node. Compressing n = 13,375,031 (488 GB) GenBank database to BLAST index resulted in CR of 62:1 in 575 minutes. BLASTing 189,864 Crispr-Cas9 gRNA target sequences (23 MB in total) to the compressed index of human pan-genome (n = 1,000) finished in 45 minutes on a single node. 30 MB mixed bacterial sequences were (n = 599) were blasted to the compressed index of 488 GB GenBank database (n = 13,375,031) in 26 minutes on 25 nodes. 78 MB mixed sequences (n = 4,167) were blasted to the compressed index of 18 GB E. coli sequence database (n = 745,409) in 5.4 minutes on a single node.}, } @article {pmid34340127, year = {2021}, author = {Ismail, S and Shahid, F and Khan, A and Bhatti, S and Ahmad, S and Naz, A and Almatroudi, A and Tahir Ul Qamar, M}, title = {Pan-vaccinomics approach towards a universal vaccine candidate against WHO priority pathogens to address growing global antibiotic resistance.}, journal = {Computers in biology and medicine}, volume = {136}, number = {}, pages = {104705}, doi = {10.1016/j.compbiomed.2021.104705}, pmid = {34340127}, issn = {1879-0534}, mesh = {*Computational Biology ; Drug Resistance, Microbial ; *Epitopes, T-Lymphocyte ; Humans ; Molecular Docking Simulation ; Vaccines, Subunit ; World Health Organization ; }, abstract = {Antimicrobial resistance (AMR) in bacterial pathogens is a major global distress. Due to the slow progress of antibiotics development and the fast pace of resistance acquisition, there is an urgent need for effective vaccines against such bacterial pathogens. In-silico approaches including pan-genomics, subtractive proteomics, reverse vaccinology, immunoinformatics, molecular docking, and dynamics simulation studies were applied in the current study to identify a universal potential vaccine candidate against the 18 multi-drug resistance (MDRs) bacterial pathogenic species from a WHO priority list. Ten non-redundant, non-homologous, virulent, and antigenic vaccine candidates were filtered against all targeted species. Nine B-cell-derived T-cell antigen epitopes which show a great affinity to the dominant HLA allele (DRB1*0101) in the human population were screened from selected vaccine candidates using immunoinformatics approaches. Screened epitopes were then used to design a multi-epitope peptide vaccine construct (MEPVC) along with β-defensin adjuvant to improve the immunogenic properties of the proposed vaccine construct. Molecular docking and MD simulation were carried out to study the binding affinity and molecular interaction of MEPVC with human immune receptors (TLR2, TLR3, TLR4, and TLR6). The final MEPVC construct was reverse translated and in-silico cloned in the pET28a(+) vector to ensure its effectiveness. This in silico construct is expected to be helpful for vaccinologists to assess its immune protection effectiveness in vivo and in vitro to counter rising antibiotic resistance worldwide.}, } @article {pmid34335123, year = {2021}, author = {Awan, F and Ali, MM and Dong, Y and Yu, Y and Zeng, Z and Liu, Y}, title = {In Silico Analysis of Potential Outer Membrane Beta-Barrel Proteins in Aeromonas hydrophila Pangenome.}, journal = {International journal of peptide research and therapeutics}, volume = {27}, number = {4}, pages = {2381-2389}, pmid = {34335123}, issn = {1573-3149}, abstract = {UNLABELLED: Outer membrane proteins (OMPs) of Aeromonas hydrophila have a variety of functional roles in virulence and pathogenesis and represent promising targets for vaccine development. The main objective of this study was to develop an in-silico model of beta-barrel OMP present among the valid A. hydrophila pangenomes (n = 22). With a program named the β-barrel Outer Membrane Protein Predictor (BOMP), total beta-barrel OMPs (n = 3127) were predicted across 22 genomes with the estimated median number of 64 per genome. In pangenome analysis, only 32 OMPs were found to be conserved. These beta-barrel OMPs also showed variations among source of isolation, COG and KEGG classes. Among 32 conserved OMPs, a highly antigenic protein was identified by utilizing Vaxijen. With B cell epitope predictions, two fragments of amino acid sequences i.e. GLTLGAQFTGNNDPQNADRSN (21 mer) and FKPSLAYLRTDVKDNARGI DDTATEY (26 mer) bearing B-cell binding sites were selected. Further, an epitope (12 amino acids: GLTLGAQFTGNN) that complexes to maximum MHC alleles with a higher antigenicity was determined. The analysis of evolutionary forces on the identified OMP sequence and epitope indicated that none of basic amino acid sites has shown significantly different substitution ratios. This conserved protein and epitope will be helpful in developing a vaccine that may be effective against all the A. hydrophila strains. Also, this study provides a theoretical basis for vaccine design against other pathogenic bacteria.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s10989-021-10259-z.}, } @article {pmid34329477, year = {2021}, author = {Wang, K and Hu, H and Tian, Y and Li, J and Scheben, A and Zhang, C and Li, Y and Wu, J and Yang, L and Fan, X and Sun, G and Li, D and Zhang, Y and Han, R and Jiang, R and Huang, H and Yan, F and Wang, Y and Li, Z and Li, G and Liu, X and Li, W and Edwards, D and Kang, X}, title = {The Chicken Pan-Genome Reveals Gene Content Variation and a Promoter Region Deletion in IGF2BP1 Affecting Body Size.}, journal = {Molecular biology and evolution}, volume = {38}, number = {11}, pages = {5066-5081}, pmid = {34329477}, issn = {1537-1719}, mesh = {Animals ; Body Size/genetics ; *Chickens/genetics ; *Genome-Wide Association Study ; Polymorphism, Single Nucleotide ; Promoter Regions, Genetic ; Quantitative Trait Loci ; }, abstract = {Domestication and breeding have reshaped the genomic architecture of chicken, but the retention and loss of genomic elements during these evolutionary processes remain unclear. We present the first chicken pan-genome constructed using 664 individuals, which identified an additional approximately 66.5-Mb sequences that are absent from the reference genome (GRCg6a). The constructed pan-genome encoded 20,491 predicated protein-coding genes, of which higher expression levels are observed in conserved genes relative to dispensable genes. Presence/absence variation (PAV) analyses demonstrated that gene PAV in chicken was shaped by selection, genetic drift, and hybridization. PAV-based genome-wide association studies identified numerous candidate mutations related to growth, carcass composition, meat quality, or physiological traits. Among them, a deletion in the promoter region of IGF2BP1 affecting chicken body size is reported, which is supported by functional studies and extra samples. This is the first time to report the causal variant of chicken body size quantitative trait locus located at chromosome 27 which was repeatedly reported. Therefore, the chicken pan-genome is a useful resource for biological discovery and breeding. It improves our understanding of chicken genome diversity and provides materials to unveil the evolution history of chicken domestication.}, } @article {pmid34328223, year = {2022}, author = {Hu, H and Scheben, A and Verpaalen, B and Tirnaz, S and Bayer, PE and Hodel, RGJ and Batley, J and Soltis, DE and Soltis, PS and Edwards, D}, title = {Amborella gene presence/absence variation is associated with abiotic stress responses that may contribute to environmental adaptation.}, journal = {The New phytologist}, volume = {233}, number = {4}, pages = {1548-1555}, pmid = {34328223}, issn = {1469-8137}, mesh = {*Genome, Plant ; *Magnoliopsida/genetics ; Stress, Physiological/genetics ; }, } @article {pmid34321532, year = {2021}, author = {Davidson, RM and Benoit, JB and Kammlade, SM and Hasan, NA and Epperson, LE and Smith, T and Vasireddy, S and Brown-Elliott, BA and Nick, JA and Olivier, KN and Zelazny, AM and Daley, CL and Strong, M and Wallace, RJ}, title = {Genomic characterization of sporadic isolates of the dominant clone of Mycobacterium abscessus subspecies massiliense.}, journal = {Scientific reports}, volume = {11}, number = {1}, pages = {15336}, pmid = {34321532}, issn = {2045-2322}, support = {K01 AI125726/AI/NIAID NIH HHS/United States ; }, mesh = {Adolescent ; Adult ; Aged ; Aged, 80 and over ; Antitubercular Agents/pharmacology ; Child ; Clone Cells ; Cystic Fibrosis/complications/*diagnosis/microbiology/pathology ; DNA, Bacterial/*genetics ; Drug Resistance, Bacterial/genetics ; Genetic Variation ; *Genome, Bacterial ; Humans ; Middle Aged ; Mycobacterium Infections, Nontuberculous/complications/*diagnosis/microbiology/pathology ; Mycobacterium abscessus/classification/drug effects/*genetics/isolation & purification ; Phylogeny ; Polymorphism, Single Nucleotide ; United States/epidemiology ; }, abstract = {Recent studies have characterized a dominant clone (Clone 1) of Mycobacterium abscessus subspecies massiliense (M. massiliense) associated with high prevalence in cystic fibrosis (CF) patients, pulmonary outbreaks in the United States (US) and United Kingdom (UK), and a Brazilian epidemic of skin infections. The prevalence of Clone 1 in non-CF patients in the US and the relationship of sporadic US isolates to outbreak clones are not known. We surveyed a reference US Mycobacteria Laboratory and a US biorepository of CF-associated Mycobacteria isolates for Clone 1. We then compared genomic variation and antimicrobial resistance (AMR) mutations between sporadic non-CF, CF, and outbreak Clone 1 isolates. Among reference lab samples, 57/147 (39%) of patients with M. massiliense had Clone 1, including pulmonary and extrapulmonary infections, compared to 11/64 (17%) in the CF isolate biorepository. Core and pan genome analyses revealed that outbreak isolates had similar numbers of single nucleotide polymorphisms (SNPs) and accessory genes as sporadic US Clone 1 isolates. However, pulmonary outbreak isolates were more likely to have AMR mutations compared to sporadic isolates. Clone 1 isolates are present among non-CF and CF patients across the US, but additional studies will be needed to resolve potential routes of transmission and spread.}, } @article {pmid34310022, year = {2021}, author = {Bayer, PE and Scheben, A and Golicz, AA and Yuan, Y and Faure, S and Lee, H and Chawla, HS and Anderson, R and Bancroft, I and Raman, H and Lim, YP and Robbens, S and Jiang, L and Liu, S and Barker, MS and Schranz, ME and Wang, X and King, GJ and Pires, JC and Chalhoub, B and Snowdon, RJ and Batley, J and Edwards, D}, title = {Modelling of gene loss propensity in the pangenomes of three Brassica species suggests different mechanisms between polyploids and diploids.}, journal = {Plant biotechnology journal}, volume = {19}, number = {12}, pages = {2488-2500}, pmid = {34310022}, issn = {1467-7652}, mesh = {*Brassica/genetics ; *Brassica napus/genetics ; Diploidy ; Genome, Plant/genetics ; Polyploidy ; }, abstract = {Plant genomes demonstrate significant presence/absence variation (PAV) within a species; however, the factors that lead to this variation have not been studied systematically in Brassica across diploids and polyploids. Here, we developed pangenomes of polyploid Brassica napus and its two diploid progenitor genomes B. rapa and B. oleracea to infer how PAV may differ between diploids and polyploids. Modelling of gene loss suggests that loss propensity is primarily associated with transposable elements in the diploids while in B. napus, gene loss propensity is associated with homoeologous recombination. We use these results to gain insights into the different causes of gene loss, both in diploids and following polyploidization, and pave the way for the application of machine learning methods to understanding the underlying biological and physical causes of gene presence/absence.}, } @article {pmid34304696, year = {2021}, author = {Hernández-Juárez, LE and Camorlinga, M and Méndez-Tenorio, A and Calderón, JF and Huang, BC and Bandoy, DDR and Weimer, BC and Torres, J}, title = {Analyses of publicly available Hungatella hathewayi genomes revealed genetic distances indicating they belong to more than one species.}, journal = {Virulence}, volume = {12}, number = {1}, pages = {1950-1964}, pmid = {34304696}, issn = {2150-5608}, mesh = {*Clostridiaceae/genetics ; *Genome, Bacterial ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Hungatella hathewayi has been observed to be a member of the gut microbiome. Unfortunately, little is known about this organism in spite of being associated with human fatalities; it is important to understand virulence mechanisms and epidemiological prospective to cause disease. In this study, a patient with chronic neurologic symptoms presented to the clinic with subsequent isolation of a strain with phenotypic characteristics suggestive of Clostridium difficile. However, whole-genome sequence found the organism to be H. hathewayi. Analysis including publicly available Hungatella genomes found substantial genomic differences as compared to the type strain, indicating this isolate was not C. difficile. We examined the whole-genome of Hungatella species and related genera, using comparative genomics to fully examine species identification and toxin production. Orthogonal phylogenetic using the 16S rRNA gene and entire genome analyses that included genome distance analyses using Genome-to-Genome Distance (GGDC), Average Nucleotide Identity (ANI), and a pan-genome analysis with inclusion of available public genomes determined the speciation to be Hungatella. Two clearly differentiated groups were identified, one including a reference H. hathewayi genome (strain DSM-13,479) and a second group that was determined to be H. effluvii, which included our clinical isolate. Also, some genomes reported as H. hathewayi were found to belong to other genera, including Clostridium and Faecalicatena. We show that the Hungatella species have an open pan-genome reflecting high genomic diversity. This study highlights the importance of correctly assigning taxonomic identification, particularly in disease-associated strains, to better understand virulence and therapeutic options.}, } @article {pmid34299200, year = {2021}, author = {Romanet, P and Galluso, J and Kamenicky, P and Hage, M and Theodoropoulou, M and Roche, C and Graillon, T and Etchevers, HC and De Murat, D and Mougel, G and Figarella-Branger, D and Dufour, H and Cuny, T and Assié, G and Barlier, A}, title = {Somatotroph Tumors and the Epigenetic Status of the GNAS Locus.}, journal = {International journal of molecular sciences}, volume = {22}, number = {14}, pages = {}, pmid = {34299200}, issn = {1422-0067}, support = {GDR2031//Groupement de Recherche CREST-NET/ ; NA//Excellence Initiative of Aix Marseille University -A*Midex- a French "Investissement d'Avenir"/ ; NA//Institut National de lutte contre le Cancer (INCa)/ ; NA//MarMaRa Institute/ ; NA//French Ministry of Health/ ; }, mesh = {Adult ; Aged ; Aged, 80 and over ; Alleles ; Chromogranins/*genetics/metabolism ; DNA Methylation ; Epigenesis, Genetic ; Female ; GTP-Binding Protein alpha Subunits, Gs/*genetics/metabolism ; Gene Expression Regulation, Neoplastic ; Genomic Imprinting ; Humans ; Male ; Middle Aged ; Mutation ; Pituitary Neoplasms/*genetics/metabolism/pathology ; Somatotrophs/*metabolism/pathology ; Young Adult ; }, abstract = {Forty percent of somatotroph tumors harbor recurrent activating GNAS mutations, historically called the gsp oncogene. In gsp-negative somatotroph tumors, GNAS expression itself is highly variable; those with GNAS overexpression most resemble phenotypically those carrying the gsp oncogene. GNAS is monoallelically expressed in the normal pituitary due to methylation-based imprinting. We hypothesize that changes in GNAS imprinting of gsp-negative tumors affect GNAS expression levels and tumorigenesis. We characterized the GNAS locus in two independent somatotroph tumor cohorts: one of 23 tumors previously published (PMID: 31883967) and classified by pan-genomic analysis, and a second with 82 tumors. Multi-omics analysis of the first cohort identified a significant difference between gsp-negative and gsp-positive tumors in the methylation index at the known differentially methylated region (DMR) of the GNAS A/B transcript promoter, which was confirmed in the larger series of 82 tumors. GNAS allelic expression was analyzed using a polymorphic Fok1 cleavage site in 32 heterozygous gsp-negative tumors. GNAS expression was significantly reduced in the 14 tumors with relaxed GNAS imprinting and biallelic expression, compared to 18 tumors with monoallelic expression. Tumors with relaxed GNAS imprinting showed significantly lower SSTR2 and AIP expression levels. Altered A/B DMR methylation was found exclusively in gsp-negative somatotroph tumors. 43% of gsp-negative tumors showed GNAS imprinting relaxation, which correlated with lower GNAS, SSTR2 and AIP expression, indicating lower sensitivity to somatostatin analogues and potentially aggressive behavior.}, } @article {pmid34293910, year = {2022}, author = {Liu, Z and Zhao, Y and Sossah, FL and Okorley, BA and Amoako, DG and Liu, P and Sheng, H and Li, D and Li, Y}, title = {Characterization, Pathogenicity, Phylogeny, and Comparative Genomic Analysis of Pseudomonas tolaasii Strains Isolated from Various Mushrooms in China.}, journal = {Phytopathology}, volume = {112}, number = {3}, pages = {521-534}, doi = {10.1094/PHYTO-12-20-0550-R}, pmid = {34293910}, issn = {0031-949X}, mesh = {*Genomics ; Phylogeny ; *Plant Diseases ; Pseudomonas ; Virulence/genetics ; }, abstract = {Since 2016, devastating bacterial blotch affecting the fruiting bodies of Agaricus bisporus, Cordyceps militaris, Flammulina filiformis, and Pleurotus ostreatus in China has caused severe economic losses. We isolated 102 bacterial strains and characterized them polyphasically. We identified the causal agent as Pseudomonas tolaasii and confirmed the pathogenicity of the strains. A host range test further confirmed the pathogen's ability to infect multiple hosts. This is the first report in China of bacterial blotch in C. militaris caused by P. tolaasii. Whole-genome sequences were generated for three strains: Pt11 (6.48 Mb), Pt51 (6.63 Mb), and Pt53 (6.80 Mb), and pangenome analysis was performed with 13 other publicly accessible P. tolaasii genomes to determine their genetic diversity, virulence, antibiotic resistance, and mobile genetic elements. The pangenome of P. tolaasii is open, and many more gene families are likely to emerge with further genome sequencing. Multilocus sequence analysis using the sequences of four common housekeeping genes (glns, gyrB, rpoB, and rpoD) showed high genetic variability among the P. tolaasii strains, with 115 strains clustered into a monophyletic group. The P. tolaasii strains possess various genes for secretion systems, virulence factors, carbohydrate-active enzymes, toxins, secondary metabolites, and antimicrobial resistance genes that are associated with pathogenesis and adapted to different environments. The myriad of insertion sequences, integrons, prophages, and genome islands encoded in the strains may contribute to genome plasticity, virulence, and antibiotic resistance. These findings advance understanding of the determinants of virulence, which can be targeted for the effective control of bacterial blotch disease.}, } @article {pmid34288550, year = {2021}, author = {Bayer, PE and Petereit, J and Danilevicz, MF and Anderson, R and Batley, J and Edwards, D}, title = {The application of pangenomics and machine learning in genomic selection in plants.}, journal = {The plant genome}, volume = {14}, number = {3}, pages = {e20112}, doi = {10.1002/tpg2.20112}, pmid = {34288550}, issn = {1940-3372}, mesh = {*Genome, Plant ; *Genomics/methods ; Machine Learning ; Plant Breeding/methods ; }, abstract = {Genomic selection approaches have increased the speed of plant breeding, leading to growing crop yields over the last decade. However, climate change is impacting current and future yields, resulting in the need to further accelerate breeding efforts to cope with these changing conditions. Here we present approaches to accelerate plant breeding and incorporate nonadditive effects in genomic selection by applying state-of-the-art machine learning approaches. These approaches are made more powerful by the inclusion of pangenomes, which represent the entire genome content of a species. Understanding the strengths and limitations of machine learning methods, compared with more traditional genomic selection efforts, is paramount to the successful application of these methods in crop breeding. We describe examples of genomic selection and pangenome-based approaches in crop breeding, discuss machine learning-specific challenges, and highlight the potential for the application of machine learning in genomic selection. We believe that careful implementation of machine learning approaches will support crop improvement to help counter the adverse outcomes of climate change on crop production.}, } @article {pmid34287030, year = {2021}, author = {Fiedoruk, K and Drewnowska, JM and Mahillon, J and Zambrzycka, M and Swiecicka, I}, title = {Pan-Genome Portrait of Bacillus mycoides Provides Insights into the Species Ecology and Evolution.}, journal = {Microbiology spectrum}, volume = {9}, number = {1}, pages = {e0031121}, pmid = {34287030}, issn = {2165-0497}, mesh = {Anthropogenic Effects ; Bacillus/classification/*genetics/isolation & purification/*physiology ; *Biological Evolution ; DNA Transposable Elements ; *Ecology ; Genome, Bacterial ; Genomics ; High-Throughput Nucleotide Sequencing ; Phylogeny ; Plasmids/genetics ; Sigma Factor ; Soil ; Soil Microbiology ; Species Specificity ; }, abstract = {Bacillus mycoides is poorly known despite its frequent occurrence in a wide variety of environments. To provide direct insight into its ecology and evolutionary history, a comparative investigation of the species pan-genome and the functional gene categorization of 35 isolates obtained from soil samples from northeastern Poland was performed. The pan-genome of these isolates is composed of 20,175 genes and is characterized by a strong predominance of adaptive genes (∼83%), a significant amount of plasmid genes (∼37%), and a great contribution of prophages and insertion sequences. The pan-genome structure and phylodynamic studies had suggested a wide genomic diversity among the isolates, but no correlation between lineages and the bacillus origin was found. Nevertheless, the two B. mycoides populations, one from Białowieża National Park, the last European natural primeval forest with soil classified as organic, and the second from mineral soil samples taken in a farm in Jasienówka, a place with strong anthropogenic pressure, differ significantly in the frequency of genes encoding proteins enabling bacillus adaptation to specific stress conditions and production of a set of compounds, thus facilitating their colonization of various ecological niches. Furthermore, differences in the prevalence of essential stress sigma factors might be an important trail of this process. Due to these numerous adaptive genes, B. mycoides is able to quickly adapt to changing environmental conditions. IMPORTANCE This research allows deeper understanding of the genetic organization of natural bacterial populations, specifically, Bacillus mycoides, a psychrotrophic member of the Bacillus cereus group that is widely distributed worldwide, especially in areas with continental cold climates. These thorough analyses made it possible to describe, for the first time, the B. mycoides pan-genome, phylogenetic relationship within this species, and the mechanisms behind the species ecology and evolutionary history. Our study indicates a set of functional properties and adaptive genes, in particular, those encoding sigma factors, associated with B. mycoides acclimatization to specific ecological niches and changing environmental conditions.}, } @article {pmid34284718, year = {2021}, author = {Steidele, CE and Stam, R}, title = {Multi-omics approach highlights differences between RLP classes in Arabidopsis thaliana.}, journal = {BMC genomics}, volume = {22}, number = {1}, pages = {557}, pmid = {34284718}, issn = {1471-2164}, mesh = {*Arabidopsis/genetics ; }, abstract = {BACKGROUND: The Leucine rich-repeat (LRR) receptor-like protein (RLP) family is a complex gene family with 57 members in Arabidopsis thaliana. Some members of the RLP family are known to be involved in basal developmental processes, whereas others are involved in defence responses. However, functional data is currently only available for a small subset of RLPs, leaving the remaining ones classified as RLPs of unknown function.

RESULTS: Using publicly available datasets, we annotated RLPs of unknown function as either likely defence-related or likely fulfilling a more basal function in plants. Then, using these categories, we can identify important characteristics that differ between the RLP subclasses. We found that the two classes differ in abundance on both transcriptome and proteome level, physical clustering in the genome and putative interaction partners. However, the classes do not differ in the genetic di versity of their individual members in accessible pan-genome data.

CONCLUSIONS: Our work has several implications for work related to functional studies on RLPs as well as for the understanding of RLP gene family evolution. Using our annotations, we can make suggestions on which RLPs can be identified as potential immune receptors using genetics tools and thereby complement disease studies. The lack of differences in nucleotide diversity between the two RLP subclasses further suggests that non-synonymous diversity of gene sequences alone cannot distinguish defence from developmental genes. By contrast, differences in transcript and protein abundance or clustering at genomic loci might also allow for functional annotations and characterisation in other plant species.}, } @article {pmid34284299, year = {2021}, author = {Wu, JJ and Chou, HP and Huang, JW and Deng, WL}, title = {Genomic and biochemical characterization of antifungal compounds produced by Bacillus subtilis PMB102 against Alternaria brassicicola.}, journal = {Microbiological research}, volume = {251}, number = {}, pages = {126815}, doi = {10.1016/j.micres.2021.126815}, pmid = {34284299}, issn = {1618-0623}, mesh = {*Alternaria/drug effects ; Antifungal Agents/chemistry/pharmacology ; *Bacillus subtilis/chemistry/genetics ; Chromatography, Liquid ; *Genome, Bacterial/genetics ; Genomics ; Tandem Mass Spectrometry ; }, abstract = {Bacillus subtilis is ubiquitous and capable of producing various metabolites, which make the bacterium a good candidate as a biocontrol agent for managing plant diseases. In this study, a phyllosphere bacterium B. subtilis PMB102 isolated from tomato leaf was found to inhibit the growth of Alternaria brassicicola ABA-31 on PDA and suppress Alternaria leaf spot on Chinese cabbage (Brassica rapa). The genome of PMB102 (Accession no. CP047645) was completely sequenced by Nanopore and Illumina technology to generate a circular chromosome of 4,103,088 bp encoding several gene clusters for synthesizing bioactive compounds. PMB102 and the other B. subtilis strains from different sources were compared in pangenome analysis to identify a suite of conserved genes involved in biocontrol and habitat adaptation. Two predicted gene products, surfactin and fengycin, were extracted from PMB102 culture filtrates and verified by LC-MS/MS. The antifungal activity of fengycin was tested on A. brassicicola ABA-31 in bioautography to inhibit hyphae growth, and in co-culturing assays to elicit the formation of swollen hyphae. Our data revealed that B. subtilis PMB102 suppresses Alternaria leaf spot by the production of antifungal metabolites, and fengycin plays an important role to inhibit the vegetative growth of A. brassicicola ABA-31.}, } @article {pmid34281179, year = {2021}, author = {Branford, I and Johnson, S and Chapwanya, A and Zayas, S and Boyen, F and Mielcarska, MB and Szulc-Dąbrowska, L and Butaye, P and Toka, FN}, title = {Comprehensive Molecular Dissection of Dermatophilus congolensis Genome and First Observation of tet(Z) Tetracycline Resistance.}, journal = {International journal of molecular sciences}, volume = {22}, number = {13}, pages = {}, pmid = {34281179}, issn = {1422-0067}, support = {41003-2019//One Health Center for Zoonoses and Tropical Veterinary Medicine/ ; AUGE/15/05 (G0H2516N)//Research Foundation Flanders/ ; }, mesh = {Actinobacteria/genetics ; Animals ; Anti-Bacterial Agents/pharmacology ; Cattle ; Cattle Diseases/metabolism ; Computational Biology/methods ; Dermatophilus/*drug effects/*genetics/metabolism ; Genome, Bacterial ; Gram-Positive Bacterial Infections/genetics/microbiology/veterinary ; Horse Diseases/microbiology ; Horses ; Tetracycline Resistance/genetics ; Whole Genome Sequencing/methods ; }, abstract = {Dermatophilus congolensis is a bacterial pathogen mostly of ruminant livestock in the tropics/subtropics and certain temperate climate areas. It causes dermatophilosis, a skin disease that threatens food security by lowering animal productivity and compromising animal health and welfare. Since it is a prevalent infection in ruminants, dermatophilosis warrants more research. There is limited understanding of its pathogenicity, and as such, there is no registered vaccine against D. congolensis. To better understanding the genomics of D. congolensis, the primary aim of this work was to investigate this bacterium using whole-genome sequencing and bioinformatic analysis. D. congolensis is a high GC member of the Actinobacteria and encodes approximately 2527 genes. It has an open pan-genome, contains many potential virulence factors, secondary metabolites and encodes at least 23 housekeeping genes associated with antimicrobial susceptibility mechanisms and some isolates have an acquired antimicrobial resistance gene. Our isolates contain a single CRISPR array Cas type IE with classical 8 Cas genes. Although the isolates originate from the same geographical location there is some genomic diversity among them. In conclusion, we present the first detailed genomic study on D. congolensis, including the first observation of tet(Z), a tetracycline resistance-conferring gene.}, } @article {pmid34280580, year = {2021}, author = {Basharat, Z and Jahanzaib, M and Rahman, N}, title = {Therapeutic target identification via differential genome analysis of antibiotic resistant Shigella sonnei and inhibitor evaluation against a selected drug target.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {94}, number = {}, pages = {105004}, doi = {10.1016/j.meegid.2021.105004}, pmid = {34280580}, issn = {1567-7257}, mesh = {Anti-Bacterial Agents/*pharmacology ; Drug Discovery ; *Drug Resistance, Bacterial ; *Genome, Bacterial ; Shigella sonnei/*drug effects/genetics ; Whole Genome Sequencing ; }, abstract = {Shigella sonnei has been implicated in bloody diarrhea (accompanied by abdominal pain and fever) and is an emerging pathogen of concern, especially in developing countries. The major means of transmission is the fecal-oral route while sexual transmission has also been reported. In children, the impact might be stunted growth due to life-threatening illness. Resistance has been reported in this species for several types of antibiotics. In this study, we retrieved the antibiotic-resistant labeled whole genome sequences of the species from the PATRIC database and performed a pan-genome analysis to filter out core genes. Antibiotic resistance was studied in the core, accessory and unique genome. Core genes were utilized as seed substance for essentiality analysis and drug candidate assignment. Product of the gene aroG, i.e. chorismate biosynthetic process 3-deoxy-7-phosphoheptulonate synthase enzyme, responsible for aromatic amino acid family biosynthetic process, was taken for further downstream processing. Natural product libraries of flavonoids (n = 178), ZINC database derived inhibitor compounds of the 3-deoxy-7-phosphoheptulonate synthase enzyme (n = 112), and streptomycin compounds (n = 737) were docked to find out potent inhibitors, followed by dynamics simulation of 50 ns each for top compounds.. Physicochemical and ADMET profiling of the top compounds was done to analyze their safety for consumption. We propose that the top compounds: Phytoene from Streptomycin library and ZINC000036444158 (synonym:1,16-bis[(dihydroxyphosphinyl)oxy]hexadecane) from 3-deoxy-7-phosphoheptulonate synthase inhibitor library of ZINC database (and used as a control in this study) should be tested in vitro against Shigella sonnei, to fully determine their efficacy. This could add to the drying pipeline of potent drug molecules against emerging pathogens.}, } @article {pmid34280193, year = {2021}, author = {Cruaud, A and Lehrter, V and Genson, G and Rasplus, JY and Depaquit, J}, title = {Evolution, systematics and historical biogeography of sand flies of the subgenus Paraphlebotomus (Diptera, Psychodidae, Phlebotomus) inferred using restriction-site associated DNA markers.}, journal = {PLoS neglected tropical diseases}, volume = {15}, number = {7}, pages = {e0009479}, pmid = {34280193}, issn = {1935-2735}, mesh = {Animal Distribution ; Animals ; *Evolution, Molecular ; Genetic Markers ; Humans ; Insect Vectors/anatomy & histology/classification/*genetics/physiology ; Leishmaniasis/epidemiology/transmission ; Middle East/epidemiology ; Phlebotomus/anatomy & histology/classification/*genetics/physiology ; Phylogeny ; }, abstract = {Phlebotomine sand flies are the main natural vectors of Leishmania, which cause visceral and tegumentary tropical diseases worldwide. However, their taxonomy and evolutionary history remain poorly studied. Indeed, as for many human disease vectors, their small size is a challenge for morphological and molecular works. Here, we successfully amplified unbiased copies of whole genome to sequence thousands of restriction-site associated DNA (RAD) markers from single specimens of phlebotomines. RAD markers were used to infer a fully resolved phylogeny of the subgenus Paraphlebotomus (11 species + 5 outgroups, 32 specimens). The subgenus was not recovered as monophyletic and we describe a new subgenus Artemievus subg. nov. Depaquit for Phlebotomus alexandri. We also confirm the validity of Ph. riouxi which is reinstated as valid species. Our analyses suggest that Paraphlebotomus sensu nov. originated ca 12.9-8.5 Ma and was possibly largely distributed from peri-Mediterranean to Irano-Turanian regions. Its biogeographical history can be summarized into three phases: i) a first split between Ph. riouxi + Ph. chabaudi and other species that may have resulted from the rise of the Saharan belt ca 8.5 Ma; ii) a Messinian vicariant event (7.3-5.3 Ma) during which the prolonged drought could have resulted in the divergence of main lineages; iii) a recent radiation event (3-2 Ma) that correspond to cycles of wet and dry periods in the Middle East and the East African subregions during the Pleistocene. Interestingly these cycles are also hypothetical drivers of the diversification of rodents, in the burrows of which Paraphlebotomus larvae develop. By meeting the challenge of sequencing pangenomics markers from single, minute phlebotomines, this work opens new avenues for improving our understanding of the epidemiology of leishmaniases and possibly other human diseases transmitted by arthropod vectors.}, } @article {pmid34275202, year = {2021}, author = {Bornowski, N and Michel, KJ and Hamilton, JP and Ou, S and Seetharam, AS and Jenkins, J and Grimwood, J and Plott, C and Shu, S and Talag, J and Kennedy, M and Hundley, H and Singan, VR and Barry, K and Daum, C and Yoshinaga, Y and Schmutz, J and Hirsch, CN and Hufford, MB and de Leon, N and Kaeppler, SM and Buell, CR}, title = {Genomic variation within the maize stiff-stalk heterotic germplasm pool.}, journal = {The plant genome}, volume = {14}, number = {3}, pages = {e20114}, doi = {10.1002/tpg2.20114}, pmid = {34275202}, issn = {1940-3372}, mesh = {Genomics ; Haplotypes ; Hybrid Vigor ; *Plant Breeding ; *Zea mays/genetics ; }, abstract = {The stiff-stalk heterotic group in Maize (Zea mays L.) is an important source of inbreds used in U.S. commercial hybrid production. Founder inbreds B14, B37, B73, and, to a lesser extent, B84, are found in the pedigrees of a majority of commercial seed parent inbred lines. We created high-quality genome assemblies of B84 and four expired Plant Variety Protection (ex-PVP) lines LH145 representing B14, NKH8431 of mixed descent, PHB47 representing B37, and PHJ40, which is a Pioneer Hi-Bred International (PHI) early stiff-stalk type. Sequence was generated using long-read sequencing achieving highly contiguous assemblies of 2.13-2.18 Gbp with N50 scaffold lengths >200 Mbp. Inbred-specific gene annotations were generated using a core five-tissue gene expression atlas, whereas transposable element (TE) annotation was conducted using de novo and homology-directed methodologies. Compared with the reference inbred B73, synteny analyses revealed extensive collinearity across the five stiff-stalk genomes, although unique components of the maize pangenome were detected. Comparison of this set of stiff-stalk inbreds with the original Iowa Stiff Stalk Synthetic breeding population revealed that these inbreds represent only a proportion of variation in the original stiff-stalk pool and there are highly conserved haplotypes in released public and ex-Plant Variety Protection inbreds. Despite the reduction in variation from the original stiff-stalk population, substantial genetic and genomic variation was identified supporting the potential for continued breeding success in this pool. The assemblies described here represent stiff-stalk inbreds that have historical and commercial relevance and provide further insight into the emerging maize pangenome.}, } @article {pmid34267741, year = {2021}, author = {Verma, DK and Chaudhary, C and Singh, L and Sidhu, C and Siddhardha, B and Prasad, SE and Thakur, KG}, title = {Corrigendum: Isolation and Taxonomic Characterization of Novel Haloarchaeal Isolates From Indian Solar Saltern: A Brief Review on Distribution of Bacteriorhodopsins and V-Type ATPases in Haloarchaea.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {713942}, doi = {10.3389/fmicb.2021.713942}, pmid = {34267741}, issn = {1664-302X}, abstract = {[This corrects the article DOI: 10.3389/fmicb.2020.554927.].}, } @article {pmid34267358, year = {2021}, author = {Liao, J and Guo, X and Weller, DL and Pollak, S and Buckley, DH and Wiedmann, M and Cordero, OX}, title = {Nationwide genomic atlas of soil-dwelling Listeria reveals effects of selection and population ecology on pangenome evolution.}, journal = {Nature microbiology}, volume = {6}, number = {8}, pages = {1021-1030}, pmid = {34267358}, issn = {2058-5276}, mesh = {Ecosystem ; Evolution, Molecular ; *Genome, Bacterial ; Listeria/classification/*genetics/isolation & purification ; Phylogeny ; Recombination, Genetic ; *Selection, Genetic ; *Soil Microbiology ; }, abstract = {Natural bacterial populations can display enormous genomic diversity, primarily in the form of gene content variation caused by the frequent exchange of DNA with the local environment. However, the ecological drivers of genomic variability and the role of selection remain controversial. Here, we address this gap by developing a nationwide atlas of 1,854 Listeria isolates, collected systematically from soils across the contiguous United States. We found that Listeria was present across a wide range of environmental parameters, being mainly controlled by soil moisture, molybdenum and salinity concentrations. Whole-genome data from 594 representative strains allowed us to decompose Listeria diversity into 12 phylogroups, each with large differences in habitat breadth and endemism. 'Cosmopolitan' phylogroups, prevalent across many different habitats, had more open pangenomes and displayed weaker linkage disequilibrium, reflecting higher rates of gene gain and loss, and allele exchange than phylogroups with narrow habitat ranges. Cosmopolitan phylogroups also had a large fraction of genes affected by positive selection. The effect of positive selection was more pronounced in the phylogroup-specific core genome, suggesting that lineage-specific core genes are important drivers of adaptation. These results indicate that genome flexibility and recombination are the consequence of selection to survive in variable environments.}, } @article {pmid34260702, year = {2021}, author = {Norri, T and Cazaux, B and Dönges, S and Valenzuela, D and Mäkinen, V}, title = {Founder reconstruction enables scalable and seamless pangenomic analysis.}, journal = {Bioinformatics (Oxford, England)}, volume = {37}, number = {24}, pages = {4611-4619}, pmid = {34260702}, issn = {1367-4811}, mesh = {*Software ; *Genomics ; Sequence Analysis, DNA ; Genome ; Workflow ; }, abstract = {MOTIVATION: Variant calling workflows that utilize a single reference sequence are the de facto standard elementary genomic analysis routine for resequencing projects. Various ways to enhance the reference with pangenomic information have been proposed, but scalability combined with seamless integration to existing workflows remains a challenge.

RESULTS: We present PanVC with founder sequences, a scalable and accurate variant calling workflow based on a multiple alignment of reference sequences. Scalability is achieved by removing duplicate parts up to a limit into a founder multiple alignment, that is then indexed using a hybrid scheme that exploits general purpose read aligners. Our implemented workflow uses GATK or BCFtools for variant calling, but the various steps of our workflow (e.g. vcf2multialign tool, founder reconstruction) can be of independent interest as a basis for creating novel pangenome analysis workflows beyond variant calling.

Our open access tools and instructions how to reproduce our experiments are available at the following address: https://github.com/algbio/panvc-founders.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid34253730, year = {2021}, author = {Lu, TY and , and Chaisson, MJP}, title = {Profiling variable-number tandem repeat variation across populations using repeat-pangenome graphs.}, journal = {Nature communications}, volume = {12}, number = {1}, pages = {4250}, pmid = {34253730}, issn = {2041-1723}, support = {R01 HG002898/HG/NHGRI NIH HHS/United States ; U01 HG010973/HG/NHGRI NIH HHS/United States ; R01 HG011649/HG/NHGRI NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; UM1 HG008901/HG/NHGRI NIH HHS/United States ; U24 HG007497/HG/NHGRI NIH HHS/United States ; }, mesh = {Chromosome Mapping ; Gene Expression Regulation ; Genetic Loci ; *Genetic Variation ; *Genetics, Population ; *Genome, Human ; Humans ; Minisatellite Repeats/*genetics ; Nucleotide Motifs/genetics ; Quantitative Trait Loci/genetics ; }, abstract = {Variable number tandem repeats (VNTRs) are composed of consecutive repetitive DNA with hypervariable repeat count and composition. They include protein coding sequences and associations with clinical disorders. It has been difficult to incorporate VNTR analysis in disease studies that use short-read sequencing because the traditional approach of mapping to the human reference is less effective for repetitive and divergent sequences. In this work, we solve VNTR mapping for short reads with a repeat-pangenome graph (RPGG), a data structure that encodes both the population diversity and repeat structure of VNTR loci from multiple haplotype-resolved assemblies. We develop software to build a RPGG, and use the RPGG to estimate VNTR composition with short reads. We use this to discover VNTRs with length stratified by continental population, and expression quantitative trait loci, indicating that RPGG analysis of VNTRs will be critical for future studies of diversity and disease.}, } @article {pmid34252945, year = {2021}, author = {Jain, C and Tavakoli, N and Aluru, S}, title = {A variant selection framework for genome graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {37}, number = {Suppl_1}, pages = {i460-i467}, pmid = {34252945}, issn = {1367-4811}, support = {CCF-1816027//National Science Foundation/ ; //National Energy Research Scientific Computing Center/ ; //Office of Science/ ; DE-AC02-05CH11231//Department of Energy/ ; }, mesh = {Algorithms ; *Genome ; Genome, Human ; Humans ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; *Software ; }, abstract = {MOTIVATION: Variation graph representations are projected to either replace or supplement conventional single genome references due to their ability to capture population genetic diversity and reduce reference bias. Vast catalogues of genetic variants for many species now exist, and it is natural to ask which among these are crucial to circumvent reference bias during read mapping.

RESULTS: In this work, we propose a novel mathematical framework for variant selection, by casting it in terms of minimizing variation graph size subject to preserving paths of length α with at most δ differences. This framework leads to a rich set of problems based on the types of variants [e.g. single nucleotide polymorphisms (SNPs), indels or structural variants (SVs)], and whether the goal is to minimize the number of positions at which variants are listed or to minimize the total number of variants listed. We classify the computational complexity of these problems and provide efficient algorithms along with their software implementation when feasible. We empirically evaluate the magnitude of graph reduction achieved in human chromosome variation graphs using multiple α and δ parameter values corresponding to short and long-read resequencing characteristics. When our algorithm is run with parameter settings amenable to long-read mapping (α = 10 kbp, δ = 1000), 99.99% SNPs and 73% SVs can be safely excluded from human chromosome 1 variation graph. The graph size reduction can benefit downstream pan-genome analysis.

: https://github.com/AT-CG/VF.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid34252729, year = {2021}, author = {Pedrós-Alió, C}, title = {Time travel in microorganisms.}, journal = {Systematic and applied microbiology}, volume = {44}, number = {4}, pages = {126227}, doi = {10.1016/j.syapm.2021.126227}, pmid = {34252729}, issn = {1618-0984}, mesh = {*Ecosystem ; Phylogeny ; RNA, Ribosomal, 16S ; *Seawater ; }, } @article {pmid34252236, year = {2021}, author = {Nie, S and Wang, B and Ding, H and Lin, H and Zhang, L and Li, Q and Wang, Y and Zhang, B and Liang, A and Zheng, Q and Wang, H and Lv, H and Zhu, K and Jia, M and Wang, X and Du, J and Zhao, R and Jiang, Z and Xia, C and Qiao, Z and Li, X and Liu, B and Zhu, H and An, R and Li, Y and Jiang, Q and Chen, B and Zhang, H and Wang, D and Tang, C and Yuan, Y and Dai, J and Zhan, J and He, W and Wang, X and Shi, J and Wang, B and Gong, M and He, X and Li, P and Huang, L and Li, H and Pan, C and Huang, H and Yuan, G and Lan, H and Nie, Y and Li, X and Zhao, X and Zhang, X and Pan, G and Wu, Q and Xu, F and Zhang, Z}, title = {Genome assembly of the Chinese maize elite inbred line RP125 and its EMS mutant collection provide new resources for maize genetics research and crop improvement.}, journal = {The Plant journal : for cell and molecular biology}, volume = {108}, number = {1}, pages = {40-54}, doi = {10.1111/tpj.15421}, pmid = {34252236}, issn = {1365-313X}, mesh = {Crops, Agricultural ; Endosperm/genetics/metabolism ; Genome, Plant/*genetics ; Inbreeding ; Mutation ; Phenotype ; Plant Breeding ; Seed Bank ; Seeds/genetics/metabolism ; Starch/metabolism ; Zea mays/*genetics/metabolism ; }, abstract = {Maize is an important crop worldwide, as well as a valuable model with vast genetic diversity. Accurate genome and annotation information for a wide range of inbred lines would provide valuable resources for crop improvement and pan-genome characterization. In this study, we generated a high-quality de novo genome assembly (contig N50 of 15.43 Mb) of the Chinese elite inbred line RP125 using Nanopore long-read sequencing and Hi-C scaffolding, which yield highly contiguous, chromosome-length scaffolds. Global comparison of the RP125 genome with those of B73, W22, and Mo17 revealed a large number of structural variations. To create new germplasm for maize research and crop improvement, we carried out an EMS mutagenesis screen on RP125. In total, we obtained 5818 independent M2 families, with 946 mutants showing heritable phenotypes. Taking advantage of the high-quality RP125 genome, we successfully cloned 10 mutants from the EMS library, including the novel kernel mutant qk1 (quekou: "missing a small part" in Chinese), which exhibited partial loss of endosperm and a starch accumulation defect. QK1 encodes a predicted metal tolerance protein, which is specifically required for Fe transport. Increased accumulation of Fe and reactive oxygen species as well as ferroptosis-like cell death were detected in qk1 endosperm. Our study provides the community with a high-quality genome sequence and a large collection of mutant germplasm.}, } @article {pmid34252087, year = {2021}, author = {Noroy, C and Meyer, DF}, title = {The super repertoire of type IV effectors in the pangenome of Ehrlichia spp. provides insights into host-specificity and pathogenesis.}, journal = {PLoS computational biology}, volume = {17}, number = {7}, pages = {e1008788}, pmid = {34252087}, issn = {1553-7358}, mesh = {Animals ; Bacterial Proteins ; Computational Biology ; *Ehrlichia/genetics/pathogenicity ; Ehrlichiosis/microbiology ; Genome, Bacterial/*genetics ; Host Specificity/*genetics ; Humans ; Type IV Secretion Systems/*genetics ; Virulence/*genetics ; }, abstract = {The identification of bacterial effectors is essential to understand how obligatory intracellular bacteria such as Ehrlichia spp. manipulate the host cell for survival and replication. Infection of mammals-including humans-by the intracellular pathogenic bacteria Ehrlichia spp. depends largely on the injection of virulence proteins that hijack host cell processes. Several hypothetical virulence proteins have been identified in Ehrlichia spp., but one so far has been experimentally shown to translocate into host cells via the type IV secretion system. However, the current challenge is to identify most of the type IV effectors (T4Es) to fully understand their role in Ehrlichia spp. virulence and host adaptation. Here, we predict the T4E repertoires of four sequenced Ehrlichia spp. and four other Anaplasmataceae as comparative models (pathogenic Anaplasma spp. and Wolbachia endosymbiont) using previously developed S4TE 2.0 software. This analysis identified 579 predicted T4Es (228 pT4Es for Ehrlichia spp. only). The effector repertoires of Ehrlichia spp. overlapped, thereby defining a conserved core effectome of 92 predicted effectors shared by all strains. In addition, 69 species-specific T4Es were predicted with non-canonical GC% mostly in gene sparse regions of the genomes and we observed a bias in pT4Es according to host-specificity. We also identified new protein domain combinations, suggesting novel effector functions. This work presenting the predicted effector collection of Ehrlichia spp. can serve as a guide for future functional characterisation of effectors and design of alternative control strategies against these bacteria.}, } @article {pmid34248875, year = {2021}, author = {Cao, H and Xu, H and Ning, C and Xiang, L and Ren, Q and Zhang, T and Zhang, Y and Gao, R}, title = {Multi-Omics Approach Reveals the Potential Core Vaccine Targets for the Emerging Foodborne Pathogen Campylobacter jejuni.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {665858}, pmid = {34248875}, issn = {1664-302X}, abstract = {Campylobacter jejuni is a leading cause of bacterial gastroenteritis in humans around the world. The emergence of bacterial resistance is becoming more serious; therefore, development of new vaccines is considered to be an alternative strategy against drug-resistant pathogen. In this study, we investigated the pangenome of 173 C. jejuni strains and analyzed the phylogenesis and the virulence factor genes. In order to acquire a high-quality pangenome, genomic relatedness was firstly performed with average nucleotide identity (ANI) analyses, and an open pangenome of 8,041 gene families was obtained with the correct taxonomy genomes. Subsequently, the virulence property of the core genome was analyzed and 145 core virulence factor (VF) genes were obtained. Upon functional genomics and immunological analyses, five core VF proteins with high antigenicity were selected as potential core vaccine targets for humans. Furthermore, functional annotations indicated that these proteins are involved in important molecular functions and biological processes, such as adhesion, regulation, and secretion. In addition, transcriptome analysis in human cells and pig intestinal loop proved that these vaccine target genes are important in the virulence of C. jejuni in different hosts. Comprehensive pangenome and relevant animal experiments will facilitate discovering the potential core vaccine targets with improved efficiency in reverse vaccinology. Likewise, this study provided some insights into the genetic polymorphism and phylogeny of C. jejuni and discovered potential vaccine candidates for humans. Prospective development of new vaccines using the targets will be an alternative to the use of antibiotics and prevent the development of multidrug-resistant C. jejuni in humans and even other animals.}, } @article {pmid34248865, year = {2021}, author = {Banerjee, R and Chaudhari, NM and Lahiri, A and Gautam, A and Bhowmik, D and Dutta, C and Chattopadhyay, S and Huson, DH and Paul, S}, title = {Interplay of Various Evolutionary Modes in Genome Diversification and Adaptive Evolution of the Family Sulfolobaceae.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {639995}, pmid = {34248865}, issn = {1664-302X}, abstract = {Sulfolobaceae family, comprising diverse thermoacidophilic and aerobic sulfur-metabolizing Archaea from various geographical locations, offers an ideal opportunity to infer the evolutionary dynamics across the members of this family. Comparative pan-genomics coupled with evolutionary analyses has revealed asymmetric genome evolution within the Sulfolobaceae family. The trend of genome streamlining followed by periods of differential gene gains resulted in an overall genome expansion in some species of this family, whereas there was reduction in others. Among the core genes, both Sulfolobus islandicus and Saccharolobus solfataricus showed a considerable fraction of positively selected genes and also higher frequencies of gene acquisition. In contrast, Sulfolobus acidocaldarius genomes experienced substantial amount of gene loss and strong purifying selection as manifested by relatively lower genome size and higher genome conservation. Central carbohydrate metabolism and sulfur metabolism coevolved with the genome diversification pattern of this archaeal family. The autotrophic CO2 fixation with three significant positively selected enzymes from S. islandicus and S. solfataricus was found to be more imperative than heterotrophic CO2 fixation for Sulfolobaceae. Overall, our analysis provides an insight into the interplay of various genomic adaptation strategies including gene gain-loss, mutation, and selection influencing genome diversification of Sulfolobaceae at various taxonomic levels and geographical locations.}, } @article {pmid34247024, year = {2021}, author = {Begrem, S and Jérôme, M and Leroi, F and Delbarre-Ladrat, C and Grovel, O and Passerini, D}, title = {Genomic diversity of Serratia proteamaculans and Serratia liquefaciens predominant in seafood products and spoilage potential analyses.}, journal = {International journal of food microbiology}, volume = {354}, number = {}, pages = {109326}, doi = {10.1016/j.ijfoodmicro.2021.109326}, pmid = {34247024}, issn = {1879-3460}, mesh = {*Food Microbiology ; *Genetic Variation ; *Genome, Bacterial/genetics ; RNA, Ribosomal, 16S/genetics ; *Seafood/microbiology ; *Serratia/genetics ; *Serratia liquefaciens/genetics ; }, abstract = {Serratia sp. cause food losses and waste due to spoilage; it is noteworthy that they represent a dominant population in seafood. The main spoilage associated species comprise S. liquefaciens, S. grimesii, S. proteamaculans and S. quinivorans, also known as S. liquefaciens-like strains. These species are difficult to discriminate since classical 16S rRNA gene-based sequences do not possess sufficient resolution. In this study, a phylogeny based on the short-length luxS gene was able to speciate 47 Serratia isolates from seafood, with S. proteamaculans being the main species from fresh salmon and tuna, cold-smoked salmon, and cooked shrimp while S. liquefaciens was only found in cold-smoked salmon. The genome of the first S. proteamaculans strain isolated from the seafood matrix (CD3406 strain) was sequenced. Pangenome analyses of S. proteamaculans and S. liquefaciens indicated high adaptation potential. Biosynthetic pathways involved in antimicrobial compounds production and in the main seafood spoilage compounds were also identified. The genetic equipment highlighted in this study contributed to gain further insights into the predominance of Serratia in seafood products and their capacity to spoil.}, } @article {pmid34245190, year = {2022}, author = {Wang, S and Narsing Rao, MP and Wei, D and Sun, L and Fang, BZ and Li, WQ and Yu, LH and Li, WJ}, title = {Complete genome sequencing and comparative genome analysis of the extremely halophilic archaea, Haloterrigena daqingensis.}, journal = {Biotechnology and applied biochemistry}, volume = {69}, number = {4}, pages = {1482-1488}, doi = {10.1002/bab.2220}, pmid = {34245190}, issn = {1470-8744}, mesh = {DNA, Archaeal/genetics ; *Halobacteriaceae/genetics ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Whole Genome Sequencing ; }, abstract = {In the present study, we report the complete genome sequencing of Haloterrigena daqingensis species. The genome of H. daqingensis JX313[T] consisted of a circular chromosome with three plasmids. The genome size and G+C content were estimated to be 3835796 bp and 61.7%, respectively. A total of 4158 genes were predicted with six rRNAs and 45 tRNAs. Metabolic pathway analysis suggests that H. daqingensis JX313[T] codes for all the necessary genes responsible to sustain its life at saline environment. The pan-genome analysis suggests that the number of singleton-gene between H. daqingensis and other Haloterrigena species varied. The study not only helps us understand H. daqingensis strategy for dealing with high stress, but it also provides an overview of its genomic makeup.}, } @article {pmid34241588, year = {2021}, author = {Sanoussi, CN and Coscolla, M and Ofori-Anyinam, B and Otchere, ID and Antonio, M and Niemann, S and Parkhill, J and Harris, S and Yeboah-Manu, D and Gagneux, S and Rigouts, L and Affolabi, D and de Jong, BC and Meehan, CJ}, title = {Mycobacterium tuberculosis complex lineage 5 exhibits high levels of within-lineage genomic diversity and differing gene content compared to the type strain H37Rv.}, journal = {Microbial genomics}, volume = {7}, number = {7}, pages = {}, pmid = {34241588}, issn = {2057-5858}, mesh = {Chromosome Mapping ; Drug Resistance, Multiple, Bacterial/genetics ; Genetic Variation/*genetics ; Genome, Bacterial/*genetics ; Genotype ; High-Throughput Nucleotide Sequencing ; Humans ; Mycobacterium tuberculosis/classification/*genetics ; Sequence Analysis, DNA ; Species Specificity ; Tuberculosis/microbiology/transmission ; Whole Genome Sequencing ; }, abstract = {Pathogens of the Mycobacterium tuberculosis complex (MTBC) are considered to be monomorphic, with little gene content variation between strains. Nevertheless, several genotypic and phenotypic factors separate strains of the different MTBC lineages (L), especially L5 and L6 (traditionally termed Mycobacterium africanum) strains, from each other. However, this genome variability and gene content, especially of L5 strains, has not been fully explored and may be important for pathobiology and current approaches for genomic analysis of MTBC strains, including transmission studies. By comparing the genomes of 355 L5 clinical strains (including 3 complete genomes and 352 Illumina whole-genome sequenced isolates) to each other and to H37Rv, we identified multiple genes that were differentially present or absent between H37Rv and L5 strains. Additionally, considerable gene content variability was found across L5 strains, including a split in the L5.3 sub-lineage into L5.3.1 and L5.3.2. These gene content differences had a small knock-on effect on transmission cluster estimation, with clustering rates influenced by the selected reference genome, and with potential overestimation of recent transmission when using H37Rv as the reference genome. We conclude that full capture of the gene diversity, especially high-resolution outbreak analysis, requires a variation of the single H37Rv-centric reference genome mapping approach currently used in most whole-genome sequencing data analysis pipelines. Moreover, the high within-lineage gene content variability suggests that the pan-genome of M. tuberculosis is at least several kilobases larger than previously thought, implying that a concatenated or reference-free genome assembly (de novo) approach may be needed for particular questions.}, } @article {pmid34238216, year = {2021}, author = {Sinha, D and Sun, X and Khare, M and Drancourt, M and Raoult, D and Fournier, PE}, title = {Pangenome analysis and virulence profiling of Streptococcus intermedius.}, journal = {BMC genomics}, volume = {22}, number = {1}, pages = {522}, pmid = {34238216}, issn = {1471-2164}, support = {ANR-10-IAHU-03//Agence Nationale de Recherche/ ; ANR-10-IAHU-03//Agence Nationale de Recherche/ ; ANR-10-IAHU-03//Agence Nationale de Recherche/ ; ANR-10-IAHU-03//Agence Nationale de Recherche/ ; ANR-10-IAHU-03//Agence Nationale de Recherche/ ; }, mesh = {Genome, Bacterial ; *Genomics ; Humans ; Phylogeny ; *Streptococcus intermedius/genetics ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {BACKGROUND: Streptococcus intermedius, a member of the S. anginosus group, is a commensal bacterium present in the normal microbiota of human mucosal surfaces of the oral, gastrointestinal, and urogenital tracts. However, it has been associated with various infections such as liver and brain abscesses, bacteremia, osteo-articular infections, and endocarditis. Since 2005, high throughput genome sequencing methods enabled understanding the genetic landscape and diversity of bacteria as well as their pathogenic role. Here, in order to determine whether specific virulence genes could be related to specific clinical manifestations, we compared the genomes from 27 S. intermedius strains isolated from patients with various types of infections, including 13 that were sequenced in our institute and 14 available in GenBank.

RESULTS: We estimated the theoretical pangenome size to be of 4,020 genes, including 1,355 core genes, 1,054 strain-specific genes and 1,611 accessory genes shared by 2 or more strains. The pangenome analysis demonstrated that the genomic diversity of S. intermedius represents an "open" pangenome model. We identified a core virulome of 70 genes and 78 unique virulence markers. The phylogenetic clusters based upon core-genome sequences and SNPs were independent from disease types and sample sources. However, using Principal Component analysis based on presence/ absence of virulence genes, we identified the sda histidine kinase, adhesion protein LAP and capsular polysaccharide biosynthesis protein cps4E as being associated to brain abscess or broncho-pulmonary infection. In contrast, liver and abdominal abscess were associated to presence of the fibronectin binding protein fbp54 and capsular polysaccharide biosynthesis protein cap8D and cpsB.

CONCLUSIONS: Based on the virulence gene content of 27 S. intermedius strains causing various diseases, we identified putative disease-specific genetic profiles discriminating those causing brain abscess or broncho-pulmonary infection from those causing liver and abdominal abscess. These results provide an insight into S. intermedius pathogenesis and highlights putative targets in a diagnostic perspective.}, } @article {pmid34237377, year = {2021}, author = {Liu, C and Peng, P and Li, W and Ye, C and Zhang, S and Wang, R and Li, D and Guan, S and Zhang, L and Huang, X and Guo, Z and Guo, J and Long, Y and Li, L and Pan, G and Tian, B and Xiao, J}, title = {Deciphering variation of 239 elite japonica rice genomes for whole genome sequences-enabled breeding.}, journal = {Genomics}, volume = {113}, number = {5}, pages = {3083-3091}, doi = {10.1016/j.ygeno.2021.07.002}, pmid = {34237377}, issn = {1089-8646}, mesh = {Alleles ; Genetic Variation ; Genome, Plant ; *Oryza/genetics ; Plant Breeding ; Polymorphism, Single Nucleotide ; }, abstract = {Revealing genomic variation of representative and diverse germplasm is the cornerstone of deploying genomics information into genetic improvement programs of species of agricultural importance. Here we report the re-sequencing of 239 japonica rice elites representing the genetic diversity of japonica germplasm in China, Japan and Korea. A total of 4.8 million SNPs and PAV of 35,634 genes were identified. The elites from Japan and Korea are closely related and relatively less diverse than those from China. A japonica rice pan-genome was constructed, and 35 Mb non-redundant novel sequences were identified, from which 1131 novel genes were predicted. Strong selection signals of genomic regions were detected on most of the chromosomes. The heading date genes Hd1 and Hd3a have been artificially selected during the breeding process. The results from this study lay the foundation for future whole genome sequences-enabled breeding in rice and provide a paradigm for other species.}, } @article {pmid34227250, year = {2022}, author = {Rijzaani, H and Bayer, PE and Rouard, M and Doležel, J and Batley, J and Edwards, D}, title = {The pangenome of banana highlights differences between genera and genomes.}, journal = {The plant genome}, volume = {15}, number = {1}, pages = {e20100}, doi = {10.1002/tpg2.20100}, pmid = {34227250}, issn = {1940-3372}, mesh = {Genome, Plant ; *Musa/genetics ; *Musaceae/genetics ; Plant Breeding ; Polyploidy ; }, abstract = {Banana (Musaceae family) has a complex genetic history and includes a genus Musa with a variety of cultivated clones with edible fruits, Ensete species that are grown for their edible corm, and monospecific Musella whose generic status has been questioned. The most commonly exported banana cultivars belong to Cavendish, a subgroup of Musa triploid cultivars, which is under threat by fungal pathogens, though there are also related species M. balbisiana Colla (B genome), M. textilis Née (T genome), and M. schizocarpa N. W. Simmonds (S genome), along with hybrids of these genomes, which potentially host genes of agronomic interest. Here we present the first cross-genus pangenome of banana, which contains representatives of the Musa and Ensete genera. Clusters based on gene presence-absence variation (PAV) clearly separate Musa and Ensete, while Musa is split further based on species. These results present the first pangenome study across genus boundaries and identifies genes that differentiate between Musaceae species, information that may support breeding programs in these crops.}, } @article {pmid34226565, year = {2021}, author = {Lovell, JT and Bentley, NB and Bhattarai, G and Jenkins, JW and Sreedasyam, A and Alarcon, Y and Bock, C and Boston, LB and Carlson, J and Cervantes, K and Clermont, K and Duke, S and Krom, N and Kubenka, K and Mamidi, S and Mattison, CP and Monteros, MJ and Pisani, C and Plott, C and Rajasekar, S and Rhein, HS and Rohla, C and Song, M and Hilaire, RS and Shu, S and Wells, L and Webber, J and Heerema, RJ and Klein, PE and Conner, P and Wang, X and Grauke, LJ and Grimwood, J and Schmutz, J and Randall, JJ}, title = {Four chromosome scale genomes and a pan-genome annotation to accelerate pecan tree breeding.}, journal = {Nature communications}, volume = {12}, number = {1}, pages = {4125}, pmid = {34226565}, issn = {2041-1723}, mesh = {Carya/*genetics ; *Chromosomes ; Diploidy ; Disease Resistance/genetics ; Genetic Variation ; *Genome, Plant ; *Genomics ; Genotype ; Haplotypes ; Phenotype ; *Plant Breeding ; }, abstract = {Genome-enabled biotechnologies have the potential to accelerate breeding efforts in long-lived perennial crop species. Despite the transformative potential of molecular tools in pecan and other outcrossing tree species, highly heterozygous genomes, significant presence-absence gene content variation, and histories of interspecific hybridization have constrained breeding efforts. To overcome these challenges, here, we present diploid genome assemblies and annotations of four outbred pecan genotypes, including a PacBio HiFi chromosome-scale assembly of both haplotypes of the 'Pawnee' cultivar. Comparative analysis and pan-genome integration reveal substantial and likely adaptive interspecific genomic introgressions, including an over-retained haplotype introgressed from bitternut hickory into pecan breeding pedigrees. Further, by leveraging our pan-genome presence-absence and functional annotation database among genomes and within the two outbred haplotypes of the 'Lakota' genome, we identify candidate genes for pest and pathogen resistance. Combined, these analyses and resources highlight significant progress towards functional and quantitative genomics in highly diverse and outbred crops.}, } @article {pmid34223115, year = {2021}, author = {Hendrickx, APA and Debast, S and Pérez-Vázquez, M and Schoffelen, AF and Notermans, DW and Landman, F and Wielders, CCH and Cañada Garcia, JE and Flipse, J and de Haan, A and Witteveen, S and van Santen-Verheuvel, M and de Greeff, SC and Kuijper, E and Schouls, LM and , }, title = {A genetic cluster of MDR Enterobacter cloacae complex ST78 harbouring a plasmid containing bla VIM-1 and mcr-9 in the Netherlands.}, journal = {JAC-antimicrobial resistance}, volume = {3}, number = {2}, pages = {dlab046}, pmid = {34223115}, issn = {2632-1823}, abstract = {BACKGROUND: Carbapenemases produced by Enterobacterales are often encoded by genes on transferable plasmids and represent a major healthcare problem, especially if the plasmids contain additional antibiotic resistance genes. As part of Dutch national surveillance, 50 medical microbiological laboratories submit their Enterobacterales isolates suspected of carbapenemase production to the National Institute for Public Health and the Environment for characterization. All isolates for which carbapenemase production is confirmed are subjected to next-generation sequencing.

OBJECTIVES: To study the molecular characteristics of a genetic cluster of Enterobacter cloacae complex isolates collected in Dutch national surveillance in the period 2015-20 in the Netherlands.

METHODS: Short- and long-read genome sequencing was used in combination with MLST and pan-genome MLST (pgMLST) analyses. Automated antimicrobial susceptibility testing (AST), the Etest for meropenem and the broth microdilution test for colistin were performed. The carbapenem inactivation method was used to assess carbapenemase production.

RESULTS: pgMLST revealed that nine E. cloacae complex isolates from three different hospitals in the Netherlands differed by <20 alleles and grouped in a genetic cluster termed EclCluster-013. Seven isolates were submitted by one hospital in 2016-20. EclCluster-013 isolates produced carbapenemase and were from ST78, a globally disseminated lineage. EclCluster-013 isolates harboured a 316 078 bp IncH12 plasmid carrying the bla VIM-1 carbapenemase and the novel mcr-9 colistin resistance gene along with genes encoding resistance to different antibiotic classes. AST showed that EclCluster-013 isolates were MDR, but susceptible to meropenem (<2 mg/L) and colistin (<2 mg/L).

CONCLUSIONS: The EclCluster-013 reported here represents an MDR E. cloacae complex ST78 strain containing an IncH12 plasmid carrying both the bla VIM-1 carbapenemase and the mcr-9 colistin resistance gene.}, } @article {pmid34222039, year = {2021}, author = {Cheng, C and Zhou, W and Dong, X and Zhang, P and Zhou, K and Zhou, D and Qian, C and Lin, X and Li, P and Li, K and Bao, Q and Xu, T and Lu, J and Ying, J}, title = {Genomic Analysis of Delftia tsuruhatensis Strain TR1180 Isolated From A Patient From China With In4-Like Integron-Associated Antimicrobial Resistance.}, journal = {Frontiers in cellular and infection microbiology}, volume = {11}, number = {}, pages = {663933}, pmid = {34222039}, issn = {2235-2988}, mesh = {*Anti-Bacterial Agents/pharmacology ; China ; Delftia ; Drug Resistance, Bacterial/genetics ; Female ; Genome, Bacterial ; Genomics ; Humans ; *Integrons ; Phylogeny ; }, abstract = {Delftia tsuruhatensis has become an emerging pathogen in humans. There is scant information on the genomic characteristics of this microorganism. In this study, we determined the complete genome sequence of a clinical D. tsuruhatensis strain, TR1180, isolated from a sputum specimen of a female patient in China in 2019. Phylogenetic and average nucleotide identity analysis demonstrated that TR1180 is a member of D. tsuruhatensis. TR1180 exhibited resistance to β-lactam, aminoglycoside, tetracycline and sulphonamide antibiotics, but was susceptible to phenicols, fluoroquinolones and macrolides. Its genome is a single, circular chromosome measuring 6,711,018 bp in size. Whole-genome analysis identified 17 antibiotic resistance-related genes, which match the antimicrobial susceptibility profile of this strain, as well as 24 potential virulence factors and a number of metal resistance genes. Our data showed that Delftia possessed an open pan-genome and the genes in the core genome contributed to the pathogenicity and resistance of Delftia strains. Comparative genomics analysis of TR1180 with other publicly available genomes of Delftia showed diverse genomic features among these strains. D. tsuruhatensis TR1180 harbored a unique 38-kb genomic island flanked by a pair of 29-bp direct repeats with the insertion of a novel In4-like integron containing most of the specific antibiotic resistance genes within the genome. This study reports the findings of a fully sequenced genome from clinical D. tsuruhatensis, which provide researchers and clinicians with valuable insights into this uncommon species.}, } @article {pmid34220930, year = {2021}, author = {Harrison, PW and Sokolov, A and Nayak, A and Fan, J and Zerbino, D and Cochrane, G and Flicek, P}, title = {The FAANG Data Portal: Global, Open-Access, "FAIR", and Richly Validated Genotype to Phenotype Data for High-Quality Functional Annotation of Animal Genomes.}, journal = {Frontiers in genetics}, volume = {12}, number = {}, pages = {639238}, pmid = {34220930}, issn = {1664-8021}, abstract = {The Functional Annotation of ANimal Genomes (FAANG) project is a worldwide coordinated action creating high-quality functional annotation of farmed and companion animal genomes. The generation of a rich genome-to-phenome resource and supporting informatic infrastructure advances the scope of comparative genomics and furthers the understanding of functional elements. The project also provides terrestrial and aquatic animal agriculture community powerful resources for supporting improvements to farmed animal production, disease resistance, and genetic diversity. The FAANG Data Portal (https://data.faang.org) ensures Findable, Accessible, Interoperable and Reusable (FAIR) open access to the wealth of sample, sequencing, and analysis data produced by an ever-growing number of FAANG consortia. It is developed and maintained by the FAANG Data Coordination Centre (DCC) at the European Molecular Biology Laboratory's European Bioinformatics Institute (EMBL-EBI). FAANG projects produce a standardised set of multi-omic assays with resulting data placed into a range of specialised open data archives. To ensure this data is easily findable and accessible by the community, the portal automatically identifies and collates all submitted FAANG data into a single easily searchable resource. The Data Portal supports direct download from the multiple underlying archives to enable seamless access to all FAANG data from within the portal itself. The portal provides a range of predefined filters, powerful predictive search, and a catalogue of sampling and analysis protocols and automatically identifies publications associated with any dataset. To ensure all FAANG data submissions are high-quality, the portal includes powerful contextual metadata validation and data submissions brokering to the underlying EMBL-EBI archives. The portal will incorporate extensive new technical infrastructure to effectively deliver and standardise FAANG's shift to single-cellomics, cell atlases, pangenomes, and novel phenotypic prediction models. The Data Portal plays a key role for FAANG by supporting high-quality functional annotation of animal genomes, through open FAIR sharing of data, complete with standardised rich metadata. Future Data Portal features developed by the DCC will support new technological developments for continued improvement for FAANG projects.}, } @article {pmid34220764, year = {2021}, author = {Koeksoy, E and Bezuidt, OM and Bayer, T and Chan, CS and Emerson, D}, title = {Zetaproteobacteria Pan-Genome Reveals Candidate Gene Cluster for Twisted Stalk Biosynthesis and Export.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {679409}, pmid = {34220764}, issn = {1664-302X}, abstract = {Twisted stalks are morphologically unique bacterial extracellular organo-metallic structures containing Fe(III) oxyhydroxides that are produced by microaerophilic Fe(II)-oxidizers belonging to the Betaproteobacteria and Zetaproteobacteria. Understanding the underlying genetic and physiological mechanisms of stalk formation is of great interest based on their potential as novel biogenic nanomaterials and their relevance as putative biomarkers for microbial Fe(II) oxidation on ancient Earth. Despite the recognition of these special biominerals for over 150 years, the genetic foundation for the stalk phenotype has remained unresolved. Here we present a candidate gene cluster for the biosynthesis and secretion of the stalk organic matrix that we identified with a trait-based analyses of a pan-genome comprising 16 Zetaproteobacteria isolate genomes. The "stalk formation in Zetaproteobacteria" (sfz) cluster comprises six genes (sfz1-sfz6), of which sfz1 and sfz2 were predicted with functions in exopolysaccharide synthesis, regulation, and export, sfz4 and sfz6 with functions in cell wall synthesis manipulation and carbohydrate hydrolysis, and sfz3 and sfz5 with unknown functions. The stalk-forming Betaproteobacteria Ferriphaselus R-1 and OYT-1, as well as dread-forming Zetaproteobacteria Mariprofundus aestuarium CP-5 and Mariprofundus ferrinatatus CP-8 contain distant sfz gene homologs, whereas stalk-less Zetaproteobacteria and Betaproteobacteria lack the entire gene cluster. Our pan-genome analysis further revealed a significant enrichment of clusters of orthologous groups (COGs) across all Zetaproteobacteria isolate genomes that are associated with the regulation of a switch between sessile and motile growth controlled by the intracellular signaling molecule c-di-GMP. Potential interactions between stalk-former unique transcription factor genes, sfz genes, and c-di-GMP point toward a c-di-GMP regulated surface attachment function of stalks during sessile growth.}, } @article {pmid34220118, year = {2021}, author = {Farace, PD and Irazoqui, JM and Morsella, CG and García, JA and Méndez, MA and Paolicchi, FA and Amadio, AF and Gioffré, AK}, title = {Phylogenomic analysis for Campylobacter fetus ocurring in Argentina.}, journal = {Veterinary world}, volume = {14}, number = {5}, pages = {1165-1179}, pmid = {34220118}, issn = {0972-8988}, abstract = {BACKGROUND AND AIM: Campylobacter fetus is one of the most important pathogens that severely affects livestock industry worldwide. C. fetus mediated bovine genital campylobacteriosis infection in cattle has been associated with significant economic losses in livestock production in the Pampas region, the most productive area of Argentina. The present study aimed to establish the genomic relationships between C. fetus strains, isolated from the Pampas region, at local and global levels. The study also explored the utility of multi-locus sequence typing (MLST) as a typing technique for C. fetus.

MATERIALS AND METHODS: For pangenome and phylogenetic analysis, whole genome sequences for 34 C. fetus strains, isolated from cattle in Argentina were downloaded from GenBank. A local maximum likelihood (ML) tree was constructed and linked to a Microreact project. In silico analysis based on MLST was used to obtain information regarding sequence type (ST) for each strain. For global phylogenetic analysis, a core genome ML-tree was constructed using genomic dataset for 265 C. fetus strains, isolated from various sources obtained from 20 countries.

RESULTS: The local core genome phylogenetic tree analysis described the presence of two major clusters (A and B) and one minor cluster (C). The occurrence of 82% of the strains in these three clusters suggested a clonal population structure for C. fetus. The MLST analysis for the local strains revealed that 31 strains were ST4 type and one strain was ST5 type. In addition, a new variant was identified that was assigned a novel ST, ST70. In the present case, ST4 was homogenously distributed across all the regions and clusters. The global analysis showed that most of the local strains clustered in the phylogenetic groups that comprised exclusively of the strains isolated from Argentina. Interestingly, three strains showed a close genetic relationship with bovine strains obtained from Uruguay and Brazil. The ST5 strain grouped in a distant cluster, with strains obtained from different sources from various geographic locations worldwide. Two local strains clustered in a phylogenetic group comprising intercontinental Campylobacter fetus venerealis strains.

CONCLUSION: The results of the study suggested active movement of animals, probably due to economic trade between different regions of the country as well as with neighboring countries. MLST results were partially concordant with phylogenetic analysis. Thus, this method did not qualify as a reliable subtyping method to assess C. fetus diversity in Argentina. The present study provided a basic platform to conduct future research on C. fetus, both at local and international levels.}, } @article {pmid34216519, year = {2022}, author = {Carpi, FM and Coman, MM and Silvi, S and Picciolini, M and Verdenelli, MC and Napolioni, V}, title = {Comprehensive pan-genome analysis of Lactiplantibacillus plantarum complete genomes.}, journal = {Journal of applied microbiology}, volume = {132}, number = {1}, pages = {592-604}, pmid = {34216519}, issn = {1365-2672}, mesh = {Genome, Bacterial ; Genomics ; Lactobacillaceae ; *Lactobacillus plantarum/genetics ; *Probiotics ; Sequence Analysis, DNA ; }, abstract = {AIMS: The aim of this work was to refine the taxonomy and the functional characterization of publicly available Lactiplantibacillus plantarum complete genomes through a pan-genome analysis. Particular attention was paid in depicting the probiotic potential of each strain.

METHODS AND RESULTS: Complete genome sequence of 127 L. plantarum strains, without detected anomalies, was downloaded from NCBI. Roary analysis of L. plantarum pan-genome identified 1436 core, 414 soft core, 1858 shell and 13,203 cloud genes, highlighting the 'open' nature of L. plantarum pan-genome. Identification and characterization of plasmid content, mobile genetic elements, adaptative immune system and probiotic marker genes (PMGs) revealed unique features across all the L. plantarum strains included in the present study. Considering our updated list of PMGs, we determined that approximatively 70% of the PMGs belongs to the core/soft-core genome.

CONCLUSIONS: The comparative genomic analysis conducted in this study provide new insights into the genomic content and variability of L. plantarum.

This study provides a comprehensive pan-genome analysis of L. plantarum, including the largest number (N = 127) of complete L. plantarum genomes retrieved from publicly available repositories. Our effort aimed to determine a solid reference panel for the future characterization of newly sequenced L. plantarum strains useful as probiotic supplements.}, } @article {pmid34215422, year = {2021}, author = {Kothe, CI and Bolotin, A and Kraïem, BF and Dridi, B and , and Renault, P}, title = {Unraveling the world of halophilic and halotolerant bacteria in cheese by combining cultural, genomic and metagenomic approaches.}, journal = {International journal of food microbiology}, volume = {358}, number = {}, pages = {109312}, doi = {10.1016/j.ijfoodmicro.2021.109312}, pmid = {34215422}, issn = {1879-3460}, mesh = {Animals ; Bacteria/genetics ; Brevibacterium ; Cattle ; *Cheese/analysis ; Metagenome ; Metagenomics ; *Microbiota ; RNA, Ribosomal, 16S/genetics ; Staphylococcus ; }, abstract = {Halophilic/halotolerant bacteria are generally assumed to live in natural environments, although they may also be found in foods such as cheese and seafood. These salt-loving bacteria have been occasionally characterized in cheese, and studies on their ecological and technological functions are still scarce. We therefore selected 13 traditional cheeses to systematically characterize these microorganisms in their rinds via cultural, genomic and metagenomic methods. Using different salt-based media, we identified 35 strains with unique 16S rRNA and rpoB gene sequences, whose whole genome was sequenced. Twenty are Gram-positive species including notably Brevibacterium aurantiacum (6) and Staphylococcus equorum (3), which are also frequently added as starters. ANI and pan-genomic analyses confirm the high genetic diversity of B. aurantiacum and reveal the presence of two subspecies in S. equorum, as well as the genetic proximity of several cheese strains to bovine isolates. Additionally, we isolated 15 Gram-negative strains, potentially defining ten new species of halophilic/halotolerant cheese bacteria, in particular for the genera Halomonas and Psychrobacter. The use of all the genomes sequenced in this study as a reference to complement those existing in the databases allowed us to study the representativeness of 66 species of halophilic/halotolerant bacteria in 74 cheese rind metagenomes. While Gram-positive strains may flourish in the different types of technologies, Gram-negative species are particularly abundant in cheeses with high moisture, such as washed-rind cheeses. Finally, analyses of co-occurrences reveal assemblies, including the frequent coexistence of several species of the same genus, forming moderately complex ecosystems with functional redundancies that probably ensure stable cheese development.}, } @article {pmid34213964, year = {2021}, author = {Ge, T and Jiang, H and Tan, EH and Johnson, SB and Larkin, RP and Charkowski, AO and Secor, G and Hao, J}, title = {Pangenomic Analysis of Dickeya dianthicola Strains Related to the Outbreak of Blackleg and Soft Rot of Potato in the United States.}, journal = {Plant disease}, volume = {105}, number = {12}, pages = {3946-3955}, doi = {10.1094/PDIS-03-21-0587-RE}, pmid = {34213964}, issn = {0191-2917}, mesh = {Dickeya ; Disease Outbreaks ; Plant Diseases ; *Solanum tuberosum ; United States ; }, abstract = {Dickeya dianthicola has caused an outbreak of blackleg and soft rot of potato in the eastern half of the United States since 2015. To investigate genetic diversity of the pathogen, a comparative analysis was conducted on genomes of D. dianthicola strains. Whole genomes of 16 strains from the United States outbreak were assembled and compared with 16 previously sequenced genomes of D. dianthicola isolated from potato or carnation. Among the 32 strains, eight distinct clades were distinguished based on phylogenomic analysis. The outbreak strains were grouped into three clades, with the majority of the strains in clade I. Clade I strains were unique and homogeneous, suggesting a recent incursion of this strain into potato production from alternative hosts or environmental sources. The pangenome of the 32 strains contained 6,693 genes, 3,377 of which were core genes. By screening primary protein subunits associated with virulence from all U.S. strains, we found that many virulence-related gene clusters, such as plant cell wall degrading enzyme genes, flagellar and chemotaxis related genes, two-component regulatory genes, and type I/II/III secretion system genes, were highly conserved but that type IV and type VI secretion system genes varied. The clade I strains encoded two clusters of type IV secretion systems, whereas the clade II and III strains encoded only one cluster. Clade I and II strains encoded one more VgrG/PAAR spike protein than did clade III. Thus, we predicted that the presence of additional virulence-related genes may have enabled the unique clade I strain to become predominant in the U.S. outbreak.}, } @article {pmid34202389, year = {2021}, author = {Pintado, A and Pérez-Martínez, I and Aragón, IM and Gutiérrez-Barranquero, JA and de Vicente, A and Cazorla, FM and Ramos, C}, title = {The Rhizobacterium Pseudomonas alcaligenes AVO110 Induces the Expression of Biofilm-Related Genes in Response to Rosellinia necatrix Exudates.}, journal = {Microorganisms}, volume = {9}, number = {7}, pages = {}, pmid = {34202389}, issn = {2076-2607}, support = {AGL2017-83368-C2-1-R//Ministerio de Ciencia, Innovación y Universidades/ ; AGL2017-83368-C2-1-R//European Regional Development Fund/ ; UMA18-FEDERJA-046//Programa Operativo FEDER Andalucía 2014-2020/ ; }, abstract = {The rhizobacterium Pseudomonas alcaligenes AVO110 exhibits antagonism toward the phytopathogenic fungus Rosellinia necatrix. This strain efficiently colonizes R. necatrix hyphae and is able to feed on their exudates. Here, we report the complete genome sequence of P. alcaligenes AVO110. The phylogeny of all available P. alcaligenes genomes separates environmental isolates, including AVO110, from those obtained from infected human blood and oyster tissues, which cluster together with Pseudomonas otitidis. Core and pan-genome analyses showed that P. alcaligenes strains encode highly heterogenic gene pools, with the AVO110 genome encoding the largest and most exclusive variable region (~1.6 Mb, 1795 genes). The AVO110 singletons include a wide repertoire of genes related to biofilm formation, several of which are transcriptionally modulated by R. necatrix exudates. One of these genes (cmpA) encodes a GGDEF/EAL domain protein specific to Pseudomonas spp. strains isolated primarily from the rhizosphere of diverse plants, but also from soil and water samples. We also show that CmpA has a role in biofilm formation and that the integrity of its EAL domain is involved in this function. This study contributes to a better understanding of the niche-specific adaptations and lifestyles of P. alcaligenes, including the mycophagous behavior of strain AVO110.}, } @article {pmid34200775, year = {2021}, author = {Alouane, T and Rimbert, H and Bormann, J and González-Montiel, GA and Loesgen, S and Schäfer, W and Freitag, M and Langin, T and Bonhomme, L}, title = {Comparative Genomics of Eight Fusarium graminearum Strains with Contrasting Aggressiveness Reveals an Expanded Open Pangenome and Extended Effector Content Signatures.}, journal = {International journal of molecular sciences}, volume = {22}, number = {12}, pages = {}, pmid = {34200775}, issn = {1422-0067}, support = {ANR-15-CE21-0010//Agence Nationale de la Recherche/ ; 16-IDEX-0001 CAP 20-25//Agence Nationale de la Recherche/ ; MCB1818006//US National Science Foundation: NSF/ ; CH1808717//US National Science Foundation: NSF/ ; }, mesh = {Biological Evolution ; Computational Biology ; Fusarium/*genetics/pathogenicity ; *Genome, Fungal ; Genomics/*methods ; *Host-Pathogen Interactions ; Plant Diseases/*genetics/immunology/microbiology ; *Polymorphism, Single Nucleotide ; Quantitative Trait Loci ; Triticum/*microbiology ; }, abstract = {Fusarium graminearum, the primary cause of Fusarium head blight (FHB) in small-grain cereals, demonstrates remarkably variable levels of aggressiveness in its host, producing different infection dynamics and contrasted symptom severity. While the secreted proteins, including effectors, are thought to be one of the essential components of aggressiveness, our knowledge of the intra-species genomic diversity of F. graminearum is still limited. In this work, we sequenced eight European F. graminearum strains of contrasting aggressiveness to characterize their respective genome structure, their gene content and to delineate their specificities. By combining the available sequences of 12 other F. graminearum strains, we outlined a reference pangenome that expands the repertoire of the known genes in the reference PH-1 genome by 32%, including nearly 21,000 non-redundant sequences and gathering a common base of 9250 conserved core-genes. More than 1000 genes with high non-synonymous mutation rates may be under diverse selection, especially regarding the trichothecene biosynthesis gene cluster. About 900 secreted protein clusters (SPCs) have been described. Mostly localized in the fast sub-genome of F. graminearum supposed to evolve rapidly to promote adaptation and rapid responses to the host's infection, these SPCs gather a range of putative proteinaceous effectors systematically found in the core secretome, with the chloroplast and the plant nucleus as the main predicted targets in the host cell. This work describes new knowledge on the intra-species diversity in F. graminearum and emphasizes putative determinants of aggressiveness, providing a wealth of new candidate genes potentially involved in the Fusarium head blight disease.}, } @article {pmid34196733, year = {2021}, author = {Alseekh, S and Kostova, D and Bulut, M and Fernie, AR}, title = {Genome-wide association studies: assessing trait characteristics in model and crop plants.}, journal = {Cellular and molecular life sciences : CMLS}, volume = {78}, number = {15}, pages = {5743-5754}, pmid = {34196733}, issn = {1420-9071}, mesh = {Crops, Agricultural/*genetics ; Genetic Association Studies/methods ; Genome, Plant/*genetics ; Genome-Wide Association Study/methods ; Haplotypes/genetics ; Humans ; Linkage Disequilibrium/genetics ; Polymorphism, Single Nucleotide/genetics ; }, abstract = {GWAS involves testing genetic variants across the genomes of many individuals of a population to identify genotype-phenotype association. It was initially developed and has proven highly successful in human disease genetics. In plants genome-wide association studies (GWAS) initially focused on single feature polymorphism and recombination and linkage disequilibrium but has now been embraced by a plethora of different disciplines with several thousand studies being published in model and crop species within the last decade or so. Here we will provide a comprehensive review of these studies providing cases studies on biotic resistance, abiotic tolerance, yield associated traits, and metabolic composition. We also detail current strategies of candidate gene validation as well as the functional study of haplotypes. Furthermore, we provide a critical evaluation of the GWAS strategy and its alternatives as well as future perspectives that are emerging with the emergence of pan-genomic datasets.}, } @article {pmid34195571, year = {2021}, author = {Ahmed, O and Rossi, M and Kovaka, S and Schatz, MC and Gagie, T and Boucher, C and Langmead, B}, title = {Pan-genomic matching statistics for targeted nanopore sequencing.}, journal = {iScience}, volume = {24}, number = {6}, pages = {102696}, pmid = {34195571}, issn = {2589-0042}, support = {R01 AI141810/AI/NIAID NIH HHS/United States ; R01 HG011392/HG/NHGRI NIH HHS/United States ; }, abstract = {Nanopore sequencing is an increasingly powerful tool for genomics. Recently, computational advances have allowed nanopores to sequence in a targeted fashion; as the sequencer emits data, software can analyze the data in real time and signal the sequencer to eject "nontarget" DNA molecules. We present a novel method called SPUMONI, which enables rapid and accurate targeted sequencing using efficient pan-genome indexes. SPUMONI uses a compressed index to rapidly generate exact or approximate matching statistics in a streaming fashion. When used to target a specific strain in a mock community, SPUMONI has similar accuracy as minimap2 when both are run against an index containing many strains per species. However SPUMONI is 12 times faster than minimap2. SPUMONI's index and peak memory footprint are also 16 to 4 times smaller than those of minimap2, respectively. This could enable accurate targeted sequencing even when the targeted strains have not necessarily been sequenced or assembled previously.}, } @article {pmid34194415, year = {2021}, author = {Li, Y and Wang, M and Sun, ZZ and Xie, BB}, title = {Comparative Genomic Insights Into the Taxonomic Classification, Diversity, and Secondary Metabolic Potentials of Kitasatospora, a Genus Closely Related to Streptomyces.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {683814}, pmid = {34194415}, issn = {1664-302X}, abstract = {While the genus Streptomyces (family Streptomycetaceae) has been studied as a model for bacterial secondary metabolism and genetics, its close relatives have been less studied. The genus Kitasatospora is the second largest genus in the family Streptomycetaceae. However, its taxonomic position within the family remains under debate and the secondary metabolic potential remains largely unclear. Here, we performed systematic comparative genomic and phylogenomic analyses of Kitasatospora. Firstly, the three genera within the family Streptomycetaceae (Kitasatospora, Streptomyces, and Streptacidiphilus) showed common genomic features, including high G + C contents, high secondary metabolic potentials, and high recombination frequencies. Secondly, phylogenomic and comparative genomic analyses revealed phylogenetic distinctions and genome content differences among these three genera, supporting Kitasatospora as a separate genus within the family. Lastly, the pan-genome analysis revealed extensive genetic diversity within the genus Kitasatospora, while functional annotation and genome content comparison suggested genomic differentiation among lineages. This study provided new insights into genomic characteristics of the genus Kitasatospora, and also uncovered its previously underestimated and complex secondary metabolism.}, } @article {pmid34188040, year = {2021}, author = {Köstlbacher, S and Collingro, A and Halter, T and Schulz, F and Jungbluth, SP and Horn, M}, title = {Pangenomics reveals alternative environmental lifestyles among chlamydiae.}, journal = {Nature communications}, volume = {12}, number = {1}, pages = {4021}, pmid = {34188040}, issn = {2041-1723}, mesh = {Acanthamoeba/microbiology ; Animals ; Chlamydia/classification/*genetics/isolation & purification ; Citric Acid Cycle/*genetics ; Ecosystem ; Fishes/parasitology ; Genome, Bacterial/*genetics ; Gills/parasitology ; Hydrolases/metabolism ; Metagenome/*genetics ; Phylogeny ; Pyruvic Acid/metabolism ; Whole Genome Sequencing ; }, abstract = {Chlamydiae are highly successful strictly intracellular bacteria associated with diverse eukaryotic hosts. Here we analyzed metagenome-assembled genomes of the "Genomes from Earth's Microbiomes" initiative from diverse environmental samples, which almost double the known phylogenetic diversity of the phylum and facilitate a highly resolved view at the chlamydial pangenome. Chlamydiae are defined by a relatively large core genome indicative of an intracellular lifestyle, and a highly dynamic accessory genome of environmental lineages. We observe chlamydial lineages that encode enzymes of the reductive tricarboxylic acid cycle and for light-driven ATP synthesis. We show a widespread potential for anaerobic energy generation through pyruvate fermentation or the arginine deiminase pathway, and we add lineages capable of molecular hydrogen production. Genome-informed analysis of environmental distribution revealed lineage-specific niches and a high abundance of chlamydiae in some habitats. Together, our data provide an extended perspective of the variability of chlamydial biology and the ecology of this phylum of intracellular microbes.}, } @article {pmid34183185, year = {2021}, author = {Gupta, PK}, title = {Quantitative genetics: pan-genomes, SVs, and k-mers for GWAS.}, journal = {Trends in genetics : TIG}, volume = {37}, number = {10}, pages = {868-871}, doi = {10.1016/j.tig.2021.05.006}, pmid = {34183185}, issn = {0168-9525}, mesh = {Animals ; Genome/genetics ; Genome-Wide Association Study/*methods/*standards ; Humans ; Phenotype ; Plants/genetics ; Polymorphism, Single Nucleotide/genetics ; }, abstract = {For identification of marker-trait associations (MTAs) for complex traits in animals and plants, thousands of genome-wide association studies (GWAS) were conducted during the past two decades. This involved regular improvement in methodology. Initially, a reference genome and SNPs were used; more recently pan-genomes and the markers structural variations (SVs)/k-mers are also being used.}, } @article {pmid34181213, year = {2021}, author = {Zhou, Q and Mai, K and Yang, D and Liu, J and Yan, Z and Luo, C and Tan, Y and Cao, S and Zhou, Q and Chen, L and Chen, F}, title = {Comparative genomic analysis of Mycoplasma anatis strains.}, journal = {Genes & genomics}, volume = {43}, number = {11}, pages = {1327-1337}, pmid = {34181213}, issn = {2092-9293}, mesh = {Base Sequence ; China ; *Comparative Genomic Hybridization ; *Genome, Bacterial ; Molecular Sequence Annotation ; Mycoplasma/classification/*genetics ; Phylogeny ; Prophages/genetics ; Sequence Analysis, DNA ; Vaccine Development ; Virulence ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {BACKGROUND: The Gram-negative intracellular bacterium Mycoplasma anatis is a pathogen of respiratory infectious diseases in ducks and has caused significant economic losses in the poultry industry.

OBJECTIVE: This study, as the first report of the structure and function of the pan-genome of Mycoplasma anatis, may provide a valuable genetic basis for many aspects of future research on the pathogens of waterfowl.

METHODS: We sequenced the whole genomes of 15 Mycoplasma anatis isolated from ducks in China. Draft genome sequencing was carried out and whole-genome sequencing was performed by the sequencers of the PacBio Sequel and an IonTorrent Personal Genome Machine (PGM). Then the common genic elements of protein-coding genes, tRNAs, and rRNAs of Mycoplasma anatis genomes were predicted by using the pipeline Prokka v1.13.7. To investigate homologous protein clusters across Mycoplasma anatis genomes, we adopted Roary v3.13.0 to cluster orthologous genes (OGs) based on the following criteria.

RESULTS: We obtained one complete genome and 14 genome sketches. Microbial mobile genetic element analysis revealed the distribution of insertion sequences (IS30, IS3, and IS1634), prophage regions, and CRISPR arrays in the genome of Mycoplasma anatis. Comparative genomic analysis decoded the genetic components and functional classification of the pan-genome of Mycoplasma anatis that comprised 646 core genes, 231 dispensable genes and among them 110 was strain-specific. Virulence-related gene profiles of Mycoplasma anatis were systematically identified, and the products of these genes included bacterial ABC transporter systems, iron transport proteins, toxins, and secretion systems.

CONCLUSION: A complete virulence-related gene profile of Mycoplasma anatis has been identified, most of the genes are highly conserved in all strains. Sequencing results are relevant to the molecular mechanisms of drug resistance, adaptive evolution of pathogens, population structure, and vaccine development.}, } @article {pmid34177846, year = {2021}, author = {Tláskal, V and Pylro, VS and Žifčáková, L and Baldrian, P}, title = {Ecological Divergence Within the Enterobacterial Genus Sodalis: From Insect Symbionts to Inhabitants of Decomposing Deadwood.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {668644}, pmid = {34177846}, issn = {1664-302X}, abstract = {The bacterial genus Sodalis is represented by insect endosymbionts as well as free-living species. While the former have been studied frequently, the distribution of the latter is not yet clear. Here, we present a description of a free-living strain, Sodalis ligni sp. nov., originating from decomposing deadwood. The favored occurrence of S. ligni in deadwood is confirmed by both 16S rRNA gene distribution and metagenome data. Pangenome analysis of available Sodalis genomes shows at least three groups within the Sodalis genus: deadwood-associated strains, tsetse fly endosymbionts and endosymbionts of other insects. This differentiation is consistent in terms of the gene frequency level, genome similarity and carbohydrate-active enzyme composition of the genomes. Deadwood-associated strains contain genes for active decomposition of biopolymers of plant and fungal origin and can utilize more diverse carbon sources than their symbiotic relatives. Deadwood-associated strains, but not other Sodalis strains, have the genetic potential to fix N2, and the corresponding genes are expressed in deadwood. Nitrogenase genes are located within the genomes of Sodalis, including S. ligni, at multiple loci represented by more gene variants. We show decomposing wood to be a previously undescribed habitat of the genus Sodalis that appears to show striking ecological divergence.}, } @article {pmid34170219, year = {2021}, author = {Zhao, Y and Chen, X and Hu, X and Shi, Y and Zhao, X and Xu, J and Ding, H and Wu, R and Huang, J and Zhao, Z}, title = {Characterization of a carbapenem-resistant Citrobacter amalonaticus coharbouring bla IMP-4 and qnrs1 genes.}, journal = {Journal of medical microbiology}, volume = {70}, number = {6}, pages = {}, doi = {10.1099/jmm.0.001364}, pmid = {34170219}, issn = {1473-5644}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/*genetics ; Carbapenem-Resistant Enterobacteriaceae/classification/drug effects/*genetics/isolation & purification ; Citrobacter/classification/drug effects/*genetics/isolation & purification ; Conjugation, Genetic ; DNA, Bacterial/genetics ; Drug Resistance, Multiple/drug effects/*genetics ; Enterobacteriaceae Infections/microbiology ; Genome, Bacterial/genetics ; Humans ; Microbial Sensitivity Tests ; Phylogeny ; Plasmids/genetics ; RNA, Ribosomal, 16S/genetics ; beta-Lactamases/genetics ; }, abstract = {Introduction. Members of the genus Citrobacter are facultative anaerobic Gram-negative bacilli belonging to the Enterobacterales [Janda J Clin Microbiol 1994; 32(8):1850-1854; Arens Clin Microbiol Infect 1997;3(1):53-57]. Formerly, Citrobacter species were occasionally reported as nosocomial pathogens with low virulence [Pepperell Antimicrob Agents Chemother 2002;46(11):3555-60]. Now, they are consistently reported to cause nosocomial infections of the urinary tract, respiratory tract, bone, peritoneum, endocardium, meninges, intestines, bloodstream and central nervous system. Among Citrobacter species, the most common isolates are C. koseri and C. freundii, while C. amalonaticus has seldom been isolated [Janda J Clin Microbiol 1994; 32(8):1850-1854; Marak Infect Dis (Lond) 2017;49(7):532-9]. Further, Citrobacter spp. are usually susceptible to carbapenems, aminoglycosides, tetracyclines and colistin [Marak Infect Dis (Lond) 2017;49(7):532-9].Hypothesis/Gap Statement. As C. amalonaticus is rare, only one clinical isolate, coharbouring carbapenem resistance gene bla IMP-4 and quinolone resistance gene qnrs1, has been reported.Aim. To characterize a carbapenem-resistant C. amalonaticus strain from PR China coharbouring bla IMP-4 and qnrs1.Methodology. Three hundred and forty nonrepetitive carbapenem-resistant Enterobacterales (CRE) strains were collected during 2011-2018. A carbapenem-resistant C. amalonaticus strain was detected and confirmed using a VITEK mass spectrometry-based microbial identification system and 16S rRNA sequencing. Minimum inhibitory concentrations (MICs) for clinical antimicrobials were obtained by the broth microdilution method. Whole-genome sequencing (WGS) was performed for antibiotic resistance gene analysis, and a phylogenetic tree of C. amalonaticus strains was constructed using the Bacterial Pan Genome Analysis (BPGA) tool. The transferability of the resistance plasmid was verified by conjugal transfer.Results. A rare carbapenem-resistant C. amalonaticus strain (CA71) was recovered from a patient with cerebral obstruction and the sequences of 16S rRNA gene shared more than 99 % similarity with C. amalonaticus CITRO86, FDAARGOS 165. CA71 is resistant to β-lactam, quinolone and aminoglycoside antibiotics, and even imipenem and meropenem (MICs of 2 and 4 mg l[-1] respectively), and is only sensitive to polymyxin B and tigecycline. Six antibiotic resistance genes were detected via WGS, including the β-lactam genes bla IMP-4, bla CTX-M-18 and bla Sed1, the quinolone gene qnrs1, and the aminoglycoside genes AAC(3)-VIIIa, AadA24. Interestingly, bla IMP-4 and qnrs1 coexist on an IncN1-type plasmid (pCA71-IMP) and successfully transferred to Escherichia coli J53 via conjugal transfer. Phylogenetic analysis showed that CA71 is most similar to C. amalonaticus strain CJ25 and belongs to the same evolutionary cluster along with seven other strains.Conclusion. To the best of our knowledge, this is the first report of a carbapenem-resistant C. amalonaticus isolate coharbouring bla IMP-4 and qnrs1.}, } @article {pmid34169673, year = {2022}, author = {Bayer, PE and Valliyodan, B and Hu, H and Marsh, JI and Yuan, Y and Vuong, TD and Patil, G and Song, Q and Batley, J and Varshney, RK and Lam, HM and Edwards, D and Nguyen, HT}, title = {Sequencing the USDA core soybean collection reveals gene loss during domestication and breeding.}, journal = {The plant genome}, volume = {15}, number = {1}, pages = {e20109}, doi = {10.1002/tpg2.20109}, pmid = {34169673}, issn = {1940-3372}, mesh = {*Domestication ; *Fabaceae/genetics ; Genome, Plant ; Plant Breeding ; Soybeans/genetics ; United States ; United States Department of Agriculture ; }, abstract = {The gene content of plants varies between individuals of the same species due to gene presence/absence variation, and selection can alter the frequency of specific genes in a population. Selection during domestication and breeding will modify the genomic landscape, though the nature of these modifications is only understood for specific genes or on a more general level (e.g., by a loss of genetic diversity). Here we have assembled and analyzed a soybean (Glycine spp.) pangenome representing more than 1,000 soybean accessions derived from the USDA Soybean Germplasm Collection, including both wild and cultivated lineages, to assess genomewide changes in gene and allele frequency during domestication and breeding. We identified 3,765 genes that are absent from the Lee reference genome assembly and assessed the presence/absence of all genes across this population. In addition to a loss of genetic diversity, we found a significant reduction in the average number of protein-coding genes per individual during domestication and subsequent breeding, though with some genes and allelic variants increasing in frequency associated with selection for agronomic traits. This analysis provides a genomic perspective of domestication and breeding in this important oilseed crop.}, } @article {pmid34168196, year = {2021}, author = {Shahid, F and Zaheer, T and Ashraf, ST and Shehroz, M and Anwer, F and Naz, A and Ali, A}, title = {Chimeric vaccine designs against Acinetobacter baumannii using pan genome and reverse vaccinology approaches.}, journal = {Scientific reports}, volume = {11}, number = {1}, pages = {13213}, pmid = {34168196}, issn = {2045-2322}, mesh = {Acinetobacter Infections/immunology ; Acinetobacter baumannii/*immunology ; Amino Acid Sequence ; Anti-Bacterial Agents/immunology ; Bacterial Vaccines/*immunology ; Computational Biology/methods ; Cross Infection/immunology ; Epitopes/immunology ; Genome, Bacterial/immunology ; Genomics/methods ; Proteomics/methods ; Vaccinology/methods ; }, abstract = {Acinetobacter baumannii (A. baumannii), an opportunistic, gram-negative pathogen, has evoked the interest of the medical community throughout the world because of its ability to cause nosocomial infections, majorly infecting those in intensive care units. It has also drawn the attention of researchers due to its evolving immune evasion strategies and increased drug resistance. The emergence of multi-drug-resistant-strains has urged the need to explore novel therapeutic options as an alternative to antibiotics. Due to the upsurge in antibiotic resistance mechanisms exhibited by A. baumannii, the current therapeutic strategies are rendered less effective. The aim of this study is to explore novel therapeutic alternatives against A. baumannii to control the ailed infection. In this study, a computational framework is employed involving, pan genomics, subtractive proteomics and reverse vaccinology strategies to identify core promiscuous vaccine candidates. Two chimeric vaccine constructs having B-cell derived T-cell epitopes from prioritized vaccine candidates; APN, AdeK and AdeI have been designed and checked for their possible interactions with host BCR, TLRs and HLA Class I and II Superfamily alleles. These vaccine candidates can be experimentally validated and thus contribute to vaccine development against A. baumannii infections.}, } @article {pmid34163450, year = {2021}, author = {Tenea, GN and Hurtado, P}, title = {Next-Generation Sequencing for Whole-Genome Characterization of Weissella cibaria UTNGt21O Strain Originated From Wild Solanum quitoense Lam. Fruits: An Atlas of Metabolites With Biotechnological Significance.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {675002}, pmid = {34163450}, issn = {1664-302X}, abstract = {The whole genome of Weissella cibaria strain UTNGt21O isolated from wild fruits of Solanum quitoense (naranjilla) shrub was sequenced and annotated. The similarity proportions based on the genus level, as a result of the best hits for the entire contig, were 54.84% with Weissella, 6.45% with Leuconostoc, 3.23% with Lactococcus, and 35.48% no match. The closest genome was W. cibaria SP7 (GCF_004521965.1) with 86.21% average nucleotide identity (ANI) and 3.2% alignment coverage. The genome contains 1,867 protein-coding genes, among which 1,620 were assigned with the EggNOG database. On the basis of the results, 438 proteins were classified with unknown function from which 247 new hypothetical proteins have no match in the nucleotide Basic Local Alignment Search Tool (BLASTN) database. It also contains 78 tRNAs, six copies of 5S rRNA, one copy of 16S rRNA, one copy of 23S rRNA, and one copy of tmRNA. The W. cibaria UTNGt21O strain harbors several genes responsible for carbohydrate metabolism, cellular process, general stress responses, cofactors, and vitamins, conferring probiotic features. A pangenome analysis indicated the presence of various strain-specific genes encoded for proteins responsible for the defense mechanisms as well as gene encoded for enzymes with biotechnological value, such as penicillin acylase and folates; thus, W. cibaria exhibited high genetic diversity. The genome characterization indicated the presence of a putative CRISPR-Cas array and five prophage regions and the absence of acquired antibiotic resistance genes, virulence, and pathogenic factors; thus, UTNGt21O might be considered a safe strain. Besides, the interaction between the peptide extracts from UTNGt21O and Staphylococcus aureus results in cell death caused by the target cell integrity loss and the release of aromatic molecules from the cytoplasm. The results indicated that W. cibaria UTNGt21O can be considered a beneficial strain to be further exploited for developing novel antimicrobials and probiotic products with improved technological characteristics.}, } @article {pmid34163443, year = {2021}, author = {Lawal, OU and Barata, M and Fraqueza, MJ and Worning, P and Bartels, MD and Goncalves, L and Paixão, P and Goncalves, E and Toscano, C and Empel, J and Urbaś, M and Domiìnguez, MA and Westh, H and de Lencastre, H and Miragaia, M}, title = {Staphylococcus saprophyticus From Clinical and Environmental Origins Have Distinct Biofilm Composition.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {663768}, pmid = {34163443}, issn = {1664-302X}, abstract = {Biofilm formation has been shown to be critical to the success of uropathogens. Although Staphylococcus saprophyticus is a common cause of urinary tract infections, its biofilm production capacity, composition, genetic basis, and origin are poorly understood. We investigated biofilm formation in a large and diverse collection of S. saprophyticus (n = 422). Biofilm matrix composition was assessed in representative strains (n = 63) belonging to two main S. saprophyticus lineages (G and S) recovered from human infection, colonization, and food-related environment using biofilm detachment approach. To identify factors that could be associated with biofilm formation and structure variation, we used a pangenome-wide association study approach. Almost all the isolates (91%; n = 384/422) produced biofilm. Among the 63 representative strains, we identified eight biofilm matrix phenotypes, but the most common were composed of protein or protein-extracellular DNA (eDNA)-polysaccharides (38%, 24/63 each). Biofilms containing protein-eDNA-polysaccharides were linked to lineage G and environmental isolates, whereas protein-based biofilms were produced by lineage S and infection isolates (p < 0.05). Putative biofilm-associated genes, namely, aas, atl, ebpS, uafA, sasF, sasD, sdrH, splE, sdrE, sdrC, sraP, and ica genes, were found with different frequencies (3-100%), but there was no correlation between their presence and biofilm production or matrix types. Notably, icaC_1 was ubiquitous in the collection, while icaR was lineage G-associated, and only four strains carried a complete ica gene cluster (icaADBCR) except one that was without icaR. We provided evidence, using a comparative genomic approach, that the complete icaADBCR cluster was acquired multiple times by S. saprophyticus and originated from other coagulase-negative staphylococci. Overall, the composition of S. saprophyticus biofilms was distinct in environmental and clinical isolates, suggesting that modulation of biofilm structure could be a key step in the pathogenicity of these bacteria. Moreover, biofilm production in S. saprophyticus is ica-independent, and the complete icaADBCR was acquired from other staphylococci.}, } @article {pmid34161571, year = {2021}, author = {Guignon, V and Toure, A and Droc, G and Dufayard, JF and Conte, M and Rouard, M}, title = {Correction to 'GreenPhylDB v5: a comparative pangenomic database for plant genomes'.}, journal = {Nucleic acids research}, volume = {49}, number = {12}, pages = {7203}, pmid = {34161571}, issn = {1362-4962}, } @article {pmid34159268, year = {2021}, author = {Zhang, S and Amanze, C and Sun, C and Zou, K and Fu, S and Deng, Y and Liu, X and Liang, Y}, title = {Evolutionary, genomic, and biogeographic characterization of two novel xenobiotics-degrading strains affiliated with Dechloromonas.}, journal = {Heliyon}, volume = {7}, number = {6}, pages = {e07181}, pmid = {34159268}, issn = {2405-8440}, abstract = {Xenobiotics are generally known as man-made refractory organic pollutants widely distributed in various environments. For exploring the bioremediation possibility of xenobiotics, two novel xenobiotics-degrading strains affiliated with Azonexaceae were isolated. We report here the phylogenetics, genome, and geo-distribution of a novel and ubiquitous Azonexaceae species that primarily joins in the cometabolic process of some xenobiotics in natural communities. Strains s22 and t15 could be proposed as a novel species within Dechloromonas based on genomic and multi-phylogenetic analysis. Pan-genome analysis showed that the 63 core genes in Dechloromonas include genes for dozens of metabolisms such as nitrogen fixation protein (nifU), nitrogen regulatory protein (glnK), dCTP deaminase, C4-dicarboxylate transporter, and fructose-bisphosphate aldolase. Strains s22 and t15 have the ability to metabolize nitrogen, including nitrogen fixation, NirS-dependent denitrification, and dissimilatory nitrate reduction. Moreover, the novel species possesses the EnvZ-OmpR two-component system for controlling osmotic stress and QseC-QseB system for quorum sensing to rapidly sense environmental changes. It is intriguing that this new species has a series of genes for the biodegradation of some xenobiotics such as azathioprine, 6-Mercaptopurine, trinitrotoluene, chloroalkane, and chloroalkene. Specifically, glutathione S-transferase (GST) and 4-oxalocrotonate tautomerase (praC) in this novel species play important roles in the detoxification metabolism of some xenobiotics like dioxin, trichloroethene, chloroacetyl chloride, benzo[a]pyrene, and aflatoxin B1. Using data from GenBank, DDBJ and EMBL databases, we also demonstrated that members of this novel species were found globally in plants (e.g. rice), guts (e.g. insect), pristine and contaminated regions. Given these data, Dechloromonas sp. strains s22 and t15 take part in the biodegradation of some xenobiotics through key enzymes.}, } @article {pmid34153410, year = {2021}, author = {da Silva, JGV and Vieira, AT and Sousa, TJ and Viana, MVC and Parise, D and Sampaio, B and da Silva, AL and de Jesus, LCL and de Carvalho, PKRML and de Castro Oliveira, L and Aburjaile, FF and Martins, FS and Nicoli, JR and Ghosh, P and Brenig, B and Azevedo, V and Gomide, ACP}, title = {Comparative genomics and in silico gene evaluation involved in the probiotic potential of Bifidobacterium longum 5[1A].}, journal = {Gene}, volume = {795}, number = {}, pages = {145781}, doi = {10.1016/j.gene.2021.145781}, pmid = {34153410}, issn = {1879-0038}, mesh = {Acetates/*metabolism ; Base Sequence ; Bifidobacterium longum/classification/*genetics/*metabolism ; Carbohydrate Metabolism/*genetics ; Child ; Computer Simulation ; Feces/microbiology ; *Genes, Bacterial ; Genomics ; High-Throughput Nucleotide Sequencing ; Humans ; Phylogeny ; Probiotics/*metabolism ; Sequence Analysis, DNA ; }, abstract = {The Bifidobacterium longum 5[1A] strain of isolated from feces of a healthy child, has demonstrated probiotic properties by in vivo and in vitro studies, which may be assigned to its production of metabolites such as acetate. Thus, through the study of comparative genomics, the present work sought to identify unique genes that might be related to the production of acetate. To perform the study, the DNA strain was sequenced using Illumina HiSeq technology, followed by assembly and manual curation of coding sequences. Comparative analysis was performed including 19 complete B. longum genomes available in Genbank/NCBI. In the phylogenetic analysis, the CECT 7210 and 157F strains of B. longum subsp. infantis aggregated within the subsp. longum cluster, suggesting that their taxonomic classification should be reviewed. The strain 5[1A] of B. longum has 26 unique genes, six of which are possibly related to carbohydrate metabolism and acetate production. The phosphoketolase pathway from B. longum 5[1A] showed a difference in acetyl-phosphate production. This result seems to corroborate the analysis of their unique genes, whose presence suggests the strain may use different sources of carbohydrates that allow a greater production of acetate and consequently offer benefits to the host health.}, } @article {pmid34149639, year = {2021}, author = {Sahmi-Bounsiar, D and Rolland, C and Aherfi, S and Boudjemaa, H and Levasseur, A and La Scola, B and Colson, P}, title = {Marseilleviruses: An Update in 2021.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {648731}, pmid = {34149639}, issn = {1664-302X}, abstract = {The family Marseilleviridae was the second family of giant viruses that was described in 2013, after the family Mimiviridae. Marseillevirus marseillevirus, isolated in 2007 by coculture on Acanthamoeba polyphaga, is the prototype member of this family. Afterward, the worldwide distribution of marseilleviruses was revealed through their isolation from samples of various types and sources. Thus, 62 were isolated from environmental water, one from soil, one from a dipteran, one from mussels, and two from asymptomatic humans, which led to the description of 67 marseillevirus isolates, including 21 by the IHU Méditerranée Infection in France. Recently, five marseillevirus genomes were assembled from deep sea sediment in Norway. Isolated marseilleviruses have ≈250 nm long icosahedral capsids and 348-404 kilobase long mosaic genomes that encode 386-545 predicted proteins. Comparative genomic analyses indicate that the family Marseilleviridae includes five lineages and possesses a pangenome composed of 3,082 clusters of genes. The detection of marseilleviruses in both symptomatic and asymptomatic humans in stool, blood, and lymph nodes, and an up-to-30-day persistence of marseillevirus in rats and mice, raise questions concerning their possible clinical significance that are still under investigation.}, } @article {pmid34147652, year = {2021}, author = {Silvestre, I and Nunes, A and Borges, V and Isidro, J and Silva, C and Vieira, L and Gomes, JP and Borrego, MJ}, title = {Genomic insights on DNase production in Streptococcus agalactiae ST17 and ST19 strains.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {93}, number = {}, pages = {104969}, doi = {10.1016/j.meegid.2021.104969}, pmid = {34147652}, issn = {1567-7257}, mesh = {Bacterial Proteins/*genetics/metabolism ; Deoxyribonucleases/*genetics/metabolism ; Genome, Bacterial ; Streptococcus agalactiae/enzymology/*genetics ; }, abstract = {Streptococcus agalactiae evasion from the human defense mechanisms has been linked to the production of DNases. These were proposed to contribute to the hypervirulence of S. agalactiae ST17/capsular-type III strains, mostly associated with neonatal meningitis. We performed a comparative genomic analysis between ST17 and ST19 human strains with different cell tropism and distinct DNase production phenotypes. All S. agalactiae ST17 strains, with the exception of 2211-04, were found to display DNase activity, while the opposite scenario was observed for ST19, where 1203-05 was the only DNase(+) strain. The analysis of the genetic variability of the seven genes putatively encoding secreted DNases in S. agalactiae revealed an exclusive amino acid change in the predicted signal peptide of GBS0661 (NucA) of the ST17 DNase(-), and an exclusive amino acid change alteration in GBS0609 of the ST19 DNase(+) strain. Further core-genome analysis identified some specificities (SNVs or indels) differentiating the DNase(-) ST17 2211-04 and the DNase(+) ST19 1203-05 from the remaining strains of each ST. The pan-genomic analysis evidenced an intact phage without homology in S. agalactiae and a transposon homologous to TnGBS2.3 in ST17 DNase(-) 2211-04; the transposon was also found in one ST17 DNase(+) strain, yet with a different site of insertion. A group of nine accessory genes were identified among all ST17 DNase(+) strains, including the Eco47II family restriction endonuclease and the C-5 cytosine-specific DNA methylase. None of these loci was found in any DNase(-) strain, which may suggest that these proteins might contribute to the lack of DNase activity. In summary, we provide novel insights on the genetic diversity between DNase(+) and DNase(-) strains, and identified genetic traits, namely specific mutations affecting predicted DNases (NucA and GBS0609) and differences in the accessory genome, that need further investigation as they may justify distinct DNase-related virulence phenotypes in S. agalactiae.}, } @article {pmid34140962, year = {2021}, author = {Ruperao, P and Thirunavukkarasu, N and Gandham, P and Selvanayagam, S and Govindaraj, M and Nebie, B and Manyasa, E and Gupta, R and Das, RR and Odeny, DA and Gandhi, H and Edwards, D and Deshpande, SP and Rathore, A}, title = {Sorghum Pan-Genome Explores the Functional Utility for Genomic-Assisted Breeding to Accelerate the Genetic Gain.}, journal = {Frontiers in plant science}, volume = {12}, number = {}, pages = {666342}, pmid = {34140962}, issn = {1664-462X}, abstract = {Sorghum (Sorghum bicolor L.) is a staple food crops in the arid and rainfed production ecologies. Sorghum plays a critical role in resilient farming and is projected as a smart crop to overcome the food and nutritional insecurity in the developing world. The development and characterisation of the sorghum pan-genome will provide insight into genome diversity and functionality, supporting sorghum improvement. We built a sorghum pan-genome using reference genomes as well as 354 genetically diverse sorghum accessions belonging to different races. We explored the structural and functional characteristics of the pan-genome and explain its utility in supporting genetic gain. The newly-developed pan-genome has a total of 35,719 genes, a core genome of 16,821 genes and an average of 32,795 genes in each cultivar. The variable genes are enriched with environment responsive genes and classify the sorghum accessions according to their race. We show that 53% of genes display presence-absence variation, and some of these variable genes are predicted to be functionally associated with drought adaptation traits. Using more than two million SNPs from the pan-genome, association analysis identified 398 SNPs significantly associated with important agronomic traits, of which, 92 were in genes. Drought gene expression analysis identified 1,788 genes that are functionally linked to different conditions, of which 79 were absent from the reference genome assembly. This study provides comprehensive genomic diversity resources in sorghum which can be used in genome assisted crop improvement.}, } @article {pmid34137092, year = {2021}, author = {Vekemans, X and Castric, V and Hipperson, H and Müller, NA and Westerdahl, H and Cronk, Q}, title = {Whole-genome sequencing and genome regions of special interest: Lessons from major histocompatibility complex, sex determination, and plant self-incompatibility.}, journal = {Molecular ecology}, volume = {30}, number = {23}, pages = {6072-6086}, pmid = {34137092}, issn = {1365-294X}, support = {ANR-18-CE02-0020-01//Agence Nationale de la Recherche/ ; DFG MU 4357/1-1//German Research Foundation/ ; RGPIN-2014-05820//Natural Sciences and Engineering Research Council of Canada (NSERC) Discovery Grants Program/ ; 648321 & 679799/ERC_/European Research Council/International ; }, mesh = {Genomics ; *High-Throughput Nucleotide Sequencing ; *Major Histocompatibility Complex/genetics ; Sequence Analysis, DNA ; Whole Genome Sequencing ; }, abstract = {Whole-genome sequencing of non-model organisms is now widely accessible and has allowed a range of questions in the field of molecular ecology to be investigated with greater power. However, some genomic regions that are of high biological interest remain problematic for assembly and data-handling. Three such regions are the major histocompatibility complex (MHC), sex-determining regions (SDRs) and the plant self-incompatibility locus (S-locus). Using these as examples, we illustrate the challenges of both assembling and resequencing these highly polymorphic regions and how bioinformatic and technological developments are enabling new approaches to their study. Mapping short-read sequences against multiple alternative references improves genotyping comprehensiveness at the S-locus thereby contributing to more accurate assessments of allelic frequencies. Long-read sequencing, producing reads of several tens to hundreds of kilobase pairs in length, facilitates the assembly of such regions as single sequences can span the multiple duplicated gene copies of the MHC region, and sequence through repetitive stretches and translocations in SDRs and S-locus haplotypes. These advances are adding value to short-read genome resequencing approaches by allowing, for example, more accurate haplotype phasing across longer regions. Finally, we assessed further technical improvements, such as nanopore adaptive sequencing and bioinformatic tools using pangenomes, which have the potential to further expand our knowledge of a number of genomic regions that remain challenging to study with classical resequencing approaches.}, } @article {pmid34136552, year = {2021}, author = {Zheng, L and Zhu, LW and Jing, J and Guan, JY and Lu, GJ and Xie, LH and Ji, X and Chu, D and Sun, Y and Chen, P and Guo, XJ}, title = {Pan-Genome Analysis of Vibrio cholerae and Vibrio metschnikovii Strains Isolated From Migratory Birds at Dali Nouer Lake in Chifeng, China.}, journal = {Frontiers in veterinary science}, volume = {8}, number = {}, pages = {638820}, pmid = {34136552}, issn = {2297-1769}, abstract = {Migratory birds are recently recognized as Vibrio disease vectors, but may be widespread transporters of Vibrio strains. We isolated Vibrio cholerae (V. cholerae) and Vibrio metschnikovii (V. metschnikovii) strains from migratory bird epidemic samples from 2017 to 2018 and isolated V. metschnikovii from migratory bird feces in 2019 from bird samples taken from the Inner Mongolia autonomous region of China. To investigate the evolution of these two Vibrio species, we sequenced the genomes of 40 V. cholerae strains and 34 V. metschnikovii strains isolated from the bird samples and compared these genomes with reference strain genomes. The pan-genome of all V. cholerae and V. metschnikovii genomes was large, with strains exhibiting considerable individual differences. A total of 2,130 and 1,352 core genes were identified in the V. cholerae and V. metschnikovii genomes, respectively, while dispensable genes accounted for 16,180 and 9,178 of all genes for the two strains, respectively. All V. cholerae strains isolated from the migratory birds that encoded T6SS and hlyA were non-O1/O139 serotypes without the ability to produce CTX. These strains also lacked the ability to produce the TCP fimbriae nor the extracellular matrix protein RbmA and could not metabolize trimetlylamine oxide (TMAO). Thus, these characteristics render them unlikely to be pandemic-inducing strains. However, a V. metschnikovii isolate encoding the complete T6SS system was isolated for the first time. These data provide new molecular insights into the diversity of V. cholerae and V. metschnikovii isolates recovered from migratory birds.}, } @article {pmid34132784, year = {2021}, author = {N'Guessan, A and Brito, IL and Serohijos, AWR and Shapiro, BJ}, title = {Mobile Gene Sequence Evolution within Individual Human Gut Microbiomes Is Better Explained by Gene-Specific Than Host-Specific Selective Pressures.}, journal = {Genome biology and evolution}, volume = {13}, number = {8}, pages = {}, pmid = {34132784}, issn = {1759-6653}, mesh = {Evolution, Molecular ; *Gastrointestinal Microbiome/genetics ; Humans ; Metagenome ; Metagenomics/methods ; *Microbiota/genetics ; }, abstract = {Pangenomes-the cumulative set of genes encoded by a population or species-arise from the interplay of horizontal gene transfer, drift, and selection. The balance of these forces in shaping pangenomes has been debated, and studies to date focused on ancient evolutionary time scales have suggested that pangenomes generally confer niche adaptation to their bacterial hosts. To shed light on pangenome evolution on shorter evolutionary time scales, we inferred the selective pressures acting on mobile genes within individual human microbiomes from 176 Fiji islanders. We mapped metagenomic sequence reads to a set of known mobile genes to identify single nucleotide variants (SNVs) and calculated population genetic metrics to infer deviations from a neutral evolutionary model. We found that mobile gene sequence evolution varied more by gene family than by human social attributes, such as household or village. Patterns of mobile gene sequence evolution could be qualitatively recapitulated with a simple evolutionary simulation without the need to invoke the adaptive value of mobile genes to either bacterial or human hosts. These results stand in contrast with the apparent adaptive value of pangenomes over longer evolutionary time scales. In general, the most highly mobile genes (i.e., those present in more distinct bacterial host genomes) tend to have higher metagenomic read coverage and an excess of low-frequency SNVs, consistent with their rapid spread across multiple bacterial species in the gut. However, a subset of mobile genes-including those involved in defense mechanisms and secondary metabolism-showed a contrasting signature of intermediate-frequency SNVs, indicating species-specific selective pressures or negative frequency-dependent selection on these genes. Together, our evolutionary models and population genetic data show that gene-specific selective pressures predominate over human or bacterial host-specific pressures during the relatively short time scales of a human lifetime.}, } @article {pmid34132752, year = {2021}, author = {Rempel, A and Wittler, R}, title = {SANS serif: alignment-free, whole-genome-based phylogenetic reconstruction.}, journal = {Bioinformatics (Oxford, England)}, volume = {37}, number = {24}, pages = {4868-4870}, pmid = {34132752}, issn = {1367-4811}, mesh = {Phylogeny ; *Software ; *Genome ; }, abstract = {SUMMARY: SANS serif is a novel software for alignment-free, whole-genome-based phylogeny estimation that follows a pangenomic approach to efficiently calculate a set of splits in a phylogenetic tree or network.

Implemented in C++ and supported on Linux, MacOS and Windows. The source code is freely available for download at https://gitlab.ub.uni-bielefeld.de/gi/sans.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid34119106, year = {2021}, author = {Huang, X and Yang, X and Shi, X and Erickson, DL and Nagaraja, TG and Meng, J}, title = {Whole-genome sequencing analysis of uncommon Shiga toxin-producing Escherichia coli from cattle: Virulence gene profiles, antimicrobial resistance predictions, and identification of novel O-serogroups.}, journal = {Food microbiology}, volume = {99}, number = {}, pages = {103821}, doi = {10.1016/j.fm.2021.103821}, pmid = {34119106}, issn = {1095-9998}, support = {U01 FD001418/FD/FDA HHS/United States ; }, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Cattle ; Cattle Diseases/*microbiology ; *Drug Resistance, Bacterial ; Escherichia coli Infections/microbiology/*veterinary ; Escherichia coli Proteins/genetics/metabolism ; Feces/microbiology ; Phylogeny ; Serogroup ; Shiga-Toxigenic Escherichia coli/drug effects/genetics/*isolation & purification/pathogenicity ; Virulence ; Whole Genome Sequencing ; }, abstract = {Shiga toxin-producing E. coli (STEC) are major foodborne pathogens. While many studies have focused on the "top-7 STEC", little is known for minor serogroups. A total of 284 non-top-7 STEC strains isolated from cattle feces were subjected to whole-genome sequencing (WGS) to determine the serotypes, the presence of virulence genes and antimicrobial resistance (AMR) determinants. Nineteen typeable and three non-typeable serotypes with novel O-antigen loci were identified. Twenty-one AMR genes and point mutations in another six genes that conferred resistance to 10 antimicrobial classes were detected, as well as 46 virulence genes. The distribution of 33 virulence genes and 15 AMR determinants exhibited significant differences among serotypes (p < 0.05). Among all strains, 81.7% (n = 232) and 14.1% (n = 40) carried stx2 and stx1 only, respectively; only 4.2% (n = 12) carried both. Subtypes stx1a, stx1c, stx2a, stx2c, stx2d, and stx2g were identified. Forty-six strains carried eae and stx2a and therefore had the potential cause severe diseases; 47 strains were genetically related to human clinical strains inferred from a pan-genome phylogenetic tree. We were able to demonstrate the utility of WGS as a surveillance tool to characterize the novel serotypes, as well as AMR and virulence profiles of uncommon STEC that could potentially cause human illness.}, } @article {pmid34114561, year = {2021}, author = {Knight, DR and Imwattana, K and Kullin, B and Guerrero-Araya, E and Paredes-Sabja, D and Didelot, X and Dingle, KE and Eyre, DW and Rodríguez, C and Riley, TV}, title = {Major genetic discontinuity and novel toxigenic species in Clostridioides difficile taxonomy.}, journal = {eLife}, volume = {10}, number = {}, pages = {}, pmid = {34114561}, issn = {2050-084X}, mesh = {Bacterial Toxins/*genetics ; Bayes Theorem ; Clostridioides/genetics ; Clostridioides difficile/*classification/*genetics ; Clostridium Infections/epidemiology/*genetics ; Genome, Bacterial ; Humans ; Phylogeny ; }, abstract = {Clostridioides difficile infection (CDI) remains an urgent global One Health threat. The genetic heterogeneity seen across C. difficile underscores its wide ecological versatility and has driven the significant changes in CDI epidemiology seen in the last 20 years. We analysed an international collection of over 12,000 C. difficile genomes spanning the eight currently defined phylogenetic clades. Through whole-genome average nucleotide identity, and pangenomic and Bayesian analyses, we identified major taxonomic incoherence with clear species boundaries for each of the recently described cryptic clades CI-III. The emergence of these three novel genomospecies predates clades C1-5 by millions of years, rewriting the global population structure of C. difficile specifically and taxonomy of the Peptostreptococcaceae in general. These genomospecies all show unique and highly divergent toxin gene architecture, advancing our understanding of the evolution of C. difficile and close relatives. Beyond the taxonomic ramifications, this work may impact the diagnosis of CDI.}, } @article {pmid34111524, year = {2021}, author = {Panibe, JP and Wang, L and Li, J and Li, MY and Lee, YC and Wang, CS and Ku, MSB and Lu, MJ and Li, WH}, title = {Chromosomal-level genome assembly of the semi-dwarf rice Taichung Native 1, an initiator of Green Revolution.}, journal = {Genomics}, volume = {113}, number = {4}, pages = {2656-2674}, doi = {10.1016/j.ygeno.2021.06.006}, pmid = {34111524}, issn = {1089-8646}, mesh = {Chromosomes ; Genome ; Genomics ; *Oryza/genetics ; Plant Breeding ; }, abstract = {Here we report the 409.5 Mb chromosome-level assembly of the first bred semi-dwarf rice, the Taichung Native 1 (TN1), which served as the template for the development of the Green Revolution (GR) cultivar IR8 "miracle rice". We sequenced the TN1 genome utilizing multiple platforms and produced PacBio long reads, Illumina paired-end reads, Illumina mate-pair reads and 10x Genomics linked reads. We used a hybrid approach to assemble the 226× coverage of sequences by a combination of de novo and reference-guided approaches. The assembled TN1 genome has an N50 scaffold size of 33.1 Mb with the longest measuring 45.5 Mb. We annotated 37,526 genes, in which 24,102 (64.23%) were assigned Blast2GO annotations. The genome has 4672 or 95.4% complete BUSCOs and a repeat content of 51.52%. We developed our own method of creating a GR pangenome using the orthologous relationships of the proteins of TN1, IR8, MH63 and IR64, identifying 16,999 core orthologue groups of Green Revolution. From the pangenome, we identified a set of shared and unique gene ontology terms for the accessory clusters, characterizing TN1, IR8, MH63 and IR64. This TN1 genome assembly and GR pangenome will be a resource for new genomic discoveries about Green Revolution, and for improving the disease and insect resistances and the yield of rice.}, } @article {pmid34110280, year = {2021}, author = {Sserwadda, I and Mboowa, G}, title = {rMAP: the Rapid Microbial Analysis Pipeline for ESKAPE bacterial group whole-genome sequence data.}, journal = {Microbial genomics}, volume = {7}, number = {6}, pages = {}, pmid = {34110280}, issn = {2057-5858}, support = {/WT_/Wellcome Trust/United Kingdom ; U2R TW010672/TW/FIC NIH HHS/United States ; }, mesh = {Acinetobacter baumannii/genetics ; Animals ; Anti-Bacterial Agents/pharmacology ; Drug Resistance, Multiple, Bacterial/*genetics ; Enterobacter/genetics ; Enterococcus faecium/genetics ; *Genome, Bacterial ; *Genomics ; Humans ; Klebsiella pneumoniae/genetics ; Multilocus Sequence Typing ; Phylogeny ; Plasmids ; Staphylococcus aureus/genetics ; Whole Genome Sequencing/*methods ; }, abstract = {The recent re-emergence of multidrug-resistant pathogens has exacerbated their threat to worldwide public health. The evolution of the genomics era has led to the generation of huge volumes of sequencing data at an unprecedented rate due to the ever-reducing costs of whole-genome sequencing (WGS). We have developed the Rapid Microbial Analysis Pipeline (rMAP), a user-friendly pipeline capable of profiling the resistomes of ESKAPE pathogens (Enterococcus faecium, Staphylococcus aureus, Klebsiella pneumoniae, Acinetobacter baumannii, Pseudomonas aeruginosa and Enterobacter species) using WGS data generated from Illumina's sequencing platforms. rMAP is designed for individuals with little bioinformatics expertise, and automates the steps required for WGS analysis directly from the raw genomic sequence data, including adapter and low-quality sequence read trimming, de novo genome assembly, genome annotation, single-nucleotide polymorphism (SNP) variant calling, phylogenetic inference by maximum likelihood, antimicrobial resistance (AMR) profiling, plasmid profiling, virulence factor determination, multi-locus sequence typing (MLST), pangenome analysis and insertion sequence characterization (IS). Once the analysis is finished, rMAP generates an interactive web-like html report. rMAP installation is very simple, it can be run using very simple commands. It represents a rapid and easy way to perform comprehensive bacterial WGS analysis using a personal laptop in low-income settings where high-performance computing infrastructure is limited.}, } @article {pmid34099713, year = {2021}, author = {Zhang, J and Hewitt, TC and Boshoff, WHP and Dundas, I and Upadhyaya, N and Li, J and Patpour, M and Chandramohan, S and Pretorius, ZA and Hovmøller, M and Schnippenkoetter, W and Park, RF and Mago, R and Periyannan, S and Bhatt, D and Hoxha, S and Chakraborty, S and Luo, M and Dodds, P and Steuernagel, B and Wulff, BBH and Ayliffe, M and McIntosh, RA and Zhang, P and Lagudah, ES}, title = {A recombined Sr26 and Sr61 disease resistance gene stack in wheat encodes unrelated NLR genes.}, journal = {Nature communications}, volume = {12}, number = {1}, pages = {3378}, pmid = {34099713}, issn = {2041-1723}, mesh = {Chromosomes, Plant/genetics ; Disease Resistance/*genetics ; Genes, Plant ; Genetic Engineering ; Genetic Markers ; NLR Proteins/*genetics ; Plant Breeding/methods ; Plant Diseases/genetics/microbiology ; Plant Proteins/genetics ; Plant Stems/microbiology ; Plants, Genetically Modified/genetics/*microbiology ; Puccinia/isolation & purification/*pathogenicity ; Triticum/genetics/*microbiology ; }, abstract = {The re-emergence of stem rust on wheat in Europe and Africa is reinforcing the ongoing need for durable resistance gene deployment. Here, we isolate from wheat, Sr26 and Sr61, with both genes independently introduced as alien chromosome introgressions from tall wheat grass (Thinopyrum ponticum). Mutational genomics and targeted exome capture identify Sr26 and Sr61 as separate single genes that encode unrelated (34.8%) nucleotide binding site leucine rich repeat proteins. Sr26 and Sr61 are each validated by transgenic complementation using endogenous and/or heterologous promoter sequences. Sr61 orthologs are absent from current Thinopyrum elongatum and wheat pan genome sequences, contrasting with Sr26 where homologues are present. Using gene-specific markers, we validate the presence of both genes on a single recombinant alien segment developed in wheat. The co-location of these genes on a small non-recombinogenic segment simplifies their deployment as a gene stack and potentially enhances their resistance durability.}, } @article {pmid34089057, year = {2021}, author = {Brown, SDM}, title = {Advances in mouse genetics for the study of human disease.}, journal = {Human molecular genetics}, volume = {30}, number = {R2}, pages = {R274-R284}, pmid = {34089057}, issn = {1460-2083}, support = {/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Alleles ; Animals ; Disease Models, Animal ; Drug Discovery ; Gene Expression Regulation ; *Genetic Association Studies/methods ; Genetic Engineering ; *Genetic Predisposition to Disease ; *Genome ; Genome-Wide Association Study ; *Genomics/methods ; High-Throughput Screening Assays ; Humans ; Mice ; Mutagenesis ; Mutation ; Phenomics/methods ; Phenotype ; Precision Medicine ; Signal Transduction ; Translational Research, Biomedical ; }, abstract = {The mouse is the pre-eminent model organism for studies of mammalian gene function and has provided an extraordinarily rich range of insights into basic genetic mechanisms and biological systems. Over several decades, the characterization of mouse mutants has illuminated the relationship between gene and phenotype, providing transformational insights into the genetic bases of disease. However, if we are to deliver the promise of genomic and precision medicine, we must develop a comprehensive catalogue of mammalian gene function that uncovers the dark genome and elucidates pleiotropy. Advances in large-scale mouse mutagenesis programmes allied to high-throughput mouse phenomics are now addressing this challenge and systematically revealing novel gene function and multi-morbidities. Alongside the development of these pan-genomic mutational resources, mouse genetics is employing a range of diversity resources to delineate gene-gene and gene-environment interactions and to explore genetic context. Critically, mouse genetics is a powerful tool for assessing the functional impact of human genetic variation and determining the causal relationship between variant and disease. Together these approaches provide unique opportunities to dissect in vivo mechanisms and systems to understand pathophysiology and disease. Moreover, the provision and utility of mouse models of disease has flourished and engages cumulatively at numerous points across the translational spectrum from basic mechanistic studies to pre-clinical studies, target discovery and therapeutic development.}, } @article {pmid34076044, year = {2021}, author = {Dall'Agnol, B and Webster, A and Souza, UA and Barbieri, A and Mayer, FQ and Cardoso, GA and Torres, TT and Machado, RZ and Ferreira, CAS and Reck, J}, title = {Genomic analysis on Brazilian strains of Anaplasma marginale.}, journal = {Revista brasileira de parasitologia veterinaria = Brazilian journal of veterinary parasitology : Orgao Oficial do Colegio Brasileiro de Parasitologia Veterinaria}, volume = {30}, number = {2}, pages = {e000421}, doi = {10.1590/S1984-29612021043}, pmid = {34076044}, issn = {1984-2961}, mesh = {Amino Acid Sequence ; *Anaplasma marginale/genetics ; *Anaplasmosis ; Animals ; Brazil ; Cattle ; *Cattle Diseases ; Genomics ; Phylogeny ; }, abstract = {Anaplasma marginale is a vector-borne pathogen that causes a disease known as anaplasmosis. No sequenced genomes of Brazilian strains are yet available. The aim of this work was to compare whole genomes of Brazilian strains of A. marginale (Palmeira and Jaboticabal) with genomes of strains from other regions (USA and Australia strains). Genome sequencing of Brazilian strains was performed by means of next-generation sequencing. Reads were mapped using the genome of the Florida strain of A. marginale as a reference sequence. Single nucleotide polymorphisms (SNPs) and insertions/deletions (INDELs) were identified. The data showed that two Brazilian strains grouped together in one particular clade, which grouped in a larger American group together with North American strains. Moreover, some important differences in surface proteins between the two Brazilian isolates can be discerned. These results shed light on the evolutionary history of A. marginale and provide the first genome information on South American isolates. Assessing the genome sequences of strains from different regions is essential for increasing knowledge of the pan-genome of this bacteria.}, } @article {pmid34072447, year = {2021}, author = {Zenda, T and Liu, S and Dong, A and Duan, H}, title = {Advances in Cereal Crop Genomics for Resilience under Climate Change.}, journal = {Life (Basel, Switzerland)}, volume = {11}, number = {6}, pages = {}, pmid = {34072447}, issn = {2075-1729}, support = {32071936//National Natural Science Foundation of China (Function and Molecular Mechanism of ZMDNAJ Regulating Drought Resistance in Maize)/ ; }, abstract = {Adapting to climate change, providing sufficient human food and nutritional needs, and securing sufficient energy supplies will call for a radical transformation from the current conventional adaptation approaches to more broad-based and transformative alternatives. This entails diversifying the agricultural system and boosting productivity of major cereal crops through development of climate-resilient cultivars that can sustainably maintain higher yields under climate change conditions, expanding our focus to crop wild relatives, and better exploitation of underutilized crop species. This is facilitated by the recent developments in plant genomics, such as advances in genome sequencing, assembly, and annotation, as well as gene editing technologies, which have increased the availability of high-quality reference genomes for various model and non-model plant species. This has necessitated genomics-assisted breeding of crops, including underutilized species, consequently broadening genetic variation of the available germplasm; improving the discovery of novel alleles controlling important agronomic traits; and enhancing creation of new crop cultivars with improved tolerance to biotic and abiotic stresses and superior nutritive quality. Here, therefore, we summarize these recent developments in plant genomics and their application, with particular reference to cereal crops (including underutilized species). Particularly, we discuss genome sequencing approaches, quantitative trait loci (QTL) mapping and genome-wide association (GWAS) studies, directed mutagenesis, plant non-coding RNAs, precise gene editing technologies such as CRISPR-Cas9, and complementation of crop genotyping by crop phenotyping. We then conclude by providing an outlook that, as we step into the future, high-throughput phenotyping, pan-genomics, transposable elements analysis, and machine learning hold much promise for crop improvements related to climate resilience and nutritional superiority.}, } @article {pmid34071208, year = {2021}, author = {Xiao, Y and Wang, C and Zhao, J and Zhang, H and Chen, W and Zhai, Q}, title = {Quantitative Detection of Bifidobacterium longum Strains in Feces Using Strain-Specific Primers.}, journal = {Microorganisms}, volume = {9}, number = {6}, pages = {}, pmid = {34071208}, issn = {2076-2607}, support = {No. 31820103010 and No. 31871773//the National Natural Science Foundation of China Program/ ; 2018DB002//Projects of Innovation and Development Pillar Program for Key Industries in Southern Xinjiang of Xinjiang Production and Construction Corps/ ; No. 2018YFC1604206//National Key Research and Development Project/ ; JUFSTR20180102//National First-Class Discipline Program of Food Science and Technology/ ; the BBSRC Newton Fund Joint Centre Award//the BBSRC Newton Fund Joint Centre Award/ ; Collaborative Innovation Center of Food Safety and Quality Control in Jiangsu Province//Collaborative Innovation Center of Food Safety and Quality Control in Jiangsu Province/ ; }, abstract = {We adopted a bioinformatics-based technique to identify strain-specific markers, which were then used to quantify the abundances of three distinct B. longum sup. longum strains in fecal samples of humans and mice. A pangenome analysis of 205 B. longum sup. longum genomes revealed the accumulation of considerable strain-specific genes within this species; specifically, 28.7% of the total identified genes were strain-specific. We identified 32, 14, and 49 genes specific to B. longum sup. longum RG4-1, B. longum sup. longum M1-20-R01-3, and B. longum sup. longum FGSZY6M4, respectively. After performing an in silico validation of these strain-specific markers using a nucleotide BLAST against both the B. longum sup. longum genome database and an NR/NT database, RG4-1_01874 (1331 bp), M1-20-R01-3_00324 (1745 bp), and FGSZY6M4_01477 (1691 bp) were chosen as target genes for strain-specific quantification. The specificities of the qPCR primers were validated against 47 non-target microorganisms and fecal baseline microbiota to ensure that they produced no PCR amplification products. The performance of the qPCR primer-based analysis was further assessed using fecal samples. After oral administration, the target B. longum strains appeared to efficiently colonize both the human and mouse guts, with average population levels of >10[8] CFU/g feces. The bioinformatics pipeline proposed here can be applied to the quantification of various bacterial species.}, } @article {pmid34069870, year = {2021}, author = {Rodrigues, DLN and Morais-Rodrigues, F and Hurtado, R and Dos Santos, RG and Costa, DC and Barh, D and Ghosh, P and Alzahrani, KJ and Soares, SC and Ramos, R and Góes-Neto, A and Azevedo, V and Aburjaile, FF}, title = {Pan-Resistome Insights into the Multidrug Resistance of Acinetobacter baumannii.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {10}, number = {5}, pages = {}, pmid = {34069870}, issn = {2079-6382}, abstract = {Acinetobacter baumannii is an important Gram-negative opportunistic pathogen that is responsible for many nosocomial infections. This etiologic agent has acquired, over the years, multiple mechanisms of resistance to a wide range of antimicrobials and the ability to survive in different environments. In this context, our study aims to elucidate the resistome from the A. baumannii strains based on phylogenetic, phylogenomic, and comparative genomics analyses. In silico analysis of the complete genomes of A. baumannii strains was carried out to identify genes involved in the resistance mechanisms and the phylogenetic relationships and grouping of the strains based on the sequence type. The presence of genomic islands containing most of the resistance gene repertoire indicated high genomic plasticity, which probably enabled the acquisition of resistance genes and the formation of a robust resistome. A. baumannii displayed an open pan-genome and revealed a still constant genetic permutation among their strains. Furthermore, the resistance genes suggest a specific profile within the species throughout its evolutionary history. Moreover, the current study performed screening and characterization of the main genes present in the resistome, which can be used in applied research to develop new therapeutic methods to control this important bacterial pathogen.}, } @article {pmid34067853, year = {2021}, author = {Reyes-Cortes, JL and Azaola-Espinosa, A and Lozano-Aguirre, L and Ponce-Alquicira, E}, title = {Physiological and Genomic Analysis of Bacillus pumilus UAMX Isolated from the Gastrointestinal Tract of Overweight Individuals.}, journal = {Microorganisms}, volume = {9}, number = {5}, pages = {}, pmid = {34067853}, issn = {2076-2607}, abstract = {The study aimed to evaluate the metabolism and resistance to the gastrointestinal tract conditions of Bacillus pumilus UAMX (BP-UAMX) isolated from overweight individuals using genomic tools. Specifically, we assessed its ability to metabolize various carbon sources, its resistance to low pH exposure, and its growth in the presence of bile salts. The genomic and bioinformatic analyses included the prediction of gene and protein metabolic functions, a pan-genome and phylogenomic analysis. BP-UAMX survived at pH 3, while bile salts (0.2-0.3% w/v) increased its growth rate. Moreover, it showed the ability to metabolize simple and complex carbon sources (glucose, starch, carboxymethyl-cellulose, inulin, and tributyrin), showing a differentiated electrophoretic profile. Genome was assembled into a single contig, with a high percentage of genes and proteins associated with the metabolism of amino acids, carbohydrates, and lipids. Antibiotic resistance genes were detected, but only one beta-Lactam resistance protein related to the inhibition of peptidoglycan biosynthesis was identified. The pan-genome of BP-UAMX is still open with phylogenetic similarities with other Bacillus of human origin. Therefore, BP-UAMX seems to be adapted to the intestinal environment, with physiological and genomic analyses demonstrating the ability to metabolize complex carbon sources, the strain has an open pan-genome with continuous evolution and adaptation.}, } @article {pmid34067383, year = {2021}, author = {Lee, HH and Park, J and Jung, H and Seo, YS}, title = {Pan-Genome Analysis Reveals Host-Specific Functional Divergences in Burkholderia gladioli.}, journal = {Microorganisms}, volume = {9}, number = {6}, pages = {}, pmid = {34067383}, issn = {2076-2607}, support = {NRF-2019R1A2C2006779//National Research Foundation of Korea/ ; no. 918019-04//Ministry of Agriculture, Food and Rural Affairs/ ; }, abstract = {Burkholderia gladioli has high versatility and adaptability to various ecological niches. Here, we constructed a pan-genome using 14 genome sequences of B. gladioli, which originate from different niches, including gladiolus, rice, humans, and nature. Functional roles of core and niche-associated genomes were investigated by pathway enrichment analyses. Consequently, we inferred the uniquely important role of niche-associated genomes in (1) selenium availability during competition with gladiolus host; (2) aromatic compound degradation in seed-borne and crude oil-accumulated environments, and (3) stress-induced DNA repair system/recombination in the cystic fibrosis-niche. We also identified the conservation of the rhizomide biosynthetic gene cluster in all the B. gladioli strains and the concentrated distribution of this cluster in human isolates. It was confirmed the absence of complete CRISPR/Cas system in both plant and human pathogenic B. gladioli and the presence of the system in B. gladioli living in nature, possibly reflecting the inverse relationship between CRISPR/Cas system and virulence.}, } @article {pmid34065739, year = {2021}, author = {Muslu, T and Biyiklioglu-Kaya, S and Akpinar, BA and Yuce, M and Budak, H}, title = {Pan-Genome miRNomics in Brachypodium.}, journal = {Plants (Basel, Switzerland)}, volume = {10}, number = {5}, pages = {}, pmid = {34065739}, issn = {2223-7747}, abstract = {Pan-genomes are efficient tools for the identification of conserved and varying genomic sequences within lineages of a species. Investigating genetic variations might lead to the discovery of genes present in a subset of lineages, which might contribute into beneficial agronomic traits such as stress resistance or yield. The content of varying genomic regions in the pan-genome could include protein-coding genes as well as microRNA(miRNAs), small non-coding RNAs playing key roles in the regulation of gene expression. In this study, we performed in silico miRNA identification from the genomic sequences of 54 lineages of Brachypodium distachyon, aiming to explore varying miRNA contents and their functional interactions. A total of 115 miRNA families were identified in 54 lineages, 56 of which were found to be present in all lineages. The miRNA families were classified based on their conservation among lineages and potential mRNA targets were identified. Obtaining information about regulatory mechanisms stemming from these miRNAs offers strong potential to provide a better insight into the complex traits that were potentially present in some lineages. Future work could lead us to introduce these traits to different lineages or other economically important plant species in order to promote their survival in different environmental conditions.}, } @article {pmid34059118, year = {2021}, author = {Cai, X and Chang, L and Zhang, T and Chen, H and Zhang, L and Lin, R and Liang, J and Wu, J and Freeling, M and Wang, X}, title = {Impacts of allopolyploidization and structural variation on intraspecific diversification in Brassica rapa.}, journal = {Genome biology}, volume = {22}, number = {1}, pages = {166}, pmid = {34059118}, issn = {1474-760X}, mesh = {Base Sequence ; Brassica rapa/*genetics ; Domestication ; Genome, Plant ; *Genomic Structural Variation ; Molecular Sequence Annotation ; Phylogeny ; *Polyploidy ; Species Specificity ; Synteny/genetics ; }, abstract = {BACKGROUND: Despite the prevalence and recurrence of polyploidization in the speciation of flowering plants, its impacts on crop intraspecific genome diversification are largely unknown. Brassica rapa is a mesopolyploid species that is domesticated into many subspecies with distinctive morphotypes.

RESULTS: Herein, we report the consequences of the whole-genome triplication (WGT) on intraspecific diversification using a pan-genome analysis of 16 de novo assembled and two reported genomes. Among the genes that derive from WGT, 13.42% of polyploidy-derived genes accumulate more transposable elements and non-synonymous mutations than other genes during individual genome evolution. We denote such genes as being "flexible." We construct the Brassica rapa ancestral genome and observe the continuing influence of the dominant subgenome on intraspecific diversification in B. rapa. The gene flexibility is biased to the more fractionated subgenomes (MFs), in contrast to the more intact gene content of the dominant LF (least fractionated) subgenome. Furthermore, polyploidy-derived flexible syntenic genes are implicated in the response to stimulus and the phytohormone auxin; this may reflect adaptation to the environment. Using an integrated graph-based genome, we investigate the structural variation (SV) landscapes in 524 B. rapa genomes. We observe that SVs track morphotype domestication. Four out of 266 candidate genes for Chinese cabbage domestication are speculated to be involved in the leafy head formation.

CONCLUSIONS: This pan-genome uncovers the possible contributions of allopolyploidization on intraspecific diversification and the possible and underexplored role of SVs in favorable trait domestication. Collectively, our work serves as a rich resource for genome-based B. rapa improvement.}, } @article {pmid34056597, year = {2021}, author = {Lomsadze, A and Bonny, C and Strozzi, F and Borodovsky, M}, title = {GeneMark-HM: improving gene prediction in DNA sequences of human microbiome.}, journal = {NAR genomics and bioinformatics}, volume = {3}, number = {2}, pages = {lqab047}, pmid = {34056597}, issn = {2631-9268}, abstract = {Computational reconstruction of nearly complete genomes from metagenomic reads may identify thousands of new uncultured candidate bacterial species. We have shown that reconstructed prokaryotic genomes along with genomes of sequenced microbial isolates can be used to support more accurate gene prediction in novel metagenomic sequences. We have proposed an approach that used three types of gene prediction algorithms and found for all contigs in a metagenome nearly optimal models of protein-coding regions either in libraries of pre-computed models or constructed de novo. The model selection process and gene annotation were done by the new GeneMark-HM pipeline. We have created a database of the species level pan-genomes for the human microbiome. To create a library of models representing each pan-genome we used a self-training algorithm GeneMarkS-2. Genes initially predicted in each contig served as queries for a fast similarity search through the pan-genome database. The best matches led to selection of the model for gene prediction. Contigs not assigned to pan-genomes were analyzed by crude, but still accurate models designed for sequences with particular GC compositions. Tests of GeneMark-HM on simulated metagenomes demonstrated improvement in gene annotation of human metagenomic sequences in comparison with the current state-of-the-art gene prediction tools.}, } @article {pmid34055480, year = {2021}, author = {Pavlovikj, N and Gomes-Neto, JC and Deogun, JS and Benson, AK}, title = {ProkEvo: an automated, reproducible, and scalable framework for high-throughput bacterial population genomics analyses.}, journal = {PeerJ}, volume = {9}, number = {}, pages = {e11376}, pmid = {34055480}, issn = {2167-8359}, abstract = {Whole Genome Sequence (WGS) data from bacterial species is used for a variety of applications ranging from basic microbiological research, diagnostics, and epidemiological surveillance. The availability of WGS data from hundreds of thousands of individual isolates of individual microbial species poses a tremendous opportunity for discovery and hypothesis-generating research into ecology and evolution of these microorganisms. Flexibility, scalability, and user-friendliness of existing pipelines for population-scale inquiry, however, limit applications of systematic, population-scale approaches. Here, we present ProkEvo, an automated, scalable, reproducible, and open-source framework for bacterial population genomics analyses using WGS data. ProkEvo was specifically developed to achieve the following goals: (1) Automation and scaling of complex combinations of computational analyses for many thousands of bacterial genomes from inputs of raw Illumina paired-end sequence reads; (2) Use of workflow management systems (WMS) such as Pegasus WMS to ensure reproducibility, scalability, modularity, fault-tolerance, and robust file management throughout the process; (3) Use of high-performance and high-throughput computational platforms; (4) Generation of hierarchical-based population structure analysis based on combinations of multi-locus and Bayesian statistical approaches for classification for ecological and epidemiological inquiries; (5) Association of antimicrobial resistance (AMR) genes, putative virulence factors, and plasmids from curated databases with the hierarchically-related genotypic classifications; and (6) Production of pan-genome annotations and data compilation that can be utilized for downstream analysis such as identification of population-specific genomic signatures. The scalability of ProkEvo was measured with two datasets comprising significantly different numbers of input genomes (one with ~2,400 genomes, and the second with ~23,000 genomes). Depending on the dataset and the computational platform used, the running time of ProkEvo varied from ~3-26 days. ProkEvo can be used with virtually any bacterial species, and the Pegasus WMS uniquely facilitates addition or removal of programs from the workflow or modification of options within them. To demonstrate versatility of the ProkEvo platform, we performed a hierarchical-based population structure analyses from available genomes of three distinct pathogenic bacterial species as individual case studies. The specific case studies illustrate how hierarchical analyses of population structures, genotype frequencies, and distribution of specific gene functions can be integrated into an analysis. Collectively, our study shows that ProkEvo presents a practical viable option for scalable, automated analyses of bacterial populations with direct applications for basic microbiology research, clinical microbiological diagnostics, and epidemiological surveillance.}, } @article {pmid34053518, year = {2021}, author = {Franciosa, I and Ferrocino, I and Giordano, M and Mounier, J and Rantsiou, K and Cocolin, L}, title = {Specific metagenomic asset drives the spontaneous fermentation of Italian sausages.}, journal = {Food research international (Ottawa, Ont.)}, volume = {144}, number = {}, pages = {110379}, doi = {10.1016/j.foodres.2021.110379}, pmid = {34053518}, issn = {1873-7145}, mesh = {Fermentation ; Food Microbiology ; Italy ; *Lactobacillus ; *Metagenomics ; }, abstract = {Metagenomics is a powerful tool to study and understand the microbial dynamics that occur during food fermentation and allows to close the link between microbial diversity and final sensory characteristics. Each food matrix can be colonized by different microbes, but also by different strains of the same species. In this study, using an innovative integrated approach combining culture-dependent method with a shotgun sequencing, we were able to show how strain-level biodiversity could influence the quality characteristics of the final product. The attention was placed on a model food fermentation process: Salame Piemonte, a Protected Geographical Indication (PGI) Italian fermented sausage. Three independent batches produced in February, March and May 2018 were analysed. The sausages were manufactured, following the production specification, in a local meat factory in the area of Turin (Italy) without the use of starter cultures. A pangenomic approach was applied in order to identify and evaluate the lactic acid bacteria (LAB) population driving the fermentation process. It was observed that all batches were characterized by the presence of few LAB species, namely Pediococcus pentosaceus, Latilactobacillus curvatus and Latilactobacillus sakei. Sausages from the different batches were different when the volatilome was taken into consideration, and a strong association between quality attributes and strains present was determined. In particular, different strains of L. sakei, showing heterogeneity at genomic level, colonized the meat at the beginning of each production and deeply influenced the fermentation process by distinctive metabolic pathways that affected the fermentation process and the final sensory aspects.}, } @article {pmid34051138, year = {2021}, author = {Qin, P and Lu, H and Du, H and Wang, H and Chen, W and Chen, Z and He, Q and Ou, S and Zhang, H and Li, X and Li, X and Li, Y and Liao, Y and Gao, Q and Tu, B and Yuan, H and Ma, B and Wang, Y and Qian, Y and Fan, S and Li, W and Wang, J and He, M and Yin, J and Li, T and Jiang, N and Chen, X and Liang, C and Li, S}, title = {Pan-genome analysis of 33 genetically diverse rice accessions reveals hidden genomic variations.}, journal = {Cell}, volume = {184}, number = {13}, pages = {3542-3558.e16}, doi = {10.1016/j.cell.2021.04.046}, pmid = {34051138}, issn = {1097-4172}, mesh = {Adaptation, Physiological/genetics ; Agriculture ; Domestication ; *Ecotype ; Gene Expression Profiling ; Gene Expression Regulation, Plant ; Genes, Plant ; *Genetic Variation ; *Genome, Plant ; Genomic Structural Variation ; Molecular Sequence Annotation ; Oryza/*genetics ; Phenotype ; }, abstract = {Structural variations (SVs) and gene copy number variations (gCNVs) have contributed to crop evolution, domestication, and improvement. Here, we assembled 31 high-quality genomes of genetically diverse rice accessions. Coupling with two existing assemblies, we developed pan-genome-scale genomic resources including a graph-based genome, providing access to rice genomic variations. Specifically, we discovered 171,072 SVs and 25,549 gCNVs and used an Oryza glaberrima assembly to infer the derived states of SVs in the Oryza sativa population. Our analyses of SV formation mechanisms, impacts on gene expression, and distributions among subpopulations illustrate the utility of these resources for understanding how SVs and gCNVs shaped rice environmental adaptation and domestication. Our graph-based genome enabled genome-wide association study (GWAS)-based identification of phenotype-associated genetic variations undetectable when using only SNPs and a single reference assembly. Our work provides rich population-scale resources paired with easy-to-access tools to facilitate rice breeding as well as plant functional genomics and evolutionary biology research.}, } @article {pmid34048479, year = {2021}, author = {Silva de Oliveira, M and Thyeska Castro Alves, J and Henrique Caracciolo Gomes de Sá, P and Veras, AAO}, title = {PAN2HGENE-tool for comparative analysis and identifying new gene products.}, journal = {PloS one}, volume = {16}, number = {5}, pages = {e0252414}, pmid = {34048479}, issn = {1932-6203}, mesh = {Computational Biology/*methods ; Genomics/methods ; High-Throughput Nucleotide Sequencing ; Metagenomics ; *Software ; Transcriptome ; }, abstract = {Advances in next-generation sequencing (NGS) platforms have had a positive impact on biological research, leading to the development of numerous omics approaches, including genomics, transcriptomics, metagenomics, and pangenomics. These analyses provide insights into the gene contents of various organisms. However, to understand the evolutionary processes of these genes, comparative analysis, which is an important tool for annotation, is required. Using comparative analysis, it is possible to infer the functions of gene contents and identify orthologs and paralogous genes via their homology. Although several comparative analysis tools currently exist, most of them are limited to complete genomes. PAN2HGENE, a computational tool that allows identification of gene products missing from the original genome sequence, with automated comparative analysis for both complete and draft genomes, can be used to address this limitation. In this study, PAN2HGENE was used to identify new products, resulting in altering the alpha value behavior in the pangenome without altering the original genomic sequence. Our findings indicate that this tool represents an efficient alternative for comparative analysis, with a simple and intuitive graphical interface. The PAN2HGENE have been uploaded to SourceForge and are available via: https://sourceforge.net/projects/pan2hgene-software.}, } @article {pmid34045706, year = {2021}, author = {He, Z and Ji, R and Havlickova, L and Wang, L and Li, Y and Lee, HT and Song, J and Koh, C and Yang, J and Zhang, M and Parkin, IAP and Wang, X and Edwards, D and King, GJ and Zou, J and Liu, K and Snowdon, RJ and Banga, SS and Machackova, I and Bancroft, I}, title = {Genome structural evolution in Brassica crops.}, journal = {Nature plants}, volume = {7}, number = {6}, pages = {757-765}, pmid = {34045706}, issn = {2055-0278}, support = {BB/L002124/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R019819/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Biological Evolution ; Brassica/*genetics ; Crops, Agricultural/*genetics ; Genes, Plant ; Genetic Introgression ; *Genome, Plant ; Polyploidy ; }, abstract = {The cultivated Brassica species include numerous vegetable and oil crops of global importance. Three genomes (designated A, B and C) share mesohexapolyploid ancestry and occur both singly and in each pairwise combination to define the Brassica species. With organizational errors (such as misplaced genome segments) corrected, we showed that the fundamental structure of each of the genomes is the same, irrespective of the species in which it occurs. This enabled us to clarify genome evolutionary pathways, including updating the Ancestral Crucifer Karyotype (ACK) block organization and providing support for the Brassica mesohexaploidy having occurred via a two-step process. We then constructed genus-wide pan-genomes, drawing from genes present in any species in which the respective genome occurs, which enabled us to provide a global gene nomenclature system for the cultivated Brassica species and develop a methodology to cost-effectively elucidate the genomic impacts of alien introgressions. Our advances not only underpin knowledge-based approaches to the more efficient breeding of Brassica crops but also provide an exemplar for the study of other polyploids.}, } @article {pmid34045649, year = {2021}, author = {Dar, HA and Ismail, S and Waheed, Y and Ahmad, S and Jamil, Z and Aziz, H and Hetta, HF and Muhammad, K}, title = {Designing a multi-epitope vaccine against Mycobacteroides abscessus by pangenome-reverse vaccinology.}, journal = {Scientific reports}, volume = {11}, number = {1}, pages = {11197}, pmid = {34045649}, issn = {2045-2322}, mesh = {Bacterial Vaccines/*therapeutic use ; Epitopes, B-Lymphocyte/immunology ; Epitopes, T-Lymphocyte/immunology ; Genome, Bacterial ; Humans ; Mycobacterium Infections, Nontuberculous/*prevention & control ; Mycobacterium abscessus/genetics/*immunology ; Vaccines, Subunit/*therapeutic use ; Vaccinology/*methods ; }, abstract = {Mycobacteroides abscessus (Previously Mycobacterium abscessus) is an emerging microorganism of the newly defined genera Mycobacteroides that causes mainly skin and tissue diseases in humans. The recent availability of total 34 fully sequenced genomes of different strains belonging to this species has provided an opportunity to utilize this genomics data to gain novel insights and guide the development of specific antimicrobial therapies. In the present study, we collected collectively 34 complete genome sequences of M. abscessus from the NCBI GenBank database. Pangenome analysis was conducted on these genomes to understand the genetic diversity and to obtain proteins associated with its core genome. These core proteins were then subjected to various subtractive filters to identify potential antigenic targets that were subjected to multi-epitope vaccine design. Our analysis projected the open pangenome of M. abscessus containing 3443 core genes. After applying various stepwise filtration steps on the core proteins, a total of four potential antigenic targets were identified. Utilizing their constituent CD4 and CD8 T-cell epitopes, a multi-epitope based subunit vaccine was computationally designed. Sequence-based analysis as well as structural characterization revealed the immunological effectiveness of this designed vaccine. Further molecular docking, molecular dynamics simulation and binding free energy estimation with Toll-like receptor 2 indicated strong structural associations of the vaccine with the immune receptor. The promising results are encouraging and need to be validated by additional wet laboratory studies for confirmation.}, } @article {pmid34044757, year = {2021}, author = {Guo, J and Pang, E and Song, H and Lin, K}, title = {A tri-tuple coordinate system derived for fast and accurate analysis of the colored de Bruijn graph-based pangenomes.}, journal = {BMC bioinformatics}, volume = {22}, number = {1}, pages = {282}, pmid = {34044757}, issn = {1471-2105}, mesh = {*Algorithms ; Genome ; *Genomics ; Metagenomics ; Sequence Analysis, DNA ; }, abstract = {BACKGROUND: With the rapid development of accurate sequencing and assembly technologies, an increasing number of high-quality chromosome-level and haplotype-resolved assemblies of genomic sequences have been derived, from which there will be great opportunities for computational pangenomics. Although genome graphs are among the most useful models for pangenome representation, their structural complexity makes it difficult to present genome information intuitively, such as the linear reference genome. Thus, efficiently and accurately analyzing the genome graph spatial structure and coordinating the information remains a substantial challenge.

RESULTS: We developed a new method, a colored superbubble (cSupB), that can overcome the complexity of graphs and organize a set of species- or population-specific haplotype sequences of interest. Based on this model, we propose a tri-tuple coordinate system that combines an offset value, topological structure and sample information. Additionally, cSupB provides a novel method that utilizes complete topological information and efficiently detects small indels (< 50 bp) for highly similar samples, which can be validated by simulated datasets. Moreover, we demonstrated that cSupB can adapt to the complex cycle structure.

CONCLUSIONS: Although the solution is made suitable for increasingly complex genome graphs by relaxing the constraint, the directed acyclic graph, the motif cSupB and the cSupB method can be extended to any colored directed acyclic graph. We anticipate that our method will facilitate the analysis of individual haplotype variants and population genomic diversity. We have developed a C +  + program for implementing our method that is available at https://github.com/eggleader/cSupB .}, } @article {pmid34039880, year = {2021}, author = {Simar, SR and Hanson, BM and Arias, CA}, title = {Techniques in bacterial strain typing: past, present, and future.}, journal = {Current opinion in infectious diseases}, volume = {34}, number = {4}, pages = {339-345}, pmid = {34039880}, issn = {1473-6527}, support = {P01 AI152999/AI/NIAID NIH HHS/United States ; R21 AI143229/AI/NIAID NIH HHS/United States ; R01 AI134637/AI/NIAID NIH HHS/United States ; T32 AI055449/AI/NIAID NIH HHS/United States ; K24 AI121296/AI/NIAID NIH HHS/United States ; K01 AI148593/AI/NIAID NIH HHS/United States ; R01 AI148342/AI/NIAID NIH HHS/United States ; }, mesh = {*Anti-Bacterial Agents ; Bacterial Typing Techniques ; *Disease Outbreaks ; Electrophoresis, Gel, Pulsed-Field ; Genome, Bacterial/genetics ; Humans ; Molecular Epidemiology ; Multilocus Sequence Typing ; }, abstract = {PURPOSE OF REVIEW: The advancement of molecular techniques such as whole-genome sequencing (WGS) has revolutionized the field of bacterial strain typing, with important implications for epidemiological surveillance and outbreak investigations. This review summarizes state-of-the-art techniques in strain typing and examines barriers faced by clinical and public health laboratories in implementing these new methodologies.

RECENT FINDINGS: WGS-based methodologies are on track to become the new 'gold standards' in bacterial strain typing, replacing traditional methods like pulsed-field gel electrophoresis and multilocus sequence typing. These new techniques have an improved ability to identify genetic relationships among organisms of interest. Further, advances in long-read sequencing approaches will likely provide a highly discriminatory tool to perform pangenome analyses and characterize relevant accessory genome elements, including mobile genetic elements carrying antibiotic resistance determinants in real time. Barriers to widespread integration of these approaches include a lack of standardized workflows and technical training.

SUMMARY: Genomic bacterial strain typing has facilitated a paradigm shift in clinical and molecular epidemiology. The increased resolution that these new techniques provide, along with epidemiological data, will facilitate the rapid identification of transmission routes with high confidence, leading to timely and effective deployment of infection control and public health interventions in outbreak settings.}, } @article {pmid34038628, year = {2021}, author = {Schörner, MA and Passarelli-Araujo, H and Scheffer, MC and Hartmann Barazzetti, F and Motta Martins, J and de Melo Machado, H and Palmeiro, JK and Bazzo, ML}, title = {Genomic analysis of Neisseria elongata isolate from a patient with infective endocarditis.}, journal = {FEBS open bio}, volume = {11}, number = {7}, pages = {1987-1996}, pmid = {34038628}, issn = {2211-5463}, mesh = {*Endocarditis/complications/genetics ; *Endocarditis, Bacterial/genetics ; Genomics ; Humans ; Neisseria/genetics ; *Neisseria elongata ; }, abstract = {Neisseria elongata is part of the commensal microbiota of the oropharynx. Although it is not considered pathogenic to humans, N. elongata has been implicated in several cases of infective endocarditis (IE). Here, we report a case of IE caused by N. elongata subsp. nitroreducens (Nel_M001) and compare its genome with 17 N. elongata genomes available in GenBank. We also evaluated resistance and virulence profiles with Comprehensive Antibiotic Resistance and Virulence Finder databases. The results showed a wide diversity among N. elongata isolates. Based on the pangenome cumulative curve, we demonstrate that N. elongata has an open pangenome. We found several different resistance genes, mainly associated with antibiotic efflux pumps. A wide range of virulence genes was observed, predominantly pilus formation genes. Nel_M001 was the only isolate to present two copies of some pilus genes and not present nspA gene. Together, our results provide insights into how this commensal microorganism can cause IE and may assist further biological investigations on nonpathogenic Neisseria spp. Case reporting and pangenome analyses are critical for enhancing our understanding of IE pathogenesis, as well as for alerting physicians and microbiologists to enable rapid identification and treatment to avoid unfavorable outcomes.}, } @article {pmid34034667, year = {2021}, author = {Bertazzoni, S and Jones, DAB and Phan, HT and Tan, KC and Hane, JK}, title = {Chromosome-level genome assembly and manually-curated proteome of model necrotroph Parastagonospora nodorum Sn15 reveals a genome-wide trove of candidate effector homologs, and redundancy of virulence-related functions within an accessory chromosome.}, journal = {BMC genomics}, volume = {22}, number = {1}, pages = {382}, pmid = {34034667}, issn = {1471-2164}, support = {CUR00023//Grains Research And Development Corporation/ ; y95//Pawsey Supercomputing Centre and National Computational Infrastructure/ ; }, mesh = {Ascomycota ; Australia ; Chromosomes ; *Plant Diseases/genetics ; *Proteome ; Virulence/genetics ; }, abstract = {BACKGROUND: The fungus Parastagonospora nodorum causes septoria nodorum blotch (SNB) of wheat (Triticum aestivum) and is a model species for necrotrophic plant pathogens. The genome assembly of reference isolate Sn15 was first reported in 2007. P. nodorum infection is promoted by its production of proteinaceous necrotrophic effectors, three of which are characterised - ToxA, Tox1 and Tox3.

RESULTS: A chromosome-scale genome assembly of P. nodorum Australian reference isolate Sn15, which combined long read sequencing, optical mapping and manual curation, produced 23 chromosomes with 21 chromosomes possessing both telomeres. New transcriptome data were combined with fungal-specific gene prediction techniques and manual curation to produce a high-quality predicted gene annotation dataset, which comprises 13,869 high confidence genes, and an additional 2534 lower confidence genes retained to assist pathogenicity effector discovery. Comparison to a panel of 31 internationally-sourced isolates identified multiple hotspots within the Sn15 genome for mutation or presence-absence variation, which was used to enhance subsequent effector prediction. Effector prediction resulted in 257 candidates, of which 98 higher-ranked candidates were selected for in-depth analysis and revealed a wealth of functions related to pathogenicity. Additionally, 11 out of the 98 candidates also exhibited orthology conservation patterns that suggested lateral gene transfer with other cereal-pathogenic fungal species. Analysis of the pan-genome indicated the smallest chromosome of 0.4 Mbp length to be an accessory chromosome (AC23). AC23 was notably absent from an avirulent isolate and is predominated by mutation hotspots with an increase in non-synonymous mutations relative to other chromosomes. Surprisingly, AC23 was deficient in effector candidates, but contained several predicted genes with redundant pathogenicity-related functions.

CONCLUSIONS: We present an updated series of genomic resources for P. nodorum Sn15 - an important reference isolate and model necrotroph - with a comprehensive survey of its predicted pathogenicity content.}, } @article {pmid34025805, year = {2021}, author = {Maguvu, TE and Oladipo, AO and Bezuidenhout, CC}, title = {Analysis of Genome Sequences of Coagulase-Negative Staphylococci Isolates from South Africa and Nigeria Highlighted Environmentally Driven Heterogeneity.}, journal = {Journal of genomics}, volume = {9}, number = {}, pages = {26-37}, pmid = {34025805}, issn = {1839-9940}, abstract = {Here, we report high-quality annotated draft genomes of eight coagulase-negative staphylococci (CoNS) isolates obtained from South Africa and Nigeria. We explored the prevalence of antibiotic resistance and virulence genes, their association with mobile genetic elements. The pan-genomic analysis highlighted the environmentally driven heterogeneity of the isolates. Isolates from Nigeria had at least one gene for cadmium resistance/tolerance, these genes were not detected in isolates from South Africa. In contrast, isolates from South Africa had a tetM gene, which was not detected among the isolates from Nigeria. The observed genomic heterogeneity correlates with anthropogenic activities in the area where the isolates were collected. Moreover, the isolates used in this study possess an open pan-genome, which could easily explain the environmentally driven heterogeneity.}, } @article {pmid34025591, year = {2021}, author = {Fu, X and Gong, L and Liu, Y and Lai, Q and Li, G and Shao, Z}, title = {Bacillus pumilus Group Comparative Genomics: Toward Pangenome Features, Diversity, and Marine Environmental Adaptation.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {571212}, pmid = {34025591}, issn = {1664-302X}, abstract = {BACKGROUND: Members of the Bacillus pumilus group (abbreviated as the Bp group) are quite diverse and ubiquitous in marine environments, but little is known about correlation with their terrestrial counterparts. In this study, 16 marine strains that we had isolated before were sequenced and comparative genome analyses were performed with a total of 52 Bp group strains. The analyses included 20 marine isolates (which included the 16 new strains) and 32 terrestrial isolates, and their evolutionary relationships, differentiation, and environmental adaptation.

RESULTS: Phylogenomic analysis revealed that the marine Bp group strains were grouped into three species: B. pumilus, B. altitudinis and B. safensis. All the three share a common ancestor. However, members of B. altitudinis were observed to cluster independently, separating from the other two, thus diverging from the others. Consistent with the universal nature of genes involved in the functioning of the translational machinery, the genes related to translation were enriched in the core genome. Functional genomic analyses revealed that the marine-derived and the terrestrial strains showed differences in certain hypothetical proteins, transcriptional regulators, K[+] transporter (TrK) and ABC transporters. However, species differences showed the precedence of environmental adaptation discrepancies. In each species, land specific genes were found with possible functions that likely facilitate survival in diverse terrestrial niches, while marine bacteria were enriched with genes of unknown functions and those related to transcription, phage defense, DNA recombination and repair.

CONCLUSION: Our results indicated that the Bp isolates show distinct genomic features even as they share a common core. The marine and land isolates did not evolve independently; the transition between marine and non-marine habitats might have occurred multiple times. The lineage exhibited a priority effect over the niche in driving their dispersal. Certain intra-species niche specific genes could be related to a strains adaptation to its respective marine or terrestrial environment(s). In summary, this report describes the systematic evolution of 52 Bp group strains and will facilitate future studies toward understanding their ecological role and adaptation to marine and/or terrestrial environments.}, } @article {pmid34023905, year = {2021}, author = {Geoffroy, V and Guignard, T and Kress, A and Gaillard, JB and Solli-Nowlan, T and Schalk, A and Gatinois, V and Dollfus, H and Scheidecker, S and Muller, J}, title = {AnnotSV and knotAnnotSV: a web server for human structural variations annotations, ranking and analysis.}, journal = {Nucleic acids research}, volume = {49}, number = {W1}, pages = {W21-W28}, pmid = {34023905}, issn = {1362-4962}, mesh = {Genome, Human ; *Genomic Structural Variation ; Genomics ; Humans ; Internet ; Molecular Sequence Annotation ; Phenotype ; Polymorphism, Single Nucleotide ; *Software ; }, abstract = {With the dramatic increase of pangenomic analysis, Human geneticists have generated large amount of genomic data including millions of small variants (SNV/indel) but also thousands of structural variations (SV) mainly from next-generation sequencing and array-based techniques. While the identification of the complete SV repertoire of a patient is getting possible, the interpretation of each SV remains challenging. To help identifying human pathogenic SV, we have developed a web server dedicated to their annotation and ranking (AnnotSV) as well as their visualization and interpretation (knotAnnotSV) freely available at the following address: https://www.lbgi.fr/AnnotSV/. A large amount of annotations from >20 sources is integrated in our web server including among others genes, haploinsufficiency, triplosensitivity, regulatory elements, known pathogenic or benign genomic regions, phenotypic data. An ACMG/ClinGen compliant prioritization module allows the scoring and the ranking of SV into 5 SV classes from pathogenic to benign. Finally, the visualization interface displays the annotated SV in an interactive way including popups, search fields, filtering options, advanced colouring to highlight pathogenic SV and hyperlinks to the UCSC genome browser or other public databases. This web server is designed for diagnostic and research analysis by providing important resources to the user.}, } @article {pmid34022616, year = {2021}, author = {Rani, A and Ravindran, VB and Surapaneni, A and Mantri, N and Ball, AS}, title = {Review: Trends in point-of-care diagnosis for Escherichia coli O157:H7 in food and water.}, journal = {International journal of food microbiology}, volume = {349}, number = {}, pages = {109233}, doi = {10.1016/j.ijfoodmicro.2021.109233}, pmid = {34022616}, issn = {1879-3460}, mesh = {*Bacteriological Techniques ; Biosensing Techniques ; CRISPR-Cas Systems ; Escherichia coli Infections/microbiology/prevention & control ; Escherichia coli O157/*isolation & purification ; *Food Microbiology ; Humans ; *Point-of-Care Systems ; Polymerase Chain Reaction ; *Water Microbiology ; }, abstract = {Escherichia coli O157:H7, a Shiga-producing E. coli is a major pathogenic E. coli strain which since the early 1980s has become a crucial food and water-borne pathogen. Several management strategies can be applied to control the spread of infection; however early diagnosis represents the optimum preventive strategy to minimize the infection. Therefore, it is crucial to detect this pathogen in a fast and efficient manner in order to reduce the morbidity and mortality. Currently used gold standard tests rely on culture and pre-enrichment of E. coli O157:H7 from the contaminated source; they are time consuming and laborious. Molecular methods such as polymerase chain reaction are sensitive; however, they require expensive instrumentation. Therefore, there is a requirement for Accurate, Sensitive, Specific, User friendly, Rapid, Equipment free and Deliverable (ASSURED) detection methods for use in the laboratory and in the field. Emerging technologies such as isothermal amplification methods, biosensors, surface enhanced Raman Spectroscopy, paper-based diagnostics and smartphone-based digital methods are recognized as new approaches in the field of E. coli O157:H7 diagnostics and are discussed in this review. Mobile PCR and CRISPR-Cas diagnostic platforms have been identified as new tools in E. coli O157:H7 POC diagnostics with the potential for implementation by industry. This review describes advances and progress in the field of E. coli O157:H7 diagnosis in the context of food and water industry. The focus is on emerging high throughput point-of-care (POC) E. coli O157:H7 diagnostics and the requirement for the transformation to service routine diagnostics in the food and water industry.}, } @article {pmid34022437, year = {2021}, author = {Nasim, F and Dey, A and Qureshi, IA}, title = {Comparative genome analysis of Corynebacterium species: The underestimated pathogens with high virulence potential.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {93}, number = {}, pages = {104928}, doi = {10.1016/j.meegid.2021.104928}, pmid = {34022437}, issn = {1567-7257}, mesh = {Corynebacterium/classification/*genetics/*pathogenicity ; Corynebacterium Infections/*microbiology ; *Genome, Bacterial ; Humans ; Virulence ; Virulence Factors/*genetics ; }, abstract = {Non-diphtherial Corynebacterium species or diphtheroids were previously considered as the mere contaminants of clinical samples. Of late, they have been reckoned as the formidable infection causing agents of various diseases. While the scientific database is filled with articles that document whole genome analysis of individual isolates, a comprehensive comparative genomic analysis of diphtheroids alongside Corynebacterium diphtheriae is expected to enable us in understanding their genomic as well as evolutionary divergence. Here, we have analysed the whole genome sequences of forty strains that were selected from a range of eleven Corynebacterium species (pathogenic and non-pathogenic). A statistical analysis of the pan and core genomes revealed that even though the core genome is saturated, the pan genome is yet open rendering scope for newer gene families to be accumulated in the course of evolution that might further change the pathogenic behavior of these species. Every strain had bacteriophage components integrated in its genome and some of them were intact and consisted of toxins. The presence of diversified genomic islands was observed across the dataset and most of them consisted of genes for virulence and multidrug resistance. Moreover, the phylogenetic analysis showed that a diphtheroid is the last common ancestor of all the Corynebacterium species. The current study is a compilation of genomic features of pathogenic as well as non-pathogenic Corynebacterium species which provides insights into their virulence potential in the times to come.}, } @article {pmid34017083, year = {2021}, author = {Tao, Y and Luo, H and Xu, J and Cruickshank, A and Zhao, X and Teng, F and Hathorn, A and Wu, X and Liu, Y and Shatte, T and Jordan, D and Jing, H and Mace, E}, title = {Extensive variation within the pan-genome of cultivated and wild sorghum.}, journal = {Nature plants}, volume = {7}, number = {6}, pages = {766-773}, pmid = {34017083}, issn = {2055-0278}, mesh = {Crops, Agricultural/*genetics ; Domestication ; *Genetic Variation ; Genome Size ; *Genome, Plant ; Multigene Family ; Phylogeny ; Pigmentation/genetics ; Plant Proteins/*genetics ; Polymorphism, Single Nucleotide ; Seeds/genetics ; Sorghum/*genetics ; }, abstract = {Sorghum is a drought-tolerant staple crop for half a billion people in Africa and Asia, an important source of animal feed throughout the world and a biofuel feedstock of growing importance. Cultivated sorghum and its inter-fertile wild relatives constitute the primary gene pool for sorghum. Understanding and characterizing the diversity within this valuable resource is fundamental for its effective utilization in crop improvement. Here, we report analysis of a sorghum pan-genome to explore genetic diversity within the sorghum primary gene pool. We assembled 13 genomes representing cultivated sorghum and its wild relatives, and integrated them with 3 other published genomes to generate a pan-genome of 44,079 gene families with 222.6 Mb of new sequence identified. The pan-genome displays substantial gene-content variation, with 64% of gene families showing presence/absence variation among genomes. Comparisons between core genes and dispensable genes suggest that dispensable genes are important for sorghum adaptation. Extensive genetic variation was uncovered within the pan-genome, and the distribution of these variations was influenced by variation of recombination rate and transposable element content across the genome. We identified presence/absence variants that were under selection during sorghum domestication and improvement, and demonstrated that such variation had important phenotypic outcomes that could contribute to crop improvement. The constructed sorghum pan-genome represents an important resource for sorghum improvement and gene discovery.}, } @article {pmid34016748, year = {2021}, author = {Drott, MT and Rush, TA and Satterlee, TR and Giannone, RJ and Abraham, PE and Greco, C and Venkatesh, N and Skerker, JM and Glass, NL and Labbé, JL and Milgroom, MG and Keller, NP}, title = {Microevolution in the pansecondary metabolome of Aspergillus flavus and its potential macroevolutionary implications for filamentous fungi.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {118}, number = {21}, pages = {}, pmid = {34016748}, issn = {1091-6490}, mesh = {Aspergillus/classification/genetics/*metabolism ; Aspergillus flavus/classification/genetics/*metabolism ; Fungal Proteins/genetics/metabolism ; Genetic Speciation ; *Genome, Fungal ; Genomics ; *Metabolome ; Metagenomics ; Multigene Family ; Phylogeny ; Secondary Metabolism/*genetics ; United States ; }, abstract = {Fungi produce a wealth of pharmacologically bioactive secondary metabolites (SMs) from biosynthetic gene clusters (BGCs). It is common practice for drug discovery efforts to treat species' secondary metabolomes as being well represented by a single or a small number of representative genomes. However, this approach misses the possibility that intraspecific population dynamics, such as adaptation to environmental conditions or local microbiomes, may harbor novel BGCs that contribute to the overall niche breadth of species. Using 94 isolates of Aspergillus flavus, a cosmopolitan model fungus, sampled from seven states in the United States, we dereplicate 7,821 BGCs into 92 unique BGCs. We find that more than 25% of pangenomic BGCs show population-specific patterns of presence/absence or protein divergence. Population-specific BGCs make up most of the accessory-genome BGCs, suggesting that different ecological forces that maintain accessory genomes may be partially mediated by population-specific differences in secondary metabolism. We use ultra-high-performance high-resolution mass spectrometry to confirm that these genetic differences in BGCs also result in chemotypic differences in SM production in different populations, which could mediate ecological interactions and be acted on by selection. Thus, our results suggest a paradigm shift that previously unrealized population-level reservoirs of SM diversity may be of significant evolutionary, ecological, and pharmacological importance. Last, we find that several population-specific BGCs from A. flavus are present in Aspergillus parasiticus and Aspergillus minisclerotigenes and discuss how the microevolutionary patterns we uncover inform macroevolutionary inferences and help to align fungal secondary metabolism with existing evolutionary theory.}, } @article {pmid34014569, year = {2021}, author = {Gobin-Limballe, S and Ottolenghi, C and Reyal, F and Arnoux, JB and Magen, M and Simon, M and Brassier, A and Jabot-Hanin, F and Lonlay, P and Pontoizeau, C and Guirat, M and Rio, M and Gesny, R and Gigarel, N and Royer, G and Steffann, J and Munnich, A and Bonnefont, JP}, title = {OTC deficiency in females: Phenotype-genotype correlation based on a 130-family cohort.}, journal = {Journal of inherited metabolic disease}, volume = {44}, number = {5}, pages = {1235-1247}, doi = {10.1002/jimd.12404}, pmid = {34014569}, issn = {1573-2665}, mesh = {Family ; Female ; Genetic Association Studies ; Heterozygote ; Humans ; Liver/enzymology ; Male ; *Mutation ; Ornithine Carbamoyltransferase/*genetics ; Ornithine Carbamoyltransferase Deficiency Disease/*mortality ; }, abstract = {OTC deficiency, an inherited urea cycle disorder, is caused by mutations in the X-linked OTC gene. Phenotype-genotype correlations are well understood in males but still poorly known in females. Taking advantage of a cohort of 130 families (289 females), we assessed the relative contribution of OTC enzyme activity, X chromosome inactivation, and OTC gene sequencing to genetic counseling in heterozygous females. Twenty two percent of the heterozygous females were clinically affected, with episodic (11%), chronic (7.5%), or neonatal forms of the disease (3.5%). Overall mortality rate was 4%. OTC activity, ranging from 0% to 60%, did not correlate with phenotype at the individual level. Analysis of multiple samples from 4 mutant livers showed intra-hepatic variability of OTC activity and X inactivation profile (range of variability: 30% and 20%, respectively) without correlation between both parameters for 3 of the 4 livers. Ninety disease-causing variants were found, 27 of which were novel. Mutations were classified as "mild" or "severe," based on male phenotypes and/or in silico prediction. In our cohort, a serious disease occurred in 32% of females with a severe mutation, compared to 4% in females with a mild mutation (odds ratio = 1.365; P = 1.6e-06). These data should help prenatal diagnosis for heterozygous females and genetic counseling after fortuitous findings of OTC variants in pangenomic sequencing.}, } @article {pmid34008105, year = {2021}, author = {Feng, Z and Liu, X and Wang, M and Nie, Y and Wu, XL}, title = {A novel temperate phage, vB_PstS-pAN, induced from the naphthalene-degrading bacterium Pseudomonas stutzeri AN10.}, journal = {Archives of virology}, volume = {166}, number = {8}, pages = {2267-2272}, pmid = {34008105}, issn = {1432-8798}, support = {2018YFA0902100//National Key R&D Program of China/ ; 2018YFA0902103//National Key R&D Program of China/ ; 91951204//National Natural Science Foundation of China/ ; }, mesh = {Base Composition ; Genome Size ; Genome, Viral ; Mitomycin/pharmacology ; Phylogeny ; Pseudomonas stutzeri/genetics/*virology ; Siphoviridae/*classification/drug effects/isolation & purification/ultrastructure ; Virus Integration ; Virus Replication ; Whole Genome Sequencing/*methods ; }, abstract = {A novel temperate phage named vB_PstS-pAN was induced by mitomycin C treatment from the naphthalene-degrading bacterium Pseudomonas stutzeri AN10. The phage particles have icosahedral heads and long non-contractile tails, and vB_PstS-pAN can therefore be morphologically classified as a member of the family Siphoviridae. The whole genome of vB_PstS-pAN is 39,466 bp in length, with an 11-nt 3' overhang cohesive end. There are 53 genes in the vB_PstS-pAN genome, including genes responsible for phage integration, replication, morphogenesis, and bacterial lysis. The vB_PstS-pAN genome has low similarity to other phage genomes in the GenBank database, suggesting that vB_PstS-pAN is a novel member of the family Siphoviridae.}, } @article {pmid34007059, year = {2021}, author = {Moya-Beltrán, A and Beard, S and Rojas-Villalobos, C and Issotta, F and Gallardo, Y and Ulloa, R and Giaveno, A and Degli Esposti, M and Johnson, DB and Quatrini, R}, title = {Genomic evolution of the class Acidithiobacillia: deep-branching Proteobacteria living in extreme acidic conditions.}, journal = {The ISME journal}, volume = {15}, number = {11}, pages = {3221-3238}, pmid = {34007059}, issn = {1751-7370}, mesh = {Evolution, Molecular ; Genome, Bacterial/genetics ; *Genomics ; Phylogeny ; *Proteobacteria/genetics ; }, abstract = {Members of the genus Acidithiobacillus, now ranked within the class Acidithiobacillia, are model bacteria for the study of chemolithotrophic energy conversion under extreme conditions. Knowledge of the genomic and taxonomic diversity of Acidithiobacillia is still limited. Here, we present a systematic analysis of nearly 100 genomes from the class sampled from a wide range of habitats. Some of these genomes are new and others have been reclassified on the basis of advanced genomic analysis, thus defining 19 Acidithiobacillia lineages ranking at different taxonomic levels. This work provides the most comprehensive classification and pangenomic analysis of this deep-branching class of Proteobacteria to date. The phylogenomic framework obtained illuminates not only the evolutionary past of this lineage, but also the molecular evolution of relevant aerobic respiratory proteins, namely the cytochrome bo3 ubiquinol oxidases.}, } @article {pmid33990913, year = {2021}, author = {Li, Z and Song, Q and Wang, M and Ren, J and Liu, S and Zhao, S}, title = {Comparative genomics analysis of Pediococcus acidilactici species.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {59}, number = {6}, pages = {573-583}, pmid = {33990913}, issn = {1976-3794}, mesh = {Anti-Bacterial Agents/biosynthesis ; Bacteriocins/biosynthesis ; *Genome, Bacterial ; Genomics ; Pediococcus acidilactici/classification/*genetics/metabolism ; }, abstract = {Pediococcus acidilactici is a reliable bacteriocin producer and a promising probiotic species with wide application in the food and health industry. However, the underlying genetic features of this species have not been analyzed. In this study, we performed a comprehensive comparative genomic analysis of 41 P. acidilactici strains from various ecological niches. The bacteriocin production of 41 strains were predicted and three kinds of bacteriocin encoding genes were identified in 11 P. acidilactici strains, namely pediocin PA-1, enterolysin A, and colicin-B. Moreover, whole-genome analysis showed a high genetic diversity within the population, mainly related to a large proportion of variable genomes, mobile elements, and hypothetical genes obtained through horizontal gene transfer. In addition, comparative genomics also facilitated the genetic explanation of the adaptation for host environment, which specify the protection mechanism against the invasion of foreign DNA (i.e. CRISPR/Cas locus), as well as carbohydrate fermentation. The 41 strains of P. acidilactici can metabolize a variety of carbon sources, which enhances the adaptability of this species and survival in different environments. This study evaluated the antibacterial ability, genome evolution, and ecological flexibility of P. acidilactici from the perspective of genetics and provides strong supporting evidence for its industrial development and application.}, } @article {pmid33990699, year = {2021}, author = {Rasmussen, JA and Villumsen, KR and Duchêne, DA and Puetz, LC and Delmont, TO and Sveier, H and Jørgensen, LVG and Præbel, K and Martin, MD and Bojesen, AM and Gilbert, MTP and Kristiansen, K and Limborg, MT}, title = {Genome-resolved metagenomics suggests a mutualistic relationship between Mycoplasma and salmonid hosts.}, journal = {Communications biology}, volume = {4}, number = {1}, pages = {579}, pmid = {33990699}, issn = {2399-3642}, mesh = {Animals ; Gastrointestinal Microbiome/*genetics ; *Genome, Bacterial ; *Metagenome ; Mycoplasma/*genetics ; Phylogeny ; Salmonidae/*microbiology ; Sequence Analysis, DNA ; *Symbiosis ; }, abstract = {Salmonids are important sources of protein for a large proportion of the human population. Mycoplasma species are a major constituent of the gut microbiota of salmonids, often representing the majority of microbiota. Despite the frequent reported dominance of salmonid-related Mycoplasma species, little is known about the phylogenomic placement, functions and potential evolutionary relationships with their salmonid hosts. In this study, we utilise 2.9 billion metagenomic reads generated from 12 samples from three different salmonid host species to I) characterise and curate the first metagenome-assembled genomes (MAGs) of Mycoplasma dominating the intestines of three different salmonid species, II) establish the phylogeny of these salmonid candidate Mycoplasma species, III) perform a comprehensive pangenomic analysis of Mycoplasma, IV) decipher the putative functionalities of the salmonid MAGs and reveal specific functions expected to benefit the host. Our data provide a basis for future studies examining the composition and function of the salmonid microbiota.}, } @article {pmid33988716, year = {2021}, author = {Dieckmann, MA and Beyvers, S and Nkouamedjo-Fankep, RC and Hanel, PHG and Jelonek, L and Blom, J and Goesmann, A}, title = {EDGAR3.0: comparative genomics and phylogenomics on a scalable infrastructure.}, journal = {Nucleic acids research}, volume = {49}, number = {W1}, pages = {W185-W192}, pmid = {33988716}, issn = {1362-4962}, mesh = {Databases, Genetic ; *Genome, Microbial ; Genomics/*methods ; *Phylogeny ; *Software ; }, abstract = {The EDGAR platform, a web server providing databases of precomputed orthology data for thousands of microbial genomes, is one of the most established tools in the field of comparative genomics and phylogenomics. Based on precomputed gene alignments, EDGAR allows quick identification of the differential gene content, i.e. the pan genome, the core genome, or singleton genes. Furthermore, EDGAR features a wide range of analyses and visualizations like Venn diagrams, synteny plots, phylogenetic trees, as well as Amino Acid Identity (AAI) and Average Nucleotide Identity (ANI) matrices. During the last few years, the average number of genomes analyzed in an EDGAR project increased by two orders of magnitude. To handle this massive increase, a completely new technical backend infrastructure for the EDGAR platform was designed and launched as EDGAR3.0. For the calculation of new EDGAR3.0 projects, we are now using a scalable Kubernetes cluster running in a cloud environment. A new storage infrastructure was developed using a file-based high-performance storage backend which ensures timely data handling and efficient access. The new data backend guarantees a memory efficient calculation of orthologs, and parallelization has led to drastically reduced processing times. Based on the advanced technical infrastructure new analysis features could be implemented including POCP and FastANI genomes similarity indices, UpSet intersecting set visualization, and circular genome plots. Also the public database section of EDGAR was largely updated and now offers access to 24,317 genomes in 749 free-to-use projects. In summary, EDGAR 3.0 provides a new, scalable infrastructure for comprehensive microbial comparative gene content analysis. The web server is accessible at http://edgar3.computational.bio.}, } @article {pmid33987687, year = {2022}, author = {Figueiredo, G and Gomes, M and Covas, C and Mendo, S and Caetano, T}, title = {The Unexplored Wealth of Microbial Secondary Metabolites: the Sphingobacteriaceae Case Study.}, journal = {Microbial ecology}, volume = {83}, number = {2}, pages = {470-481}, pmid = {33987687}, issn = {1432-184X}, support = {SFRH/BD/98446/2013//Fundação para a Ciência e a Tecnologia/ ; CEECIND/01463/2017//Fundação para a Ciência e a Tecnologia/ ; UIDP/50017/2020+UIDB/50017/2020//Fundação para a Ciência e a Tecnologia/ ; }, mesh = {*Actinobacteria/genetics ; *Bacteroidetes/genetics ; Computational Biology ; Genomics/methods ; Humans ; Multigene Family ; }, abstract = {Research on secondary metabolites (SMs) has been mostly focused on Gram-positive bacteria, especially Actinobacteria. The association of genomics with robust bioinformatics tools revealed the neglected potential of Gram-negative bacteria as promising sources of new SMs. The family Sphingobacteriaceae belongs to the phylum Bacteroidetes having representatives in practically all environments including humans, rhizosphere, soils, wastewaters, among others. Some genera of this family have demonstrated great potential as plant growth promoters, bioremediators and producers of some value-added compounds such as carotenoids and antimicrobials. However, to date, Sphingobacteriaceae's SMs are still poorly characterized, and likewise, little is known about their chemistry. This study revealed that Sphingobacteriaceae pangenome encodes a total of 446 biosynthetic gene clusters (BGCs), which are distributed across 85 strains, highlighting the great potential of this bacterial family to produce SMs. Pedobacter, Mucilaginibacter and Sphingobacterium were the genera with the highest number of BGCs, especially those encoding the biosynthesis of ribosomally synthesized and post-translationally modified peptides (RiPPs), terpenes, polyketides and nonribosomal peptides (NRPs). In Mucilaginibacter and Sphingobacterium genera, M. lappiensis ATCC BAA-1855, Mucilaginibacter sp. OK098 (both with 11 BGCs) and Sphingobacterium sp. 21 (6 BGCs) are the strains with the highest number of BGCs. Most of the BGCs found in these two genera did not have significant hits with the MIBiG database. These results strongly suggest that the bioactivities and environmental functions of these compounds, especially RiPPs, PKs and NRPs, are still unknown. Among RiPPs, two genera encoded the production of class I and class III lanthipeptides. The last are associated with LanKC proteins bearing uncommon lyase domains, whose dehydration mechanism deserves further investigation. This study translated genomics into functional information that unveils the enormous potential of environmental Gram-negative bacteria to produce metabolites with unknown chemistries, bioactivities and, more importantly, unknown ecological roles.}, } @article {pmid33981291, year = {2021}, author = {Maturana, JL and Cárdenas, JP}, title = {Insights on the Evolutionary Genomics of the Blautia Genus: Potential New Species and Genetic Content Among Lineages.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {660920}, pmid = {33981291}, issn = {1664-302X}, abstract = {Blautia, a genus established in 2008, is a relevantly abundant taxonomic group present in the microbiome of human and other mammalian gastrointestinal (GI) tracts. Several described (or proposed) Blautia species are available at this date. However, despite the increasing level of knowledge about Blautia, its diversity is still poorly understood. The increasing availability of Blautia genomic sequences in the public databases opens the possibility to study this genus from a genomic perspective. Here we report the pangenome analysis and the phylogenomic study of 225 Blautia genomes available in RefSeq. We found 33 different potential species at the genomic level, 17 of them previously undescribed; we also confirmed by genomic standards the status of 4 previously proposed new Blautia species. Comparative genomic analyses suggest that the Blautia pangenome is open, with a relatively small core genome (∼ 700-800 gene families). Utilizing a set of representative genomes, we performed a gene family gain/loss model for the genus, showing that despite terminal nodes suffered more massive gene gain events than internal nodes (i.e., predicted ancestors), some ancestors were predicted to have gained an important number of gene families, some of them associated with the possible acquisition of metabolic abilities. Gene loss events remained lower than gain events in most cases. General aspects regarding pangenome composition and gene gain/loss events are discussed, as well as the proposition of changes in the taxonomic assignment of B. coccoides [TY] and the proposition of a new species, "B. pseudococcoides.".}, } @article {pmid33979679, year = {2021}, author = {Almeida, OGG and Furlan, JPR and Stehling, EG and De Martinis, ECP}, title = {Comparative phylo-pangenomics reveals generalist lifestyles in representative Acinetobacter species and proposes candidate gene markers for species identification.}, journal = {Gene}, volume = {791}, number = {}, pages = {145707}, doi = {10.1016/j.gene.2021.145707}, pmid = {33979679}, issn = {1879-0038}, mesh = {Acinetobacter/classification/*genetics/isolation & purification ; Biomarkers ; DNA, Bacterial/genetics ; *Phylogeny ; Sequence Analysis, DNA ; *Species Specificity ; Spectrometry, Mass, Matrix-Assisted Laser Desorption-Ionization/methods ; }, abstract = {Acinetobacter species have the potential to invade and colonize immunocompromised patients, therefore being well-known as opportunistic pathogens. Among these bacteria, the species of the Acinetobacter calcoaceticus-Acinetobacter baumannii "complex" (Acb members) emerge as the main often isolated bacteria in clinical specimens. The unequivocal taxonomy is crucial to correctly identify these species and associated with comparative genomic analyses aids to understand their life-styles as well. In this study, all publicly available Acinetobacter species at the date of this study preparation were analyzed. The results revealed that the Acb members are in fact a complex when phenotypic methods are confronted, while for comparative and phylogenomics analyses this term is misleading, since they composed a monophyletic group instead. Nine best gene markers (response regulator, recJ, recG, phosphomannomutase, pepSY, monovalent cation/H + antiporter subunit D, mnmE, glnE, and bamA) were selected for identification of Acinetobacter species. Moreover, representative strains of each species were split according their isolation sources in the categories: environmental, human, insect and non-human vertebrate. Neither niche-specific genome signature nor niche-associated functional and pathogenic potential were associated with their isolation source, meaning it is not the main force acting on Acinetobacter adaptation in a given niche and corroborating that their ubiquitous distribution is a reflex of their generalist life-styles.}, } @article {pmid33972446, year = {2021}, author = {Crysnanto, D and Leonard, AS and Fang, ZH and Pausch, H}, title = {Novel functional sequences uncovered through a bovine multiassembly graph.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {118}, number = {20}, pages = {}, pmid = {33972446}, issn = {1091-6490}, mesh = {Animals ; Cattle/*genetics ; Female ; Male ; Whole Genome Sequencing ; }, abstract = {Many genomic analyses start by aligning sequencing reads to a linear reference genome. However, linear reference genomes are imperfect, lacking millions of bases of unknown relevance and are unable to reflect the genetic diversity of populations. This makes reference-guided methods susceptible to reference-allele bias. To overcome such limitations, we build a pangenome from six reference-quality assemblies from taurine and indicine cattle as well as yak. The pangenome contains an additional 70,329,827 bases compared to the Bos taurus reference genome. Our multiassembly approach reveals 30 and 10.1 million bases private to yak and indicine cattle, respectively, and between 3.3 and 4.4 million bases unique to each taurine assembly. Utilizing transcriptomes from 56 cattle, we show that these nonreference sequences encode transcripts that hitherto remained undetected from the B. taurus reference genome. We uncover genes, primarily encoding proteins contributing to immune response and pathogen-mediated immunomodulation, differentially expressed between Mycobacterium bovis-infected and noninfected cattle that are also undetectable in the B. taurus reference genome. Using whole-genome sequencing data of cattle from five breeds, we show that reads which were previously misaligned against the Bos taurus reference genome now align accurately to the pangenome sequences. This enables us to discover 83,250 polymorphic sites that segregate within and between breeds of cattle and capture genetic differentiation across breeds. Our work makes a so-far unused source of variation amenable to genetic investigations and provides methods and a framework for establishing and exploiting a more diverse reference genome.}, } @article {pmid33967985, year = {2021}, author = {Yang, S and Xie, X and Ma, J and He, X and Li, Y and Du, M and Li, L and Yang, L and Wu, Q and Chen, W and Zhang, J}, title = {Selective Isolation of Bifidobacterium From Human Faeces Using Pangenomics, Metagenomics, and Enzymology.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {649698}, pmid = {33967985}, issn = {1664-302X}, abstract = {Bifidobacterium, an important genus for human health, is difficult to isolate. We applied metagenomics, pangenomics, and enzymology to determine the dominant glycoside hydrolase (GH) families of Bifidobacterium and designed selective medium for Bifidobacterium isolation. Pangenomics results showed that the GH13, GH3, GH42, and GH43 families were highly conserved in Bifidobacterium. Metagenomic analysis of GH families in human faecal samples was performed. The results indicated that Bifidobacterium contains core GHs for utilizing raffinose, D-trehalose anhydrous, D(+)-cellobiose, melibiose, lactulose, lactose, D(+)-sucrose, resistant starch, pullulan, xylan, and glucan. These carbohydrates as the main carbon sources were applied for selective media, which were more conducive to the growth of bifidobacteria. In the medium with lactose, raffinose and xylan as the main carbon sources, the ratio of cultivable bifidobacteria to cultivable microorganisms were 89.39% ± 2.50%, 71.45% ± 0.99%, and 53.95% ± 1.22%, respectively, whereas the ratio in the ordinary Gifu anaerobic medium was only 17.90% ± 0.58%. Furthermore, the species significantly (p < 0.05) varied among samples from different individuals. Results suggested that xylan might be a prebiotic that benefits host health, and it is feasible to screen and isolate bifidobacteria using the oligosaccharides corresponding to the specific GHs of bifidobacteria as the carbon sources of the selective media.}, } @article {pmid33964091, year = {2021}, author = {Barchi, L and Rabanus-Wallace, MT and Prohens, J and Toppino, L and Padmarasu, S and Portis, E and Rotino, GL and Stein, N and Lanteri, S and Giuliano, G}, title = {Improved genome assembly and pan-genome provide key insights into eggplant domestication and breeding.}, journal = {The Plant journal : for cell and molecular biology}, volume = {107}, number = {2}, pages = {579-596}, pmid = {33964091}, issn = {1365-313X}, mesh = {Chromosome Mapping ; *Domestication ; Genes, Plant/genetics ; Genetic Variation ; Genome, Plant/*genetics ; *Plant Breeding ; Polymorphism, Single Nucleotide/genetics ; Quantitative Trait, Heritable ; Solanum melongena/*genetics/growth & development ; Whole Genome Sequencing ; }, abstract = {Eggplant (Solanum melongena L.) is an important horticultural crop and one of the most widely grown vegetables from the Solanaceae family. It was domesticated from a wild, prickly progenitor carrying small, round, non-anthocyanic fruits. We obtained a novel, highly contiguous genome assembly of the eggplant '67/3' reference line, by Hi-C retrofitting of a previously released short read- and optical mapping-based assembly. The sizes of the 12 chromosomes and the fraction of anchored genes in the improved assembly were comparable to those of a chromosome-level assembly. We resequenced 23 accessions of S. melongena representative of the worldwide phenotypic, geographic, and genetic diversity of the species, and one each from the closely related species Solanum insanum and Solanum incanum. The eggplant pan-genome contained approximately 51.5 additional megabases and 816 additional genes compared with the reference genome, while the pan-plastome showed little genetic variation. We identified 53 selective sweeps related to fruit color, prickliness, and fruit shape in the nuclear genome, highlighting selection leading to the emergence of present-day S. melongena cultivars from its wild ancestors. Candidate genes underlying the selective sweeps included a MYBL1 repressor and CHALCONE ISOMERASE (for fruit color), homologs of Arabidopsis GLABRA1 and GLABROUS INFLORESCENCE STEMS2 (for prickliness), and orthologs of tomato FW2.2, OVATE, LOCULE NUMBER/WUSCHEL, SUPPRESSOR OF OVATE, and CELL SIZE REGULATOR (for fruit size/shape), further suggesting that selection for the latter trait relied on a common set of orthologous genes in tomato and eggplant.}, } @article {pmid33963386, year = {2021}, author = {Whelan, FJ and Hall, RJ and McInerney, JO}, title = {Evidence for Selection in the Abundant Accessory Gene Content of a Prokaryote Pangenome.}, journal = {Molecular biology and evolution}, volume = {38}, number = {9}, pages = {3697-3708}, pmid = {33963386}, issn = {1537-1719}, support = {BB/N018044/1//BBSRC/ ; }, mesh = {*Genome ; Phylogeny ; *Prokaryotic Cells ; }, abstract = {A pangenome is the complete set of genes (core and accessory) present in a phylogenetic clade. We hypothesize that a pangenome's accessory gene content is structured and maintained by selection. To test this hypothesis, we interrogated the genomes of 40 Pseudomonas species for statistically significant coincident (i.e., co-occurring/avoiding) gene patterns. We found that 86.7% of common accessory genes are involved in ≥1 coincident relationship. Further, genes that co-occur and/or avoid each other-but are not vertically inherited-are more likely to share functional categories, are more likely to be simultaneously transcribed, and are more likely to produce interacting proteins, than would be expected by chance. These results are not due to coincident genes being adjacent to one another on the chromosome. Together, these findings suggest that the accessory genome is structured into sets of genes that function together within a given strain. Given the similarity of the Pseudomonas pangenome with open pangenomes of other prokaryotic species, we speculate that these results are generalizable.}, } @article {pmid33961980, year = {2021}, author = {Buzzanca, D and Botta, C and Ferrocino, I and Alessandria, V and Houf, K and Rantsiou, K}, title = {Functional pangenome analysis reveals high virulence plasticity of Aliarcobacter butzleri and affinity to human mucus.}, journal = {Genomics}, volume = {113}, number = {4}, pages = {2065-2076}, doi = {10.1016/j.ygeno.2021.05.001}, pmid = {33961980}, issn = {1089-8646}, mesh = {Animals ; *Arcobacter/genetics ; Genome, Bacterial ; Genomics ; Humans ; Mucus ; Phylogeny ; Swine ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Aliarcobacter butzleri is an emerging pathogen that may cause enteritis in humans, however, the incidence of disease caused by this member of the Campylobacteriaceae family is still underestimated. Furthermore, little is known about the precise virulence mechanism and behavior during infection. Therefore, in the present study, through complementary use of comparative genomics and physiological tests on human gut models, we sought to elucidate the genetic background of a set of 32 A. butzleri strains of diverse origin and to explore the correlation with the ability to colonize and invade human intestinal cells in vitro. The simulated infection of human intestinal models showed a higher colonization rate in presence of mucus-producing cells. For some strains, human mucus significantly improved the resistance to physical removal from the in vitro mucosa, while short time-frame growth was even observed. Pangenome analysis highlighted a hypervariable accessory genome, not strictly correlated to the isolation source. Likewise, the strain phylogeny was unrelated to their shared origin, despite a certain degree of segregation was observed among strains isolated from different segments of the intestinal tract of pigs. The putative virulence genes detected in all strains were mostly encompassed in the accessory fraction of the pangenome. The LPS biosynthesis and in particular the chain glycosylation of the O-antigen is harbored in a region of high plasticity of the pangenome, which would indicate frequent horizontal gene transfer phenomena, as well as the involvement of this hypervariable structure in the adaptive behavior and sympatric evolution of A. butzleri. Results of the present study deepen the current knowledge on A. butzleri pangenome by extending the pool of genes regarded as virulence markers and provide bases to develop new diagnostic approaches for the detection of those strains with a higher virulence potential.}, } @article {pmid33961228, year = {2021}, author = {Fagorzi, C and Checcucci, A}, title = {A Compendium of Bioinformatic Tools for Bacterial Pangenomics to Be Used by Wet-Lab Scientists.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2242}, number = {}, pages = {233-243}, pmid = {33961228}, issn = {1940-6029}, mesh = {Bacteria/*genetics ; *Computational Biology ; DNA, Bacterial/*genetics ; *Genome, Bacterial ; *Genomics ; Software ; }, abstract = {Making use of mathematics and statistics, bioinformatics helps biologists to quickly obtain information from a huge amount of experimental data. Nowadays, a large number of web- and computer-based tools are available, allowing more unskilled scientists to be familiar with data analysis techniques. The present chapter gives an overview of the most easy-to-use tools and software packages for bacterial genes and genome analysis present on the Web, with the aim to mainly help wet-lab researcher at undergraduate and postgraduate levels to introduce them to bioinformatics analysis of biological data.}, } @article {pmid33961221, year = {2021}, author = {Vanni, C}, title = {Accurate Annotation of Microbial Metagenomic Genes and Identification of Core Sets.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2242}, number = {}, pages = {115-138}, pmid = {33961221}, issn = {1940-6029}, mesh = {Databases, Genetic ; *Genome, Microbial ; *Metagenome ; *Metagenomics ; Multigene Family ; Phylogeny ; Research Design ; Workflow ; }, abstract = {In the past decade, metagenomics studies of microbial communities have added billions of sequences to the databases. This extensive amount of data and information has the potential to widen our understanding of the functioning of microbial communities and their roles in the environment. A fundamental step in this process is the functional and taxonomic profiling of the metagenomes, through an accurate gene annotation. This gene-level information can then be placed in the genomic context of metagenome-assembled genomes. Then, on a broader level, we can place this combined data into the context of a pangenome and start characterizing core and accessory gene sets. In this chapter, we provide a workflow to create an annotated gene catalog and to identify core sets of genes in the context of a pangenome. The first section will focus on the methods to provide metagenomic genes with accurate annotations. The second part will describe how to combine the gene catalog information with metagenome-assembled genomes and how to use both to build and investigate a pangenome.}, } @article {pmid33961220, year = {2021}, author = {Zoledowska, S and Motyka-Pomagruk, A and Misztak, A and Lojkowska, E}, title = {Comparative Genomics, from the Annotated Genome to Valuable Biological Information: A Case Study.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2242}, number = {}, pages = {91-112}, pmid = {33961220}, issn = {1940-6029}, mesh = {DNA, Bacterial/*genetics ; Databases, Genetic ; Dickeya/*genetics ; *Genome, Bacterial ; *Genomics ; *High-Throughput Nucleotide Sequencing ; Pectobacterium/*genetics ; Research Design ; *Sequence Analysis, DNA ; Software Design ; Workflow ; }, abstract = {High availability of fast, cheap, and high-throughput next generation sequencing techniques resulted in acquisition of numerous de novo sequenced and assembled bacterial genomes. It rapidly became clear that digging out useful biological information from such a huge amount of data presents a considerable challenge. In this chapter we share our experience with utilization of several handy open source comparative genomic tools. All of them were applied in the studies focused on revealing inter- and intraspecies variation in pectinolytic plant pathogenic bacteria classified to Dickeya solani and Pectobacterium parmentieri. As the described software performed well on the species within the Pectobacteriaceae family, it presumably may be readily utilized on some closely related taxa from the Enterobacteriaceae family. First of all, implementation of various annotation software is discussed and compared. Then, tools computing whole genome comparisons including generation of circular juxtapositions of multiple sequences, revealing the order of synteny blocks or calculation of ANI or Tetra values are presented. Besides, web servers intended either for functional annotation of the genes of interest or for detection of genomic islands, plasmids, prophages, CRISPR/Cas are described. Last but not least, utilization of the software designed for pangenome studies and the further downstream analyses is explained. The presented work not only summarizes broad possibilities assured by the comparative genomic approach but also provides a user-friendly guide that might be easily followed by nonbioinformaticians interested in undertaking similar studies.}, } @article {pmid33957463, year = {2021}, author = {Nguyen, TL and Pham Thi, HH}, title = {Genome-wide comparison of coronaviruses derived from veterinary animals: A canine and feline perspective.}, journal = {Comparative immunology, microbiology and infectious diseases}, volume = {76}, number = {}, pages = {101654}, doi = {10.1016/j.cimid.2021.101654}, pmid = {33957463}, issn = {1878-1667}, mesh = {Animals ; *Cat Diseases ; Cats ; *Coronavirus, Canine/genetics ; *Coronavirus, Feline/genetics ; *Dog Diseases ; Dogs ; Genome, Viral ; Phylogeny ; }, abstract = {Feline- and canine-derived coronaviruses (FCoVs and CCoVs) are widespread among dog and cat populations. This study was to understand the route of disease origin and viral transmission in veterinary animals and in human through comparative pan-genomic analysis of coronavirus sequences, especially retrieved from genomes of FCoV and CCoV. Average nucleotide identity based on complete genomes might clustered CoV strains according to their infected host, with an exception of type II of CCoV (accession number KC175339) that was clustered closely to virulent FCoVs. In contrast, the hierarchical clustering based on gene repertories retrieved from pan-genome analysis might divided the examined coronaviruses into host-independent clusters, and formed obviously the cluster of Alphacoronaviruses into sub-clusters of feline-canine, only feline, feline-canine-human coronavirus. Also, functional analysis of genomic subsets might help to divide FCoV and CCoV pan-genomes into (i) clusters of core genes encoding spike, membrane, nucleocapsid proteins, and ORF1ab polyprotein; (ii) clusters of core-like genes encoding nonstructural proteins; (iii) clusters of accessory genes encoding the ORF1A; and (iv) two singleton genes encoding nonstructural protein and polyprotein 1ab. Seven clusters of gene repertories were categorized as common to the FCoV and/or CCoV genomes including pantropic and high virulent strains, illustrating that distinct core-like genes/accessory genes concerning to their pathogenicity should be exploited in further biotype analysis of new isolate. In conclusion, the phylogenomic analyses have allowed the identification of trends in the viral genomic data, especially in developing a specific control measures against coronavirus disease, such as the selection of good markers for differentiating new species from common and/or pantropic isolates.}, } @article {pmid33952861, year = {2021}, author = {Prondzinsky, P and Berkemer, SJ and Ward, LM and McGlynn, SE}, title = {The Thermosynechococcus Genus: Wide Environmental Distribution, but a Highly Conserved Genomic Core.}, journal = {Microbes and environments}, volume = {36}, number = {2}, pages = {}, pmid = {33952861}, issn = {1347-4405}, mesh = {Adaptation, Physiological ; Ecosystem ; *Genome, Bacterial ; Genomics ; Hot Springs/microbiology ; Japan ; Phylogeny ; Thermosynechococcus/classification/*genetics/*isolation & purification/physiology ; }, abstract = {Cyanobacteria thrive in diverse environments. However, questions remain about possible growth limitations in ancient environmental conditions. As a single genus, the Thermosynechococcus are cosmopolitan and live in chemically diverse habitats. To understand the genetic basis for this, we compared the protein coding component of Thermosynechococcus genomes. Supplementing the known genetic diversity of Thermosynechococcus, we report draft metagenome-assembled genomes of two Thermosynechococcus recovered from ferrous carbonate hot springs in Japan. We find that as a genus, Thermosynechococcus is genomically conserved, having a small pan-genome with few accessory genes per individual strain as well as few genes that are unique to the genus. Furthermore, by comparing orthologous protein groups, including an analysis of genes encoding proteins with an iron related function (uptake, storage or utilization), no clear differences in genetic content, or adaptive mechanisms could be detected between genus members, despite the range of environments they inhabit. Overall, our results highlight a seemingly innate ability for Thermosynechococcus to inhabit diverse habitats without having undergone substantial genomic adaptation to accommodate this. The finding of Thermosynechococcus in both hot and high iron environments without adaptation recognizable from the perspective of the proteome has implications for understanding the basis of thermophily within this clade, and also for understanding the possible genetic basis for high iron tolerance in cyanobacteria on early Earth. The conserved core genome may be indicative of an allopatric lifestyle-or reduced genetic complexity of hot spring habitats relative to other environments.}, } @article {pmid33942475, year = {2021}, author = {Torkamaneh, D and Lemay, MA and Belzile, F}, title = {The pan-genome of the cultivated soybean (PanSoy) reveals an extraordinarily conserved gene content.}, journal = {Plant biotechnology journal}, volume = {19}, number = {9}, pages = {1852-1862}, pmid = {33942475}, issn = {1467-7652}, mesh = {*Fabaceae ; Genome, Plant/genetics ; Genomics ; Plant Breeding ; *Soybeans/genetics ; }, abstract = {Studies on structural variation in plants have revealed the inadequacy of a single reference genome for an entire species and suggest that it is necessary to build a species-representative genome called a pan-genome to better capture the extent of both structural and nucleotide variation. Here, we present a pan-genome of cultivated soybean (Glycine max), termed PanSoy, constructed using the de novo genome assembly of 204 phylogenetically and geographically representative improved accessions selected from the larger GmHapMap collection. PanSoy uncovers 108 Mb (˜11%) of novel nonreference sequences encompassing 3621 protein-coding genes (including 1659 novel genes) absent from the soybean 'Williams 82' reference genome. Nonetheless, the core genome represents an exceptionally large proportion of the genome, with >90.6% of genes being shared by >99% of the accessions. A majority of PAVs encompassing genes could be confirmed with long-read sequencing on a subset of accessions. The PanSoy is a major step towards capturing the extent of genetic variation in cultivated soybean and provides a resource for soybean genomics research and breeding.}, } @article {pmid33941519, year = {2021}, author = {Lawal, OU and Fraqueza, MJ and Worning, P and Bouchami, O and Bartels, MD and Goncalves, L and Paixão, P and Goncalves, E and Toscano, C and Empel, J and Urbaś, M and Domínguez, MA and Westh, H and de Lencastre, H and Miragaia, M}, title = {Staphylococcus saprophyticus Causing Infections in Humans Is Associated with High Resistance to Heavy Metals.}, journal = {Antimicrobial agents and chemotherapy}, volume = {65}, number = {7}, pages = {e0268520}, pmid = {33941519}, issn = {1098-6596}, mesh = {Animals ; *Arsenic ; Cadmium ; Copper ; Humans ; *Metals, Heavy ; Microbial Sensitivity Tests ; Staphylococcus saprophyticus ; }, abstract = {Staphylococcus saprophyticus is a common pathogen of the urinary tract, a heavy metal-rich environment, but information regarding its heavy metal resistance is unknown. We investigated 422 S. saprophyticus isolates from human infection and colonization/contamination, animals, and environmental sources for resistance to copper, zinc, arsenic, and cadmium using the agar dilution method. To identify the genes associated with metal resistance and assess possible links to pathogenicity, we accessed the whole-genome sequence of all isolates and used in silico and pangenome-wide association approaches. The MIC values for copper and zinc were uniformly high (1,600 mg/liter). Genes encoding copper efflux pumps (copA, copB, copZ, mco, and csoR) and zinc transporters (zinT, czrAB, znuBC, and zur) were abundant in the population (20 to 100%). Arsenic and cadmium showed various susceptibility levels. Genes encoding the ars operon (arsRDABC), an ABC transporter and a two-component permease, were linked to resistance to arsenic (MICs ≥ 1,600 mg/liter; 14% [58/422]; P < 0.05). At least three cad genes (cadA or cadC and cadD-cadX or czrC) and genes encoding multidrug efflux pumps and hyperosmoregulation in acidified conditions were associated with resistance to cadmium (MICs ≥ 200 mg/liter; 20% [85/422]; P < 0.05). These resistance genes were frequently carried by mobile genetic elements. Resistance to arsenic and cadmium were linked to human infection and a clonal lineage originating in animals (P < 0.05). Altogether, S. saprophyticus was highly resistant to heavy metals and accumulated multiple metal resistance determinants. The highest arsenic and cadmium resistance levels were associated with infection, suggesting resistance to these metals is relevant for S. saprophyticus pathogenicity.}, } @article {pmid33936009, year = {2021}, author = {Mavrodi, OV and McWilliams, JR and Peter, JO and Berim, A and Hassan, KA and Elbourne, LDH and LeTourneau, MK and Gang, DR and Paulsen, IT and Weller, DM and Thomashow, LS and Flynt, AS and Mavrodi, DV}, title = {Root Exudates Alter the Expression of Diverse Metabolic, Transport, Regulatory, and Stress Response Genes in Rhizosphere Pseudomonas.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {651282}, pmid = {33936009}, issn = {1664-302X}, support = {P20 GM103476/GM/NIGMS NIH HHS/United States ; }, abstract = {Plants live in association with microorganisms that positively influence plant development, vigor, and fitness in response to pathogens and abiotic stressors. The bulk of the plant microbiome is concentrated belowground at the plant root-soil interface. Plant roots secrete carbon-rich rhizodeposits containing primary and secondary low molecular weight metabolites, lysates, and mucilages. These exudates provide nutrients for soil microorganisms and modulate their affinity to host plants, but molecular details of this process are largely unresolved. We addressed this gap by focusing on the molecular dialog between eight well-characterized beneficial strains of the Pseudomonas fluorescens group and Brachypodium distachyon, a model for economically important food, feed, forage, and biomass crops of the grass family. We collected and analyzed root exudates of B. distachyon and demonstrated the presence of multiple carbohydrates, amino acids, organic acids, and phenolic compounds. The subsequent screening of bacteria by Biolog Phenotype MicroArrays revealed that many of these metabolites provide carbon and energy for the Pseudomonas strains. RNA-seq profiling of bacterial cultures amended with root exudates revealed changes in the expression of genes encoding numerous catabolic and anabolic enzymes, transporters, transcriptional regulators, stress response, and conserved hypothetical proteins. Almost half of the differentially expressed genes mapped to the variable part of the strains' pangenome, reflecting the importance of the variable gene content in the adaptation of P. fluorescens to the rhizosphere lifestyle. Our results collectively reveal the diversity of cellular pathways and physiological responses underlying the establishment of mutualistic interactions between these beneficial rhizobacteria and their plant hosts.}, } @article {pmid33934838, year = {2021}, author = {Sharma, P and Sharma, BS and Verma, RJ}, title = {CRISPR-based genome editing of zebrafish.}, journal = {Progress in molecular biology and translational science}, volume = {180}, number = {}, pages = {69-84}, doi = {10.1016/bs.pmbts.2021.01.005}, pmid = {33934838}, issn = {1878-0814}, mesh = {Animals ; CRISPR-Cas Systems/genetics ; *Gene Editing ; Genetic Engineering ; Genetic Therapy ; *Zebrafish/genetics ; }, abstract = {CRISPR/Cas9, once discovered as an adaptive immune system in bacteria, has emerged as a disruptive technology in the field of genetic engineering. Technological advancements in the recent past has enhanced the applicability of CRISPR/Cas9 tool for gene editing, gene therapies, developmental studies and mutational analysis in various model organisms. Zebrafish, one of the excellent animal models, is preferred for conducting CRISPR/Cas9 studies to assess the functional implication of specific genes of interest. CRISPR/Cas9 mediated gene editing techniques, such as, knock-out and knock-in approaches, provide evidences to identify the role of different genes through loss-of-function studies. Also, CRISPR/Cas9 has been proved to be an efficient tool for designing disease models for gene expression studies based on phenotypic screening. The present chapter provides an overview of CRISPR/Cas9 mechanism, different strategies for DNA modifications and gene function analysis, highlighting the translational applications for future prospects, such as screening of drug toxicity and efficacy.}, } @article {pmid33929893, year = {2021}, author = {Miga, KH and Wang, T}, title = {The Need for a Human Pangenome Reference Sequence.}, journal = {Annual review of genomics and human genetics}, volume = {22}, number = {}, pages = {81-102}, pmid = {33929893}, issn = {1545-293X}, support = {U01 HG009391/HG/NHGRI NIH HHS/United States ; U41 HG010972/HG/NHGRI NIH HHS/United States ; R01 HG007175/HG/NHGRI NIH HHS/United States ; R01 HG011274/HG/NHGRI NIH HHS/United States ; U01 CA200060/CA/NCI NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; UM1 HG011585/HG/NHGRI NIH HHS/United States ; R25 DA027995/DA/NIDA NIH HHS/United States ; U24 ES026699/ES/NIEHS NIH HHS/United States ; }, mesh = {*Genome, Human ; *Genomics ; Humans ; }, abstract = {The reference human genome sequence is inarguably the most important and widely used resource in the fields of human genetics and genomics. It has transformed the conduct of biomedical sciences and brought invaluable benefits to the understanding and improvement of human health. However, the commonly used reference sequence has profound limitations, because across much of its span, it represents the sequence of just one human haplotype. This single, monoploid reference structure presents a critical barrier to representing the broad genomic diversity in the human population. In this review, we discuss the modernization of the reference human genome sequence to a more complete reference of human genomic diversity, known as a human pangenome.}, } @article {pmid33929503, year = {2021}, author = {Chambers, J and Sparks, N and Sydney, N and Livingstone, PG and Cookson, AR and Whitworth, DE}, title = {Corrigendum to "Comparative genomics and pan-genomics of the Myxococcaceae, including a description of five novel species: Myxococcus eversor sp. nov., Myxococcus llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogochensis sp. nov., Myxococcus vastator sp. nov., Pyxidicoccus caerfyrddinensis sp. nov. and Pyxidicoccus trucidator sp. nov." [Genome Biol. Evol. 12(12) (2020) 2289-2302].}, journal = {Genome biology and evolution}, volume = {13}, number = {4}, pages = {}, doi = {10.1093/gbe/evab040}, pmid = {33929503}, issn = {1759-6653}, } @article {pmid33927459, year = {2021}, author = {Puri, A and Bajaj, A and Lal, S and Singh, Y and Lal, R}, title = {Phylogenomic Framework for Taxonomic Delineation of Paracoccus spp. and Exploration of Core-Pan Genome.}, journal = {Indian journal of microbiology}, volume = {61}, number = {2}, pages = {180-194}, pmid = {33927459}, issn = {0046-8991}, abstract = {The taxonomic classification of metabolically versatile Paracoccus spp. has been so far performed using polyphasic approach. The topology of single gene phylogenies, however, has highlighted ambiguous species assignments. In the present study, genome based multi-gene phylogenies and overall genome related index were used for species threshold assessment. Comprehensive phylogenomic analysis of Paracoccus genomes (n = 103) showed concordant clustering of strains across multi-gene marker set phylogenies (nMC = 0.08-0.14); as compared to 16S rDNA phylogeny (nMC = 0.37-0.42) suggesting robustness of multi gene phylogenies in drawing phylogenetic inferences. Functional gene content distribution across the genus showed that only 1.7% gene content constitutes the core genome highlighting the significance of extensive genomic variability in the evolution of Paracoccus spp. Further, genome metrics were used to validate characterized strains, identifying classification anomalies (n = 13), and based on this, genome derived taxonomic amendments were notified in present study. Conclusively, validated metric tools can be employed on whole genome sequences, including draft assemblies, for the assessment and assignment of uncharacterized strains and species level ascription of newly isolated Paracoccus strains in future.}, } @article {pmid33926017, year = {2021}, author = {Grassi, F and De Lorenzis, G}, title = {Back to the Origins: Background and Perspectives of Grapevine Domestication.}, journal = {International journal of molecular sciences}, volume = {22}, number = {9}, pages = {}, pmid = {33926017}, issn = {1422-0067}, mesh = {Crops, Agricultural/genetics ; *Domestication ; Fruit ; Genomics ; Humans ; Phylogeny ; Plant Breeding/methods ; Vitis/*genetics/*metabolism ; }, abstract = {Domestication is a process of selection driven by humans, transforming wild progenitors into domesticated crops. The grapevine (Vitis vinifera L.), besides being one of the most extensively cultivated fruit trees in the world, is also a fascinating subject for evolutionary studies. The domestication process started in the Near East and the varieties obtained were successively spread and cultivated in different areas. Whether the domestication occurred only once, or whether successive domestication events occurred independently, is a highly debated mystery. Moreover, introgression events, breeding and intense trade in the Mediterranean basin have followed, in the last thousands of years, obfuscating the genetic relationships. Although a succession of studies has been carried out to explore grapevine origin and different evolution models are proposed, an overview of the topic remains pending. We review here the findings obtained in the main phylogenetic and genomic studies proposed in the last two decades, to clarify the fundamental questions regarding where, when and how many times grapevine domestication took place. Finally, we argue that the realization of the pan-genome of grapes could be a useful resource to discover and track the changes which have occurred in the genomes and to improve our understanding about the domestication.}, } @article {pmid33925684, year = {2021}, author = {Parker, CT and Huynh, S and Alexander, A and Oliver, AS and Cooper, KK}, title = {Genomic Characterization of Salmonella typhimurium DT104 Strains Associated with Cattle and Beef Products.}, journal = {Pathogens (Basel, Switzerland)}, volume = {10}, number = {5}, pages = {}, pmid = {33925684}, issn = {2076-0817}, support = {2030-42000- 644 051-00D//Agricultural Research Service CRIS project/ ; }, abstract = {Salmonella enterica subsp. enterica serovar Typhimurium DT104, a multidrug-resistant phage type, has emerged globally as a major cause of foodborne outbreaks particularly associated with contaminated beef products. In this study, we sequenced three S. Typhimurium DT104 strains associated with a 2009 outbreak caused by ground beef, including the outbreak source strain and two clinical strains. The goal of the study was to gain a stronger understanding of the genomics and genomic epidemiology of highly clonal S. typhimurium DT104 strains associated with bovine sources. Our study found no single nucleotide polymorphisms (SNPs) between the ground beef source strain and the clinical isolates from the 2009 outbreak. SNP analysis including twelve other S. typhimurium strains from bovine and clinical sources, including both DT104 and non-DT104, determined DT104 strains averaged 55.0 SNPs between strains compared to 474.5 SNPs among non-DT104 strains. Phylogenetic analysis separated the DT104 strains from the non-DT104 strains, but strains did not cluster together based on source of isolation even within the DT104 phage type. Pangenome analysis of the strains confirmed previous studies showing that DT104 strains are missing the genes for the allantoin utilization pathway, but this study confirmed that the genes were part of a deletion event and not substituted or disrupted by the insertion of another genomic element. Additionally, cgMLST analysis revealed that DT104 strains with cattle as the source of isolation were quite diverse as a group and did not cluster together, even among strains from the same country. Expansion of the analysis to 775 S. typhimurium ST19 strains associated with cattle from North America revealed diversity between strains, not limited to just among DT104 strains, which suggests that the cattle environment is favorable for a diverse group of S. typhimurium strains and not just DT104 strains.}, } @article {pmid33924811, year = {2021}, author = {Zakham, F and Sironen, T and Vapalahti, O and Kant, R}, title = {Pan and Core Genome Analysis of 183 Mycobacterium tuberculosis Strains Revealed a High Inter-Species Diversity among the Human Adapted Strains.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {10}, number = {5}, pages = {}, pmid = {33924811}, issn = {2079-6382}, abstract = {Tuberculosis (TB) is an airborne communicable disease with high morbidity and mortality rates, especially in developing countries. The causal agents of TB belong to the complex Mycobacterium tuberculosis (MTBc), which is composed of different human and animal TB associated species. Some animal associated species have zoonotic potential and add to the burden of TB management. The BCG ("Bacillus Calmette-Guérin") vaccine is widely used for the prevention against TB, but its use is limited in immunocompromised patients and animals due to the adverse effects and disseminated life-threatening complications. In this study, we aimed to carry out a comparative genome analysis between the human adapted species including BCG vaccine strains to identify and pinpoint the conserved genes related to the virulence across all the species, which could add a new value for vaccine development. For this purpose, the sequences of 183 Mycobacterium tuberculosis (MTB) strains were retrieved from the freely available WGS dataset at NCBI. The species included: 168 sensu stricto MTB species with other human MTB complex associated strains: M. tuberculosis var. africanum (3), M. tuberculosis var. bovis (2 draft genomes) and 10 BCG species, which enabled the analysis of core genome which contains the conserved genes and some virulence factor determinants. Further, a phylogenetic tree was constructed including the genomes of human (183); animals MTB adapted strains (6) and the environmental Mycobacterium strain "M. canettii". Our results showed that the core genome consists of 1166 conserved genes among these species, which represents a small portion of the pangenome (7036 genes). The remaining genes in the pangenome (5870) are accessory genes, adding a high inter-species diversity. Further, the core genome includes several virulence-associated genes and this could explain the rare infectiousness potential of some attenuated vaccine strains in some patients. This study reveals that low number of conserved genes in human adapted MTBc species and high inter-species diversity of the pan-genome could be considered for vaccine candidate development.}, } @article {pmid33917427, year = {2021}, author = {Gustaw, K and Koper, P and Polak-Berecka, M and Rachwał, K and Skrzypczak, K and Waśko, A}, title = {Genome and Pangenome Analysis of Lactobacillus hilgardii FLUB-A New Strain Isolated from Mead.}, journal = {International journal of molecular sciences}, volume = {22}, number = {7}, pages = {}, pmid = {33917427}, issn = {1422-0067}, support = {2018/29/N/NZ9/00985 Preludium 15 Program//Narodowe Centrum Nauki/ ; }, mesh = {*Genome, Bacterial ; Honey/*microbiology ; Lactobacillus/*genetics/isolation & purification ; *Phylogeny ; Whole Genome Sequencing ; }, abstract = {The production of mead holds great value for the Polish liquor industry, which is why the bacterium that spoils mead has become an object of concern and scientific interest. This article describes, for the first time, Lactobacillus hilgardii FLUB newly isolated from mead, as a mead spoilage bacteria. Whole genome sequencing of L. hilgardii FLUB revealed a 3 Mbp chromosome and five plasmids, which is the largest reported genome of this species. An extensive phylogenetic analysis and digital DNA-DNA hybridization confirmed the membership of the strain in the L. hilgardii species. The genome of L. hilgardii FLUB encodes 3043 genes, 2871 of which are protein coding sequences, 79 code for RNA, and 93 are pseudogenes. L. hilgardii FLUB possesses three clustered regularly interspaced short palindromic repeats (CRISPR), eight genomic islands (44,155 bp to 6345 bp), and three (two intact and one incomplete) prophage regions. For the first time, the characteristics of the genome of this species were described and a pangenomic analysis was performed. The concept of the pangenome was used not only to establish the genetic repertoire of this species, but primarily to highlight the unique characteristics of L. hilgardii FLUB. The core of the genome of L. hilgardii is centered around genes related to the storage and processing of genetic information, as well as to carbohydrate and amino acid metabolism. Strains with such a genetic constitution can effectively adapt to environmental changes. L. hilgardii FLUB is distinguished by an extensive cluster of metabolic genes, arsenic detoxification genes, and unique surface layer proteins. Variants of MRS broth with ethanol (10-20%), glucose (2-25%), and fructose (2-24%) were prepared to test the strain's growth preferences using Bioscreen C and the PYTHON script. L. hilgardii FLUB was found to be more resistant than a reference strain to high concentrations of alcohol (18%) and sugars (25%). It exhibited greater preference for fructose than glucose, which suggests it has a fructophilic nature. Comparative genomic analysis supported by experimental research imitating the conditions of alcoholic beverages confirmed the niche specialization of L. hilgardii FLUB to the mead environment.}, } @article {pmid33913068, year = {2021}, author = {Darji, H and Verma, N and Lugani, Y and Mehrotra, P and Sindhu, DK and Vemuluri, VR}, title = {Polyphasic characterization of and genomic insights into a haloalkali-tolerant Saccharibacillus alkalitolerans sp. nov., that produces three cellulase isozymes and several antimicrobial compounds.}, journal = {Antonie van Leeuwenhoek}, volume = {114}, number = {7}, pages = {1043-1057}, pmid = {33913068}, issn = {1572-9699}, support = {OLP-805//CSIR-IMTECH/ ; }, mesh = {*Anti-Infective Agents ; Bacillales ; Bacterial Typing Techniques ; Base Composition ; *Cellulases ; DNA, Bacterial/genetics ; Fatty Acids/analysis ; Genomics ; Isoenzymes ; Nucleic Acid Hybridization ; Phospholipids ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {A cellulase producing novel bacterial strain VR-M41[T] was isolated from an open-air vegetable and fruit market. Cells are found to be rod-shaped, endospore forming, positive for Gram's stain and negative for catalase, oxidase and urease. Strain VR-M41[T ]was halotolerant (upto 8.0% NaCl, w/v), motile and facultative anaerobe. It grew at wide range of pH (6.0-10.0) and temperatures (20-40 °C). Strain VR-M41[T] produced three isozymes of Carboxymethylcellulase. The 16S rRNA gene sequence of strain VR-M41[T ]was 97.3% similar to both Saccharibacillus kuerlensis DSM 22868[T] and Saccharibacillus sacchari DSM 19268[T], and less than 96.4% with the rest of the valid species of the genus Saccharibacillus. Whole-genome ANI, dDDH and genome phylogenetic tree analysis revealed that strain VR-M41[T] significantly differed from Saccharibacillus kuerlensis DSM 22868[T] and Saccharibacillus sacchari DSM 19268[T] (ANI 79.6-79.7% and dDDH 23.1%). The strain comprised of MK-7 and anteiso-C 15:0 (42.2%) as predominant isoprenoid quinone and fatty acid respectively. Major polar lipids were found to be diphosphatidylglycerol, phosphatidylglycerol and phosphatidylethanolamine. The draft genome of strain VR-M41[T] consisted of 5,386,426 base pairs with 5103 annotated genes, out of which 2147 corresponded to hypothetical proteins and 2956 with functional assignments. Pan-genome analysis revealed the presence of 2998 core genes, 828 accessory genes, and 1131 unique genes of Saccharibacillus. Strain VR-M41[T] produced antimicrobials against Staphylococcus aureus, Streptococcus pneumoniae, Micrococcus luteus and Shigella flexneri. Significant phenotypic and genotypic differentiating characteristics from closely related species, indicated that strain VR-M41[T] is a novel species of the genus Saccharibacillus, for which the name Saccharibacillus alkalitolerans sp. nov., is proposed. The type strain is VR-M41[T] (= KCTC 43183[T]=NBRC 114337[T]).}, } @article {pmid33912144, year = {2021}, author = {Wolter, LA and Mitulla, M and Kalem, J and Daniel, R and Simon, M and Wietz, M}, title = {CAZymes in Maribacter dokdonensis 62-1 From the Patagonian Shelf: Genomics and Physiology Compared to Related Flavobacteria and a Co-occurring Alteromonas Strain.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {628055}, pmid = {33912144}, issn = {1664-302X}, abstract = {Carbohydrate-active enzymes (CAZymes) are an important feature of bacteria in productive marine systems such as continental shelves, where phytoplankton and macroalgae produce diverse polysaccharides. We herein describe Maribacter dokdonensis 62-1, a novel strain of this flavobacterial species, isolated from alginate-supplemented seawater collected at the Patagonian continental shelf. M. dokdonensis 62-1 harbors a diverse array of CAZymes in multiple polysaccharide utilization loci (PUL). Two PUL encoding polysaccharide lyases from families 6, 7, 12, and 17 allow substantial growth with alginate as sole carbon source, with simultaneous utilization of mannuronate and guluronate as demonstrated by HPLC. Furthermore, strain 62-1 harbors a mixed-feature PUL encoding both ulvan- and fucoidan-targeting CAZymes. Core-genome phylogeny and pangenome analysis revealed variable occurrence of these PUL in related Maribacter and Zobellia strains, indicating specialization to certain "polysaccharide niches." Furthermore, lineage- and strain-specific genomic signatures for exopolysaccharide synthesis possibly mediate distinct strategies for surface attachment and host interaction. The wide detection of CAZyme homologs in algae-derived metagenomes suggests global occurrence in algal holobionts, supported by sharing multiple adaptive features with the hydrolytic model flavobacterium Zobellia galactanivorans. Comparison with Alteromonas sp. 76-1 isolated from the same seawater sample revealed that these co-occurring strains target similar polysaccharides but with different genomic repertoires, coincident with differing growth behavior on alginate that might mediate ecological specialization. Altogether, our study contributes to the perception of Maribacter as versatile flavobacterial polysaccharide degrader, with implications for biogeochemical cycles, niche specialization and bacteria-algae interactions in the oceans.}, } @article {pmid33900330, year = {2021}, author = {Zhang, C and Zhao, Y and Jiang, J and Yu, L and Tian, F and Zhao, J and Zhang, H and Chen, W and Zhai, Q}, title = {Identification of the key characteristics of Bifidobacterium longum strains for the alleviation of ulcerative colitis.}, journal = {Food & function}, volume = {12}, number = {8}, pages = {3476-3492}, doi = {10.1039/d1fo00017a}, pmid = {33900330}, issn = {2042-650X}, mesh = {Animals ; Anti-Inflammatory Agents ; Bifidobacterium longum/classification/genetics/*physiology ; Colitis, Ulcerative/immunology/pathology/*therapy ; Colon/immunology/pathology ; Dextran Sulfate ; Disease Models, Animal ; Gastrointestinal Microbiome ; Inflammation/prevention & control ; Male ; Mice ; Mice, Inbred BALB C ; NF-kappa B/metabolism ; Phylogeny ; }, abstract = {Bifidobacterium longum (B. longum) species are widely used to prevent and treat ulcerative colitis (UC). In this study, phylogenetic and pan-genomic characterization of 122 B. longum strains was performed on the basis of 936 core genes; among these, four strains from different branches of the phylogenetic tree were selected for an evaluation of anti-inflammatory and immune modulatory activities in a DSS-induced colitis mouse model. Among the tested B. longum strains (B. longum FBJ20M1, B. longum FGDLZ8M1, B. longum FGSZY16M3, and B. longum FJSWXJ2M1), B. longum FGDLZ8M1 was found to most effectively alleviate colitis by reducing the expression of pro-inflammatory cytokines, restoring the colon length, and maintaining the mucosal integrity. The anti-inflammatory mechanisms of B. longum FGDLZ8M1 were related to the inhibition of NF-κB signaling. Genomic analysis indicated that these protective effects of B. longum FGDLZ8M1 may be related to specific genes associated with carbohydrate transport and metabolism and defense mechanisms (e.g., tolerance to bile salts and acids). Correlation analysis indicated that gastrointestinal transit tolerance was the most strongly associated factor. Our findings may contribute to the rapid screening of lactic acid bacterial strains with UC-alleviating effects.}, } @article {pmid33897640, year = {2021}, author = {Lyu, N and Feng, Y and Pan, Y and Huang, H and Liu, Y and Xue, C and Zhu, B and Hu, Y}, title = {Genomic Characterization of Salmonella enterica Isolates From Retail Meat in Beijing, China.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {636332}, pmid = {33897640}, issn = {1664-302X}, abstract = {Salmonella enterica remains one of the leading causes of foodborne bacterial disease. Retail meat is a major source of human salmonellosis. However, comparative genomic analyses of S. enterica isolates from retail meat from different sources in China are lacking. A total of 341 S. enterica strains were isolated from retail meat in sixteen districts of Beijing, China, at three different time points (January 1st, May 1st, and October 1st) in 2017. Comparative genomics was performed to investigate the genetic diversity, virulence and antimicrobial resistance gene (ARG) profiles of these isolates. The most common serotype was S. Enteritidis (203/341, 59.5%), which dominated among isolates from three different time points during the year. Laboratory retesting confirmed the accuracy of the serotyping results predicted by the Salmonella In Silico Typing Resource (SISTR) (96.5%). The pangenome of the 341 S. enterica isolates contained 13,931 genes, and the core genome contained 3,635 genes. Higher Salmonella phage 118970 sal3 (219/341, 64.2%) and Gifsy-2 (206/341, 60.4%) prevalence contributed to the diversity of the accessory genes, especially those with unknown functions. IncFII(S), IncX1, and IncFIB(S) plasmid replicons were more common in these isolates and were major sources of horizontally acquired foreign genes. The virulence gene profile showed fewer virulence genes associated with type III secretion systems in certain isolates from chicken. A total of 88 different ARGs were found in the 341 isolates. Three beta-lactamases, namely, bla CTX - M - 55 (n = 15), bla CTX - M - 14 (n = 11), and bla CTX - M - 65 (n = 11), were more prevalent in retail meats. The emergence of qnrE1 and bla CTX - M - 123 indicated a potential increase in the prevalence of retail meats. After the prohibition of colistin in China, three and four isolates were positive for the colistin resistance genes mcr-1.1 and mcr-9, respectively. Thus, we explored the evolution and genomic features of S. enterica isolates from retail meats in Beijing, China. The diverse ARGs of these isolates compromise food security and are a clinical threat.}, } @article {pmid33896630, year = {2021}, author = {Ma, S and Cao, J and Liliu, R and Li, N and Zhao, J and Zhang, H and Chen, W and Zhai, Q}, title = {Effects of Bacillus coagulans as an adjunct starter culture on yogurt quality and storage.}, journal = {Journal of dairy science}, volume = {104}, number = {7}, pages = {7466-7479}, doi = {10.3168/jds.2020-19876}, pmid = {33896630}, issn = {1525-3198}, mesh = {Animals ; *Bacillus coagulans ; China ; Fermentation ; Milk ; Phylogeny ; *Probiotics ; Yogurt ; }, abstract = {Bacillus coagulans has been widely studied for its probiotic properties. Therefore, identifying a strain that can be used as an adjunct starter culture for yogurt production would have commercial value. In this study, 30 B. coagulans strains were isolated from vegetable samples from 11 provinces or autonomous regions in China, and their pan-genomic and phylogenetic characteristics were analyzed. Phylogenetic analysis categorized 30 strains into 4 different subphylotypes, including subtype I (11 isolates), subtype II (7 isolates), subtype III (11 isolates), and subtype IV (1 isolate). Four B. coagulans strains (B. coagulans-70, B. coagulans-78, B. coagulans-79, and B. coagulans-100) were randomly selected from each subphylotype of the phylogenetic tree as adjunct starter cultures. Compared with the other tested strains, B. coagulans-70 showed the highest count in yogurt at the end of the manufacturing period. Comparative genome analysis indicated that the different bacterial levels of B. coagulans strains in yogurt may be associated with the abundance of genes related to carbohydrate transport and metabolism (e.g., sucrose utilization). Finally, differences in texture and volatile flavor compound profiles were observed between the yogurt samples. Compared with the other groups, the addition of B. coagulans-70 exerted a positive effect on the appearance and texture of yogurt products. Volatile analysis showed increased quantities of 2-heptanone, 2-nonanone, amyl alcohol, and 2-hydroxy-3-pentanone in the B. coagulans-70 group compared with control yogurts. These results above combined with the results of a sensory evaluation indicated that B. coagulans-70 is the most suitable strain for further use in functional dairy product development.}, } @article {pmid33892774, year = {2021}, author = {Li, J and Yuan, D and Wang, P and Wang, Q and Sun, M and Liu, Z and Si, H and Xu, Z and Ma, Y and Zhang, B and Pei, L and Tu, L and Zhu, L and Chen, LL and Lindsey, K and Zhang, X and Jin, S and Wang, M}, title = {Cotton pan-genome retrieves the lost sequences and genes during domestication and selection.}, journal = {Genome biology}, volume = {22}, number = {1}, pages = {119}, pmid = {33892774}, issn = {1474-760X}, mesh = {DNA Copy Number Variations ; *Domestication ; *Genes, Plant ; Genetic Variation ; Genetics, Population ; *Genome, Plant ; Genome-Wide Association Study ; *Genomics/methods ; Gossypium/*genetics ; INDEL Mutation ; Phenotype ; Plant Breeding ; Polymorphism, Single Nucleotide ; *Selection, Genetic ; }, abstract = {BACKGROUND: Millennia of directional human selection has reshaped the genomic architecture of cultivated cotton relative to wild counterparts, but we have limited understanding of the selective retention and fractionation of genomic components.

RESULTS: We construct a comprehensive genomic variome based on 1961 cottons and identify 456 Mb and 357 Mb of sequence with domestication and improvement selection signals and 162 loci, 84 of which are novel, including 47 loci associated with 16 agronomic traits. Using pan-genome analyses, we identify 32,569 and 8851 non-reference genes lost from Gossypium hirsutum and Gossypium barbadense reference genomes respectively, of which 38.2% (39,278) and 14.2% (11,359) of genes exhibit presence/absence variation (PAV). We document the landscape of PAV selection accompanied by asymmetric gene gain and loss and identify 124 PAVs linked to favorable fiber quality and yield loci.

CONCLUSIONS: This variation repertoire points to genomic divergence during cotton domestication and improvement, which informs the characterization of favorable gene alleles for improved breeding practice using a pan-genome-based approach.}, } @article {pmid33888092, year = {2021}, author = {Ramaprasad, A and Klaus, S and Douvropoulou, O and Culleton, R and Pain, A}, title = {Plasmodium vinckei genomes provide insights into the pan-genome and evolution of rodent malaria parasites.}, journal = {BMC biology}, volume = {19}, number = {1}, pages = {69}, pmid = {33888092}, issn = {1741-7007}, support = {JP16K21233//Japan Society for the Promotion of Science/ ; BAS/1/1020-01-01//King Abdullah University of Science and Technology/ ; URF/1/2267-01-01//Global Collaborative Research, King Abdullah University of Science and Technology/ ; }, mesh = {Animals ; Democratic Republic of the Congo ; *Genome ; *Malaria/genetics ; Mice ; Phylogeny ; *Plasmodium/genetics ; Rats ; }, abstract = {BACKGROUND: Rodent malaria parasites (RMPs) serve as tractable tools to study malaria parasite biology and host-parasite-vector interactions. Among the four RMPs originally collected from wild thicket rats in sub-Saharan Central Africa and adapted to laboratory mice, Plasmodium vinckei is the most geographically widespread with isolates collected from five separate locations. However, there is a lack of extensive phenotype and genotype data associated with this species, thus hindering its use in experimental studies.

RESULTS: We have generated a comprehensive genetic resource for P. vinckei comprising of five reference-quality genomes, one for each of its subspecies, blood-stage RNA sequencing data for five P. vinckei isolates, and genotypes and growth phenotypes for ten isolates. Additionally, we sequenced seven isolates of the RMP species Plasmodium chabaudi and Plasmodium yoelii, thus extending genotypic information for four additional subspecies enabling a re-evaluation of the genotypic diversity and evolutionary history of RMPs. The five subspecies of P. vinckei have diverged widely from their common ancestor and have undergone large-scale genome rearrangements. Comparing P. vinckei genotypes reveals region-specific selection pressures particularly on genes involved in mosquito transmission. Using phylogenetic analyses, we show that RMP multigene families have evolved differently across the vinckei and berghei groups of RMPs and that family-specific expansions in P. chabaudi and P. vinckei occurred in the common vinckei group ancestor prior to speciation. The erythrocyte membrane antigen 1 and fam-c families in particular show considerable expansions among the lowland forest-dwelling P. vinckei parasites. The subspecies from the highland forests of Katanga, P. v. vinckei, has a uniquely smaller genome, a reduced multigene family repertoire and is also amenable to transfection making it an ideal parasite for reverse genetics. We also show that P. vinckei parasites are amenable to genetic crosses.

CONCLUSIONS: Plasmodium vinckei isolates display a large degree of phenotypic and genotypic diversity and could serve as a resource to study parasite virulence and immunogenicity. Inclusion of P. vinckei genomes provide new insights into the evolution of RMPs and their multigene families. Amenability to genetic crossing and transfection make them also suitable for classical and functional genetics to study Plasmodium biology.}, } @article {pmid33875001, year = {2021}, author = {Lau, BT and Pavlichin, D and Hooker, AC and Almeda, A and Shin, G and Chen, J and Sahoo, MK and Huang, CH and Pinsky, BA and Lee, HJ and Ji, HP}, title = {Profiling SARS-CoV-2 mutation fingerprints that range from the viral pangenome to individual infection quasispecies.}, journal = {Genome medicine}, volume = {13}, number = {1}, pages = {62}, pmid = {33875001}, issn = {1756-994X}, support = {R01 HG006137/HG/NHGRI NIH HHS/United States ; 2R01HG006137-04/GF/NIH HHS/United States ; U01 HG010963/HG/NHGRI NIH HHS/United States ; R35HG011292//National Institutes of Health (US)/ ; U01HG010963/HG/NHGRI NIH HHS/United States ; R35 HG011292/HG/NHGRI NIH HHS/United States ; }, mesh = {Base Sequence ; COVID-19/*virology ; Conserved Sequence ; DNA Fingerprinting ; *Genome, Viral ; Humans ; *Mutation ; RNA, Viral ; SARS-CoV-2/*genetics ; Sequence Analysis, RNA ; }, abstract = {BACKGROUND: The genome of SARS-CoV-2 is susceptible to mutations during viral replication due to the errors generated by RNA-dependent RNA polymerases. These mutations enable the SARS-CoV-2 to evolve into new strains. Viral quasispecies emerge from de novo mutations that occur in individual patients. In combination, these sets of viral mutations provide distinct genetic fingerprints that reveal the patterns of transmission and have utility in contact tracing.

METHODS: Leveraging thousands of sequenced SARS-CoV-2 genomes, we performed a viral pangenome analysis to identify conserved genomic sequences. We used a rapid and highly efficient computational approach that relies on k-mers, short tracts of sequence, instead of conventional sequence alignment. Using this method, we annotated viral mutation signatures that were associated with specific strains. Based on these highly conserved viral sequences, we developed a rapid and highly scalable targeted sequencing assay to identify mutations, detect quasispecies variants, and identify mutation signatures from patients. These results were compared to the pangenome genetic fingerprints.

RESULTS: We built a k-mer index for thousands of SARS-CoV-2 genomes and identified conserved genomics regions and landscape of mutations across thousands of virus genomes. We delineated mutation profiles spanning common genetic fingerprints (the combination of mutations in a viral assembly) and a combination of mutations that appear in only a small number of patients. We developed a targeted sequencing assay by selecting primers from the conserved viral genome regions to flank frequent mutations. Using a cohort of 100 SARS-CoV-2 clinical samples, we identified genetic fingerprints consisting of strain-specific mutations seen across populations and de novo quasispecies mutations localized to individual infections. We compared the mutation profiles of viral samples undergoing analysis with the features of the pangenome.

CONCLUSIONS: We conducted an analysis for viral mutation profiles that provide the basis of genetic fingerprints. Our study linked pangenome analysis with targeted deep sequenced SARS-CoV-2 clinical samples. We identified quasispecies mutations occurring within individual patients and determined their general prevalence when compared to over 70,000 other strains. Analysis of these genetic fingerprints may provide a way of conducting molecular contact tracing.}, } @article {pmid33866091, year = {2021}, author = {Shen, C and Ma, F and Deng, S and Zhong, LL and El-Sayed Ahmed, MAE and Zhang, G and Yan, B and Dai, M and Yang, F and Xia, Y and Tian, GB}, title = {Prevalence, genomic characteristics, and transmission dynamics of mcr-1-positive Salmonella enterica Typhimurium from patients with infectious diarrhea.}, journal = {International journal of medical microbiology : IJMM}, volume = {311}, number = {4}, pages = {151501}, doi = {10.1016/j.ijmm.2021.151501}, pmid = {33866091}, issn = {1618-0607}, mesh = {Animals ; *Anti-Bacterial Agents/pharmacology ; China/epidemiology ; Diarrhea/epidemiology ; Genomics ; Humans ; Plasmids/genetics ; Prevalence ; *Salmonella typhimurium/genetics ; Swine ; }, abstract = {BACKGROUND: Previous studies reported the prevalence of mcr-1 among clinical infected Salmonella isolates in China. However, the transmission dynamics of mcr-1 in different ecological niches were not well investigated. Our objective is to exhibit the transmission dynamics of mcr-1 in Salmonella.

METHODS: 598 Salmonella isolates were recovered from ten hospitals; besides 936 pig faces and 167 pork samples were collected from January 2015 to December 2017 in Guangzhou, China. PCR and sequencing were used to identify mcr-1-positive Salmonella. Antimicrobial susceptibility testing was performed with 16 antimicrobials. Conjugation, S1-PFGE, and Southern blot were used to determine the transferability and location of mcr-1. Whole-genome sequencing was used to investigate pangenome, phylogeny, plasmid, and transposon.

RESULTS: Eleven mcr-1-positive Salmonella isolates were identified from patients with infectious diarrhea. Five pig fecal samples and three pork samples contained mcr-1-positive Salmonella isolates. All isolates were multi-drug resistant. The mcr-1 genes were located on ∼210-250 kb IncHI2-pST3 plasmids, and 12 mcr-1 genes were transferable. All isolates were assigned to ST34 or its genetically closed STs. The distribution of the core-genome network was significantly correlated with source distributions. The accessory genes-based network demonstrated that the diverse clonal complexes could share highly similar accessory genomes.

CONCLUSIONS: The prevalence of mcr-1-positive Salmonella among different sources was low. Clonal transmission could not be the main reason for the expansion of mcr-1-positive Salmonella, but be attributed to the horizontal transfer of IncHI2-pST3 plasmid. Continuous surveillance on Salmonella should be performed to investigate the response of colistin banning in food-producing animals by mcr-1-positive Salmonella populations.}, } @article {pmid33864546, year = {2021}, author = {Hwang, CY and Cho, ES and Yoon, DJ and Seo, MJ}, title = {Halobellus ruber sp. nov., a deep red-pigmented extremely halophilic archaeon isolated from a Korean solar saltern.}, journal = {Antonie van Leeuwenhoek}, volume = {114}, number = {7}, pages = {997-1011}, pmid = {33864546}, issn = {1572-9699}, support = {2018//Incheon National University Research Grant/ ; }, mesh = {Base Composition ; China ; DNA, Archaeal/genetics ; *Halobacteriaceae/genetics ; Phospholipids ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Republic of Korea ; Sequence Analysis, DNA ; }, abstract = {A novel halophilic archaeon, strain MBLA0160[T], was isolated from a solar saltern in Sorae, Republic of Korea. The cells are deep-red pigmented, Gram-negative, rod shaped, motile, and lysed in distilled water. The strain MBLA0160[T] grew at 25-45 °C (optimum 37 °C), in 15-30% (w/v) NaCl (optimum 20%) and 0.1-1.0 M MgCl2 (optimum 0.3-0.5 M) at pH 5.0-9.0 (optimum 7.0). Phylogenetic analysis based on the 16S rRNA sequence showed that this strain was related to two species within the genus Halobellus (Hbs.), with 98.4% and 95.8% similarity to Hbs. salinus CSW2.24.4[ T] and Hbs. clavatus TNN18[T], respectively. The major polar lipids of the strain MBLA160[T] were phosphatidylglycerol, phosphatidylglycerol sulfate, and phosphatidylglycerol phosphate methyl ester. The genome size, G + C content, and N50 value of MBLA0160[T] were 3.49 Mb, 66.5 mol%, and 620,127 bp, respectively. According to predicted functional proteins of strain MBLA0160[T], the highest category was amino acid transport and metabolism. Genome rapid annotation showed that amino acid and derivatives was the most subsystem feature counts. Pan-genomic analysis showed that strain MBLA0160[T] had 97 annotated unique KEGG, which were mainly included metabolism and environmental information processing. Ortholog average nucleotide identities (OrthoANI) and in silico DNA-DNA hybridization (isDDH) values between the strain MBLA0160[T] and other strains of the genus Halobellus were under 84,4% and 28.1%, respectively. The genome of strain MBLA0160[T] also contain the biosynthetic gene cluster for C50 carotenoid as secondary metabolite. Based on the phylogenetic, phenotypic, chemotaxonomic properties, and comparative genomic analyses, strain MBLA0160[T] is considered to represent a novel species of the genus Halobellus, for which the name Halobellus ruber sp. nov. is proposed. The type strain is MBLA0160[T] (= KCTC 4291[ T] = JCM 34172[ T]).}, } @article {pmid33850043, year = {2021}, author = {Cui, WJ and Zhang, B and Zhao, R and Liu, LX and Jiao, J and Zhang, Z and Tian, CF}, title = {Lineage-Specific Rewiring of Core Pathways Predating Innovation of Legume Nodules Shapes Symbiotic Efficiency.}, journal = {mSystems}, volume = {6}, number = {2}, pages = {}, pmid = {33850043}, issn = {2379-5077}, abstract = {The interkingdom coevolution innovated the rhizobium-legume symbiosis. The application of this nitrogen-fixing system in sustainable agriculture is usually impeded by incompatible interactions between partners. However, the progressive evolution of rhizobium-legume compatibility remains elusive. In this work, deletions of rhcV encoding a structural component of the type three secretion system allow related Sinorhizobium strains to nodulate a previously incompatible soybean cultivar (Glycine max). These rhcV mutants show low to medium to high symbiotic efficiency on the same cultivated soybean while being indistinguishable on wild soybean plants (Glycine soja). The dual pantranscriptomics reveals nodule-specific activation of core symbiosis genes of Sinorhizobium and Glycine genes associated with genome duplication events along the chronogram. Unexpectedly, symbiotic efficiency is in line with lineage-dependent transcriptional profiles of core pathways which predate the diversification of Fabaceae and Sinorhizobium. This is supported by further physiological and biochemical experiments. Particularly, low-efficiency nodules show disordered antioxidant activity and low-energy status, which restrict nitrogen fixation activity. Collectively, the ancient core pathways play a crucial role in optimizing the function of later-evolved mutualistic arsenals in the rhizobium-legume coevolution.IMPORTANCE Significant roles of complex extracellular microbiota in environmental adaptation of eukaryotes in ever-changing circumstances have been revealed. Given the intracellular infection ability, facultative endosymbionts can be considered pioneers within complex extracellular microbiota and are ideal organisms for understanding the early stage of interkingdom adaptation. This work reveals that the later innovation of key symbiotic arsenals and the lineage-specific network rewiring in ancient core pathways, predating the divergence of legumes and rhizobia, underline the progressive evolution of rhizobium-legume compatibility. This insight not only is significant for improving the application benefits of rhizobial inoculants in sustainable agriculture but also advances our general understanding of the interkingdom coevolution which is theoretically explored by all host-microbiota interactions.}, } @article {pmid33849459, year = {2021}, author = {Jonkheer, EM and Brankovics, B and Houwers, IM and van der Wolf, JM and Bonants, PJM and Vreeburg, RAM and Bollema, R and de Haan, JR and Berke, L and Smit, S and de Ridder, D and van der Lee, TAJ}, title = {The Pectobacterium pangenome, with a focus on Pectobacterium brasiliense, shows a robust core and extensive exchange of genes from a shared gene pool.}, journal = {BMC genomics}, volume = {22}, number = {1}, pages = {265}, pmid = {33849459}, issn = {1471-2164}, mesh = {Europe ; Gene Pool ; *Pectobacterium/genetics ; Phylogeny ; Plant Diseases ; *Solanum tuberosum/genetics ; }, abstract = {BACKGROUND: Bacterial plant pathogens of the Pectobacterium genus are responsible for a wide spectrum of diseases in plants, including important crops such as potato, tomato, lettuce, and banana. Investigation of the genetic diversity underlying virulence and host specificity can be performed at genome level by using a comprehensive comparative approach called pangenomics. A pangenomic approach, using newly developed functionalities in PanTools, was applied to analyze the complex phylogeny of the Pectobacterium genus. We specifically used the pangenome to investigate genetic differences between virulent and avirulent strains of P. brasiliense, a potato blackleg causing species dominantly present in Western Europe.

RESULTS: Here we generated a multilevel pangenome for Pectobacterium, comprising 197 strains across 19 species, including type strains, with a focus on P. brasiliense. The extensive phylogenetic analysis of the Pectobacterium genus showed robust distinct clades, with most detail provided by 452,388 parsimony-informative single-nucleotide polymorphisms identified in single-copy orthologs. The average Pectobacterium genome consists of 47% core genes, 1% unique genes, and 52% accessory genes. Using the pangenome, we zoomed in on differences between virulent and avirulent P. brasiliense strains and identified 86 genes associated to virulent strains. We found that the organization of genes is highly structured and linked with gene conservation, function, and transcriptional orientation.

CONCLUSION: The pangenome analysis demonstrates that evolution in Pectobacteria is a highly dynamic process, including gene acquisitions partly in clusters, genome rearrangements, and loss of genes. Pectobacterium species are typically not characterized by a set of species-specific genes, but instead present themselves using new gene combinations from the shared gene pool. A multilevel pangenomic approach, fusing DNA, protein, biological function, taxonomic group, and phenotypes, facilitates studies in a flexible taxonomic context.}, } @article {pmid33848586, year = {2021}, author = {Machimbirike, VI and Uthaipaisanwong, P and Khunrae, P and Dong, HT and Senapin, S and Rattanarojpong, T and Sutheeworapong, S}, title = {Comparative genomics of Edwardsiellaictaluri revealed four distinct host-specific genotypes and thirteen potential vaccine candidates.}, journal = {Genomics}, volume = {113}, number = {4}, pages = {1976-1987}, doi = {10.1016/j.ygeno.2021.04.016}, pmid = {33848586}, issn = {1089-8646}, mesh = {Animals ; *Enterobacteriaceae Infections/veterinary ; *Fish Diseases/prevention & control ; Genomics ; Genotype ; Multilocus Sequence Typing ; Phylogeny ; *Vaccines ; }, abstract = {Edwardsiella ictaluri has been considered an important threat for catfish aquaculture industry for more than 4 decades and an emerging pathogen of farmed tilapia but only 9 sequenced genomes were publicly available. We hereby report two new complete genomes of E. ictaluri originated from diseased hybrid red tilapia (Oreochromis sp.) and striped catfish (Pangasianodon hypophthalmus) in Southeast Asia. E. ictaluri species has an open pan-genome consisting of 2615 core genes and 5592 pan genes. Phylogenetic analysis using core genome MLST (cgMLST) and ANI values consistently placed E. ictaluri isolates into 4 host-specific genotypes. Presence of unique genes and absence of certain genes from each genotype provided potential biomarkers for further development of genotyping scheme. Vaccine candidates with high antigenic, solubility and secretion probabilities were identified in silico from the core genes. Microevolution within the species is brought about by bacteriophages and insertion elements and possibly drive host adaptation.}, } @article {pmid33848428, year = {2021}, author = {Lei, L and Goltsman, E and Goodstein, D and Wu, GA and Rokhsar, DS and Vogel, JP}, title = {Plant Pan-Genomics Comes of Age.}, journal = {Annual review of plant biology}, volume = {72}, number = {}, pages = {411-435}, doi = {10.1146/annurev-arplant-080720-105454}, pmid = {33848428}, issn = {1545-2123}, mesh = {*Genome, Plant ; *Genomics ; }, abstract = {A pan-genome is the nonredundant collection of genes and/or DNA sequences in a species. Numerous studies have shown that plant pan-genomes are typically much larger than the genome of any individual and that a sizable fraction of the genes in any individual are present in only some genomes. The construction and interpretation of plant pan-genomes are challenging due to the large size and repetitive content of plant genomes. Most pan-genomes are largely focused on nontransposable element protein coding genes because they are more easily analyzed and defined than noncoding and repetitive sequences. Nevertheless, noncoding and repetitive DNA play important roles in determining the phenotype and genome evolution. Fortunately, it is now feasible to make multiple high-quality genomes that can be used to construct high-resolution pan-genomes that capture all the variation. However, assembling, displaying, and interacting with such high-resolution pan-genomes will require the development of new tools.}, } @article {pmid33844295, year = {2021}, author = {Tang, WT and Hao, TW and Chen, GH}, title = {Comparative metabolic modeling of multiple sulfate-reducing prokaryotes reveals versatile energy conservation mechanisms.}, journal = {Biotechnology and bioengineering}, volume = {118}, number = {7}, pages = {2676-2693}, doi = {10.1002/bit.27787}, pmid = {33844295}, issn = {1097-0290}, support = {RES002426//CIHR/Canada ; }, mesh = {Desulfovibrio vulgaris/genetics/*metabolism ; *Energy Metabolism ; *Models, Biological ; Sulfates/*metabolism ; }, abstract = {Sulfate-reducing prokaryotes (SRPs) are crucial participants in the cycling of sulfur, carbon, and various metals in the natural environment and in engineered systems. Despite recent advances in genetics and molecular biology bringing a huge amount of information about the energy metabolism of SRPs, little effort has been made to link this important information with their biotechnological studies. This study aims to construct multiple metabolic models of SRPs that systematically compile genomic, genetic, biochemical, and molecular information about SRPs to study their energy metabolism. Pan-genome analysis was conducted to compare the genomes of SRPs, from which a list of orthologous genes related to central and energy metabolism was obtained. Twenty-four SRP metabolic models via the inference of pan-genome analysis were efficiently constructed. The metabolic model of the well-studied model SRP Desulfovibrio vulgaris Hildenborough (DvH) was validated via flux balance analysis (FBA). The DvH model predictions matched reported experimental growth and energy yields, which demonstrated that the core metabolic model worked successfully. Further, steady-state simulation of SRP metabolic models under different growth conditions showed how the use of different electron transfer pathways leads to energy generation. Three energy conservation mechanisms were identified, including menaquinone-based redox loop, hydrogen cycling, and proton pumping. Flavin-based electron bifurcation (FBEB) was also demonstrated to be an essential mechanism for supporting energy conservation. The developed models can be easily extended to other species of SRPs not examined in this study. More importantly, the present work develops an accurate and efficient approach for constructing metabolic models of multiple organisms, which can be applied to other critical microbes in environmental and industrial systems, thereby enabling the quantitative prediction of their metabolic behaviors to benefit relevant applications.}, } @article {pmid33841754, year = {2021}, author = {Zhong, C and Chen, C and Wang, L and Ning, K}, title = {Integrating pan-genome with metagenome for microbial community profiling.}, journal = {Computational and structural biotechnology journal}, volume = {19}, number = {}, pages = {1458-1466}, pmid = {33841754}, issn = {2001-0370}, abstract = {Advances in sequencing technology have led to the increased availability of genomes and metagenomes, which has greatly facilitated microbial pan-genome and metagenome analysis in the community. In line with this trend, studies on microbial genomes and phenotypes have gradually shifted from individuals to environmental communities. Pan-genomics and metagenomics are powerful strategies for in-depth profiling study of microbial communities. Pan-genomics focuses on genetic diversity, dynamics, and phylogeny at the multi-genome level, while metagenomics profiles the distribution and function of culture-free microbial communities in special environments. Combining pan-genome and metagenome analysis can reveal the microbial complicated connections from an individual complete genome to a mixture of genomes, thereby extending the catalog of traditional individual genomic profile to community microbial profile. Therefore, the combination of pan-genome and metagenome approaches has become a promising method to track the sources of various microbes and decipher the population-level evolution and ecosystem functions. This review summarized the pan-genome and metagenome approaches, the combined strategies of pan-genome and metagenome, and applications of these combined strategies in studies of microbial dynamics, evolution, and function in communities. We discussed emerging strategies for the study of microbial communities that integrate information in both pan-genome and metagenome. We emphasized studies in which the integrating pan-genome with metagenome approach improved the understanding of models of microbial community profiles, both structural and functional. Finally, we illustrated future perspectives of microbial community profile: more advanced analytical techniques, including big-data based artificial intelligence, will lead to an even better understanding of the patterns of microbial communities.}, } @article {pmid33841489, year = {2021}, author = {Qureshi, NA and Bakhtiar, SM and Faheem, M and Shah, M and Bari, A and Mahmood, HM and Sohaib, M and Mothana, RA and Ullah, R and Jamal, SB}, title = {Genome-Based Drug Target Identification in Human Pathogen Streptococcus gallolyticus.}, journal = {Frontiers in genetics}, volume = {12}, number = {}, pages = {564056}, pmid = {33841489}, issn = {1664-8021}, abstract = {Streptococcus gallolysticus (Sg) is an opportunistic Gram-positive, non-motile bacterium, which causes infective endocarditis, an inflammation of the inner lining of the heart. As Sg has acquired resistance with the available antibiotics, therefore, there is a dire need to find new therapeutic targets and potent drugs to prevent and treat this disease. In the current study, an in silico approach is utilized to link genomic data of Sg species with its proteome to identify putative therapeutic targets. A total of 1,138 core proteins have been identified using pan genomic approach. Further, using subtractive proteomic analysis, a set of 18 proteins, essential for bacteria and non-homologous to host (human), is identified. Out of these 18 proteins, 12 cytoplasmic proteins were selected as potential drug targets. These selected proteins were subjected to molecular docking against drug-like compounds retrieved from ZINC database. Furthermore, the top docked compounds with lower binding energy were identified. In this work, we have identified novel drug and vaccine targets against Sg, of which some have already been reported and validated in other species. Owing to the experimental validation, we believe our methodology and result are significant contribution for drug/vaccine target identification against Sg-caused infective endocarditis.}, } @article {pmid33841383, year = {2021}, author = {Gómez, P and Ruiz-Ripa, L and Fernández-Fernández, R and Gharsa, H and Ben Slama, K and Höfle, U and Zarazaga, M and Holmes, MA and Torres, C}, title = {Genomic Analysis of Staphylococcus aureus of the Lineage CC130, Including mecC-Carrying MRSA and MSSA Isolates Recovered of Animal, Human, and Environmental Origins.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {655994}, pmid = {33841383}, issn = {1664-302X}, abstract = {Most methicillin resistant Staphylococcus aureus (MRSA) isolates harboring mecC gene belong to clonal complex CC130. This lineage has traditionally been regarded as animal-associated as it lacks the human specific immune evasion cluster (IEC), and has been recovered from a broad range of animal hosts. Nevertheless, sporadic mecC-MRSA human infections have been reported, with evidence of zoonotic transmission in some cases. The objective of this study was to investigate the whole-genome sequences of 18 S. aureus CC130 isolates [13 methicillin-resistant (mecC-MRSA) and five methicillin-susceptible (MSSA)] from different sequences types, obtained from a variety of host species and origins (human, livestock, wild birds and mammals, and water), and from different geographic locations, in order to identify characteristic markers and genomic features. Antibiotic resistance genes found among MRSA-CC130 were those associated with the SSCmecXI element. Most MRSA-CC130 strains carried a similar virulence gene profile. Additionally, six MRSA-CC130 possessed scn-sak and one MSSA-ST130 had lukMF'. The MSSA-ST700 strains were most divergent in their resistance and virulence genes. The pan-genome analysis showed that 29 genes were present solely in MRSA-CC130 (associated with SCCmecXI) and 21 among MSSA-CC130 isolates (associated with phages). The SCCmecXI, PBP3, GdpP, and AcrB were identical at the amino acid level in all strains, but some differences were found in PBP1, PBP2, PBP4, and YjbH proteins. An examination of the host markers showed that the 3' region of the bacteriophage φ3 was nearly identical to the reference sequence. Truncated hlb gene was also found in scn-negative strains (two of them carrying sak-type gene). The dtlB gene of wild rabbit isolates included novel mutations. The vwbp gene was found in the three MSSA-ST700 strains from small ruminants and in one MSSA-ST130 from a red deer; these strains also carried a scn-type gene, different from the human and equine variants. Finally, a phylogenetic analysis showed that the three MSSA-ST700 strains and the two MSSA-ST130 strains cluster separately from the remaining MRSA-CC130 strains with the etD2 gene as marker for the main lineage. The presence of the human IEC cluster in some mecC-MRSA-CC130 strains suggests that these isolates may have had a human origin.}, } @article {pmid33839269, year = {2021}, author = {de Almeida, OGG and Vitulo, N and De Martinis, ECP and Felis, GE}, title = {Pangenome analyses of LuxS-coding genes and enzymatic repertoires in cocoa-related lactic acid bacteria.}, journal = {Genomics}, volume = {113}, number = {4}, pages = {1659-1670}, doi = {10.1016/j.ygeno.2021.04.010}, pmid = {33839269}, issn = {1089-8646}, mesh = {Bacteria/genetics ; *Cacao/genetics/microbiology ; Fermentation ; *Lactobacillales/genetics ; *Limosilactobacillus fermentum ; }, abstract = {Lactobacillaceae presents potential for interspecific Quorum Sensing (QS) in spontaneous cocoa fermentation, correlated with high abundance of luxS. Three Brazilian isolates from cocoa fermentation were characterized by Whole Genome Sequencing and luxS gene was surveyed in their genomes, in comparison with public databases. They were classified as Lactiplantibacillus plantarum, Limosilactobacillus fermentum and Pediococcus acidilactici. LuxS genes were conserved in core genomes of the novel isolates, but in some non-cocoa related Lactic Acid Bacteria (LAB) it was accessory and plasmid-borne. The conservation and horizontal acquisition of luxS reinforces that QS is determinant for bacterial adaptation in several environments, especially taking into account the luxS has been correlated with modulation of bacteriocin production, stress tolerance and biofilm formation. Therefore, in this paper, new clade and species-specific primers were designed for future application for screening of luxS gene in LAB to evaluate the adaptive potential to diverse food fermentations.}, } @article {pmid33837077, year = {2021}, author = {Shaw, LP and Chau, KK and Kavanagh, J and AbuOun, M and Stubberfield, E and Gweon, HS and Barker, L and Rodger, G and Bowes, MJ and Hubbard, ATM and Pickford, H and Swann, J and Gilson, D and Smith, RP and Hoosdally, SJ and Sebra, R and Brett, H and Peto, TEA and Bailey, MJ and Crook, DW and Read, DS and Anjum, MF and Walker, AS and Stoesser, N and , }, title = {Niche and local geography shape the pangenome of wastewater- and livestock-associated Enterobacteriaceae.}, journal = {Science advances}, volume = {7}, number = {15}, pages = {}, pmid = {33837077}, issn = {2375-2548}, support = {/WT_/Wellcome Trust/United Kingdom ; }, abstract = {Escherichia coli and other Enterobacteriaceae are diverse species with "open" pangenomes, where genes move intra- and interspecies via horizontal gene transfer. However, most analyses focus on clinical isolates. The pangenome dynamics of natural populations remain understudied, despite their suggested role as reservoirs for antimicrobial resistance (AMR) genes. Here, we analyze near-complete genomes for 827 Enterobacteriaceae (553 Escherichia and 274 non-Escherichia spp.) with 2292 circularized plasmids in total, collected from 19 locations (livestock farms and wastewater treatment works in the United Kingdom) within a 30-km radius at three time points over a year. We find different dynamics for chromosomal and plasmid-borne genes. Plasmids have a higher burden of AMR genes and insertion sequences, and AMR-gene-carrying plasmids show evidence of being under stronger selective pressure. Environmental niche and local geography both play a role in shaping plasmid dynamics. Our results highlight the importance of local strategies for controlling the spread of AMR.}, } @article {pmid33828821, year = {2021}, author = {Min, H and Baek, K and Lee, A and Seok, YJ and Choi, Y}, title = {Genomic characterization of four Escherichia coli strains isolated from oral lichen planus biopsies.}, journal = {Journal of oral microbiology}, volume = {13}, number = {1}, pages = {1905958}, pmid = {33828821}, issn = {2000-2297}, abstract = {Oral lichen planus (OLP) is a chronic T cell-mediated inflammatory disease that affects the mucus membrane of the oral cavity. We previously proposed a potential role of intracellular bacteria detected within OLP lesions in the pathogenesis of OLP and isolated four Escherichia coli strains from OLP tissues that were phylogenetically close to K-12 MG1655 strain. We sequenced the genomes of the four OLP-isolated E. coli strains and generated 6.71 Gbp of Illumina MiSeq data (166-195x coverage per strain). The size of the assembled draft genomes was 4.69 Mbp, with a GC content of 50.7%, in which 4360 to 4367 protein-coding sequences per strain were annotated. We also identified 368 virulence factors and 53 antibiotic resistance genes. Comparative genomics revealed that the OLP-isolated strains shared more pangenome orthologous groups with pathogenic strains than did the K-12 MG1655 strain, a derivative of K-12 strain isolated from human feces. Although the OLP-isolated strains did not have the major virulence factors (VFs) of the pathogenic strains, a number of VFs involved in adherence/invasion, colonization, or systemic infection were identified. The genomic characteristics of E. coli first isolated from the oral cavity would benefit future investigations on the pathogenic potential of these bacteria.}, } @article {pmid33827048, year = {2021}, author = {Stenman, A and Backman, S and Johansson, K and Paulsson, JO and Stålberg, P and Zedenius, J and Juhlin, CC}, title = {Pan-genomic characterization of high-risk pediatric papillary thyroid carcinoma.}, journal = {Endocrine-related cancer}, volume = {28}, number = {5}, pages = {337-351}, pmid = {33827048}, issn = {1479-6821}, mesh = {Child ; Genomics ; Humans ; Mutation ; Neoplasm Recurrence, Local ; Proto-Oncogene Proteins B-raf/genetics ; Thyroid Cancer, Papillary/genetics/pathology ; *Thyroid Neoplasms/genetics/pathology ; }, abstract = {Pediatric papillary thyroid carcinomas (pPTCs) are often indolent tumors with excellent long-term outcome, although subsets of cases are clinically troublesome and recur. Although it is generally thought to exhibit similar molecular aberrancies as their counterpart tumors in adults, the pan-genomic landscape of clinically aggressive pPTCs has not been previously described. In this study, five pairs of primary and synchronously metastatic pPTC from patients with high-risk phenotypes were characterized using parallel whole-genome and -transcriptome sequencing. Primary tumors and their metastatic components displayed an exceedingly low number of coding somatic mutations and gross chromosomal alterations overall, with surprisingly few shared mutational events. Two cases exhibited one established gene fusion event each (SQSTM1-NTRK3 and NCOA4-RET) in both primary and metastatic tissues, and one case each was positive for a BRAF V600E mutation and a germline truncating CHEK2 mutation, respectively. One single case was without apparent driver events and was considered as a genetic orphan. Non-coding mutations in cancer-associated regions were generally not present. By expressional analyses, fusion-driven primary and metastatic pPTC clustered separately from the mutation-driven cases and the sole genetic orphan. We conclude that pPTCs are genetically indolent tumors with exceedingly stable genomes. Several mutations found exclusively in the metastatic samples which may represent novel genetic events that drive the metastatic behavior, and the differences in mutational compositions suggest early clonal divergence between primary tumors and metastases. Moreover, an overrepresentation of mutational and expressional dysregulation of immune regulatory pathways was noted among fusion-positive pPTC metastases, suggesting that these tumors might facilitate spread through immune evasive mechanisms.}, } @article {pmid33823902, year = {2021}, author = {Břinda, K and Baym, M and Kucherov, G}, title = {Simplitigs as an efficient and scalable representation of de Bruijn graphs.}, journal = {Genome biology}, volume = {22}, number = {1}, pages = {96}, pmid = {33823902}, issn = {1474-760X}, support = {R35 GM133700/GM/NIGMS NIH HHS/United States ; R35GM133700/GM/NIGMS NIH HHS/United States ; }, mesh = {*Algorithms ; Computational Biology/*methods ; Genomics/methods ; Sequence Analysis, DNA/*methods ; *Software ; }, abstract = {de Bruijn graphs play an essential role in bioinformatics, yet they lack a universal scalable representation. Here, we introduce simplitigs as a compact, efficient, and scalable representation, and ProphAsm, a fast algorithm for their computation. For the example of assemblies of model organisms and two bacterial pan-genomes, we compare simplitigs to unitigs, the best existing representation, and demonstrate that simplitigs provide a substantial improvement in the cumulative sequence length and their number. When combined with the commonly used Burrows-Wheeler Transform index, simplitigs reduce memory, and index loading and query times, as demonstrated with large-scale examples of GenBank bacterial pan-genomes.}, } @article {pmid33823816, year = {2021}, author = {Tateishi, Y and Ozeki, Y and Nishiyama, A and Miki, M and Maekura, R and Fukushima, Y and Nakajima, C and Suzuki, Y and Matsumoto, S}, title = {Comparative genomic analysis of Mycobacterium intracellulare: implications for clinical taxonomic classification in pulmonary Mycobacterium avium-intracellulare complex disease.}, journal = {BMC microbiology}, volume = {21}, number = {1}, pages = {103}, pmid = {33823816}, issn = {1471-2180}, support = {2018YFC1002103//National Key Research and Development Project/ ; WJ2021M110//Health Commission of Hubei Province Scientific Research Project/ ; 16020520668//Chinese Medical Association/ ; 2017CFB752//Natural Science Foundation of Hubei Province/ ; }, mesh = {Genes, Bacterial/genetics ; Genetic Variation ; *Genome, Bacterial/genetics ; Genomics ; Humans ; *Mycobacterium avium Complex/classification/genetics/pathogenicity ; *Mycobacterium avium-intracellulare Infection/microbiology ; *Phylogeny ; Plasmids/genetics ; Virulence/genetics ; }, abstract = {BACKGROUND: Mycobacterium intracellulare is a representative etiological agent of emerging pulmonary M. avium-intracellulare complex disease in the industrialized countries worldwide. The recent genome sequencing of clinical strains isolated from pulmonary M. avium-intracellulare complex disease has provided insight into the genomic characteristics of pathogenic mycobacteria, especially for M. avium; however, the genomic characteristics of M. intracellulare remain to be elucidated.

RESULTS: In this study, we performed comparative genomic analysis of 55 M. intracellulare and related strains such as M. paraintracellulare (MP), M. indicus pranii (MIP) and M. yonogonense. Based on the average nucleotide identity, the clinical M. intracellulare strains were phylogenetically grouped in two clusters: (1) the typical M. intracellulare (TMI) group, including ATCC13950 and virulent M.i.27 and M.i.198 that we previously reported, and (2) the MP-MIP group. The alignment of the genomic regions was mostly preserved between groups. Plasmids were identified between groups and subgroups, including a plasmid common among some strains of the M.i.27 subgroup. Several genomic regions including those encoding factors involved in lipid metabolism (e.g., fadE3, fadE33), transporters (e.g., mce3), and type VII secretion system (genes of ESX-2 system) were shown to be hypermutated in the clinical strains. M. intracellulare was shown to be pan-genomic at the species and subspecies levels. The mce genes were specific to particular subspecies, suggesting that these genes may be helpful in discriminating virulence phenotypes between subspecies.

CONCLUSIONS: Our data suggest that genomic diversity among M. intracellulare, M. paraintracellulare, M. indicus pranii and M. yonogonense remains at the subspecies or genovar levels and does not reach the species level. Genetic components such as mce genes revealed by the comparative genomic analysis could be the novel focus for further insight into the mechanism of human pathogenesis for M. intracellulare and related strains.}, } @article {pmid33815306, year = {2021}, author = {Ye, Q and Shang, Y and Chen, M and Pang, R and Li, F and Xiang, X and Wang, C and Zhou, B and Zhang, S and Zhang, J and Yang, X and Xue, L and Ding, Y and Wu, Q}, title = {Identification of Novel Sensitive and Reliable Serovar-Specific Targets for PCR Detection of Salmonella Serovars Hadar and Albany by Pan-Genome Analysis.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {605984}, pmid = {33815306}, issn = {1664-302X}, abstract = {The accurate and rapid classification of Salmonella serovars is an essential focus for the identification of isolates involved in disease in humans and animals. The purpose of current research was to identify novel sensitive and reliable serovar-specific targets and to develop PCR method for Salmonella C2 serogroups (O:8 epitopes) in food samples to facilitate timely treatment. A total of 575 genomic sequences of 16 target serovars belonging to serogroup C2 and 150 genomic sequences of non-target serovars were analysed by pan-genome analysis. As a result, four and three specific genes were found for serovars Albany and Hadar, respectively. Primer sets for PCR targeting these serovar-specific genes were designed and evaluated based on their specificity; the results showed high specificity (100%). The sensitivity of the specific PCR was 2.8 × 10[1]-10[3] CFU/mL and 2.3 × 10[3]-10[4] CFU/mL for serovars Albany and Hadar, respectively, and the detection limits were 1.04 × 10[3]-10[4] CFU/g and 1.16 × 10[4]-10[5] CFU/g in artificially contaminated raw pork samples. Furthermore, the potential functions of these serovar-specific genes were analysed; all of the genes were functionally unknown, except for one specific serovar Albany gene known to be a encoded secreted protein and one specific gene for serovars Hadar and Albany that is a encoded membrane protein. Thus, these findings demonstrate that pan-genome analysis is a precious method for mining new high-quality serovar-targets for PCR assays or other molecular methods that are highly sensitive and can be used for rapid detection of Salmonella serovars.}, } @article {pmid33813894, year = {2021}, author = {Calcino, AD and Kenny, NJ and Gerdol, M}, title = {Single individual structural variant detection uncovers widespread hemizygosity in molluscs.}, journal = {Philosophical transactions of the Royal Society of London. Series B, Biological sciences}, volume = {376}, number = {1825}, pages = {20200153}, pmid = {33813894}, issn = {1471-2970}, mesh = {Adaptation, Biological ; Animals ; Evolution, Molecular ; *Genetic Variation ; *Genome ; Mollusca/*genetics ; Selection, Genetic ; }, abstract = {The advent of complete genomic sequencing has opened a window into genomic phenomena obscured by fragmented assemblies. A good example of these is the existence of hemizygous regions of autosomal chromosomes, which can result in marked differences in gene content between individuals within species. While these hemizygous regions, and presence/absence variation of genes that can result, are well known in plants, firm evidence has only recently emerged for their existence in metazoans. Here, we use recently published, complete genomes from wild-caught molluscs to investigate the prevalence of hemizygosity across a well-known and ecologically important clade. We show that hemizygous regions are widespread in mollusc genomes, not clustered in individual chromosomes, and often contain genes linked to transposition, DNA repair and stress response. With targeted investigations of HSP70-12 and C1qDC, we also show how individual gene families are distributed within pan-genomes. This work suggests that extensive pan-genomes are widespread across the conchiferan Mollusca, and represent useful tools for genomic evolution, allowing the maintenance of additional genetic diversity within the population. As genomic sequencing and re-sequencing becomes more routine, the prevalence of hemizygosity, and its impact on selection and adaptation, are key targets for research across the tree of life. This article is part of the Theo Murphy meeting issue 'Molluscan genomics: broad insights and future directions for a neglected phylum'.}, } @article {pmid33812777, year = {2021}, author = {Nousias, O and Montesanto, F}, title = {Metagenomic profiling of host-associated bacteria from 8 datasets of the red alga Porphyra purpurea with MetaPhlAn3.}, journal = {Marine genomics}, volume = {59}, number = {}, pages = {100866}, doi = {10.1016/j.margen.2021.100866}, pmid = {33812777}, issn = {1876-7478}, mesh = {Bacteria/genetics ; Metagenome ; Metagenomics ; *Porphyra/genetics ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Microbial communities play fundamental roles in association with marine algae; in fact, they are recognized to be actively involved in growth and morphogenesis of the algae. Porphyra purpurea is a red alga commonly found in the intertidal zone with a high economic value, however little is known about the bacterial species associated with this genus. Here we report the bacterial-associated diversity of P. purpurea in four different localities (Ireland, Italy United Kingdom and the USA) from analyzing eight publicly available metagenomic datasets. These were analyzed with Methaplan3 to identify the putative bacterial taxonomies and their relative abundances. Furthermore, we compared these results to the 16S rRNA metagenomic analysis pipeline of the MGnify database to evaluate both methods. Kraken2 was used to verify and support the results, as a complementary classification method to Metaphlan3. This approach highlighted the different taxonomic resolution of a 16S rRNA OTU-based method compared to the pan-genome approach deployed by Metaphlan3 and complemented by Kraken2. The results presented here provide valuable preliminary data on the putative host-associated bacterial species of P. purpurea.}, } @article {pmid33800844, year = {2021}, author = {Rubio, A and Pérez-Pulido, AJ}, title = {Protein-Coding Genes of Helicobacter pylori Predominantly Present Purifying Selection though Many Membrane Proteins Suffer from Selection Pressure: A Proposal to Analyze Bacterial Pangenomes.}, journal = {Genes}, volume = {12}, number = {3}, pages = {}, pmid = {33800844}, issn = {2073-4425}, mesh = {Bacterial Proteins/genetics ; Computational Biology/*methods ; Evolution, Molecular ; Helicobacter pylori/classification/*genetics ; Membrane Proteins/*genetics ; Molecular Sequence Annotation ; Selection, Genetic ; Sequence Analysis, DNA ; Whole Genome Sequencing ; }, abstract = {The current availability of complete genome sequences has allowed knowing that bacterial genomes can bear genes not present in the genome of all the strains from a specific species. So, the genes shared by all the strains comprise the core of the species, but the pangenome can be much greater and usually includes genes appearing in one only strain. Once the pangenome of a species is estimated, other studies can be undertaken to generate new knowledge, such as the study of the evolutionary selection for protein-coding genes. Most of the genes of a pangenome are expected to be subject to purifying selection that assures the conservation of function, especially those in the core group. However, some genes can be subject to selection pressure, such as genes involved in virulence that need to escape to the host immune system, which is more common in the accessory group of the pangenome. We analyzed 180 strains of Helicobacter pylori, a bacterium that colonizes the gastric mucosa of half the world population and presents a low number of genes (around 1500 in a strain and 3000 in the pangenome). After the estimation of the pangenome, the evolutionary selection for each gene has been calculated, and we found that 85% of them are subject to purifying selection and the remaining genes present some grade of selection pressure. As expected, the latter group is enriched with genes encoding for membrane proteins putatively involved in interaction to host tissues. In addition, this group also presents a high number of uncharacterized genes and genes encoding for putative spurious proteins. It suggests that they could be false positives from the gene finders used for identifying them. All these results propose that this kind of analyses can be useful to validate gene predictions and functionally characterize proteins in complete genomes.}, } @article {pmid33794293, year = {2021}, author = {Jayaram, A and Wingate, A and Wetterskog, D and Wheeler, G and Sternberg, CN and Jones, R and Berruti, A and Lefresne, F and Lahaye, M and Thomas, S and Gormley, M and Meacham, F and Garg, K and Lim, LP and Merseburger, AS and Tombal, B and Ricci, D and Attard, G}, title = {Plasma tumor gene conversions after one cycle abiraterone acetate for metastatic castration-resistant prostate cancer: a biomarker analysis of a multicenter international trial.}, journal = {Annals of oncology : official journal of the European Society for Medical Oncology}, volume = {32}, number = {6}, pages = {726-735}, doi = {10.1016/j.annonc.2021.03.196}, pmid = {33794293}, issn = {1569-8041}, support = {MR/P002072/2/MRC_/Medical Research Council/United Kingdom ; MR/P002072/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Abiraterone Acetate ; Biomarkers, Tumor/genetics ; Gene Conversion ; Humans ; Male ; *Prostatic Neoplasms, Castration-Resistant/drug therapy/genetics ; Receptors, Androgen/genetics ; Treatment Outcome ; }, abstract = {BACKGROUND: Plasma tumor DNA fraction is prognostic in metastatic cancers. This could improve risk stratification before commencing a new treatment. We hypothesized that a second sample collected after one cycle of treatment could refine outcome prediction of patients identified as poor prognosis based on plasma DNA collected pre-treatment.

PATIENTS AND METHODS: Plasma DNA [128 pre-treatment, 134 cycle 2 day 1 (C2D1), and 49 progression] from 151 chemotherapy-naive metastatic castration-resistant prostate cancer (mCRPC) patients in a phase II study of abiraterone acetate (NCT01867710) were subjected to custom targeted next-generation sequencing covering exons of these genes: TP53, AR, RB1, PTEN, PIK3CA, BRCA1, BRCA2, ATM, CDK12, CHEK2, FANCA HDAC2 and PALB2. We also captured 1500 pan-genome regions enriched for single nucleotide polymorphisms to allow detection of tumor DNA using the rolling B-allele method. We tested associations with overall survival (OS) and progression-free survival (PFS).

RESULTS: Plasma tumor DNA detection was associated with shorter OS [hazard ratio (HR): 2.89, 95% confidence intervals (CI): 1.77-4.73, P ≤ 0.0001] and PFS (HR: 2.05; 95% CI: 1.36-3.11, P < 0.001). Using a multivariable model including plasma tumor DNA, patients who had a TP53, RB1 or PTEN gene alteration pre-treatment and at C2D1 had a significantly shorter OS than patients with no alteration at either time point (TP53: HR 7.13, 95% CI 2.37-21.47, P < 0.001; RB1: HR 6.24, 95% CI 1.97-19.73, P = 0.002; PTEN: HR 11.9, 95% CI 3.6-39.34, P < 0.001). Patients who were positive pre-treatment and converted to undetectable had no evidence of a difference in survival compared with those who were undetectable pre-treatment (P = 0.48, P = 0.43, P = 0.5, respectively). Progression samples harbored AR gain in all patients who had gain pre-treatment (9/49) and de novo AR somatic point mutations were detected in 8/49 patients.

CONCLUSIONS: Plasma gene testing after one cycle treatment refines prognostication and could provide an early indication of treatment benefit.}, } @article {pmid33793812, year = {2021}, author = {Barragan, AC and Weigel, D}, title = {Plant NLR diversity: the known unknowns of pan-NLRomes.}, journal = {The Plant cell}, volume = {33}, number = {4}, pages = {814-831}, pmid = {33793812}, issn = {1532-298X}, mesh = {Disease Resistance/genetics ; *Genome, Plant ; Haplotypes ; Multigene Family ; NLR Proteins/*genetics/metabolism ; Plant Diseases/immunology ; Plant Immunity/*physiology ; Plant Proteins/*genetics/metabolism ; Plants/genetics/immunology ; }, abstract = {Plants and pathogens constantly adapt to each other. As a consequence, many members of the plant immune system, and especially the intracellular nucleotide-binding site leucine-rich repeat receptors, also known as NOD-like receptors (NLRs), are highly diversified, both among family members in the same genome, and between individuals in the same species. While this diversity has long been appreciated, its true extent has remained unknown. With pan-genome and pan-NLRome studies becoming more and more comprehensive, our knowledge of NLR sequence diversity is growing rapidly, and pan-NLRomes provide powerful platforms for assigning function to NLRs. These efforts are an important step toward the goal of comprehensively predicting from sequence alone whether an NLR provides disease resistance, and if so, to which pathogens.}, } @article {pmid33789061, year = {2021}, author = {Yu, D and Banting, G and Neumann, NF}, title = {A review of the taxonomy, genetics, and biology of the genus Escherichia and the type species Escherichia coli.}, journal = {Canadian journal of microbiology}, volume = {67}, number = {8}, pages = {553-571}, doi = {10.1139/cjm-2020-0508}, pmid = {33789061}, issn = {1480-3275}, mesh = {*Escherichia coli/genetics ; *Genomics ; Genotype ; Phenotype ; Phylogeny ; }, abstract = {Historically, bacteriologists have relied heavily on biochemical and structural phenotypes for bacterial taxonomic classification. However, advances in comparative genomics have led to greater insights into the remarkable genetic diversity within the microbial world, and even within well-accepted species such as Escherichia coli. The extraordinary genetic diversity in E. coli recapitulates the evolutionary radiation of this species in exploiting a wide range of niches (i.e., ecotypes), including the gastrointestinal system of diverse vertebrate hosts as well as non-host natural environments (soil, natural waters, wastewater), which drives the adaptation, natural selection, and evolution of intragenotypic conspecific specialism as a strategy for survival. Over the last few years, there has been increasing evidence that many E. coli strains are very host (or niche)-specific. While biochemical and phylogenetic evidence support the classification of E. coli as a distinct species, the vast genomic (diverse pan-genome and intragenotypic variability), phenotypic (e.g., metabolic pathways), and ecotypic (host-/niche-specificity) diversity, comparable to the diversity observed in known species complexes, suggest that E. coli is better represented as a complex. Herein we review the taxonomic classification of the genus Escherichia and discuss how phenotype, genotype, and ecotype recapitulate our understanding of the biology of this remarkable bacterium.}, } @article {pmid33786847, year = {2021}, author = {Wang, M and Fan, Y and Liu, P and Liu, Y and Zhang, J and Jiang, Y and Zhou, C and Yang, L and Wang, C and Qian, C and Yuan, C and Zhang, S and Zhang, X and Yin, Z and Mu, H and Du, Y}, title = {Genomic insights into evolution of pathogenicity and resistance of multidrug-resistant Raoultella ornithinolytica WM1.}, journal = {Annals of the New York Academy of Sciences}, volume = {1497}, number = {1}, pages = {74-90}, doi = {10.1111/nyas.14595}, pmid = {33786847}, issn = {1749-6632}, support = {OPP1176128//Bill and Melinda Gates Foundation/ ; }, mesh = {Anti-Bacterial Agents/pharmacology/therapeutic use ; *Drug Resistance, Multiple, Bacterial ; Enterobacteriaceae/*drug effects/*genetics ; Enterobacteriaceae Infections/drug therapy/*microbiology ; Gene Expression Regulation, Bacterial ; Genes, Bacterial ; *Genome, Bacterial ; *Genomics ; Humans ; Plasmids/genetics ; }, abstract = {Raoultella ornithinolytica is a poorly understood opportunistic pathogen, and the underlying mechanisms of its multidrug resistance and pathogenicity have not yet been comprehensively investigated. The multidrug-resistant (MDR) strain WM1 was isolated from the blood of a male patient in Tianjin, China, in 2018. Here, we describe the complete genome and provide a genomic analysis of R. ornithinolytica WM1. The isolate was resistant to all tested antimicrobials except amikacin, tobramycin, and tigecycline. Two plasmids, pWM1-1 (IncHI5) and pWM1-2 (IncR), carried multidrug-resistance regions. A large antimicrobial resistance island region resided on pWM1-1 and exhibited mosaic structures resulting from the acquisition of complex integrations of variable regions, including genes conferring resistance to multiple classes of antimicrobials. Moreover, WM1 possessed virulence-related elements that encode several virulence factors, including type I fimbriae, Escherichia coli common pilus, type II and VI secretion systems, yersiniabactin, enterobactin, and surface polysaccharide, indicating pathogenic potential. Furthermore, the core genome phylogeny and pan-genome analyses revealed extensive genetic diversity. Our analysis indicates the need for stringent infection control, antimicrobial stewardship, periodic resistance monitoring, and rational medication to address potential threats posed by MDR R. ornithinolytica strains.}, } @article {pmid33771633, year = {2021}, author = {Panthee, S and Paudel, A and Hamamoto, H and Ogasawara, AA and Iwasa, T and Blom, J and Sekimizu, K}, title = {Complete genome sequence and comparative genomic analysis of Enterococcus faecalis EF-2001, a probiotic bacterium.}, journal = {Genomics}, volume = {113}, number = {3}, pages = {1534-1542}, doi = {10.1016/j.ygeno.2021.03.021}, pmid = {33771633}, issn = {1089-8646}, mesh = {*Enterococcus faecalis/genetics ; Genome, Bacterial ; Genomics ; Humans ; *Probiotics ; Virulence Factors/genetics ; }, abstract = {Enterococcus faecalis is a common human gut commensal bacterium. While some E. faecalis strains are probiotic, others are known to cause opportunistic infections, and clear distinction between these strains is difficult using traditional taxonomic approaches. In this study, we completed the genome sequencing of EF-2001, a probiotic strain, using our in-house hybrid assembly approach. Comparative analysis showed that EF-2001 was devoid of cytolysins, major factors associated with pathogenesis, and was phylogenetically distant from pathogenic E. faecalis V583. Genomic analysis of strains with a publicly available complete genome sequence predicted that drug-resistance genes- dfrE, efrA, efrB, emeA, and lsaA were present in all strains, and EF-2001 lacked additional drug-resistance genes. Core- and pan-genome analyses revealed a higher degree of genomic fluidity. We found 49 genes specific to EF-2001, further characterization of which may provide insights into its diverse biological activities. Our comparative genomic analysis approach could help predict the pathogenic or probiotic potential of E. faecalis leading to an early distinction based on genome sequences.}, } @article {pmid33767684, year = {2021}, author = {Kim, S and Chung, HY and Kwon, JG and Choi, SH and Lee, JH}, title = {Fresh Crab Plays an Important Role as a Nutrient Reservoir for the Rapid Propagation of Vibrio vulnificus.}, journal = {Frontiers in microbiology}, volume = {12}, number = {}, pages = {645860}, pmid = {33767684}, issn = {1664-302X}, abstract = {Vibrio vulnificus is a well-known opportunistic pathogen causing food-borne illnesses by ingestion of contaminated seafood. A new strain of V. vulnificus FORC_016 was isolated from a patient's blood sample in South Korea. The genome consists of two circular DNA chromosomes: chromosome I (3,234,424 bp with a G + C contents of 46.60% containing 2,889 ORFs, 106 tRNA genes, and 31 rRNA genes) and chromosome II (1,837,945 bp with a GC content of 47.00% containing 1,572 ORFs, 13 tRNA genes, and 3 rRNA genes). In addition, chromosome I has a super integron (SI) containing 209 ORFs, which is probably associated with various additional functions including antibiotic resistance and pathogenicity. Pan-genome analysis with other V. vulnificus genomes revealed that core genome regions contain most of the important virulence factors. However, accessory genome regions are located in the SI region and contain unique genes regarding cell wall biosynthesis and generation of host cell protecting capsule, suggesting possible resistance ability against environmental stresses. Comparative RNA-Seq analysis of samples between contact and no contact to the crab conditions showed that expressions of amino acid/peptide and carbohydrate transport and utilization genes were down-regulated, but expressions of cell division and growth-related genes were up-regulated, suggesting that the crab may be a nutrition reservoir for rapid propagation of V. vulnificus. Therefore, consumption of the contaminated fresh crab would provide a large number of V. vulnificus to humans, which may be more dangerous. Consequently, biocontrol of V. vulnificus may be critical to ensure the safety in seafood consumption.}, } @article {pmid33759550, year = {2021}, author = {Hu, M and Li, C and Xue, Y and Hu, A and Chen, S and Chen, Y and Lu, G and Zhou, X and Zhou, J}, title = {Isolation, Characterization, and Genomic Investigation of a Phytopathogenic Strain of Stenotrophomonas maltophilia.}, journal = {Phytopathology}, volume = {111}, number = {11}, pages = {2088-2099}, doi = {10.1094/PHYTO-11-20-0501-R}, pmid = {33759550}, issn = {0031-949X}, mesh = {Genomics ; Humans ; *Oryza ; Plant Diseases ; *Stenotrophomonas maltophilia/genetics ; Virulence ; }, abstract = {Stenotrophomonas maltophilia is ubiquitous in diverse environmental habitats. It merits significant concern because of its increasing incidence of nosocomial and community-acquired infection in immunocompromised patients and multiple drug resistance. It is rarely reported as a phytopathogen except in causing white stripe disease of rice in India and postharvest fruit rot of Lanzhou lily. For this study, Dickeya zeae and S. maltophilia strains were simultaneously isolated from soft rot leaves of Clivia miniata in Guangzhou, China, and were both demonstrated to be pathogenic to the host. Compared with the D. zeae strains, S. maltophilia strains propagated faster for greater growth in lysogeny broth medium and produced no cellulases or polygalacturonases, but did produce more proteases and fewer extracellular polysaccharides. Furthermore, S. maltophilia strains swam and swarmed dramatically less on semisolid media, but formed a great many more biofilms. Both D. zeae and S. maltophilia strains isolated from clivia caused rot symptoms on other monocot hosts, but not on dicots. Similar to previously reported S. maltophilia strains isolated from other sources, the strain JZL8 survived under many antibiotic stresses. The complete genome sequence of S. maltophilia strain JZL8 consists of a chromosome of 4,635,432 bp without a plasmid. Pan-genome analysis of JZL8 and 180 other S. maltophilia strains identified 50 genes that are unique to JZL8, seven of which implicate JZL8 as the potential pathogen contributor in plants. JZL8 also contains three copies of Type I Secretion System machinery; this is likely responsible for its greater production of proteases. Findings from this study extend our knowledge on the host range of S. maltophilia and provide insight into the phenotypic and genetic features underlying the plant pathogenicity of JZL8.}, } @article {pmid33749576, year = {2019}, author = {Lorenzi, JN and Lespinet, O and Leblond, P and Thibessard, A}, title = {Subtelomeres are fast-evolving regions of the Streptomyces linear chromosome.}, journal = {Microbial genomics}, volume = {7}, number = {6}, pages = {}, pmid = {33749576}, issn = {2057-5858}, abstract = {Streptomyces possess a large linear chromosome (6-12 Mb) consisting of a conserved central region flanked by variable arms covering several megabases. In order to study the evolution of the chromosome across evolutionary times, a representative panel of Streptomyces strains and species (125) whose chromosomes are completely sequenced and assembled was selected. The pan-genome of the genus was modelled and shown to be open with a core-genome reaching 1018 genes. The evolution of Streptomyces chromosome was analysed by carrying out pairwise comparisons, and by monitoring indexes measuring the conservation of genes (presence/absence) and their synteny along the chromosome. Using the phylogenetic depth offered by the chosen panel, it was possible to infer that within the central region of the chromosome, the core-genes form a highly conserved organization, which can reveal the existence of an ancestral chromosomal skeleton. Conversely, the chromosomal arms, enriched in variable genes evolved faster than the central region under the combined effect of rearrangements and addition of new information from horizontal gene transfer. The genes hosted in these regions may be localized there because of the adaptive advantage that their rapid evolution may confer. We speculate that (i) within a bacterial population, the variability of these genes may contribute to the establishment of social characters by the production of 'public goods' (ii) at the evolutionary scale, this variability contributes to the diversification of the genetic pool of the bacteria.}, } @article {pmid33742357, year = {2021}, author = {Zhou, X and Zhang, XA and Jiang, ZW and Yang, X and Zhang, XL and Yang, Q}, title = {Combined characterization of a new member of Marivita cryptomonadis strain LZ-15-2 isolated from cultivable phycosphere microbiota of highly toxic HAB dinoflagellate Alexandrium catenella LZT09.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {52}, number = {2}, pages = {739-748}, pmid = {33742357}, issn = {1678-4405}, mesh = {Bacterial Typing Techniques ; Dinoflagellida/growth & development/*microbiology ; Genome, Bacterial ; Harmful Algal Bloom ; *Microbiota ; Phylogeny ; Rhodobacteraceae/classification/genetics/growth & development/*isolation & purification ; }, abstract = {During our conveying the microbial structures of phycosphere microbiota (PM) derived from diverse marine harmful algal bloom (HAB) dinoflagellates, a new rod-sharped, white-colored cultivable bacterial strain, designated as LZ-15-2, was isolated from the PM of highly toxic Alexandrium catenella LZT09. Phylogenetic analysis of 16S rRNA gene sequence indicated that strain LZ-15-2 belonged to the genus Marivita within the family Rhodobacteraceae, and demonstrated the highest gene similarity of 99.2% to M. cryptomonadis CL-SK44[T], and less than 98.65% with other type strains of Marivita. Phylogenomic calculations on average nucleotide identity (ANI) and digital DNA-DNA hybridization (dDDH) values between the new isolate and M. cryptomonadis CL-SK44[T] were 99.86% and 99.88%, respectively. Genomic comparison of strain LZ-15-2 with available genomes of Marivita species further verified its taxonomic position within the genus of Marivita. Moreover, comparative genomics analysis showed a proximal similarity of strain LZ-15-2 with M. cryptomonadis CL-SK44[T], and it also revealed an open pan-genome status based on constructed gene accumulation curves among Marivita members with 9,361 and 1,712 genes for the pan- and core-genome analysis, respectively. Based on combined polyphasic taxonomic characteristics, strain LZ-15-2 represents a new member of M. cryptomonadis, and proposed as a potential candidate for further exploration of the detailed mechanisms governing the dynamic cross-kingdom algae-bacteria interactions (ABI) between PM and their algal host LZT09.}, } @article {pmid33742098, year = {2021}, author = {Abrouk, M and Athiyannan, N and Müller, T and Pailles, Y and Stritt, C and Roulin, AC and Chu, C and Liu, S and Morita, T and Handa, H and Poland, J and Keller, B and Krattinger, SG}, title = {Population genomics and haplotype analysis in spelt and bread wheat identifies a gene regulating glume color.}, journal = {Communications biology}, volume = {4}, number = {1}, pages = {375}, pmid = {33742098}, issn = {2399-3642}, mesh = {Chromosomes, Plant ; *Color ; DNA Copy Number Variations ; Gene Dosage ; Gene Expression Regulation, Plant ; *Genetic Variation ; *Genome, Plant ; Genome-Wide Association Study ; *Haplotypes ; High-Throughput Nucleotide Sequencing ; *Metagenomics ; Phenotype ; Plant Proteins/*genetics/metabolism ; Polymorphism, Single Nucleotide ; Transcription Factors/*genetics/metabolism ; Triticum/*genetics/metabolism ; }, abstract = {The cloning of agriculturally important genes is often complicated by haplotype variation across crop cultivars. Access to pan-genome information greatly facilitates the assessment of structural variations and rapid candidate gene identification. Here, we identified the red glume 1 (Rg-B1) gene using association genetics and haplotype analyses in ten reference grade wheat genomes. Glume color is an important trait to characterize wheat cultivars. Red glumes are frequent among Central European spelt, a dominant wheat subspecies in Europe before the 20[th] century. We used genotyping-by-sequencing to characterize a global diversity panel of 267 spelt accessions, which provided evidence for two independent introductions of spelt into Europe. A single region at the Rg-B1 locus on chromosome 1BS was associated with glume color in the diversity panel. Haplotype comparisons across ten high-quality wheat genomes revealed a MYB transcription factor as candidate gene. We found extensive haplotype variation across the ten cultivars, with a particular group of MYB alleles that was conserved in red glume wheat cultivars. Genetic mapping and transient infiltration experiments allowed us to validate this particular MYB transcription factor variants. Our study demonstrates the value of multiple high-quality genomes to rapidly resolve copy number and haplotype variations in regions controlling agriculturally important traits.}, } @article {pmid33738531, year = {2021}, author = {Sarjit, A and Ravensdale, JT and Coorey, R and Fegan, N and Dykes, GA}, title = {Survival of Salmonella Under Heat Stress is Associated with the Presence/Absence of CRISPR Cas Genes and Iron Levels.}, journal = {Current microbiology}, volume = {78}, number = {5}, pages = {1741-1751}, pmid = {33738531}, issn = {1432-0991}, mesh = {*Clustered Regularly Interspaced Short Palindromic Repeats ; *Genome, Bacterial ; Heat-Shock Response ; Iron ; Salmonella/genetics ; }, abstract = {Clustered regularly interspaced short palindromic repeats (CRISPR) cas genes have been linked to stress response in Salmonella. Our aim was to identify the presence of CRISPR cas in Salmonella and its response to heat in the presence of iron. Whole genomes of Salmonella (n = 50) of seven serovars were compared to identify the presence of CRISPR cas genes, direct-repeats and spacers. All Salmonella genomes had all cas genes present except S. Newport 2393 which lacked these genes. Gene-specific primers were used to confirm the absence of these genes in S. Newport 2393. The presence/absence of CRISPR cas genes was further investigated among 469 S. Newport genomes from PATRIC with 283 genomes selected for pan-genome analysis. The response of eleven Salmonella strains of various serovars to gradual heat in ferrous and ferric forms of iron was investigated. A total of 32/283 S. Newport genomes that lacked all CRISPR cas genes clustered together. S. Newport 2393 was the most heat-sensitive strain at higher iron levels (200 and 220 pm) in ferrous and ferric forms of iron. The absence of CRISPR cas genes in S. Newport 2393 may contribute to its increase in heat sensitivity and iron may play a role in this. The high reduction in numbers of most Salmonella strains exposed to heat makes it unfeasible to extract RNA and conduct transcription studies. Further studies should be conducted to validate the survival of Salmonella when exposed to heat in the presence/absence of CRISPR cas genes and different iron levels.}, } @article {pmid33736686, year = {2021}, author = {Lim, J and Park, HT and Ko, S and Park, HE and Lee, G and Kim, S and Shin, MK and Yoo, HS and Kim, D}, title = {Genomic diversity of Mycobacterium avium subsp. paratuberculosis: pangenomic approach for highlighting unique genomic features with newly constructed complete genomes.}, journal = {Veterinary research}, volume = {52}, number = {1}, pages = {46}, pmid = {33736686}, issn = {1297-9716}, support = {iPET918020-4//Ministry of Agriculture, Food and Rural Affairs/ ; }, mesh = {*Genetic Variation ; *Genome, Bacterial ; Genomics ; Mycobacterium avium subsp. paratuberculosis/classification/*genetics ; Phylogeny ; Polymorphism, Single Nucleotide ; Republic of Korea ; }, abstract = {Mycobacterium avium subsp. paratuberculosis (MAP) is a causative agent of Johne's disease, which is a chronic granulomatous enteropathy in ruminants. Determining the genetic diversity of MAP is necessary to understand the epidemiology and biology of MAP, as well as establishing disease control strategies. In the present study, whole genome-based alignment and comparative analysis were performed using 40 publicly available MAP genomes, including newly sequenced Korean isolates. First, whole genome-based alignment was employed to identify new genomic structures in MAP genomes. Second, the genomic diversity of the MAP population was described by pangenome analysis. A phylogenetic tree based on the core genome and pangenome showed that the MAP was differentiated into two major types (C- and S-type), which was in keeping with the findings of previous studies. However, B-type strains were discriminated from C-type strains. Finally, functional analysis of the pangenome was performed using three virulence factor databases (i.e., PATRIC, VFDB, and Victors) to predict the phenotypic diversity of MAP in terms of pathogenicity. Based on the results of the pangenome analysis, we developed a real-time PCR technique to distinguish among S-, B- and C-type strains. In conclusion, the results of our study suggest that the phenotypic differences between MAP strains can be explained by their genetic polymorphisms. These results may help to elucidate the diversity of MAP, extending from genomic features to phenotypic traits.}, } @article {pmid33726677, year = {2021}, author = {Edwards, RJ and Field, MA and Ferguson, JM and Dudchenko, O and Keilwagen, J and Rosen, BD and Johnson, GS and Rice, ES and Hillier, D and Hammond, JM and Towarnicki, SG and Omer, A and Khan, R and Skvortsova, K and Bogdanovic, O and Zammit, RA and Aiden, EL and Warren, WC and Ballard, JWO}, title = {Chromosome-length genome assembly and structural variations of the primal Basenji dog (Canis lupus familiaris) genome.}, journal = {BMC genomics}, volume = {22}, number = {1}, pages = {188}, pmid = {33726677}, issn = {1471-2164}, support = {UM1 HG009375/HG/NHGRI NIH HHS/United States ; UM1HG009375/NH/NIH HHS/United States ; }, mesh = {Animals ; China ; Chromosomes ; Dogs ; Female ; Genome ; Genomics ; Male ; *Wolves/genetics ; }, abstract = {BACKGROUND: Basenjis are considered an ancient dog breed of central African origins that still live and hunt with tribesmen in the African Congo. Nicknamed the barkless dog, Basenjis possess unique phylogeny, geographical origins and traits, making their genome structure of great interest. The increasing number of available canid reference genomes allows us to examine the impact the choice of reference genome makes with regard to reference genome quality and breed relatedness.

RESULTS: Here, we report two high quality de novo Basenji genome assemblies: a female, China (CanFam_Bas), and a male, Wags. We conduct pairwise comparisons and report structural variations between assembled genomes of three dog breeds: Basenji (CanFam_Bas), Boxer (CanFam3.1) and German Shepherd Dog (GSD) (CanFam_GSD). CanFam_Bas is superior to CanFam3.1 in terms of genome contiguity and comparable overall to the high quality CanFam_GSD assembly. By aligning short read data from 58 representative dog breeds to three reference genomes, we demonstrate how the choice of reference genome significantly impacts both read mapping and variant detection.

CONCLUSIONS: The growing number of high-quality canid reference genomes means the choice of reference genome is an increasingly critical decision in subsequent canid variant analyses. The basal position of the Basenji makes it suitable for variant analysis for targeted applications of specific dog breeds. However, we believe more comprehensive analyses across the entire family of canids is more suited to a pangenome approach. Collectively this work highlights the importance the choice of reference genome makes in all variation studies.}, } @article {pmid33720317, year = {2021}, author = {Shirasawa, K and Sasaki, K and Hirakawa, H and Isobe, S}, title = {Genomic region associated with pod color variation in pea (Pisum sativum).}, journal = {G3 (Bethesda, Md.)}, volume = {11}, number = {5}, pages = {}, pmid = {33720317}, issn = {2160-1836}, mesh = {Flowers ; *Genomics ; *Peas/genetics ; Phenotype ; }, abstract = {Pea (Pisum sativum) was chosen as the research material by Gregor Mendel to discover the laws of inheritance. Out of seven traits studied by Mendel, genes controlling three traits including pod shape, pod color, and flower position have not been identified to date. With the aim of identifying the genomic region controlling pod color, we determined the genome sequence of a pea line with yellow pods. Genome sequence reads obtained using a Nanopore sequencing technology were assembled into 117,981 contigs (3.3 Gb), with an N50 value of 51.2 kb. A total of 531,242 potential protein-coding genes were predicted, of which 519,349 (2.8 Gb) were located within repetitive sequences (2.8 Gb). The assembled sequences were ordered using a reference as a guide to build pseudomolecules. Subsequent genetic and association analyses led to the identification of a genomic region that controls pea pod color. DNA sequences at this genomic location and transcriptome profiles of green and yellow pod lines were analyzed, and genes encoding 3' exoribonucleases were selected as potential candidates controlling pod color. The results presented in this study are expected to accelerate pan-genome studies in pea and facilitate the identification of the gene controlling one of the traits studied by Mendel.}, } @article {pmid33719856, year = {2022}, author = {Felice, AG and Alves, LG and Freitas, ASF and Rodrigues, TCV and Jaiswal, AK and Tiwari, S and Gomes, LGR and Miranda, FM and Ramos, RTJ and Azevedo, V and Oliveira, LC and Oliveira, CJ and Soares, SDC and Benevides, LJ}, title = {Pan-genomic analyses of 47 complete genomes of the Rickettsia genus and prediction of new vaccine targets and virulence factors of the species.}, journal = {Journal of biomolecular structure & dynamics}, volume = {40}, number = {16}, pages = {7496-7510}, doi = {10.1080/07391102.2021.1898473}, pmid = {33719856}, issn = {1538-0254}, mesh = {Animals ; Genome, Bacterial/genetics ; Genomics ; Humans ; *Rickettsia/genetics ; *Vaccines ; Virulence Factors/genetics ; }, abstract = {The genus Rickettsia belongs to the Proteobacteria phylum and these bacteria infect animals and humans causing a range of diseases worldwide. The genus is divided into 4 groups and despite the public health threat and the knowledge accumulated so far, the mandatory intracellular bacteria behaviour and limitation for in vitro culture makes it difficult to create new vaccines and drug targets to these bacteria. In an attempt to overcome these limitations, pan-genomic approaches has used 47 genomes of the genus Rickettsia, in order to describe species similarities and genomics islands. Moreover, we conducted reverse vaccinology and docking analysis aiming the identification of proteins that have great potential to become vaccine and drug targets. We found out that the bacteria of the four Rickettsia groups have a high similarity with each other, with about 90 to 100% of identity. A pathogenicity island and a resistance island were predicted. In addition, 8 proteins were also predicted as strong candidates for vaccine and 9 as candidates for drug targets. The prediction of the proteins leads us to believe in a possibility of prospecting potential drugs or creating a polyvalent vaccine, which could reach most strains of this large group of bacteria.Communicated by Ramaswamy H. Sarma.}, } @article {pmid33716184, year = {2021}, author = {Chen, Z and Erickson, DL and Meng, J}, title = {Polishing the Oxford Nanopore long-read assemblies of bacterial pathogens with Illumina short reads to improve genomic analyses.}, journal = {Genomics}, volume = {113}, number = {3}, pages = {1366-1377}, doi = {10.1016/j.ygeno.2021.03.018}, pmid = {33716184}, issn = {1089-8646}, support = {U01 FD001418/FD/FDA HHS/United States ; }, mesh = {Escherichia coli ; Genome, Bacterial ; Genomics/methods ; High-Throughput Nucleotide Sequencing/methods ; *Nanopores ; Phylogeny ; Sequence Analysis, DNA/methods ; Staphylococcus aureus ; }, abstract = {Oxford Nanopore sequencing has been widely used to achieve complete genomes of bacterial pathogens. However, the error rates of Oxford Nanopore long reads are high. Various polishing algorithms using Illumina short reads to correct the errors in Oxford Nanopore long-read assemblies have been developed. The impact of polishing the Oxford Nanopore long-read assemblies of bacterial pathogens with Illumina short reads on improving genomic analyses was evaluated using both simulated and real reads. Ten species (10 strains) were selected for simulated reads, while real reads were tested on 11 species (11 strains). Oxford Nanopore long reads were assembled with Unicycler to produce a draft assembly, followed by three rounds of polishing with Illumina short reads using two polishing tools, Pilon and NextPolish. One round of NextPolish polishing generated genome completeness and accuracy parameters similar to the reference genomes, whereas two or three rounds of Pilon polishing were needed, though contiguity remained unchanged after polishing. The polished assemblies of Escherichia coli O157:H7, Salmonella Typhimurium, and Cronobacter sakazakii with simulated reads did not provide accurate plasmid identifications. One round of NextPolish polishing was needed for accurately identifying plasmids in Staphylococcus aureus and E. coli O26:H11 with real reads, whereas one and two rounds of Pilon polishing were necessary for these two strains, respectively. Polishing failed to provide an accurate antimicrobial resistance (AMR) genotype for S. aureus with real reads. One round of polishing recovered an accurate AMR genotype for Klebsiella pneumoniae with real reads. The reference genome and draft assembly of Citrobacter braakii with real reads differed, which carried blaCMY-83 and fosA6, respectively, while both genes were present after one round of polishing. However, polishing did not improve the assembly of E. coli O26:H11 with real reads to achieve numbers of virulence genes similar to the reference genome. The draft and polished assemblies showed a phylogenetic tree topology comparable with the reference genomes. For multilocus sequence typing and pan-genome analyses, one round of NextPolish polishing was sufficient to obtain accurate results, while two or three rounds of Pilon polishing were needed. Overall, NextPolish outperformed Pilon for polishing the Oxford Nanopore long-read assemblies of bacterial pathogens, though both polishing strategies improved genomic analyses compared to the draft assemblies.}, } @article {pmid33712682, year = {2021}, author = {Majda, S and Beisser, D and Boenigk, J}, title = {Nutrient-driven genome evolution revealed by comparative genomics of chrysomonad flagellates.}, journal = {Communications biology}, volume = {4}, number = {1}, pages = {328}, pmid = {33712682}, issn = {2399-3642}, mesh = {Autotrophic Processes/genetics ; Base Composition ; Carbon/*metabolism ; *Evolution, Molecular ; *Genome ; *Genomics ; Heterotrophic Processes/*genetics ; Photosynthesis/*genetics ; Phylogeny ; Ploidies ; Stramenopiles/*genetics/metabolism ; }, abstract = {Phototrophic eukaryotes have evolved mainly by the primary or secondary uptake of photosynthetic organisms. A return to heterotrophy occurred multiple times in various protistan groups such as Chrysophyceae, despite the expected advantage of autotrophy. It is assumed that the evolutionary shift to mixotrophy and further to heterotrophy is triggered by a differential importance of nutrient and carbon limitation. We sequenced the genomes of 16 chrysophyte strains and compared them in terms of size, function, and sequence characteristics in relation to photo-, mixo- and heterotrophic nutrition. All strains were sequenced with Illumina and partly with PacBio. Heterotrophic taxa have reduced genomes and a higher GC content of up to 59% as compared to phototrophic taxa. Heterotrophs have a large pan genome, but a small core genome, indicating a differential specialization of the distinct lineages. The pan genome of mixotrophs and heterotrophs taken together but not the pan genome of the mixotrophs alone covers the complete functionality of the phototrophic strains indicating a random reduction of genes. The observed ploidy ranges from di- to tetraploidy and was found to be independent of taxonomy or trophic mode. Our results substantiate an evolution driven by nutrient and carbon limitation.}, } @article {pmid33710295, year = {2021}, author = {Mascher, M and Wicker, T and Jenkins, J and Plott, C and Lux, T and Koh, CS and Ens, J and Gundlach, H and Boston, LB and Tulpová, Z and Holden, S and Hernández-Pinzón, I and Scholz, U and Mayer, KFX and Spannagl, M and Pozniak, CJ and Sharpe, AG and Šimková, H and Moscou, MJ and Grimwood, J and Schmutz, J and Stein, N}, title = {Long-read sequence assembly: a technical evaluation in barley.}, journal = {The Plant cell}, volume = {33}, number = {6}, pages = {1888-1906}, pmid = {33710295}, issn = {1532-298X}, mesh = {Computational Biology/methods ; DNA, Intergenic ; Genome, Plant ; Genomics/*methods ; High-Throughput Nucleotide Sequencing/*methods ; Hordeum/*genetics ; Molecular Sequence Annotation ; Retroelements ; Sequence Analysis, DNA ; Terminal Repeat Sequences ; }, abstract = {Sequence assembly of large and repeat-rich plant genomes has been challenging, requiring substantial computational resources and often several complementary sequence assembly and genome mapping approaches. The recent development of fast and accurate long-read sequencing by circular consensus sequencing (CCS) on the PacBio platform may greatly increase the scope of plant pan-genome projects. Here, we compare current long-read sequencing platforms regarding their ability to rapidly generate contiguous sequence assemblies in pan-genome studies of barley (Hordeum vulgare). Most long-read assemblies are clearly superior to the current barley reference sequence based on short-reads. Assemblies derived from accurate long reads excel in most metrics, but the CCS approach was the most cost-effective strategy for assembling tens of barley genomes. A downsampling analysis indicated that 20-fold CCS coverage can yield very good sequence assemblies, while even five-fold CCS data may capture the complete sequence of most genes. We present an updated reference genome assembly for barley with near-complete representation of the repeat-rich intergenic space. Long-read assembly can underpin the construction of accurate and complete sequences of multiple genomes of a species to build pan-genome infrastructures in Triticeae crops and their wild relatives.}, } @article {pmid33690659, year = {2021}, author = {Truccollo, B and Whyte, P and Burgess, C and Bolton, D}, title = {Genetic characterisation of a subset of Campylobacter jejuni isolates from clinical and poultry sources in Ireland.}, journal = {PloS one}, volume = {16}, number = {3}, pages = {e0246843}, pmid = {33690659}, issn = {1932-6203}, mesh = {Animals ; Campylobacter jejuni/*genetics/*isolation & purification/pathogenicity ; Humans ; Ireland ; Multilocus Sequence Typing ; Poultry/*microbiology ; Virulence ; }, abstract = {Campylobacter spp. is a significant and prevalent public health hazard globally. Campylobacter jejuni is the most frequently recovered species from human cases and poultry are considered the most important reservoir for its transmission to humans. In this study, 30 Campylobacter jejuni isolates were selected from clinical (n = 15) and broiler (n = 15) sources from a larger cohort, based on source, virulence, and antimicrobial resistance profiles. The objective of this study was to further characterise the genomes of these isolates including MLST types, population structure, pan-genome, as well as virulence and antimicrobial resistance determinants. A total of 18 sequence types and 12 clonal complexes were identified. The most common clonal complex was ST-45, which was found in both clinical and broiler samples. We characterised the biological functions that were associated with the core and accessory genomes of the isolates in this study. No significant difference in the prevalence of virulence or antimicrobial resistance determinants was observed between clinical and broiler isolates, although genes associated with severe illness such as neuABC, wlaN and cstIII were only detected in clinical isolates. The ubiquity of virulence factors associated with motility, invasion and cytolethal distending toxin (CDT) synthesis in both clinical and broiler C. jejuni genomes and genetic similarities between groups of broiler and clinical C. jejuni reaffirm that C. jejuni from poultry remains a significant threat to public health.}, } @article {pmid33679880, year = {2021}, author = {Mohd Saad, NS and Severn-Ellis, AA and Pradhan, A and Edwards, D and Batley, J}, title = {Genomics Armed With Diversity Leads the Way in Brassica Improvement in a Changing Global Environment.}, journal = {Frontiers in genetics}, volume = {12}, number = {}, pages = {600789}, pmid = {33679880}, issn = {1664-8021}, abstract = {Meeting the needs of a growing world population in the face of imminent climate change is a challenge; breeding of vegetable and oilseed Brassica crops is part of the race in meeting these demands. Available genetic diversity constituting the foundation of breeding is essential in plant improvement. Elite varieties, land races, and crop wild species are important resources of useful variation and are available from existing genepools or genebanks. Conservation of diversity in genepools, genebanks, and even the wild is crucial in preventing the loss of variation for future breeding efforts. In addition, the identification of suitable parental lines and alleles is critical in ensuring the development of resilient Brassica crops. During the past two decades, an increasing number of high-quality nuclear and organellar Brassica genomes have been assembled. Whole-genome re-sequencing and the development of pan-genomes are overcoming the limitations of the single reference genome and provide the basis for further exploration. Genomic and complementary omic tools such as microarrays, transcriptomics, epigenetics, and reverse genetics facilitate the study of crop evolution, breeding histories, and the discovery of loci associated with highly sought-after agronomic traits. Furthermore, in genomic selection, predicted breeding values based on phenotype and genome-wide marker scores allow the preselection of promising genotypes, enhancing genetic gains and substantially quickening the breeding cycle. It is clear that genomics, armed with diversity, is set to lead the way in Brassica improvement; however, a multidisciplinary plant breeding approach that includes phenotype = genotype × environment × management interaction will ultimately ensure the selection of resilient Brassica varieties ready for climate change.}, } @article {pmid33677170, year = {2021}, author = {Cornelius, AJ and Huq, M and On, SLW and French, NP and Vandenberg, O and Miller, WG and Lastovica, AJ and Istivan, T and Biggs, PJ}, title = {Genetic characterisation of Campylobacter concisus: Strategies for improved genomospecies discrimination.}, journal = {Systematic and applied microbiology}, volume = {44}, number = {3}, pages = {126187}, doi = {10.1016/j.syapm.2021.126187}, pmid = {33677170}, issn = {1618-0984}, mesh = {Base Composition ; *Campylobacter/classification/genetics ; *Genome, Bacterial ; Genomics ; Nucleic Acid Hybridization ; *Phylogeny ; }, abstract = {Although at least two genetically distinct groups, or genomospecies, have been well documented for Campylobacter concisus, no phenotype has yet been identified for their differentiation and thus formal description as separate species. C. concisus has been isolated from a variety of sites in the human body, including saliva and stool samples from both healthy and diarrhoeic individuals. We evaluated the ability of a range of whole genome-based tools to distinguish between the two C. concisus genomospecies (GS) using a collection of 190 C. concisus genomes. Nine genomes from related Campylobacter species were included in some analyses to provide context. Analyses incorporating sequence analysis of multiple ribosomal genes generated similar levels of C. concisus GS discrimination as genome-wide comparisons. The C. concisus genomes formed two groups; GS1 represented by ATCC 33237[T] and GS2 by CCUG 19995. The two C. concisus GS were separated from the nine genomes of related species. GS1 and GS2 also differed in G+C content with medians of 37.56% and 39.51%, respectively. The groups are consistent with previously established GS and are supported by DNA reassociation results. Average Nucleotide Identity using MUMmer (ANIm) and Genome BLAST Distance Phylogeny generated in silico DNA-DNA hybridisation (isDDH) (against ATCC 33237[T] and CCUG 19995), plus G+C content provides cluster-independent GS discrimination suitable for routine use. Pan-genomic analysis identified genes specific to GS1 and GS2. WGS data and genomic species identification methods support the existence of two GS within C. concisus. These data provide genome-level metrics for strain identification to genomospecies level.}, } @article {pmid33669391, year = {2021}, author = {Rosselli, R and La Porta, N and Muresu, R and Stevanato, P and Concheri, G and Squartini, A}, title = {Pangenomics of the Symbiotic Rhizobiales. Core and Accessory Functions Across a Group Endowed with High Levels of Genomic Plasticity.}, journal = {Microorganisms}, volume = {9}, number = {2}, pages = {}, pmid = {33669391}, issn = {2076-2607}, support = {PRAT CPDA154841/15//Università di Padova/ ; }, abstract = {Pangenome analyses reveal major clues on evolutionary instances and critical genome core conservation. The order Rhizobiales encompasses several families with rather disparate ecological attitudes. Among them, Rhizobiaceae, Bradyrhizobiaceae, Phyllobacteriacreae and Xanthobacteriaceae, include members proficient in mutualistic symbioses with plants based on the bacterial conversion of N2 into ammonia (nitrogen-fixation). The pangenome of 12 nitrogen-fixing plant symbionts of the Rhizobiales was analyzed yielding total 37,364 loci, with a core genome constituting 700 genes. The percentage of core genes averaged 10.2% over single genomes, and between 5% to 7% were found to be plasmid-associated. The comparison between a representative reference genome and the core genome subset, showed the core genome highly enriched in genes for macromolecule metabolism, ribosomal constituents and overall translation machinery, while membrane/periplasm-associated genes, and transport domains resulted under-represented. The analysis of protein functions revealed that between 1.7% and 4.9% of core proteins could putatively have different functions.}, } @article {pmid33668566, year = {2021}, author = {Iscan, E and Ekin, U and Yildiz, G and Oz, O and Keles, U and Suner, A and Cakan-Akdogan, G and Ozhan, G and Nekulova, M and Vojtesek, B and Uzuner, H and Karakülah, G and Alotaibi, H and Ozturk, M}, title = {TAp73β Can Promote Hepatocellular Carcinoma Dedifferentiation.}, journal = {Cancers}, volume = {13}, number = {4}, pages = {}, pmid = {33668566}, issn = {2072-6694}, support = {113S389//TUBITAK/ ; -//Turkish Academy of Sciences/ ; -//Izmir Biomedicine and Genome Center/ ; MMCI, 00209805//MH CZ - DRO/ ; 19-06530S//Grant Agency of the Czech Republic/ ; No.CZ.02.1.01/0.0/0.0/16_019/0000868//European Regional Development Fund-Project ENOCH/ ; -//EMBO Installation Grant/ ; -//TUBITAK/ ; }, abstract = {Hepatocyte dedifferentiation is a major source of hepatocellular carcinoma (HCC), but its mechanisms are unknown. We explored the p73 expression in HCC tumors and studied the effects of transcriptionally active p73β (TAp73β) in HCC cells. Expression profiles of p73 and patient clinical data were collected from the Genomic Data Commons (GDC) data portal and the TSVdb database, respectively. Global gene expression profiles were determined by pan-genomic 54K microarrays. The Gene Set Enrichment Analysis method was used to identify TAp73β-regulated gene sets. The effects of TAp73 isoforms were analyzed in monolayer cell culture, 3D-cell culture and xenograft models in zebrafish using western blot, flow cytometry, fluorescence imaging, real-time polymerase chain reaction (RT-PCR), immunohistochemistry and morphological examination. TAp73 isoforms were significantly upregulated in HCC, and high p73 expression correlated with poor patient survival. The induced expression of TAp73β caused landscape expression changes in genes involved in growth signaling, cell cycle, stress response, immunity, metabolism and development. Hep3B cells overexpressing TAp73β had lost hepatocyte lineage biomarkers including ALB, CYP3A4, AFP, HNF4α. In contrast, TAp73β upregulated genes promoting cholangiocyte lineage such as YAP, JAG1 and ZO-1, accompanied with an increase in metastatic ability. Our findings suggest that TAp73β may promote malignant dedifferentiation of HCC cells.}, } @article {pmid33668147, year = {2021}, author = {Shikov, AE and Malovichko, YV and Lobov, AA and Belousova, ME and Nizhnikov, AA and Antonets, KS}, title = {The Distribution of Several Genomic Virulence Determinants Does Not Corroborate the Established Serotyping Classification of Bacillus thuringiensis.}, journal = {International journal of molecular sciences}, volume = {22}, number = {5}, pages = {}, pmid = {33668147}, issn = {1422-0067}, support = {20-316-70020//Russian Foundation for Basic Research/ ; }, mesh = {Bacillus thuringiensis/*classification/genetics/metabolism/pathogenicity ; Bacterial Proteins/genetics/*metabolism ; Chromatography, Liquid ; Flagellin/genetics/*metabolism ; Phylogeny ; Proteome/analysis/*metabolism ; Serotyping/*methods ; Tandem Mass Spectrometry ; *Virulence ; Virulence Factors/genetics/*metabolism ; }, abstract = {Bacillus thuringiensis, commonly referred to as Bt, is an object of the lasting interest of microbiologists due to its highly effective insecticidal properties, which make Bt a prominent source of biologicals. To categorize the exuberance of Bt strains discovered, serotyping assays are utilized in which flagellin serves as a primary seroreactive molecule. Despite its convenience, this approach is not indicative of Bt strains' phenotypes, neither it reflects actual phylogenetic relationships within the species. In this respect, comparative genomic and proteomic techniques appear more informative, but their use in Bt strain classification remains limited. In the present work, we used a bottom-up proteomic approach based on fluorescent two-dimensional difference gel electrophoresis (2D-DIGE) coupled with liquid chromatography/tandem mass spectrometry(LC-MS/MS) protein identification to assess which stage of Bt culture, vegetative or spore, would be more informative for strain characterization. To this end, the proteomic differences for the israelensis-attributed strains were assessed to compare sporulating cultures of the virulent derivative to the avirulent one as well as to the vegetative stage virulent bacteria. Using the same approach, virulent spores of the israelensis strain were also compared to the spores of strains belonging to two other major Bt serovars, namely darmstadiensis and thuringiensis. The identified proteins were analyzed regarding the presence of the respective genes in the 104 Bt genome assemblies available at open access with serovar attributions specified. Of 21 proteins identified, 15 were found to be encoded in all the present assemblies at 67% identity threshold, including several virulence factors. Notable, individual phylogenies of these core genes conferred neither the serotyping nor the flagellin-based phylogeny but corroborated the reconstruction based on phylogenomics approaches in terms of tree topology similarity. In its turn, the distribution of accessory protein genes was not confined to the existing serovars. The obtained results indicate that neither gene presence nor the core gene sequence may serve as distinctive bases for the serovar attribution, undermining the notion that the serotyping system reflects strains' phenotypic or genetic similarity. We also provide a set of loci, which fit in with the phylogenomics data plausibly and thus may serve for draft phylogeny estimation of the novel strains.}, } @article {pmid33660164, year = {2021}, author = {Mete, O and Ezzat, S and Perry, A and Yamada, S and Uccella, S and Grossman, AB and Asa, SL}, title = {The Pangenomic Classification of Pituitary Neuroendocrine Tumors: Quality Histopathology is Required for Accurate Translational Research.}, journal = {Endocrine pathology}, volume = {32}, number = {3}, pages = {415-417}, pmid = {33660164}, issn = {1559-0097}, mesh = {Humans ; *Neuroendocrine Tumors ; Pituitary Gland ; *Pituitary Neoplasms ; Translational Research, Biomedical ; }, } @article {pmid33659287, year = {2021}, author = {Mizzi, R and Timms, VJ and Price-Carter, ML and Gautam, M and Whittington, R and Heuer, C and Biggs, PJ and Plain, KM}, title = {Comparative Genomics of Mycobacterium avium Subspecies Paratuberculosis Sheep Strains.}, journal = {Frontiers in veterinary science}, volume = {8}, number = {}, pages = {637637}, pmid = {33659287}, issn = {2297-1769}, abstract = {Mycobacterium avium subspecies paratuberculosis (MAP) is the aetiological agent of Johne's disease (JD), a chronic enteritis that causes major losses to the global livestock industry. Further, it has been associated with human Crohn's disease. Several strains of MAP have been identified, the two major groups being sheep strain MAP, which includes the Type I and Type III sub-lineages, and the cattle strain or Type II MAP lineage, of which bison strains are a sub-grouping. Major genotypic, phenotypic and pathogenic variations have been identified in prior comparisons, but the research has predominately focused on cattle strains of MAP. In countries where the sheep industries are more prevalent, however, such as Australia and New Zealand, ovine JD is a substantial burden. An information gap exists regarding the genomic differences between sheep strain sub-lineages and the relevance of Type I and Type III MAP in terms of epidemiology and/or pathogenicity. We therefore investigated sheep MAP isolates from Australia and New Zealand using whole genome sequencing. For additional context, sheep MAP genome datasets were downloaded from the Sequence Read Archive and GenBank. The final dataset contained 18 Type III and 16 Type I isolates and the K10 cattle strain MAP reference genome. Using a pan-genome approach, an updated global phylogeny for sheep MAP from de novo assemblies was produced. When rooted with the K10 cattle reference strain, two distinct clades representing the lineages were apparent. The Australian and New Zealand isolates formed a distinct sub-clade within the type I lineage, while the European type I isolates formed another less closely related group. Within the type III lineage, isolates appeared more genetically diverse and were from a greater number of continents. Querying of the pan-genome and verification using BLAST analysis revealed lineage-specific variations (n = 13) including genes responsible for metabolism and stress responses. The genetic differences identified may represent important epidemiological and virulence traits specific to sheep MAP. This knowledge will potentially contribute to improved vaccine development and control measures for these strains.}, } @article {pmid33652876, year = {2021}, author = {Kamli, MR and Alzahrani, NAY and Hajrah, NH and Sabir, JSM and Malik, A}, title = {Genome-Driven Discovery of Enzymes with Industrial Implications from the Genus Aneurinibacillus.}, journal = {Microorganisms}, volume = {9}, number = {3}, pages = {}, pmid = {33652876}, issn = {2076-2607}, abstract = {Bacteria belonging to the genus Aneurinibacillus within the family Paenibacillaceae are Gram-positive, endospore-forming, and rod-shaped bacteria inhabiting diverse environments. Currently, there are eight validly described species of Aneurinibacillus; however, several unclassified species have also been reported. Aneurinibacillus spp. have shown the potential for producing secondary metabolites (SMs) and demonstrated diverse types of enzyme activities. These features make them promising candidates with industrial implications. At present, genomes of 9 unique species from the genus Aneurinibacillus are available, which can be utilized to decipher invaluable information on their biosynthetic potential as well as enzyme activities. In this work, we performed the comparative genome analyses of nine Aneurinibacillus species representing the first such comprehensive study of this genus at the genome level. We focused on discovering the biosynthetic, biodegradation, and heavy metal resistance potential of this under-investigated genus. The results indicate that the genomes of Aneurinibacillus contain SM-producing regions with diverse bioactivities, including antimicrobial and antiviral activities. Several carbohydrate-active enzymes (CAZymes) and genes involved in heavy metal resistance were also identified. Additionally, a broad range of enzyme classes were also identified in the Aneurinibacillus pan-genomes, making this group of bacteria potential candidates for future investigations with industrial applications.}, } @article {pmid33649946, year = {2021}, author = {Lugli, GA}, title = {Assembly, Annotation, and Comparative Analysis of Bifidobacterial Genomes.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2278}, number = {}, pages = {31-44}, pmid = {33649946}, issn = {1940-6029}, mesh = {Bifidobacterium/*genetics ; *Genome, Bacterial ; Genomics/*methods ; High-Throughput Nucleotide Sequencing/methods ; Molecular Sequence Annotation/methods ; Phylogeny ; Sequence Analysis, DNA/methods ; Whole Genome Sequencing/methods ; }, abstract = {Genome assembly and annotation are two of the key actions that must be undertaken in order to explore the genomic repertoire of (bifido)bacteria. The gathered information can be employed to genomically characterize a given microorganism, and can also be used to perform comparative genome analysis by including other sequenced (bifido)bacterial strains. Here, we highlight various bioinformatic programs able to manage next generation sequencing data starting from the assembly of a genome to the comparative analyses between strains.}, } @article {pmid33649550, year = {2021}, author = {Matlock, W and Chau, KK and AbuOun, M and Stubberfield, E and Barker, L and Kavanagh, J and Pickford, H and Gilson, D and Smith, RP and Gweon, HS and Hoosdally, SJ and Swann, J and Sebra, R and Bailey, MJ and Peto, TEA and Crook, DW and Anjum, MF and Read, DS and Walker, AS and Stoesser, N and Shaw, LP and , }, title = {Genomic network analysis of environmental and livestock F-type plasmid populations.}, journal = {The ISME journal}, volume = {15}, number = {8}, pages = {2322-2335}, pmid = {33649550}, issn = {1751-7370}, support = {/WT_/Wellcome Trust/United Kingdom ; MRF_MRF-145-0004-TPG-AVISO/MRF/MRF/United Kingdom ; 203141/Z/16/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Animals ; Anti-Bacterial Agents ; Genomics ; *Livestock ; Phylogeny ; Plasmids/genetics ; *beta-Lactamases/genetics ; }, abstract = {F-type plasmids are diverse and of great clinical significance, often carrying genes conferring antimicrobial resistance (AMR) such as extended-spectrum β-lactamases, particularly in Enterobacterales. Organising this plasmid diversity is challenging, and current knowledge is largely based on plasmids from clinical settings. Here, we present a network community analysis of a large survey of F-type plasmids from environmental (influent, effluent and upstream/downstream waterways surrounding wastewater treatment works) and livestock settings. We use a tractable and scalable methodology to examine the relationship between plasmid metadata and network communities. This reveals how niche (sampling compartment and host genera) partition and shape plasmid diversity. We also perform pangenome-style analyses on network communities. We show that such communities define unique combinations of core genes, with limited overlap. Building plasmid phylogenies based on alignments of these core genes, we demonstrate that plasmid accessory function is closely linked to core gene content. Taken together, our results suggest that stable F-type plasmid backbone structures can persist in environmental settings while allowing dramatic variation in accessory gene content that may be linked to niche adaptation. The association of F-type plasmids with AMR may reflect their suitability for rapid niche adaptation.}, } @article {pmid33648592, year = {2021}, author = {Estrada, AA and Gottschalk, M and Rendahl, A and Rossow, S and Marshall-Lund, L and Marthaler, DG and Gebhart, CJ}, title = {Proposed virulence-associated genes of Streptococcus suis isolates from the United States serve as predictors of pathogenicity.}, journal = {Porcine health management}, volume = {7}, number = {1}, pages = {22}, pmid = {33648592}, issn = {2055-5660}, support = {project #00057268//Rapid Agricultural Response Fund, Minnesota Agricultural Experiment Station (US)/ ; }, abstract = {BACKGROUND: There is limited information on the distribution of virulence-associated genes (VAGs) in U.S. Streptococcus suis isolates, resulting in little understanding of the pathogenic potential of these isolates. This lack also reduces our understanding of the epidemiology associated with S. suis in the United States and thus affects the efficiency of control and prevention strategies. In this study we applied whole genome sequencing (WGS)-based approaches for the characterization of S. suis and identification of VAGs.

RESULTS: Of 208 S. suis isolates classified as pathogenic, possibly opportunistic, and commensal pathotypes, the genotype based on the classical VAGs (epf, mrp, and sly encoding the extracellular protein factor, muramidase-release protein, and suilysin, respectively) was identified in 9% (epf+/mrp+/sly+) of the pathogenic pathotype. Using the chi-square test and LASSO regression model, the VAGs ofs (encoding the serum opacity factor) and srtF (encoding sortase F) were selected out of 71 published VAGs as having a significant association with pathotype, and both genes were found in 95% of the pathogenic pathotype. The ofs+/srtF+ genotype was also present in 74% of 'pathogenic' isolates from a separate validation set of isolates. Pan-genome clustering resulted in the differentiation of a group of isolates from five swine production companies into clusters corresponding to clonal complex (CC) and virulence-associated (VA) genotypes. The same CC-VA genotype patterns were identified in multiple production companies, suggesting a lack of association between production company, CC, or VA genotype.

CONCLUSIONS: The proposed ofs and srtF genes were stronger predictors for differentiating pathogenic and commensal S. suis isolates compared to the classical VAGs in two sets of U.S. isolates. Pan-genome analysis in combination with metadata (serotype, ST/CC, VA genotype) was illustrated to be a valuable subtyping tool to describe the genetic diversity of S. suis.}, } @article {pmid33630832, year = {2021}, author = {Louha, S and Meinersmann, RJ and Glenn, TC}, title = {Whole genome genetic variation and linkage disequilibrium in a diverse collection of Listeria monocytogenes isolates.}, journal = {PloS one}, volume = {16}, number = {2}, pages = {e0242297}, pmid = {33630832}, issn = {1932-6203}, mesh = {Food Microbiology ; Genetic Variation ; *Genome, Bacterial ; Humans ; *Linkage Disequilibrium ; Listeria monocytogenes/*genetics/isolation & purification ; Listeriosis/*microbiology ; Multilocus Sequence Typing ; Phylogeny ; }, abstract = {We performed whole-genome multi-locus sequence typing for 2554 genes in a large and heterogenous panel of 180 Listeria monocytogenes strains having diverse geographical and temporal origins. The subtyping data was used for characterizing genetic variation and evaluating patterns of linkage disequilibrium in the pan-genome of L. monocytogenes. Our analysis revealed the presence of strong linkage disequilibrium in L. monocytogenes, with ~99% of genes showing significant non-random associations with a large majority of other genes in the genome. Twenty-seven loci having lower levels of association with other genes were considered to be potential "hot spots" for horizontal gene transfer (i.e., recombination via conjugation, transduction, and/or transformation). The patterns of linkage disequilibrium in L. monocytogenes suggest limited exchange of foreign genetic material in the genome and can be used as a tool for identifying new recombinant strains. This can help understand processes contributing to the diversification and evolution of this pathogenic bacteria, thereby facilitating development of effective control measures.}, } @article {pmid33622483, year = {2021}, author = {Lawal, OU and Fraqueza, MJ and Bouchami, O and Worning, P and Bartels, MD and Gonçalves, ML and Paixão, P and Gonçalves, E and Toscano, C and Empel, J and Urbaś, M and Domínguez, MA and Westh, H and de Lencastre, H and Miragaia, M}, title = {Foodborne Origin and Local and Global Spread of Staphylococcus saprophyticus Causing Human Urinary Tract Infections.}, journal = {Emerging infectious diseases}, volume = {27}, number = {3}, pages = {880-893}, pmid = {33622483}, issn = {1080-6059}, mesh = {Animals ; *Community-Acquired Infections ; Humans ; *Staphylococcal Infections ; Staphylococcus saprophyticus ; Swine ; *Urinary Tract Infections ; Virulence Factors ; }, abstract = {Staphylococcus saprophyticus is a primary cause of community-acquired urinary tract infections (UTIs) in young women. S. saprophyticus colonizes humans and animals but basic features of its molecular epidemiology are undetermined. We conducted a phylogenomic analysis of 321 S. saprophyticus isolates collected from human UTIs worldwide during 1997-2017 and 232 isolates from human UTIs and the pig-processing chain in a confined region during 2016-2017. We found epidemiologic and genomic evidence that the meat-production chain is a major source of S. saprophyticus causing human UTIs; human microbiota is another possible origin. Pathogenic S. saprophyticus belonged to 2 lineages with distinctive genetic features that are globally and locally disseminated. Pangenome-wide approaches identified a strong association between pathogenicity and antimicrobial resistance, phages, platelet binding proteins, and an increased recombination rate. Our study provides insight into the origin, transmission, and population structure of pathogenic S. saprophyticus and identifies putative new virulence factors.}, } @article {pmid33616628, year = {2021}, author = {Gao, G and Magadan, S and Waldbieser, GC and Youngblood, RC and Wheeler, PA and Scheffler, BE and Thorgaard, GH and Palti, Y}, title = {A long reads-based de-novo assembly of the genome of the Arlee homozygous line reveals chromosomal rearrangements in rainbow trout.}, journal = {G3 (Bethesda, Md.)}, volume = {11}, number = {4}, pages = {}, pmid = {33616628}, issn = {2160-1836}, mesh = {Animals ; Genome ; *Oncorhynchus mykiss/genetics ; Sex Determination Processes ; Y Chromosome ; }, abstract = {Currently, there is still a need to improve the contiguity of the rainbow trout reference genome and to use multiple genetic backgrounds that will represent the genetic diversity of this species. The Arlee doubled haploid line was originated from a domesticated hatchery strain that was originally collected from the northern California coast. The Canu pipeline was used to generate the Arlee line genome de-novo assembly from high coverage PacBio long-reads sequence data. The assembly was further improved with Bionano optical maps and Hi-C proximity ligation sequence data to generate 32 major scaffolds corresponding to the karyotype of the Arlee line (2 N = 64). It is composed of 938 scaffolds with N50 of 39.16 Mb and a total length of 2.33 Gb, of which ∼95% was in 32 chromosome sequences with only 438 gaps between contigs and scaffolds. In rainbow trout the haploid chromosome number can vary from 29 to 32. In the Arlee karyotype the haploid chromosome number is 32 because chromosomes Omy04, 14 and 25 are divided into six acrocentric chromosomes. Additional structural variations that were identified in the Arlee genome included the major inversions on chromosomes Omy05 and Omy20 and additional 15 smaller inversions that will require further validation. This is also the first rainbow trout genome assembly that includes a scaffold with the sex-determination gene (sdY) in the chromosome Y sequence. The utility of this genome assembly is shown through the improved annotation of the duplicated genome loci that harbor the IGH genes on chromosomes Omy12 and Omy13.}, } @article {pmid33606689, year = {2021}, author = {Bravo, V and Katz, A and Porte, L and Weitzel, T and Varela, C and Gonzalez-Escalona, N and Blondel, CJ}, title = {Genomic analysis of the diversity, antimicrobial resistance and virulence potential of clinical Campylobacter jejuni and Campylobacter coli strains from Chile.}, journal = {PLoS neglected tropical diseases}, volume = {15}, number = {2}, pages = {e0009207}, pmid = {33606689}, issn = {1935-2735}, support = {55008749/HHMI/Howard Hughes Medical Institute/United States ; }, mesh = {Anti-Bacterial Agents/pharmacology ; Campylobacter Infections ; Campylobacter coli/classification/*genetics ; Campylobacter jejuni/classification/drug effects/*genetics ; Chile ; Drug Resistance, Bacterial/*genetics ; Fluoroquinolones/pharmacology ; Gastroenteritis ; *Genomics ; Humans ; Microbial Sensitivity Tests ; Multigene Family ; Multilocus Sequence Typing ; Phylogeny ; Type IV Secretion Systems ; Type VI Secretion Systems/genetics ; Virulence/genetics ; Virulence Factors/*genetics ; }, abstract = {Campylobacter jejuni and Campylobacter coli are the leading cause of human gastroenteritis in the industrialized world and an emerging threat in developing countries. The incidence of campylobacteriosis in South America is greatly underestimated, mostly due to the lack of adequate diagnostic methods. Accordingly, there is limited genomic and epidemiological data from this region. In the present study, we performed a genome-wide analysis of the genetic diversity, virulence, and antimicrobial resistance of the largest collection of clinical C. jejuni and C. coli strains from Chile available to date (n = 81), collected in 2017-2019 in Santiago, Chile. This culture collection accounts for more than one third of the available genome sequences from South American clinical strains. cgMLST analysis identified high genetic diversity as well as 13 novel STs and alleles in both C. jejuni and C. coli. Pangenome and virulome analyses showed a differential distribution of virulence factors, including both plasmid and chromosomally encoded T6SSs and T4SSs. Resistome analysis predicted widespread resistance to fluoroquinolones, but low rates of erythromycin resistance. This study provides valuable genomic and epidemiological data and highlights the need for further genomic epidemiology studies in Chile and other South American countries to better understand molecular epidemiology and antimicrobial resistance of this emerging intestinal pathogen.}, } @article {pmid33602135, year = {2021}, author = {Li, Y and Sun, ZZ and Rong, JC and Xie, BB}, title = {Comparative genomics reveals broad genetic diversity, extensive recombination and nascent ecological adaptation in Micrococcus luteus.}, journal = {BMC genomics}, volume = {22}, number = {1}, pages = {124}, pmid = {33602135}, issn = {1471-2164}, support = {31770412//National Natural Science Foundation of China/ ; 2016WLJH41//Young Scholars Program of Shandong University/ ; 2020QNQT006//Youth Interdisciplinary Science and Innovative Research Groups of Shandong University/ ; }, mesh = {Animals ; Evolution, Molecular ; Genetic Variation ; *Genome, Bacterial ; Genome-Wide Association Study ; Genomics ; *Micrococcus luteus/genetics ; Phylogeny ; Recombination, Genetic ; }, abstract = {BACKGROUND: Micrococcus luteus is a group of actinobacteria that is widely used in biotechnology and is being thought as an emerging nosocomial pathogen. With one of the smallest genomes of free-living actinobacteria, it is found in a wide range of environments, but intraspecies genetic diversity and adaptation strategies to various environments remain unclear. Here, comparative genomics, phylogenomics, and genome-wide association studies were used to investigate the genomic diversity, evolutionary history, and the potential ecological differentiation of the species.

RESULTS: High-quality genomes of 66 M. luteus strains were downloaded from the NCBI GenBank database and core and pan-genome analysis revealed a considerable intraspecies heterogeneity. Phylogenomic analysis, gene content comparison, and average nucleotide identity calculation consistently indicated that the species has diverged into three well-differentiated clades. Population structure analysis further suggested the existence of an unknown ancestor or the fourth, yet unsampled, clade. Reconstruction of gene gain/loss events along the evolutionary history revealed both early events that contributed to the inter-clade divergence and recent events leading to the intra-clade diversity. We also found convincing evidence that recombination has played a key role in the evolutionary process of the species, with upto two-thirds of the core genes having been affected by recombination. Furthermore, distribution of mammal-associated strains (including pathogens) on the phylogenetic tree suggested that the last common ancestor had a free-living lifestyle, and a few recently diverged lineages have developed a mammal-associated lifestyle separately. Consistently, genome-wide association analysis revealed that mammal-associated strains from different lineages shared genes functionally relevant to the host-associated lifestyle, indicating a recent ecological adaption to the new host-associated habitats.

CONCLUSIONS: These results revealed high intraspecies genomic diversity of M. luteus and highlighted that gene gain/loss events and extensive recombination events played key roles in the genome evolution. Our study also indicated that, as a free-living species, some lineages have recently developed or are developing a mammal-associated lifestyle. This study provides insights into the mechanisms that drive the genome evolution and adaption to various environments of a bacterial species.}, } @article {pmid33592318, year = {2021}, author = {Aguirre-Sanchez, JR and Ibarra-Rodriguez, JR and Vega-Lopez, IF and Martínez-Urtaza, J and Chaidez-Quiroz, C}, title = {Genomic signatures of adaptation to natural settings in non-typhoidal Salmonella enterica Serovars Saintpaul, Thompson and Weltevreden.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {90}, number = {}, pages = {104771}, doi = {10.1016/j.meegid.2021.104771}, pmid = {33592318}, issn = {1567-7257}, mesh = {Adaptation, Biological/*genetics ; *Computational Biology ; *Genes, Bacterial ; *Genome, Bacterial ; *Multigene Family ; Salmonella enterica/*genetics ; Whole Genome Sequencing ; }, abstract = {Salmonella enterica is a pathogenic bacterium responsible for intestinal illness and systemic diseases such as typhoid and paratyphoid fevers. Among clinical manifestation classification, non-typhoidal Salmonella is mainly known as foodborne pathogen associated with the consumption of fecal contaminated food and water. Even though Salmonella hosts include humans and warm-blooded animals, it has been found in non-host environments as river water where the bacteria use different strategies to fitness the environment persisting and establishment. Now with the availability of WGS and bioinformatics tools, we can explore bacterial genomes with higher resolution to increase our understanding of specific genetic signatures among environmental and clinical isolates, being the goal of this work. Pangenome construction allowed the detection of specific environmental and clinical gene clusters related to metabolism and secretion systems as the main signature respectively. Specifically, D-galactonate degradation pathway was observed mainly in environmental genomes while T3SS and flagellum genes were detected for all clinical but not for all environmental isolates. Gene duplication and pseudogenes accumulation were detected as the main adaptation strategy for environmental isolates; thus, isolation source may play an important role in genome plasticity, conferring a selective advantage to survive and persist for environmental Salmonella isolates. Intact prophage sequences with cargo genes were observable for both isolation sources playing an important role in virulence contribution.}, } @article {pmid33590101, year = {2021}, author = {Pfeifer, E and Moura de Sousa, JA and Touchon, M and Rocha, EPC}, title = {Bacteria have numerous distinctive groups of phage-plasmids with conserved phage and variable plasmid gene repertoires.}, journal = {Nucleic acids research}, volume = {49}, number = {5}, pages = {2655-2673}, pmid = {33590101}, issn = {1362-4962}, mesh = {Acinetobacter/genetics ; Bacteria/*genetics ; Bacteriophages/*genetics ; Databases, Nucleic Acid ; Enterobacteriaceae/enzymology/genetics ; Genes, Bacterial ; Plasmids/*genetics ; Prophages/*genetics ; Telomerase/genetics ; }, abstract = {Plasmids and temperate phages are key contributors to bacterial evolution. They are usually regarded as very distinct. However, some elements, termed phage-plasmids, are known to be both plasmids and phages, e.g. P1, N15 or SSU5. The number, distribution, relatedness and characteristics of these phage-plasmids are poorly known. Here, we screened for these elements among ca. 2500 phages and 12000 plasmids and identified 780 phage-plasmids across very diverse bacterial phyla. We grouped 92% of them by similarity of gene repertoires to eight defined groups and 18 other broader communities of elements. The existence of these large groups suggests that phage-plasmids are ancient. Their gene repertoires are large, the average element is larger than an average phage or plasmid, and they include slightly more homologs to phages than to plasmids. We analyzed the pangenomes and the genetic organization of each group of phage-plasmids and found the key phage genes to be conserved and co-localized within distinct groups, whereas genes with homologs in plasmids are much more variable and include most accessory genes. Phage-plasmids are a sizeable fraction of the sequenced plasmids (∼7%) and phages (∼5%), and could have key roles in bridging the genetic divide between phages and other mobile genetic elements.}, } @article {pmid33587228, year = {2021}, author = {Matarrita-Carranza, B and Murillo-Cruz, C and Avendaño, R and Ríos, MI and Chavarría, M and Gómez-Calvo, ML and Tamayo-Castillo, G and Araya, JJ and Pinto-Tomás, AA}, title = {Streptomyces sp. M54: an actinobacteria associated with a neotropical social wasp with high potential for antibiotic production.}, journal = {Antonie van Leeuwenhoek}, volume = {114}, number = {4}, pages = {379-398}, pmid = {33587228}, issn = {1572-9699}, support = {research projects 801-B0-538, 810-B5-772, and 809-B6-656//Sistema de Estudios de Posgrado and Vicerrectoría de Investigación, Universidad de Costa Rica/ ; }, mesh = {*Actinobacteria/genetics ; Animals ; Anti-Bacterial Agents/pharmacology ; Chromatography, Liquid ; Humans ; Hypocreales ; Phylogeny ; *Streptomyces/genetics ; Tandem Mass Spectrometry ; *Wasps ; }, abstract = {Streptomyces symbionts in insects have shown to be a valuable source of new antibiotics. Here, we report the genome sequence and the potential for antibiotic production of "Streptomyces sp. M54", an Actinobacteria associated with the eusocial wasp, Polybia plebeja. The Streptomyces sp. M54 genome is composed of a chromosome (7.96 Mb), and a plasmid (1.91 Kb) and harbors 30 biosynthetic gene clusters for secondary metabolites, of which only one third has been previously characterized. Growth inhibition bioassays show that this bacterium produces antimicrobial compounds that are active against Hirsutella citriformis, a natural fungal enemy of its host, and the human pathogens Staphylococcus aureus and Candida albicans. Analyses through TLC-bioautography, LC-MS/MS and NMR allowed the identification of five macrocyclic ionophore antibiotics, with previously reported antibacterial, antitumor and antiviral properties. Phylogenetic analyses placed Streptomyces sp. M54 in a clade of other host-associated strains taxonomically related to Streptomyces griseus. Pangenomic and ANI analyses confirm the identity of one of its closest relatives as Streptomyces sp. LaPpAH-199, a strain isolated from an ant-plant symbiosis in Africa. In summary, our results suggest an insect-microbe association in distant geographic areas and showcase the potential of Streptomyces sp. M54 and related strains for the discovery of novel antibiotics.}, } @article {pmid33584592, year = {2020}, author = {Gutiérrez, S and Díaz, L and Reyes-Jara, A and Yang, X and Meng, J and González-Escalona, N and Toro, M}, title = {Whole-Genome Phylogenetic Analysis Reveals a Wide Diversity of Non-O157 STEC Isolated From Ground Beef and Cattle Feces.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {622663}, pmid = {33584592}, issn = {1664-302X}, abstract = {Shiga toxin-producing Escherichia coli (STEC) causes foodborne outbreaks that can lead to complications such as hemolytic uremic syndrome. Their main reservoir is cattle, and ground beef has been frequently associated with disease and outbreaks. In this study, we attempted to understand the genetic relationship among STEC isolated in Chile from different sources, their relationship to STEC from the rest of the world, and to identify molecular markers of Chilean STEC. We sequenced 62 STEC isolated in Chile using MiSeq Illumina. In silico typing was determined using tools of the Center Genomic Epidemiology, Denmark University (CGE/DTU). Genomes of our local STEC collection were compared with 113 STEC isolated worldwide through a core genome MLST (cgMLST) approach, and we also searched for distinct genes to be used as molecular markers of Chilean isolates. Genomes in our local collection were grouped based on serogroup and sequence type, and clusters were formed within local STEC. In the worldwide STEC analysis, Chilean STEC did not cluster with genomes of the rest of the world suggesting that they are not phylogenetically related to previously described STEC. The pangenome of our STEC collection was 11,650 genes, but we did not identify distinct molecular markers of local STEC. Our results showed that there may be local emerging STEC with unique features, nevertheless, no molecular markers were detected. Therefore, there might be elements such as a syntenic organization that might explain differential clustering detected between local and worldwide STEC.}, } @article {pmid33575648, year = {2021}, author = {Perrin, A and Rocha, EPC}, title = {PanACoTA: a modular tool for massive microbial comparative genomics.}, journal = {NAR genomics and bioinformatics}, volume = {3}, number = {1}, pages = {lqaa106}, pmid = {33575648}, issn = {2631-9268}, abstract = {The study of the gene repertoires of microbial species, their pangenomes, has become a key part of microbial evolution and functional genomics. Yet, the increasing number of genomes available complicates the establishment of the basic building blocks of comparative genomics. Here, we present PanACoTA (https://github.com/gem-pasteur/PanACoTA), a tool that allows to download all genomes of a species, build a database with those passing quality and redundancy controls, uniformly annotate and then build their pangenome, several variants of core genomes, their alignments and a rapid but accurate phylogenetic tree. While many programs building pangenomes have become available in the last few years, we have focused on a modular method, that tackles all the key steps of the process, from download to phylogenetic inference. While all steps are integrated, they can also be run separately and multiple times to allow rapid and extensive exploration of the parameters of interest. PanACoTA is built in Python3, includes a singularity container and features to facilitate its future development. We believe PanACoTa is an interesting addition to the current set of comparative genomics tools, since it will accelerate and standardize the more routine parts of the work, allowing microbial genomicists to more quickly tackle their specific questions.}, } @article {pmid33572680, year = {2021}, author = {Lin, YT and Lee, CC and Leu, WM and Wu, JJ and Huang, YC and Meng, M}, title = {Fungicidal Activity of Volatile Organic Compounds Emitted by Burkholderia gladioli Strain BBB-01.}, journal = {Molecules (Basel, Switzerland)}, volume = {26}, number = {3}, pages = {}, pmid = {33572680}, issn = {1420-3049}, support = {106-2313-B-005-021-MY3//the Ministry of Science and Technology, Taiwan, ROC/ ; }, mesh = {Antifungal Agents/*metabolism/*pharmacology ; Burkholderia gladioli/*metabolism ; Plant Diseases/microbiology/prevention & control ; Volatile Organic Compounds/*metabolism/*pharmacology ; }, abstract = {A Burkholderia gladioli strain, named BBB-01, was isolated from rice shoots based on the confrontation plate assay activity against several plant pathogenic fungi. The genome of this bacterial strain consists of two circular chromosomes and one plasmid with 8,201,484 base pairs in total. Pangenome analysis of 23 B. gladioli strains suggests that B. gladioli BBB-01 has the closest evolutionary relationship to B. gladioli pv. gladioli and B. gladioli pv. agaricicola. B. gladioli BBB-01 emitted dimethyl disulfide and 2,5-dimethylfuran when it was cultivated in lysogeny broth and potato dextrose broth, respectively. Dimethyl disulfide is a well-known pesticide, while the bioactivity of 2,5-dimethylfuran has not been reported. In this study, the inhibition activity of the vapor of these two compounds was examined against phytopathogenic fungi, including Magnaporthe oryzae, Gibberella fujikuroi, Sarocladium oryzae, Phellinus noxius and Colletotrichumfructicola, and human pathogen Candida albicans. In general, 2,5-dimethylfuran is more potent than dimethyl disulfide in suppressing the growth of the tested fungi, suggesting that 2,5-dimethylfuran is a potential fumigant to control plant fungal disease.}, } @article {pmid33572241, year = {2021}, author = {Chen, X and Li, R and Wang, Y and Li, A}, title = {Genomic Characterization Provides an Insight into the Pathogenicity of the Poplar Canker Bacterium Lonsdalea populi.}, journal = {Genes}, volume = {12}, number = {2}, pages = {}, pmid = {33572241}, issn = {2073-4425}, mesh = {China ; Europe ; Gammaproteobacteria/*genetics/pathogenicity ; *Genomics ; Plant Diseases/genetics/*microbiology ; Populus/*microbiology ; RNA, Ribosomal, 16S/genetics ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {An emerging poplar canker caused by the gram-negative bacterium, Lonsdalea populi, has led to high mortality of hybrid poplars Populus × euramericana in China and Europe. The molecular bases of pathogenicity and bark adaptation of L. populi have become a focus of recent research. This study revealed the whole genome sequence and identified putative virulence factors of L. populi. A high-quality L. populi genome sequence was assembled de novo, with a genome size of 3,859,707 bp, containing approximately 3434 genes and 107 RNAs (75 tRNA, 22 rRNA, and 10 ncRNA). The L. populi genome contained 380 virulence-associated genes, mainly encoding for adhesion, extracellular enzymes, secretory systems, and two-component transduction systems. The genome had 110 carbohydrate-active enzyme (CAZy)-coding genes and putative secreted proteins. The antibiotic-resistance database annotation listed that L. populi was resistant to penicillin, fluoroquinolone, and kasugamycin. Analysis of comparative genomics found that L. populi exhibited the highest homology with the L. britannica genome and L. populi encompassed 1905 specific genes, 1769 dispensable genes, and 1381 conserved genes, suggesting high evolutionary diversity and genomic plasticity. Moreover, the pan genome analysis revealed that the N-5-1 genome is an open genome. These findings provide important resources for understanding the molecular basis of the pathogenicity and biology of L. populi and the poplar-bacterium interaction.}, } @article {pmid33567604, year = {2021}, author = {Mao, B and Yin, R and Li, X and Cui, S and Zhang, H and Zhao, J and Chen, W}, title = {Comparative Genomic Analysis of Lactiplantibacillus plantarum Isolated from Different Niches.}, journal = {Genes}, volume = {12}, number = {2}, pages = {}, pmid = {33567604}, issn = {2073-4425}, mesh = {Adaptation, Physiological/*genetics ; Feces/microbiology ; Fermentation/genetics ; Genome, Bacterial/*genetics ; Humans ; Lactobacillaceae/*genetics ; Phenotype ; Phylogeny ; }, abstract = {Lactiplantibacillus plantarum can adapt to a variety of niches and is widely distributed in many sources. We used comparative genomics to explore the differences in the genome and in the physiological characteristics of L. plantarum isolated from pickles, fermented sauce, and human feces. The relationships between genotypes and phenotypes were analyzed to address the effects of isolation source on the genetic variation of L. plantarum. The comparative genomic results indicate that the numbers of unique genes in the different strains were niche-dependent. L. plantarum isolated from fecal sources generally had more strain-specific genes than L. plantarum isolated from pickles. The phylogenetic tree and average nucleotide identity (ANI) results indicate that L. plantarum in pickles and fermented sauce clustered independently, whereas the fecal L. plantarum was distributed more uniformly in the phylogenetic tree. The pan-genome curve indicated that the L. plantarum exhibited high genomic diversity. Based on the analysis of the carbohydrate active enzyme and carbohydrate-use abilities, we found that L. plantarum strains isolated from different sources exhibited different expression of the Glycoside Hydrolases (GH) and Glycosyl Transferases (GT) families and that the expression patterns of carbohydrate active enzymes were consistent with the evolution relationships of the strains. L. plantarum strains exhibited niche-specific characteristicsand the results provided better understating on genetics of this species.}, } @article {pmid33565958, year = {2021}, author = {Yamaguchi, M and Win, HPM and Higashi, K and Ono, M and Hirose, Y and Motooka, D and Okuzaki, D and Aye, MM and Htun, MM and Thu, HM and Kawabata, S}, title = {Epidemiological analysis of pneumococcal strains isolated at Yangon Children's Hospital in Myanmar via whole-genome sequencing-based methods.}, journal = {Microbial genomics}, volume = {7}, number = {2}, pages = {}, pmid = {33565958}, issn = {2057-5858}, mesh = {Azithromycin/pharmacology ; Bacterial Typing Techniques ; Child, Preschool ; *Drug Resistance, Multiple, Bacterial ; Female ; High-Throughput Nucleotide Sequencing ; Hospitals, Pediatric ; Humans ; Infant ; Male ; Microbial Sensitivity Tests ; Multilocus Sequence Typing/*methods ; Myanmar ; Phylogeny ; Pneumococcal Infections/*diagnosis ; Respiratory Tract Infections/*microbiology ; Streptococcus pneumoniae/*classification/genetics/isolation & purification ; Tetracycline/pharmacology ; Whole Genome Sequencing/*methods ; }, abstract = {Streptococcus pneumoniae causes over one million deaths from lower respiratory infections per annum worldwide. Although mortality is very high in Southeast Asian countries, molecular epidemiological information remains unavailable for some countries. In this study, we report, for the first time, the whole-genome sequences and genetic profiles of pneumococcal strains isolated in Myanmar. We isolated 60 streptococcal strains from 300 children with acute respiratory infection at Yangon Children's Hospital in Myanmar. We obtained whole-genome sequences and identified the species, serotypes, sequence types, antimicrobial resistance (AMR) profiles, virulence factor profiles and pangenome structure using sequencing-based analysis. Average nucleotide identity analysis indicated that 58 strains were S. pneumoniae and the other 2 strains were Streptococcus mitis. The major serotype was 19F (11 strains), followed by 6E (6B genetic variant; 7 strains) and 15 other serotypes; 5 untypable strains were also detected. Multilocus sequence typing analysis revealed 39 different sequence types, including 11 novel ones. In addition, genetic profiling indicated that AMR genes and mutations spread among pneumococcal strains in Myanmar. A minimum inhibitory concentration assay indicated that several pneumococcal strains had acquired azithromycin and tetracycline resistance, whereas no strains were found to be resistant against levofloxacin and high-dose penicillin G. Phylogenetic and pangenome analysis showed various pneumococcal lineages and that the pneumococcal strains contain a rich and mobile gene pool, providing them with the ability to adapt to selective pressures. This molecular epidemiological information can help in tracking global infection and supporting AMR control in addition to public health interventions in Myanmar.}, } @article {pmid33565589, year = {2021}, author = {Yocca, AE and Lu, Z and Schmitz, RJ and Freeling, M and Edger, PP}, title = {Evolution of Conserved Noncoding Sequences in Arabidopsis thaliana.}, journal = {Molecular biology and evolution}, volume = {38}, number = {7}, pages = {2692-2703}, pmid = {33565589}, issn = {1537-1719}, mesh = {Arabidopsis/*genetics ; *Conserved Sequence ; *Evolution, Molecular ; Gene Duplication ; *Genetic Variation ; Genome, Plant ; Regulatory Sequences, Nucleic Acid/*genetics ; Selection, Genetic ; }, abstract = {Recent pangenome studies have revealed a large fraction of the gene content within a species exhibits presence-absence variation (PAV). However, coding regions alone provide an incomplete assessment of functional genomic sequence variation at the species level. Little to no attention has been paid to noncoding regulatory regions in pangenome studies, though these sequences directly modulate gene expression and phenotype. To uncover regulatory genetic variation, we generated chromosome-scale genome assemblies for thirty Arabidopsis thaliana accessions from multiple distinct habitats and characterized species level variation in Conserved Noncoding Sequences (CNS). Our analyses uncovered not only PAV and positional variation (PosV) but that diversity in CNS is nonrandom, with variants shared across different accessions. Using evolutionary analyses and chromatin accessibility data, we provide further evidence supporting roles for conserved and variable CNS in gene regulation. Additionally, our data suggests that transposable elements contribute to CNS variation. Characterizing species-level diversity in all functional genomic sequences may later uncover previously unknown mechanistic links between genotype and phenotype.}, } @article {pmid33564084, year = {2021}, author = {Grazziotin, AL and Vidal, NM and Hoepers, PG and Reis, TFM and Mesa, D and Caron, LF and Ingberman, M and Beirão, BCB and Zuffo, JP and Fonseca, BB}, title = {Comparative genomics of a novel clade shed light on the evolution of the genus Erysipelothrix and characterise an emerging species.}, journal = {Scientific reports}, volume = {11}, number = {1}, pages = {3383}, pmid = {33564084}, issn = {2045-2322}, mesh = {Animals ; DNA, Bacterial/*genetics ; Erysipelothrix/classification/*genetics/isolation & purification ; Erysipelothrix Infections/epidemiology/genetics ; *Genome, Bacterial ; Genomics ; *Phylogeny ; Poultry Diseases/epidemiology/genetics ; *Sequence Analysis, DNA ; Turkey ; }, abstract = {Erysipelothrix sp. isolates obtained from a deadly outbreak in farmed turkeys were sequenced and compared to representatives of the genus. Phylogenetic trees-supported by digital DNA:DNA hybridization and Average Nucleotide Identity-revealed a novel monophyletic clade comprising isolates from pigs, turkeys, and fish, including isolates previously described as E. sp. Strain 2. Genes coding for the SpaC protein, typically found in E. sp. Strain 2, were detected in all isolates of the clade. Therefore, we confirm E. sp. Strain 2 represents a unique species that may be isolated from a broad host range, and the name "Erysipelothrix takahashiae" is suggested. Core genome analysis showed that the pathogenic species of this genus, E. rhusiopathiae and the clade E. sp. Strain 2, are enriched in core functionalities related to nutrient uptake and transport, but not necessarily homologous pathways. For instance, whereas the aerobic DctA transporter may uptake C4-dicarboxylates in both species, the anaerobic DcuC transporter is exclusive of the E. sp. Strain 2. Remarkably, the pan-genome analysis uncovered that genes related to transport and metabolism, recombination and repair, translation and transcription in the fish isolate, within the novel clade, have undergone a genomic reduction through pseudogenization. This reflects distinct selective pressures shaping the genome of species and strains within the genus Erysipelothrix while adapting to their respective niches.}, } @article {pmid33564053, year = {2021}, author = {Costa, D and Lévesque, S and Kumar, N and Fresia, P and Ferrés, I and Lawley, TD and Iraola, G}, title = {Pangenome analysis reveals genetic isolation in Campylobacter hyointestinalis subspecies adapted to different mammalian hosts.}, journal = {Scientific reports}, volume = {11}, number = {1}, pages = {3431}, pmid = {33564053}, issn = {2045-2322}, mesh = {*Adaptation, Physiological ; Animals ; *Campylobacter hyointestinalis/genetics/isolation & purification/pathogenicity ; *Genetic Variation ; *Genome, Bacterial ; *Phylogeny ; Swine ; }, abstract = {Campylobacter hyointestinalis is an emerging pathogen currently divided in two subspecies: C. hyointestinalis subsp. lawsonii which is predominantly recovered from pigs, and C. hyointestinalis subsp. hyointestinalis which can be found in a much wider range of mammalian hosts. Despite C. hyointestinalis being reported as an emerging pathogen, its evolutionary and host-associated diversification patterns are still vastly unexplored. For this reason, we generated whole-genome sequences of 13 C. hyointestinalis subsp. hyointestinalis strains and performed a comprehensive comparative analysis including publicly available C. hyointestinalis subsp. hyointestinalis and C. hyointestinalis subsp. lawsonii genomes, to gain insight into the genomic variation of these differentially-adapted subspecies. Both subspecies are distinct phylogenetic lineages which present an apparent barrier to homologous recombination, suggesting genetic isolation. This is further supported by accessory gene patterns that recapitulate the core genome phylogeny. Additionally, C. hyointestinalis subsp. hyointestinalis presents a bigger and more diverse accessory genome, which probably reflects its capacity to colonize different mammalian hosts unlike C. hyointestinalis subsp. lawsonii that is presumably host-restricted. This greater plasticity in the accessory genome of C. hyointestinalis subsp. hyointestinalis correlates to a higher incidence of genome-wide recombination events, that may be the underlying mechanism driving its diversification. Concordantly, both subspecies present distinct patterns of gene families involved in genome plasticity and DNA repair like CRISPR-associated proteins and restriction-modification systems. Together, our results provide an overview of the genetic mechanisms shaping the genomes of C. hyointestinalis subspecies, contributing to understand the biology of Campylobacter species that are increasingly recognized as emerging pathogens.}, } @article {pmid33563309, year = {2021}, author = {Sielemann, K and Weisshaar, B and Pucker, B}, title = {Reference-based QUantification Of gene Dispensability (QUOD).}, journal = {Plant methods}, volume = {17}, number = {1}, pages = {18}, pmid = {33563309}, issn = {1746-4811}, support = {DILS//Universität Bielefeld/ ; }, abstract = {BACKGROUND: Dispensability of genes in a phylogenetic lineage, e.g. a species, genus, or higher-level clade, is gaining relevance as most genome sequencing projects move to a pangenome level. Most analyses classify genes as core genes, which are present in all investigated individual genomes, and dispensable genes, which only occur in a single or a few investigated genomes. The binary classification as 'core' or 'dispensable' is often based on arbitrary cutoffs of presence/absence in the analysed genomes. Even when extended to 'conditionally dispensable', this concept still requires the assignment of genes to distinct groups.

RESULTS: Here, we present a new method which overcomes this distinct classification by quantifying gene dispensability and present a dedicated tool for reference-based QUantification Of gene Dispensability (QUOD). As a proof of concept, sequence data of 966 Arabidopsis thaliana accessions (Ath-966) were processed to calculate a gene-specific dispensability score for each gene based on normalised coverage in read mappings. We validated this score by comparison of highly conserved Benchmarking Universal Single Copy Orthologs (BUSCOs) to all other genes. The average scores of BUSCOs were significantly lower than the scores of non-BUSCOs. Analysis of variation demonstrated lower variation values between replicates of a single accession than between iteratively, randomly selected accessions from the whole dataset Ath-966. Functional investigations revealed defense and antimicrobial response genes among the genes with high-dispensability scores.

CONCLUSIONS: Instead of classifying a gene as core or dispensable, QUOD assigns a dispensability score to each gene. Hence, QUOD facilitates the identification of candidate dispensable genes, associated with high dispensability scores, which often underlie lineage-specific adaptation to varying environmental conditions.}, } @article {pmid33563283, year = {2021}, author = {Sela, I and Wolf, YI and Koonin, EV}, title = {Assessment of assumptions underlying models of prokaryotic pangenome evolution.}, journal = {BMC biology}, volume = {19}, number = {1}, pages = {27}, pmid = {33563283}, issn = {1741-7007}, mesh = {Archaea/*genetics ; Bacteria/*genetics ; *Evolution, Molecular ; *Metagenome ; Models, Genetic ; }, abstract = {BACKGROUND: The genomes of bacteria and archaea evolve by extensive loss and gain of genes which, for any group of related prokaryotic genomes, result in the formation of a pangenome with the universal, asymmetrical U-shaped distribution of gene commonality. However, the evolutionary factors that define the specific shape of this distribution are not thoroughly understood.

RESULTS: We investigate the fit of simple models of genome evolution to the empirically observed gene commonality distributions and genome intersections for 33 groups of closely related bacterial genomes. A model with an infinite external gene pool available for gene acquisition and constant genome size (IGP-CGS model), and two gene turnover rates, one for slow- and the other one for fast-evolving genes, allows two approaches to estimate the parameters for gene content dynamics. One is by fitting the model prediction to the distribution of the number of genes shared by precisely k genomes (gene commonality distribution) and another by analyzing the distribution of the number of genes common for k genome sets (k-cores). Both approaches produce a comparable overall quality of fit, although the former significantly overestimates the number of the universally conserved genes, while the latter overestimates the number of singletons. We further explore the effect of dropping each of the assumptions of the IGP-CGS model on the fit to the gene commonality distributions and show that models with either a finite gene pool or unequal rates of gene loss and gain (greater gene loss rate) eliminate the overestimate of the number of singletons or the core genome size.

CONCLUSIONS: We examine the assumptions that are usually adopted for modeling the evolution of the U-shaped gene commonality distributions in prokaryote genomes, namely, those of infinitely many genes and constant genome size. The combined analysis of genome intersections and gene commonality suggests that at least one of these assumptions is invalid. The violation of both these assumptions reflects the limited ability of prokaryotes to gain new genes. This limitation seems to stem, at least partly, from the horizontal gene transfer barrier, i.e., the cost of accommodation of foreign genes by prokaryotes. Further development of models taking into account the complexity of microbial evolution is necessary for an improved understanding of the evolution of prokaryotes.}, } @article {pmid33563209, year = {2021}, author = {Maeno, S and Nishimura, H and Tanizawa, Y and Dicks, L and Arita, M and Endo, A}, title = {Unique niche-specific adaptation of fructophilic lactic acid bacteria and proposal of three Apilactobacillus species as novel members of the group.}, journal = {BMC microbiology}, volume = {21}, number = {1}, pages = {41}, pmid = {33563209}, issn = {1471-2180}, mesh = {*Adaptation, Physiological ; Bacterial Proteins/genetics ; DNA, Bacterial/genetics ; Fructose/*metabolism ; Genome, Bacterial ; Genomics ; Glucose/metabolism ; Lactobacillales/classification/*genetics/*metabolism ; Leuconostocaceae/*classification/*genetics/metabolism ; Phylogeny ; }, abstract = {BACKGROUND: Fructophilic lactic acid bacteria (FLAB) found in D-fructose rich niches prefer D-fructose over D-glucose as a growth substrate. They need electron acceptors for growth on D-glucose. The organisms share carbohydrate metabolic properties. Fructobacillus spp., Apilactobacillus kunkeei, and Apilactobacillus apinorum are members of this unique group. Here we studied the fructophilic characteristics of recently described species Apilactobacillus micheneri, Apilactobacillus quenuiae, and Apilactobacillus timberlakei.

RESULTS: The three species prefer D-fructose over D-glucose and only metabolize D-glucose in the presence of electron acceptors. The genomic characteristics of the three species, i.e. small genomes and thus a low number of coding DNA sequences, few genes involved in carbohydrate transport and metabolism, and partial deletion of adhE gene, are characteristic of FLAB. The three species thus are novel members of FLAB. Reduction of genes involved in carbohydrate transport and metabolism in accordance with reduction of genome size were the common characteristics of the family Lactobacillaceae, but FLAB markedly reduced the gene numbers more than other species in the family. Pan-genome analysis of genes involved in metabolism displayed a lack of specific carbohydrate metabolic pathways in FLAB, leading to a unique cluster separation.

CONCLUSIONS: The present study expanded FLAB group. Fructose-rich environments have induced similar evolution in phylogenetically distant FLAB species. These are examples of convergent evolution of LAB.}, } @article {pmid33562141, year = {2021}, author = {Chandrasekar, SS and Phanse, Y and Hildebrand, RE and Hanafy, M and Wu, CW and Hansen, CH and Osorio, JE and Suresh, M and Talaat, AM}, title = {Localized and Systemic Immune Responses against SARS-CoV-2 Following Mucosal Immunization.}, journal = {Vaccines}, volume = {9}, number = {2}, pages = {}, pmid = {33562141}, issn = {2076-393X}, support = {R21 AI149793/AI/NIAID NIH HHS/United States ; U01 AI124299/AI/NIAID NIH HHS/United States ; SEED Fund//Wisconsin Alumni Research Foundation/ ; 2019-05849//U.S. Department of Agriculture/ ; }, abstract = {The rapid transmission of SARS-CoV-2 in the USA and worldwide necessitates the development of multiple vaccines to combat the COVID-19 global pandemic. Previously, we showed that a particulate adjuvant system, quil-A-loaded chitosan (QAC) nanoparticles, can elicit robust immunity combined with plasmid vaccines when used against avian coronavirus. Here, we report on the immune responses elicited by mucosal homologous plasmid and a heterologous immunization strategy using a plasmid vaccine and a Modified Vaccinia Ankara (MVA) expressing SARS-CoV-2 spike (S) and nucleocapsid (N) antigens. Only the heterologous intranasal immunization strategy elicited neutralizing antibodies against SARS-CoV-2 in serum and bronchoalveolar lavage of mice, suggesting a protective vaccine. The same prime/boost strategy led to the induction of type 1 and type 17 T-cell responses and polyfunctional T-cells expressing multiple type 1 cytokines (e.g., IFN-γ, TNFα, IL-2) in the lungs and spleens of vaccinated mice. In contrast, the plasmid homologous vaccine strategy led to the induction of local mono and polyfunctional T-cells secreting IFN-γ. Outcomes of this study support the potential of QAC-nano vaccines to elicit significant mucosal immune responses against respiratory coronaviruses.}, } @article {pmid33556879, year = {2021}, author = {Gasparini, K and Moreira, JDR and Peres, LEP and Zsögön, A}, title = {De novo domestication of wild species to create crops with increased resilience and nutritional value.}, journal = {Current opinion in plant biology}, volume = {60}, number = {}, pages = {102006}, doi = {10.1016/j.pbi.2021.102006}, pmid = {33556879}, issn = {1879-0356}, mesh = {Crops, Agricultural/genetics ; *Domestication ; Gene Editing ; Nutritive Value ; *Plant Breeding ; }, abstract = {Creating crops with resistance to drought, soil salinity and insect damage, that simultaneously have higher nutritional quality, is challenging to conventional breeding due to the complex and diffuse genetic basis of those traits. Recent advances in gene editing technology, such as base editors and prime-editing, coupled with a deeper understanding of the genetic basis of domestication delivered by the analysis of crop 'pangenomes', open the exciting prospect of creating novel crops via manipulation of domestication-related genes in wild species. A de novo domestication platform may allow rapid and precise conversion of crop wild relatives into crops, while retaining many of the valuable resilience and nutritional traits left behind during domestication and breeding. Using the Solanaceae family as case in point, we discuss how such a knowledge-driven pipeline could be exploited to contribute to food security over the coming decades.}, } @article {pmid33547057, year = {2021}, author = {Richards, VP and Nigsch, A and Pavinski Bitar, P and Sun, Q and Stuber, T and Ceres, K and Smith, RL and Robbe Austerman, S and Schukken, Y and Grohn, YT and Stanhope, MJ}, title = {Evolutionary genomic and bacteria GWAS analysis of Mycobacterium avium subsp. paratuberculosis and dairy cattle Johne's disease phenotypes.}, journal = {Applied and environmental microbiology}, volume = {87}, number = {8}, pages = {}, pmid = {33547057}, issn = {1098-5336}, abstract = {Mycobacterium avium subsp. paratuberculosis (MAP) is the causative agent of Johne's disease in ruminants, which has important health consequences for dairy cattle. The Regional Dairy Quality Management Alliance (RDQMA) project is a multistate research program involving MAP isolates taken from three intensively studied commercial dairy farms in the northeastern United States, which emphasized longitudinal data collection of both MAP isolates and animal health in three regional dairy herds for a period of about 7 years. This paper reports the results of a pan-GWAS analysis involving 318 MAP isolates and dairy cow Johne's disease phenotypes, taken from these three farms. Based on our highly curated accessory gene count the pan-GWAS analysis identified several MAP genes associated with bovine Johne's disease phenotypes scored from these three farms, with some of the genes having functions suggestive of possible cause/effect relationships to these phenotypes. This paper reports a pan-genomic comparative analysis between MAP and Mycobacterium tuberculosis, assessing functional Gene Ontology category enrichments between these taxa. Finally, we also provide a population genomic perspective on the effectiveness of herd isolation, involving closed dairy farms, in preventing MAP inter-farm cross infection on a micro-geographic scale.IMPORTANCE Mycobacterium avium subsp. paratuberculosis (MAP) is the causative agent of Johne's disease in ruminants, which has important health consequences for dairy cattle, and enormous economic consequences for the dairy industry. Understanding which genes in this bacterium are correlated with key disease phenotypes can lead to functional experiments targeting these genes and ultimately lead to improved control strategies. This study represents a rare example of a prolonged longitudinal study of dairy cattle where the disease was measured and the bacteria were isolated from the same cows. The genome sequences of over 300 MAP isolates were analyzed for genes that were correlated with a wide range of Johne's disease phenotypes. A number of genes were identified that were significantly associated with several aspects of the disease and suggestive of further experimental follow-up.}, } @article {pmid33541841, year = {2021}, author = {Koonin, EV and Makarova, KS and Wolf, YI}, title = {Evolution of Microbial Genomics: Conceptual Shifts over a Quarter Century.}, journal = {Trends in microbiology}, volume = {29}, number = {7}, pages = {582-592}, pmid = {33541841}, issn = {1878-4380}, support = {ZIA LM000073/ImNIH/Intramural NIH HHS/United States ; }, mesh = {Archaea/*genetics ; Bacteria/classification/*genetics ; *Evolution, Molecular ; Genetic Variation ; *Genome, Microbial ; Genomics/*methods/*trends ; Humans ; Phylogeny ; }, abstract = {Prokaryote genomics started in earnest in 1995, with the complete sequences of two small bacterial genomes, those of Haemophilus influenzae and Mycoplasma genitalium. During the next quarter century, the prokaryote genome database has been growing exponentially, with no saturation in sight. For most of these 25 years, genome sequencing remained limited to cultivable microbes. Together with next-generation sequencing methods, advances in metagenomics and single-cell genomics have lifted this limitation, providing for an increasingly unbiased characterization of the global prokaryote diversity. Advances in computational genomics followed the progress of genome sequencing, even if occasionally lagging behind. Several major new branches of bacteria and archaea were discovered, including Asgard archaea, the apparent closest relatives of eukaryotes and expansive groups of bacteria and archaea with small genomes thought to be symbionts of other prokaryotes. Comparative analysis of numerous prokaryote genomes spanning a wide range of evolutionary distances changed the conceptual foundations of microbiology, supplanting the notion of species genomes with fixed gene sets with that of dynamic pangenomes and the notion of a single Tree of Life (ToL) with a statistical tree-like trend among individual gene trees. Strides were also made towards a theory and quantitative laws of prokaryote genome evolution.}, } @article {pmid33540616, year = {2021}, author = {Fiedler, S and Ambros, IM and Glogova, E and Benesch, M and Urban, C and Mayer, M and Ebetsberger-Dachs, G and Bardi, E and Jones, N and Gamper, A and Meister, B and Crazzolara, R and Amann, G and Dieckmann, K and Horcher, E and Kerbl, R and Brunner-Herglotz, B and Ziegler, A and Ambros, PF and Ladenstein, R}, title = {Long-Term Outcome and Role of Biology within Risk-Adapted Treatment Strategies: The Austrian Neuroblastoma Trial A-NB94.}, journal = {Cancers}, volume = {13}, number = {3}, pages = {}, pmid = {33540616}, issn = {2072-6694}, support = {-//St. Anna Kinderkrebsforschung, Vienna, Austria/ ; 16611//Austrian National Bank (OeNB), Vienna, Austria/ ; I 2799-B28//Austrian Science Fund (FWF)/ ; }, abstract = {We evaluated long-term outcome and genomic profiles in the Austrian Neuroblastoma Trial A-NB94 which applied a risk-adapted strategy of treatment (RAST) using stage, age and MYCN amplification (MNA) status for stratification. RAST ranged from surgery only to intensity-adjusted chemotherapy, single or multiple courses of high-dose chemotherapy (HDT) followed by autologous stem cell rescue depending on response to induction chemotherapy, and irradiation to the primary tumor site. Segmental chromosomal alterations (SCAs) were investigated retrospectively using multi- and pan-genomic techniques. The A-NB94 trial enrolled 163 patients. Patients with localized disease had an excellent ten-year (10y) event free survival (EFS) and overall survival (OS) of 99 ± 1% and 93 ± 2% whilst it was 80 ± 13% and 90 ± 9% for infants with stage 4S and for infants with stage 4 non-MNA disease both 83 ± 15%. Stage 4 patients either >12 months or ≤12 months but with MNA had a 10y-EFS and OS of 45 ± 8% and 47 ± 8%, respectively. SCAs were present in increasing frequencies according to stage and age: in 29% of localized tumors but in 92% of stage 4 tumors (p < 0.001), and in 39% of patients ≤ 12 months but in 63% of patients > 12 months (p < 0.001). RAST successfully reduced chemotherapy exposure in low- and intermediate-risk patients with excellent long-term results while the outcome of high-risk disease met contemporary trials.}, } @article {pmid33536414, year = {2021}, author = {Mageiros, L and Méric, G and Bayliss, SC and Pensar, J and Pascoe, B and Mourkas, E and Calland, JK and Yahara, K and Murray, S and Wilkinson, TS and Williams, LK and Hitchings, MD and Porter, J and Kemmett, K and Feil, EJ and Jolley, KA and Williams, NJ and Corander, J and Sheppard, SK}, title = {Genome evolution and the emergence of pathogenicity in avian Escherichia coli.}, journal = {Nature communications}, volume = {12}, number = {1}, pages = {765}, pmid = {33536414}, issn = {2041-1723}, support = {/WT_/Wellcome Trust/United Kingdom ; 088786/C/09/Z/WT_/Wellcome Trust/United Kingdom ; MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; MR/M501608/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Animals ; Chickens ; Escherichia coli/classification/*genetics/pathogenicity ; Escherichia coli Infections/diagnosis/microbiology/*prevention & control ; *Evolution, Molecular ; Genes, Bacterial ; Genetic Variation ; Genome, Bacterial/*genetics ; Genome-Wide Association Study/methods ; Genotype ; Humans ; Phylogeny ; Poultry Diseases/diagnosis/microbiology/*prevention & control ; Virulence/genetics ; }, abstract = {Chickens are the most common birds on Earth and colibacillosis is among the most common diseases affecting them. This major threat to animal welfare and safe sustainable food production is difficult to combat because the etiological agent, avian pathogenic Escherichia coli (APEC), emerges from ubiquitous commensal gut bacteria, with no single virulence gene present in all disease-causing isolates. Here, we address the underlying evolutionary mechanisms of extraintestinal spread and systemic infection in poultry. Combining population scale comparative genomics and pangenome-wide association studies, we compare E. coli from commensal carriage and systemic infections. We identify phylogroup-specific and species-wide genetic elements that are enriched in APEC, including pathogenicity-associated variation in 143 genes that have diverse functions, including genes involved in metabolism, lipopolysaccharide synthesis, heat shock response, antimicrobial resistance and toxicity. We find that horizontal gene transfer spreads pathogenicity elements, allowing divergent clones to cause infection. Finally, a Random Forest model prediction of disease status (carriage vs. disease) identifies pathogenic strains in the emergent ST-117 poultry-associated lineage with 73% accuracy, demonstrating the potential for early identification of emergent APEC in healthy flocks.}, } @article {pmid33533167, year = {2021}, author = {Glick, L and Mayrose, I}, title = {Panoramic: A package for constructing eukaryotic pan-genomes.}, journal = {Molecular ecology resources}, volume = {21}, number = {4}, pages = {1393-1403}, doi = {10.1111/1755-0998.13344}, pmid = {33533167}, issn = {1755-0998}, support = {US-5089-18//BARD US-Israel Agricultural Research and Development Fund/ ; //Fellowship from the Edmond J. Safra Center for Bioinformatics at Tel-Aviv University/ ; }, mesh = {*Eukaryota/genetics ; *Genome ; *Genomics ; Reproducibility of Results ; *Software ; }, abstract = {The study of intraspecific genomic variation in eukaryotic species has been the focus of numerous genome resequencing projects in recent years. One emerging approach for the analysis of intraspecific diversity uses the concept of a pan-genome, which theoretically represents the full set of genomic sequences and coding genes from all individuals of a given species. This approach has many advantages over reference-based methods and has been successfully applied to study both prokaryotic and eukaryotic species. However, the process of pan-genome construction still presents considerable scientific and technical challenges, especially for eukaryotic species with large and complex genomes. Although general approaches for the construction of pan-genomes have been devised, currently available software tools implement only certain modules of the entire computational procedure. Therefore, each pan-genome project requires the development of tailored analysis pipelines, thus complicating and prolonging the process and impairing research reproducibility and comparison across studies. Here, we present Panoramic, a software package for the automatic construction of eukaryotic pan-genomes. Panoramic takes raw sequencing reads as input and applies two alternative approaches for pan-genome construction. Panoramic makes pan-genome construction a considerably easier task by providing simple user interface and efficient data processing algorithms. We demonstrate the use of Panoramic by constructing the pan-genome of the model plant species Arabidopsis thaliana from sequencing data of 20 diverse ecotypes.}, } @article {pmid33532821, year = {2021}, author = {Schulz, T and Wittler, R and Rahmann, S and Hach, F and Stoye, J}, title = {Detecting high-scoring local alignments in pangenome graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {37}, number = {16}, pages = {2266-2274}, pmid = {33532821}, issn = {1367-4811}, support = {031A537B//German Network for Bioinformatics Infrastructure/ ; //European Union's Horizon 2020 research and innovation program/ ; 872539//Marie Skłodowska-Curie/ ; GRK 1906//DFG/ ; RGPIN-05952//National Science and Engineering Council of Canada/ ; SCH-2020-0370//Michael Smith Foundation for Health Research/ ; }, abstract = {MOTIVATION: Increasing amounts of individual genomes sequenced per species motivate the usage of pangenomic approaches. Pangenomes may be represented as graphical structures, e.g. compacted colored de Bruijn graphs, which offer a low memory usage and facilitate reference-free sequence comparisons. While sequence-to-graph mapping to graphical pangenomes has been studied for some time, no local alignment search tool in the vein of BLAST has been proposed yet.

RESULTS: We present a new heuristic method to find maximum scoring local alignments of a DNA query sequence to a pangenome represented as a compacted colored de Bruijn graph. Our approach additionally allows a comparison of similarity among sequences within the pangenome. We show that local alignment scores follow an exponential-tail distribution similar to BLAST scores, and we discuss how to estimate its parameters to separate local alignments representing sequence homology from spurious findings. An implementation of our method is presented, and its performance and usability are shown. Our approach scales sublinearly in running time and memory usage with respect to the number of genomes under consideration. This is an advantage over classical methods that do not make use of sequence similarity within the pangenome.

Source code and test data are available from https://gitlab.ub.uni-bielefeld.de/gi/plast.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid33532671, year = {2021}, author = {Aishwarya, S and Gunasekaran, K and Sagaya Jansi, R and Sangeetha, G}, title = {From genomes to molecular dynamics - A bottom up approach in extrication of SARS CoV-2 main protease inhibitors.}, journal = {Computational toxicology (Amsterdam, Netherlands)}, volume = {18}, number = {}, pages = {100156}, pmid = {33532671}, issn = {2468-1113}, abstract = {The recent pandemic Coronavirus disease-19 outbreak had traumatized global countries since its origin in late December 2019. Though the virus originated in China, it has spread rapidly across the world due its firmly established community transmission. To successfully tackle the spread and further infection, there needs a clear multidimensional understanding of the molecular mechanisms. Henceforth, 942 viral genome sequences were analysed to predict the core genomes crucial in virus life cycle. Additionally, 35 small interfering RNA transcripts were predicted that can target specifically the viral core proteins and reduce pathogenesis. The crystal structure of Covid-19 main protease-6LU7 was chosen as an attractive target due to the factors that there were fewer mutations and whose structure had significant identity to the annotated protein sequence of the core genome. Drug repurposing of both recruiting and non recruiting drugs was carried out through molecular docking procedures to recognize bitolterol as a good inhibitor of Covid-19 protease. The study was extended further to screen antiviral phytocompounds through quantitative structure activity relationship and molecular docking to identify davidigenin, from licorice as the best novel lead with good interactions and binding energy. The docking of the best compounds in all three categories was validated with molecular dynamics simulations which implied stable binding of the drug and lead molecule. Though the studies need clinical evaluations, the results are suggestive of curbing the pandemic.}, } @article {pmid33528277, year = {2021}, author = {Suryaletha, K and Chandrika, SK and Thomas, S}, title = {Comprehensive genomics depict accessory genes encoding pathogenicity and biofilm determinants in Enterococcus faecalis.}, journal = {Future microbiology}, volume = {16}, number = {3}, pages = {175-184}, doi = {10.2217/fmb-2020-0111}, pmid = {33528277}, issn = {1746-0921}, mesh = {Bacterial Proteins/*genetics/metabolism ; *Biofilms ; CRISPR-Cas Systems ; Enterococcus faecalis/*genetics/*pathogenicity/physiology ; Genome, Bacterial ; Genomic Islands ; Gram-Positive Bacterial Infections/microbiology ; Humans ; Plasmids/genetics/metabolism ; Virulence ; }, abstract = {Aim:Enterococcus faecalis is a leading nosocomial pathogen in biofilm-associated polymicrobial infections. The study aims to understand pathogenicity and biofilm determinants of the pathogen by genome analysis. Methodology: Genome sequencing of a strong biofilm forming clinical isolate Enterococcus faecalis SK460 devoid of Fsr quorum-signaling system, was performed and comparative genomics was carried out among a set of pathogenic biofilm formers and nonpathogenic weak biofilm formers. Results: Analysis revealed a pool of virulence and adhesion related factors associated with pathogenicity. Absence of CRISPR-Cas system facilitated acquisition of pheromone responsive plasmid, pathogenicity island and phages. Comprehensive analysis identified a subset of accessory genes encoding polysaccharide lyase, sugar phosphotransferase system, phage proteins and transcriptional regulators exclusively in pathogenic biofilm formers. Conclusion: The study identified a set of genes specific to pathogenic biofilm formers and these can act as targets which in turn help to develop future treatment endeavors against enterococcal infections.}, } @article {pmid33526912, year = {2021}, author = {Przewieslik-Allen, AM and Wilkinson, PA and Burridge, AJ and Winfield, MO and Dai, X and Beaumont, M and King, J and Yang, CY and Griffiths, S and Wingen, LU and Horsnell, R and Bentley, AR and Shewry, P and Barker, GLA and Edwards, KJ}, title = {The role of gene flow and chromosomal instability in shaping the bread wheat genome.}, journal = {Nature plants}, volume = {7}, number = {2}, pages = {172-183}, pmid = {33526912}, issn = {2055-0278}, support = {BB/N021061/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/J/000PR9781/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Bread ; *Chromosomal Instability ; *Gene Flow ; Genetic Variation ; *Genome, Plant ; Genotype ; Plant Breeding/*methods ; Polymorphism, Single Nucleotide ; Triticum/*genetics ; }, abstract = {Bread wheat (Triticum aestivum) is one of the world's most important crops; however, a low level of genetic diversity within commercial breeding accessions can significantly limit breeding potential. In contrast, wheat relatives exhibit considerable genetic variation and so potentially provide a valuable source of novel alleles for use in breeding new cultivars. Historically, gene flow between wheat and its relatives may have contributed novel alleles to the bread wheat pangenome. To assess the contribution made by wheat relatives to genetic diversity in bread wheat, we used markers based on single nucleotide polymorphisms to compare bread wheat accessions, created in the past 150 years, with 45 related species. We show that many bread wheat accessions share near-identical haplotype blocks with close relatives of wheat's diploid and tetraploid progenitors, while some show evidence of introgressions from more distant species and structural variation between accessions. Hence, introgressions and chromosomal rearrangements appear to have made a major contribution to genetic diversity in cultivar collections. As gene flow from relatives to bread wheat is an ongoing process, we assess the impact that introgressions might have on future breeding strategies.}, } @article {pmid33526758, year = {2021}, author = {Cho, SH and Jeong, Y and Lee, E and Ko, SR and Ahn, CY and Oh, HM and Cho, BK and Cho, S}, title = {Assessment of Erythrobacter Species Diversity through Pan-Genome Analysis with Newly Isolated Erythrobacter sp. 3-20A1M.}, journal = {Journal of microbiology and biotechnology}, volume = {31}, number = {4}, pages = {601-609}, pmid = {33526758}, issn = {1738-8872}, mesh = {Base Composition ; Biosynthetic Pathways ; Carotenoids ; Genome Size ; *Genome, Bacterial ; Multigene Family ; *Phylogeny ; Republic of Korea ; Seawater/microbiology ; Secondary Metabolism ; Sphingomonadaceae/*classification/isolation & purification ; }, abstract = {Erythrobacter species are extensively studied marine bacteria that produce various carotenoids. Due to their photoheterotrophic ability, it has been suggested that they play a crucial role in marine ecosystems. It is essential to identify the genome sequence and the genes of the species to predict their role in the marine ecosystem. In this study, we report the complete genome sequence of the marine bacterium Erythrobacter sp. 3-20A1M. The genome size was 3.1 Mbp and its GC content was 64.8%. In total, 2998 genetic features were annotated, of which 2882 were annotated as functional coding genes. Using the genetic information of Erythrobacter sp. 3-20A1M, we performed pangenome analysis with other Erythrobacter species. This revealed highly conserved secondary metabolite biosynthesis-related COG functions across Erythrobacter species. Through subsequent secondary metabolite biosynthetic gene cluster prediction and KEGG analysis, the carotenoid biosynthetic pathway was proven conserved in all Erythrobacter species, except for the spheroidene and spirilloxanthin pathways, which are only found in photosynthetic Erythrobacter species. The presence of virulence genes, especially the plant-algae cell wall degrading genes, revealed that Erythrobacter sp. 3-20A1M is a potential marine plant-algae scavenger.}, } @article {pmid33526285, year = {2021}, author = {Takahashi, T and Lee, S and Kim, S}, title = {Genomic characteristics of Streptococcus agalactiae based on the pan-genome orthologous group analysis according to invasiveness and capsular genotype.}, journal = {Journal of infection and chemotherapy : official journal of the Japan Society of Chemotherapy}, volume = {27}, number = {6}, pages = {814-819}, doi = {10.1016/j.jiac.2021.01.008}, pmid = {33526285}, issn = {1437-7780}, mesh = {Female ; Genomics ; Genotype ; Humans ; Phylogeny ; Republic of Korea ; *Streptococcal Infections ; *Streptococcus agalactiae/genetics ; }, abstract = {OBJECTIVE: Following the construction of a bacterial pan-genome from the whole genome sequences on a web-based pipeline, all coding DNA sequences (CDSs) can be clustered into pan-genome orthologous groups (POGs), which is a similar approach to comparative genome hybridization on glass microscope slides. We aimed to clarify the genomic characteristics of Streptococcus agalactiae based on the POG analysis.

METHODS: Sixty-six S. agalactiae isolates obtained from invasive specimens (blood and cerebrospinal fluid) and non-invasive specimens (urine and vaginal discharge) between 2010 and 2017 in Korea were subjected to whole genome sequencing (WGS). Based on the WGS data, we conducted the POG analysis and constructed a phylogenetic tree along with capsular polysaccharide (CPS) genotyping. We compared the genomics of invasive vs. non-invasive isolates, as well as CPS III vs. non-CPS III genotypes.

RESULTS: Predicted pan- and core-genome sizes were 3416 and 1658 genes, respectively. We found four clusters consisting of CPS genotypes (III, VIII, Ib/VI, and Ia) in the phylogenetic tree. There were significant differences in two metabolic pathways specific to invasiveness, and in six metabolic pathways specific to CPS III type produced by CDSs.

CONCLUSION: Our observations reveal the pan- and core-genome sizes, four clusters of genomes distributed by CPS genotypes, and unique CDS features of S. agalactiae by comparative genomics in terms of invasiveness and CPS genotype.}, } @article {pmid33520430, year = {2021}, author = {Naorem, RS and Blom, J and Fekete, C}, title = {Genome-wide comparison of four MRSA clinical isolates from Germany and Hungary.}, journal = {PeerJ}, volume = {9}, number = {}, pages = {e10185}, pmid = {33520430}, issn = {2167-8359}, abstract = {Staphylococcus aureus is a drug-resistant pathogen, capable of colonizing diverse ecological niches and causing a broad spectrum of infections related to a community and healthcare. In this study, we choose four methicillin-resistant S. aureus (MRSA) clinical isolates from Germany and Hungary based on our previous polyphasic characterization finding. We assumed that the selected strains have a different genetic background in terms of the presence of resistance and virulence genes, prophages, plasmids, and secondary metabolite biosynthesis genes that may play a crucial role in niche adaptation and pathogenesis. To clarify these assumptions, we performed a comparative genome analysis of these strains and observed many differences in their genomic compositions. The Hungarian isolates (SA H27 and SA H32) with ST22-SCCmec type IVa have fewer genes for multiple-drug resistance, virulence, and prophages reported in Germany isolates. Germany isolate, SA G6 acquires aminoglycoside (ant(6)-Ia and aph(3')-III) and nucleoside (sat-4) resistance genes via phage transduction and may determine its pathogenic potential. The comparative genome study allowed the segregation of isolates of geographical origin and differentiation of the clinical isolates from the commensal isolates. This study suggested that Germany and Hungarian isolates are genetically diverse and showing variation among them due to the gain or loss of mobile genetic elements (MGEs). An interesting finding is the addition of SA G6 genome responsible for the drastic decline of the core/pan-genome ratio curve and causing the pan-genome to open wider. Functional characterizations revealed that S. aureus isolates survival are maintained by the amino acids catabolism and favor adaptation to growing in a protein-rich medium. The dispersible and singleton genes content of S. aureus genomes allows us to understand the genetic variation among the CC5 and CC22 groups. The strains with the same genetic background were clustered together, which suggests that these strains are highly alike; however, comparative genome analysis exposed that the acquisition of phage elements, and plasmids through the events of MGEs transfer contribute to differences in their phenotypic characters. This comparative genome analysis would improve the knowledge about the pathogenic S. aureus strain's characterization, and responsible for clinically important phenotypic differences among the S. aureus strains.}, } @article {pmid33519795, year = {2020}, author = {Li, F and Ye, Q and Chen, M and Zhang, J and Xue, L and Wang, J and Wu, S and Zeng, H and Gu, Q and Zhang, Y and Wei, X and Ding, Y and Wu, Q}, title = {Multiplex PCR for the Identification of Pathogenic Listeria in Flammulina velutipes Plant Based on Novel Specific Targets Revealed by Pan-Genome Analysis.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {634255}, pmid = {33519795}, issn = {1664-302X}, abstract = {Listeria spp. is an important foodborne disease agent, often found in the fresh mushroom (Flammulina velutipes) and its production environment. The aim of this study was to develop multiplex PCR for rapid identification of Listeria monocytogenes and Listeria ivanovii, and nonpathogenic Listeria in F. velutipes plants. Pan-genome analysis was first used to identify five novel Listeria-specific targets: one for the Listeria genus, one for L. monocytogenes, and three for L. ivanovii. Primers for the novel targets were highly specific in individual reactions. The detection limits were 10[3]-10[4] CFU/mL, meeting the requirements of molecular detection. A mPCR assay for the identification of pathogenic Listeria, with primers targeting the novel genes specific for Listeria genus (LMOSLCC2755_0944), L. monocytogenes (LMOSLCC2755_0090), and L. ivanovii (queT_1) was then designed. The assay specificity was robustly verified by analyzing nonpathogenic Listeria and non-Listeria spp. strains. The determined detection limits were 2.0 × 10[3] CFU/mL for L. monocytogenes and 3.4 × 10[3] CFU/mL for L. ivanovii, for pure culture analysis. Further, the assay detected 7.6 × 10[4] to 7.6 × 10[0] CFU/10 g of pathogenic Listeria spiked into F. velutipes samples following 4-12 h enrichment. The assay feasibility was evaluated by comparing with a traditional culture-based method, by analyzing 129 samples collected from different F. velutipes plants. The prevalence of Listeria spp. and L. monocytogenes was 58.1% and 41.1%, respectively. The calculated κ factors for Listeria spp., L. monocytogenes, and L. ivanovii were 0.97, 0.97, and 1, respectively. The results of the novel mPCR assay were highly consistent with those of the culture-based method. The new assay thus will allow rapid, specific, and accurate detection and monitoring of pathogenic Listeria in food and its production environment.}, } @article {pmid33516973, year = {2021}, author = {Molina-Mora, JA and Chinchilla-Montero, D and García-Batán, R and García, F}, title = {Genomic context of the two integrons of ST-111 Pseudomonas aeruginosa AG1: A VIM-2-carrying old-acquaintance and a novel IMP-18-carrying integron.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {89}, number = {}, pages = {104740}, doi = {10.1016/j.meegid.2021.104740}, pmid = {33516973}, issn = {1567-7257}, mesh = {Bacterial Proteins/*genetics ; *Genome, Bacterial ; *Integrons ; Pseudomonas aeruginosa/*genetics ; beta-Lactamases/*metabolism ; }, abstract = {Pseudomonas aeruginosa is an opportunist and versatile organism responsible for infections mainly in immunocompromised hosts. This pathogen has high intrinsic resistance to most antimicrobials. P. aeruginosa AG1 (PaeAG1) is a Costa Rican high-risk ST-111 strain with resistance to multiple antibiotics, including carbapenems, due to the activity of VIM-2 and IMP-18 metallo-β-lactamases (MBLs). These genes are harbored in two class 1 integrons located inone out of the 57 PaeAG1 genomic islands. However, the genomic context associated to these determinants in PaeAG1 and other P. aeruginosa strains is unclear. Thus, we first assessed the transcriptional activity of VIM-2 and IMP-18 genes when exposed to imipenem (a carbapenem) by RT-qPCR. To select related genomes to PaeAG1, we implemented a pan-genome analysis to define and up-date the phylogenetic relationship among complete P. aeruginosa genomes. We also studied the PaeAG1 genomic islands content in the related strains and finally we described the architecture and possible evolutionary steps of the genomic regions around the VIM-2- and IMP-18-carrying integrons. Expression of VIM-2 and IMP-18 genes was demonstrated to be induced after imipenem exposure. In a subsequent comparative genomics analysis with 211 strains, the P. aeruginosa pan-genome revealed that complete genome sequences are able to separate clones by MLST profile, including a clear ST-111 cluster with PaeAG1. The PaeAG1 genomic islands were found to define a diverse presence/absence pattern among related genomes. Finally, landscape reconstruction of genomic regions showed that VIM-2-carrying integron (In59-like) is an old-acquaintance element harbored in the same known region found in other two ST-111 strains. Also, PaeAG1 has an exclusive genomic region containing a novel IMP-18-carrying integron (registered as In1666), with an arrangement never reported before. Altogether, we provide new insights about the genomic determinants associated with the resistance to carbapenems in this high-risk P. aeruginosa using comparative genomics.}, } @article {pmid33513905, year = {2021}, author = {Brondani, VB and Lacombe, AMF and Mariani, BMP and Montenegro, L and Soares, IC and Bezerra-Neto, JE and Tanno, FY and Srougi, V and Chambo, JL and Mendonca, BB and Almeida, MQ and Zerbini, MCN and Fragoso, MCBV}, title = {Low Protein Expression of both ATRX and ZNRF3 as Novel Negative Prognostic Markers of Adult Adrenocortical Carcinoma.}, journal = {International journal of molecular sciences}, volume = {22}, number = {3}, pages = {}, pmid = {33513905}, issn = {1422-0067}, support = {2017/26345-5//Fundação de Amparo à Pesquisa do Estado de São Paulo/ ; }, mesh = {Adolescent ; Adrenal Cortex Neoplasms/*metabolism/*mortality/pathology ; Adrenocortical Carcinoma/*metabolism/*mortality/pathology ; Adult ; Aged ; Aged, 80 and over ; Biomarkers, Tumor/metabolism ; Cohort Studies ; Disease-Free Survival ; Female ; Humans ; Immunohistochemistry ; Kaplan-Meier Estimate ; Ki-67 Antigen/metabolism ; Male ; Middle Aged ; Neoplasm Recurrence, Local/*metabolism/pathology ; Prognosis ; Regression Analysis ; Tissue Array Analysis ; Ubiquitin-Protein Ligases/*metabolism ; X-linked Nuclear Protein/*metabolism ; }, abstract = {Adrenocortical carcinoma (ACC) is a rare malignancy that is associated with a dismal prognosis. Pan-genomic studies have demonstrated the involvement of ATRX and ZNRF3 genes in adrenocortical tumorigenesis. Our aims were to evaluate the protein expression of ATRX and ZNRF3 in a cohort of 82 adults with ACC and to establish their prognostic value. Two pathologists analyzed immuno-stained slides of a tissue microarray. The low protein expression of ATRX and ZNRF3 was associated with a decrease in overall survival (OS) (p = 0.045, p = 0.012, respectively). The Cox regression for ATRX protein expression of >1.5 showed a hazard ratio (HR) for OS of 0.521 (95% CI 0.273-0.997; p = 0.049) when compared with ≤1.5; for ZNRF3 expression >2, the HR for OS was 0.441 (95% CI, 0.229-0.852; p = 0.015) when compared with ≤2. High ATRX and ZNRF3 protein expressions were associated with optimistic recurrence-free survival (RFS) (p = 0.027 and p = 0.005, respectively). The Cox regression of RFS showed an HR of 0.332 (95%CI, 0.111-0.932) for ATRX expression >2.7 (p = 0.037), and an HR of 0.333 (95%CI, 0.140-0.790) for ZNRF3 expression >2 (p = 0.013). In conclusion, low protein expression of ATRX and ZNRF3 are negative prognostic markers of ACC; however, different cohorts should be evaluated to validate these findings.}, } @article {pmid33507916, year = {2021}, author = {Vázquez-Rosas-Landa, M and Sánchez-Rangel, D and Hernández-Domínguez, EE and Pérez-Torres, CA and López-Buenfil, A and de Jesús García-Ávila, C and Carrillo-Hernández, ED and Castañeda-Casasola, CC and Rodríguez-Haas, B and Pérez-Lira, J and Villafán, E and Alonso-Sánchez, A and Ibarra-Laclette, E}, title = {Design of a diagnostic system based on molecular markers derived from the ascomycetes pan-genome analysis: The case of Fusarium dieback disease.}, journal = {PloS one}, volume = {16}, number = {1}, pages = {e0246079}, pmid = {33507916}, issn = {1932-6203}, mesh = {Animals ; *Ascomycota/classification/genetics ; Coleoptera/*microbiology ; *Fusarium/classification/genetics ; *Genome, Fungal ; Persea/*microbiology ; Plant Diseases/*microbiology ; }, abstract = {A key factor to take actions against phytosanitary problems is the accurate and rapid detection of the causal agent. Here, we develop a molecular diagnostics system based on comparative genomics to easily identify fusariosis and specific pathogenic species as the Fusarium kuroshium, the symbiont of the ambrosia beetle Euwallaceae kuroshio Gomez and Hulcr which is responsible for Fusarium dieback disease in San Diego CA, USA. We performed a pan-genome analysis using sixty-three ascomycetes fungi species including phytopathogens and fungi associated with the ambrosia beetles. Pan-genome analysis revealed that 2,631 orthologue genes are only shared by Fusarium spp., and on average 3,941 (SD ± 1,418.6) are species-specific genes. These genes were used for PCR primer design and tested on DNA isolated from i) different strains of ascomycete species, ii) artificially infected avocado stems and iii) plant tissue of field-collected samples presumably infected. Our results let us propose a useful set of primers to either identify any species from Fusarium genus or, in a specific manner, species such as F. kuroshium, F. oxysporum, and F. graminearum. The results suggest that the molecular strategy employed in this study can be expanded to design primers against different types of pathogens responsible for provoking critical plant diseases.}, } @article {pmid33505375, year = {2020}, author = {Candeliere, F and Raimondi, S and Spampinato, G and Tay, MYF and Amaretti, A and Schlundt, J and Rossi, M}, title = {Comparative Genomics of Leuconostoc carnosum.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {605127}, pmid = {33505375}, issn = {1664-302X}, abstract = {Leuconostoc carnosum is a known colonizer of meat-related food matrices. It reaches remarkably high loads during the shelf life in packaged meat products and plays a role in spoilage, although preservative effects have been proposed for some strains. In this study, the draft genomes of 17 strains of L. carnosum (i.e., all the strains that have been sequenced so far) were compared to decipher their metabolic and functional potential and to determine their role in food transformations. Genome comparison and pathway reconstruction indicated that L. carnosum is a compact group of closely related heterofermentative bacteria sharing most of the metabolic features. Adaptation to a nitrogen-rich environment, such as meat, is evidenced by 23 peptidase genes identified in the core genome and by the autotrophy for nitrogen compounds including several amino acids, vitamins, and cofactors. Genes encoding the decarboxylases yielding biogenic amines were not present. All the strains harbored 1-4 of 32 different plasmids, bearing functions associated to proteins hydrolysis, transport of amino acids and oligopeptides, exopolysaccharides, and various resistances (e.g., to environmental stresses, bacteriophages, and heavy metals). Functions associated to bacteriocin synthesis, secretion, and immunity were also found in plasmids. While genes for lactococcin were found in most plasmids, only three harbored the genes for leucocin B, a class IIa antilisterial bacteriocin. Determinants of antibiotic resistances were absent in both plasmids and chromosomes.}, } @article {pmid33505359, year = {2020}, author = {Bryan, NC and Lebreton, F and Gilmore, M and Ruvkun, G and Zuber, MT and Carr, CE}, title = {Genomic and Functional Characterization of Enterococcus faecalis Isolates Recovered From the International Space Station and Their Potential for Pathogenicity.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {515319}, pmid = {33505359}, issn = {1664-302X}, support = {80NSSC17K0688/ImNASA/Intramural NASA/United States ; }, abstract = {Enterococcus faecalis is a multidrug resistant, opportunistic human pathogen and a leading cause of hospital acquired infections. Recently, isolates have been recovered from the air and surfaces onboard the International Space Station (ISS). Pangenomic and functional analyses were carried out to assess their potential impact on astronaut health. Genomes of each ISS isolate, and both clinical and commensal reference strains, were evaluated for their core and unique gene content, acquired antibiotic resistance genes, phage, plasmid content, and virulence traits. In order to determine their potential survival when outside of the human host, isolates were also challenged with three weeks of desiccation at 30% relative humidity. Finally, pathogenicity of the ISS strains was evaluated in the model organism Caenorhabditis elegans. At the culmination of this study, there were no defining signatures that separated known pathogenic strains from the more commensal phenotypes using the currently available resources. As a result, the current reliance on database information alone must be shifted to experimentally evaluated genotypic and phenotypic characteristics of clinically relevant microorganisms.}, } @article {pmid33500331, year = {2021}, author = {Rajput, A and Seif, Y and Choudhary, KS and Dalldorf, C and Poudel, S and Monk, JM and Palsson, BO}, title = {Pangenome Analytics Reveal Two-Component Systems as Conserved Targets in ESKAPEE Pathogens.}, journal = {mSystems}, volume = {6}, number = {1}, pages = {}, pmid = {33500331}, issn = {2379-5077}, support = {U01 AI124316/AI/NIAID NIH HHS/United States ; }, abstract = {The two-component system (TCS) helps bacteria sense and respond to environmental stimuli through histidine kinases and response regulators. TCSs are the largest family of multistep signal transduction processes, and they are involved in many important cellular processes such as antibiotic resistance, pathogenicity, quorum sensing, osmotic stress, and biofilms. Here, we perform the first comprehensive study to highlight the role of TCSs as potential drug targets against ESKAPEE (Enterococcus faecium, Staphylococcus aureus, Klebsiella pneumoniae, Acinetobacter baumannii, Pseudomonas aeruginosa, Enterobacter spp., and Escherichia coli) pathogens through annotation, mapping, pangenomic status, gene orientation, and sequence variation analysis. The distribution of the TCSs is group specific with regard to Gram-positive and Gram-negative bacteria, except for KdpDE. The TCSs among ESKAPEE pathogens form closed pangenomes, except for Pseudomonas aeruginosa Furthermore, their conserved nature due to closed pangenomes might make them good drug targets. Fitness score analysis suggests that any mutation in some TCSs such as BaeSR, ArcBA, EvgSA, and AtoSC, etc., might be lethal to the cell. Taken together, the results of this pangenomic assessment of TCSs reveal a range of strategies deployed by the ESKAPEE pathogens to manifest pathogenicity and antibiotic resistance. This study further suggests that the conserved features of TCSs might make them an attractive group of potential targets with which to address antibiotic resistance.IMPORTANCE The ESKAPEE pathogens are the leading cause of health care-associated infections worldwide. Two-component systems (TCSs) can be used as effective targets against pathogenic bacteria since they are ubiquitous and manage various vital functions such as antibiotic resistance, virulence, biofilms, quorum sensing, and pH balance, among others. This study provides a comprehensive overview of the pangenomic status of the TCSs among ESKAPEE pathogens. The annotation and pangenomic analysis of TCSs show that they are significantly distributed and conserved among the pathogens, as most of them form closed pangenomes. Furthermore, our analysis also reveals that the removal of the TCSs significantly affects the fitness of the cell. Hence, they may be used as promising drug targets against bacteria.}, } @article {pmid33499002, year = {2021}, author = {Paley, S and Billington, R and Herson, J and Krummenacker, M and Karp, PD}, title = {Pathway Tools Visualization of Organism-Scale Metabolic Networks.}, journal = {Metabolites}, volume = {11}, number = {2}, pages = {}, pmid = {33499002}, issn = {2218-1989}, support = {R01 GM075742/GM/NIGMS NIH HHS/United States ; R01 GM077678/GM/NIGMS NIH HHS/United States ; R01 GM080746/GM/NIGMS NIH HHS/United States ; GM075742, GM077678, and GM080746/NH/NIH HHS/United States ; }, abstract = {Metabolomics, synthetic biology, and microbiome research demand information about organism-scale metabolic networks. The convergence of genome sequencing and computational inference of metabolic networks has enabled great progress toward satisfying that demand by generating metabolic reconstructions from the genomes of thousands of sequenced organisms. Visualization of whole metabolic networks is critical for aiding researchers in understanding, analyzing, and exploiting those reconstructions. We have developed bioinformatics software tools that automatically generate a full metabolic-network diagram for an organism, and that enable searching and analyses of the network. The software generates metabolic-network diagrams for unicellular organisms, for multi-cellular organisms, and for pan-genomes and organism communities. Search tools enable users to find genes, metabolites, enzymes, reactions, and pathways within a diagram. The diagrams are zoomable to enable researchers to study local neighborhoods in detail and to see the big picture. The diagrams also serve as tools for comparison of metabolic networks and for interpreting high-throughput datasets, including transcriptomics, metabolomics, and reaction fluxes computed by metabolic models. These data can be overlaid on the metabolic charts to produce animated zoomable displays of metabolic flux and metabolite abundance. The BioCyc.org website contains whole-network diagrams for more than 18,000 sequenced organisms. The ready availability of organism-specific metabolic network diagrams and associated tools for almost any sequenced organism are useful for researchers working to better understand the metabolism of their organism and to interpret high-throughput datasets in a metabolic context.}, } @article {pmid33488540, year = {2020}, author = {Freitas-Silva, J and de Oliveira, BFR and Vigoder, FM and Muricy, G and Dobson, ADW and Laport, MS}, title = {Peeling the Layers Away: The Genomic Characterization of Bacillus pumilus 64-1, an Isolate With Antimicrobial Activity From the Marine Sponge Plakina cyanorosea (Porifera, Homoscleromorpha).}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {592735}, pmid = {33488540}, issn = {1664-302X}, abstract = {Bacillus pumilus 64-1, a bacterial strain isolated from the marine sponge Plakina cyanorosea, which exhibits antimicrobial activity against both pathogenic and drug-resistant Gram-positive and Gram-negative bacteria. This study aimed to conduct an in-depth genomic analysis of this bioactive sponge-derived strain. The nearly complete genome of strain 64-1 consists of 3.6 Mbp (41.5% GC), which includes 3,705 coding sequences (CDS). An open pangenome was observed when limiting to the type strains of the B. pumilus group and aquatic-derived B. pumilus representatives. The genome appears to encode for at least 12 potential biosynthetic gene clusters (BGCs), including both types I and III polyketide synthases (PKS), non-ribosomal peptide synthetases (NRPS), and one NRPS-T1PKS hybrid, among others. In particular, bacilysin and other bacteriocin-coding genes were found and may be associated with the detected antimicrobial activity. Strain 64-1 also appears to possess a broad repertoire of genes encoding for plant cell wall-degrading carbohydrate-active enzymes (CAZymes). A myriad of genes which may be involved in various process required by the strain in its marine habitat, such as those encoding for osmoprotectory transport systems and the biosynthesis of compatible solutes were also present. Several heavy metal tolerance genes are also present, together with various mobile elements including a region encoding for a type III-B Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR) region, four prophage segments and transposase elements. This is the first report on the genomic characterization of a cultivable bacterial member of the Plakina cyanorosea holobiont.}, } @article {pmid33484244, year = {2021}, author = {Jayakodi, M and Schreiber, M and Stein, N and Mascher, M}, title = {Building pan-genome infrastructures for crop plants and their use in association genetics.}, journal = {DNA research : an international journal for rapid publication of reports on genes and genomes}, volume = {28}, number = {1}, pages = {}, pmid = {33484244}, issn = {1756-1663}, mesh = {Computational Biology ; Epigenomics ; Gene Expression Profiling ; Genetic Variation ; *Genome, Plant ; Genomics/*methods ; *High-Throughput Nucleotide Sequencing ; Plants/*genetics ; Sequence Analysis, DNA ; Sequence Analysis, RNA ; Transcriptome ; }, abstract = {Pan-genomic studies aim at representing the entire sequence diversity within a species to provide useful resources for evolutionary studies, functional genomics and breeding of cultivated plants. Cost reductions in high-throughput sequencing and advances in sequence assembly algorithms have made it possible to create multiple reference genomes along with a catalogue of all forms of genetic variations in plant species with large and complex or polyploid genomes. In this review, we summarize the current approaches to building pan-genomes as an in silico representation of plant sequence diversity and outline relevant methods for their effective utilization in linking structural with phenotypic variation. We propose as future research avenues (i) transcriptomic and epigenomic studies across multiple reference genomes and (ii) the development of user-friendly and feature-rich pan-genome browsers.}, } @article {pmid33479307, year = {2021}, author = {Rani, A and Ravindran, VB and Surapaneni, A and Shahsavari, E and Haleyur, N and Mantri, N and Ball, AS}, title = {Evaluation and comparison of recombinase polymerase amplification coupled with lateral-flow bioassay for Escherichia coli O157:H7 detection using diifeerent genes.}, journal = {Scientific reports}, volume = {11}, number = {1}, pages = {1881}, pmid = {33479307}, issn = {2045-2322}, mesh = {Animals ; Biological Assay/*methods ; Carbohydrate Epimerases/*genetics ; DNA, Bacterial/genetics ; Drinking Water/microbiology ; Escherichia coli O157/*genetics/isolation & purification ; Escherichia coli Proteins/*genetics ; Flagellin/*genetics ; Food Microbiology/methods ; Fruit and Vegetable Juices/microbiology ; Humans ; Milk/microbiology ; Polymerase Chain Reaction/*methods ; Recombinases/metabolism ; Reproducibility of Results ; Shiga Toxin/*genetics ; Transaminases/*genetics ; }, abstract = {Shiga toxin-producing Escherichia coli serotype O157:H7 is a food and waterborne zoonotic pathogen causing gastroenteritis in humans. Rapid and simple detection in water and food is imperative to control its spread. However, traditional microbial detection approaches are time-consuming, expensive and complex to operate at the point-of-care without professional training. We present a rapid, simple, sensitive, specific and portable method for detection of E. coli O157:H7 in drinking water, apple juice and milk. We evaluated the effect of gene selection in detecting E. coli O157:H7 using recombinase polymerase amplification coupled with a lateral flow assay using rfbE, fliC and stx gene targets. As low as 100 ag and 1 fg DNA, 4-5 CFU/mL and 10[1] CFU/mL of E. coli O157:H7 was detected using the stx and rfbE gene targets respectively with 100% specificity, whilst the detection limit was 10 fg DNA and 10[2] CFU/mL for the fliC gene target, with 72.8% specificity. The RPA-LFA can be completed within 8 min at temperatures between 37 and 42 °C with reduced handling and simple equipment requirements. The test threshold amplification of the target was achieved in 5-30 min of incubation. In conclusion, RPA-LFA represents a potential rapid and effective alternative to conventional methods for the monitoring of E. coli O157:H7 in food and water.}, } @article {pmid33477842, year = {2021}, author = {Ruiz-Roldán, L and de Toro, M and Sáenz, Y}, title = {Whole Genome Analysis of Environmental Pseudomonas mendocina Strains: Virulence Mechanisms and Phylogeny.}, journal = {Genes}, volume = {12}, number = {1}, pages = {}, pmid = {33477842}, issn = {2073-4425}, mesh = {Animals ; Anti-Bacterial Agents/pharmacology/therapeutic use ; Bacterial Proteins/genetics ; Drug Resistance, Microbial/genetics ; Ducks/microbiology ; Endocarditis/drug therapy/*microbiology ; Endodeoxyribonucleases/genetics ; Feces/microbiology ; Food Microbiology ; *Genome, Bacterial ; Humans ; Lettuce/microbiology ; Phylogeny ; Pseudomonas mendocina/genetics/*pathogenicity ; Sepsis/drug therapy/*microbiology ; Virulence Factors/*genetics ; Whole Genome Sequencing ; }, abstract = {Pseudomonas mendocina is an environmental bacterium, rarely isolated in clinical specimens, although it has been described as producing endocarditis and sepsis. Little is known about its genome. Whole genome sequencing can be used to learn about the phylogeny, evolution, or pathogenicity of these isolates. Thus, the aim of this study was to analyze the resistome, virulome, and phylogenetic relationship of two P. mendocina strains, Ps542 and Ps799, isolated from a healthy Anas platyrhynchos fecal sample and a lettuce, respectively. Among all of the small number of P.mendocina genomes available in the National Center for Biotechnology Information (NCBI) repository, both strains were placed within one of two well-defined phylogenetic clusters. Both P. mendocina strains lacked antimicrobial resistance genes, but the Ps799 genome showed a MOBP3 family relaxase. Nevertheless, this study revealed that P. mendocina possesses an important number of virulence factors, including a leukotoxin, flagella, pili, and the Type 2 and Type 6 Secretion Systems, that could be responsible for their pathogenesis. More phenotypical and in vivo studies are needed to deepen the association with human infections and the potential P. mendocina pathogenicity.}, } @article {pmid33477474, year = {2021}, author = {Guardiola-Avila, I and Sánchez-Busó, L and Acedo-Félix, E and Gomez-Gil, B and Zúñiga-Cabrera, M and González-Candelas, F and Noriega-Orozco, L}, title = {Core and Accessory Genome Analysis of Vibrio mimicus.}, journal = {Microorganisms}, volume = {9}, number = {1}, pages = {}, pmid = {33477474}, issn = {2076-2607}, abstract = {Vibrio mimicus is an emerging pathogen, mainly associated with contaminated seafood consumption. However, little is known about its evolution, biodiversity, and pathogenic potential. This study analyzes the pan-, core, and accessory genomes of nine V. mimicus strains. The core genome yielded 2424 genes in chromosome I (ChI) and 822 genes in chromosome II (ChII), with an accessory genome comprising an average of 10.9% of the whole genome for ChI and 29% for ChII. Core genome phylogenetic trees were obtained, and V. mimicus ATCC-33654 strain was the closest to the outgroup in both chromosomes. Additionally, a phylogenetic study of eight conserved genes (ftsZ, gapA, gyrB, topA, rpoA, recA, mreB, and pyrH), including Vibrio cholerae, Vibrio parilis, Vibrio metoecus, and Vibrio caribbenthicus, clearly showed clade differentiation. The main virulence genes found in ChI corresponded with type I secretion proteins, extracellular components, flagellar proteins, and potential regulators, while, in ChII, the main categories were type-I secretion proteins, chemotaxis proteins, and antibiotic resistance proteins. The accessory genome was characterized by the presence of mobile elements and toxin encoding genes in both chromosomes. Based on the genome atlas, it was possible to characterize differential regions between strains. The pan-genome of V. mimicus encompassed 3539 genes for ChI and 2355 genes for ChII. These results give us an insight into the virulence and gene content of V. mimicus, as well as constitute the first approach to its diversity.}, } @article {pmid33466262, year = {2021}, author = {Nguyen, HTL and Kasapis, S and Mantri, N}, title = {Physicochemical Properties and Effects of Honeys on Key Biomarkers of Oxidative Stress and Cholesterol Homeostasis in HepG2 Cells.}, journal = {Nutrients}, volume = {13}, number = {1}, pages = {}, pmid = {33466262}, issn = {2072-6643}, mesh = {Antioxidants/metabolism ; *Biomarkers ; Cell Survival/drug effects ; *Chemical Phenomena ; Cholesterol/*metabolism ; Gene Expression Regulation/drug effects ; Hep G2 Cells ; Homeostasis/*drug effects ; Honey/*analysis ; Humans ; Lipid Metabolism/drug effects ; Oxidative Stress/*drug effects ; Spectrum Analysis ; }, abstract = {Manuka honey and newly developed honeys (arjuna, guggul, jiaogulan and olive) were examined for their physicochemical, biochemical properties and effects on oxidative stress and cholesterol homeostasis in fatty acid-induced HepG2 cells. The honeys exhibited standard moisture content (<20%), electrical conductivity (<0.8 mS/cm), acidic pH, and monosaccharides (>60%), except olive honey (<60% total monosaccharides). They all expressed non-Newtonian behavior and 05 typical regions of the FTIR spectra as those of natural ones. Guggul and arjuna, manuka honeys showed the highest phenolic contents, correlating with their significant antioxidant activities. Arjuna, guggul and manuka honeys demonstrated the agreement of total cholesterol reduction and the transcriptional levels of AMPK, SREBP2, HCMGR, LDLR, LXRα. Jiaogulan honey showed the least antioxidant content and activity, but it was the most cytotoxic. Both jiaogulan and olive honeys modulated the tested gene in the pattern that should lead to a lower TC content, but this reduction did not occur after 24 h. All 2% concentrations of tested honeys elicited a clearer effect on NQO1 gene expression. In conclusion, the new honeys complied with international norms for natural honeys and we provide partial evidence for the protective effects of manuka, arjuna and guggul honeys amongst the tested ones on key biomarkers of oxidative stress and cholesterol homeostasis, pending further studies to better understand their modes of action.}, } @article {pmid33458736, year = {2021}, author = {Pais, AKL and Silva, JRD and Santos, LVSD and Albuquerque, GMR and Farias, ARG and Silva Junior, WJ and Balbino, VQ and Silva, AMF and Gama, MASD and Souza, EB}, title = {Genomic sequencing of different sequevars of Ralstonia solanacearum belonging to the Moko ecotype.}, journal = {Genetics and molecular biology}, volume = {44}, number = {1}, pages = {e20200172}, pmid = {33458736}, issn = {1415-4757}, abstract = {Banana vascular wilt or Moko is a disease caused by Ralstonia solanacearum. This study aimed to sequence, assemble, annotate, and compare the genomes of R. solanacearum Moko ecotypes of different sequevar strains from Brazil. Average nucleotide identity analyses demonstrated a high correlation (> 96%) between the genome sequences of strains CCRMRs277 (sequevar IIA-24), CCRMRs287 (IIB-4), CCRMRs304 (IIA-24), and CCRMRsB7 (IIB-25), which were grouped into phylotypes IIA and IIB. The number of coding sequences present in chromosomes and megaplasmids varied from 3,070 to 3,521 and 1,669 to 1,750, respectively. Pangenome analysis identified 3,378 clusters in the chromosomes, of which 2,604 were shared by all four analyzed genomes and 2,580 were single copies. In megaplasmids, 1,834 clusters were identified, of which 1,005 were shared by all four genomes and 992 were identified as single copies. Strains CCRMRsB7 and CCRMRs287 differed from the others by having unique clusters in both their chromosomes and megaplasmids, and CCRMRsB7 possessed the largest genome among all Moko ecotype strains sequenced to date. Therefore, the genomic information obtained in this study provides a theoretical basis for the identification, characterization, and phylogenetic analysis of R. solanacearum Moko ecotypes.}, } @article {pmid33455693, year = {2021}, author = {Navarro, S and Cuatrecasas, M and Hernández-Losa, J and Landolfi, S and Musulén, E and Ramón Y Cajal, S and García-Carbonero, R and García-Foncillas, J and Pérez-Segura, P and Salazar, R and Vera, R and García-Alfonso, P}, title = {[Update of the recommendations for the determination of biomarkers in colorectal carcinoma. National Consensus of the Spanish Society of Medical Oncology and the Spanish Society of Pathology].}, journal = {Revista espanola de patologia : publicacion oficial de la Sociedad Espanola de Anatomia Patologica y de la Sociedad Espanola de Citologia}, volume = {54}, number = {1}, pages = {41-54}, doi = {10.1016/j.patol.2020.07.004}, pmid = {33455693}, issn = {1988-561X}, mesh = {Adenomatous Polyposis Coli/genetics ; Biomarkers, Tumor/*genetics ; Colorectal Neoplasms/*genetics/*pathology ; Colorectal Neoplasms, Hereditary Nonpolyposis/genetics ; *Consensus ; GTP Phosphohydrolases/genetics ; Gene Fusion ; Genes, erbB-2 ; Genes, ras ; Genetic Markers ; *Genetic Predisposition to Disease ; High-Throughput Nucleotide Sequencing ; Humans ; Liquid Biopsy ; Lymph Nodes/pathology ; Medical Oncology ; Membrane Proteins/genetics ; *Mutation ; Pathology, Clinical ; Proto-Oncogene Proteins B-raf/genetics ; Proto-Oncogene Proteins p21(ras)/genetics ; Receptor, trkA/genetics ; Societies, Medical ; }, abstract = {This update of the consensus of the Spanish Society of Medical Oncology (Sociedad Española de Oncología Médica - SEOM) and the Spanish Society of Pathology (Sociedad Española de Anatomía Patológica - SEAP), reviews the advances in the analysis of biomarkers in advanced colorectal cancer (CRC) as well as susceptibility markers of hereditary CRC and molecular biomarkers of localized CRC. Recently published information on the essential determination of KRAS, NRAS and BRAF mutations and the possible benefits of determining the amplification of human epidermal growth factor receptor 2 (HER2), the expression of proteins in the DNA repair pathway and the study of NTRK fusions are also evaluated. From a pathological point of view, the importance of analysing the tumour budding and poorly differentiated clusters and its prognostic value in CRC is reviewed, as well as the impact of molecular lymph node analysis on lymph node staging in CRC. The incorporation of pan-genomic technologies, such as next-generation sequencing (NGS) and liquid biopsy in the clinical management of patients with CRC is also outlined. All these aspects are developed in this guide which, like the previous one, will be revised when necessary in the future.}, } @article {pmid33452249, year = {2021}, author = {Rai, A and Hirakawa, H and Nakabayashi, R and Kikuchi, S and Hayashi, K and Rai, M and Tsugawa, H and Nakaya, T and Mori, T and Nagasaki, H and Fukushi, R and Kusuya, Y and Takahashi, H and Uchiyama, H and Toyoda, A and Hikosaka, S and Goto, E and Saito, K and Yamazaki, M}, title = {Chromosome-level genome assembly of Ophiorrhiza pumila reveals the evolution of camptothecin biosynthesis.}, journal = {Nature communications}, volume = {12}, number = {1}, pages = {405}, pmid = {33452249}, issn = {2041-1723}, mesh = {Biosynthetic Pathways/genetics ; Camptothecin/*biosynthesis ; Chromosomes, Plant/genetics ; Contig Mapping ; *Evolution, Molecular ; Genome, Plant/*genetics ; Genomics ; Plant Proteins/*genetics/metabolism ; Plant Roots/metabolism ; Plants, Medicinal/genetics/metabolism ; Rubiaceae/genetics/*metabolism ; Vinca Alkaloids/biosynthesis ; }, abstract = {Plant genomes remain highly fragmented and are often characterized by hundreds to thousands of assembly gaps. Here, we report chromosome-level reference and phased genome assembly of Ophiorrhiza pumila, a camptothecin-producing medicinal plant, through an ordered multi-scaffolding and experimental validation approach. With 21 assembly gaps and a contig N50 of 18.49 Mb, Ophiorrhiza genome is one of the most complete plant genomes assembled to date. We also report 273 nitrogen-containing metabolites, including diverse monoterpene indole alkaloids (MIAs). A comparative genomics approach identifies strictosidine biogenesis as the origin of MIA evolution. The emergence of strictosidine biosynthesis-catalyzing enzymes precede downstream enzymes' evolution post γ whole-genome triplication, which occurred approximately 110 Mya in O. pumila, and before the whole-genome duplication in Camptotheca acuminata identified here. Combining comparative genome analysis, multi-omics analysis, and metabolic gene-cluster analysis, we propose a working model for MIA evolution, and a pangenome for MIA biosynthesis, which will help in establishing a sustainable supply of camptothecin.}, } @article {pmid33446715, year = {2021}, author = {Bravakos, P and Mandalakis, M and Nomikou, P and Anastasiou, TI and Kristoffersen, JB and Stavroulaki, M and Kilias, S and Kotoulas, G and Magoulas, A and Polymenakou, PN}, title = {Genomic adaptation of Pseudomonas strains to acidity and antibiotics in hydrothermal vents at Kolumbo submarine volcano, Greece.}, journal = {Scientific reports}, volume = {11}, number = {1}, pages = {1336}, pmid = {33446715}, issn = {2045-2322}, mesh = {Adaptation, Physiological/*genetics ; Drug Resistance, Multiple, Bacterial/*genetics ; *Genome, Bacterial ; Greece ; Hydrothermal Vents/*microbiology ; Pseudomonas/*genetics/isolation & purification ; Seawater/*microbiology ; }, abstract = {Although the rise of antibiotic and multidrug resistant bacteria is one of the biggest current threats to human health, our understanding of the mechanisms involved in antibiotic resistance selection remains scarce. We performed whole genome sequencing of 21 Pseudomonas strains, previously isolated from an active submarine volcano of Greece, the Kolumbo volcano. Our goal was to identify the genetic basis of the enhanced co-tolerance to antibiotics and acidity of these Pseudomonas strains. Pangenome analysis identified 10,908 Gene Clusters (GCs). It revealed that the numbers of phage-related GCs and sigma factors, which both provide the mechanisms of adaptation to environmental stressors, were much higher in the high tolerant Pseudomonas strains compared to the rest ones. All identified GCs of these strains were associated with antimicrobial and multidrug resistance. The present study provides strong evidence that the CO2-rich seawater of the volcano associated with low pH might be a reservoir of microorganisms carrying multidrug efflux-mediated systems and pumps. We, therefore, suggest further studies of other extreme environments (or ecosystems) and their associated physicochemical parameters (or factors) in the rise of antibiotic resistance.}, } @article {pmid33438035, year = {2021}, author = {Heaton, MP and Smith, TPL and Bickhart, DM and Vander Ley, BL and Kuehn, LA and Oppenheimer, J and Shafer, WR and Schuetze, FT and Stroud, B and McClure, JC and Barfield, JP and Blackburn, HD and Kalbfleisch, TS and Davenport, KM and Kuhn, KL and Green, RE and Shapiro, B and Rosen, BD}, title = {A Reference Genome Assembly of Simmental Cattle, Bos taurus taurus.}, journal = {The Journal of heredity}, volume = {112}, number = {2}, pages = {184-191}, pmid = {33438035}, issn = {1465-7333}, support = {T32 HG008345/HG/NHGRI NIH HHS/United States ; }, mesh = {Animals ; Bison ; Cattle/*genetics ; Chromosome Mapping ; Female ; *Genome ; Haplotypes ; Male ; }, abstract = {Genomics research has relied principally on the establishment and curation of a reference genome for the species. However, it is increasingly recognized that a single reference genome cannot fully describe the extent of genetic variation within many widely distributed species. Pangenome representations are based on high-quality genome assemblies of multiple individuals and intended to represent the broadest possible diversity within a species. A Bovine Pangenome Consortium (BPC) has recently been established to begin assembling genomes from more than 600 recognized breeds of cattle, together with other related species to provide information on ancestral alleles and haplotypes. Previously reported de novo genome assemblies for Angus, Brahman, Hereford, and Highland breeds of cattle are part of the initial BPC effort. The present report describes a complete single haplotype assembly at chromosome-scale for a fullblood Simmental cow from an F1 bison-cattle hybrid fetus by trio binning. Simmental cattle, also known as Fleckvieh due to their red and white spots, originated in central Europe in the 1830s as a triple-purpose breed selected for draught, meat, and dairy production. There are over 50 million Simmental cattle in the world, known today for their fast growth and beef yields. This assembly (ARS_Simm1.0) is similar in length to the other bovine assemblies at 2.86 Gb, with a scaffold N50 of 102 Mb (max scaffold 156.8 Mb) and meets or exceeds the continuity of the best Bos taurus reference assemblies to date.}, } @article {pmid33433885, year = {2021}, author = {Papathomas, TG and Suurd, DPD and Pacak, K and Tischler, AS and Vriens, MR and Lam, AK and de Krijger, RR}, title = {What Have We Learned from Molecular Biology of Paragangliomas and Pheochromocytomas?.}, journal = {Endocrine pathology}, volume = {32}, number = {1}, pages = {134-153}, pmid = {33433885}, issn = {1559-0097}, mesh = {Adrenal Gland Neoplasms/*genetics/*pathology ; Genetic Predisposition to Disease ; Genomics ; Genotype ; Humans ; Paraganglioma/*genetics/*pathology ; Pheochromocytoma/*genetics/*pathology ; }, abstract = {Recent advances in molecular genetics and genomics have led to increased understanding of the aetiopathogenesis of pheochromocytomas and paragangliomas (PPGLs). Thus, pan-genomic studies now provide a comprehensive integrated genomic analysis of PPGLs into distinct molecularly defined subtypes concordant with tumour genotypes. In addition, new embryological discoveries have refined the concept of how normal paraganglia develop, potentially establishing a developmental basis for genotype-phenotype correlations for PPGLs. The challenge for modern pathology is to translate these scientific discoveries into routine practice, which will be based largely on histopathology for the foreseeable future. Here, we review recent progress concerning the cell of origin and molecular pathogenesis of PPGLs, including pathogenetic mechanisms, genetic susceptibility and molecular classification. The current roles and tools of pathologists are considered from a histopathological perspective, including differential diagnoses, genotype-phenotype correlations and the use of immunohistochemistry in identifying hereditary predisposition and validating genetic variants of unknown significance. Current and potential molecular prognosticators are also presented with the hope that predictive molecular biomarkers will be integrated into risk stratification scoring systems to assess the metastatic potential of these intriguing neoplasms and identify potential drug targets.}, } @article {pmid33431432, year = {2021}, author = {Hasan, NA and Norton, GJ and Virdi, R and Epperson, LE and Vang, CK and Hellbusch, B and Bai, X and Chan, ED and Strong, M and Honda, JR}, title = {Measurable genomic changes in Mycobacterium avium subsp. hominissuis after long-term adaptation in Acanthamoeba lenticulata and reduced persistence in macrophages.}, journal = {Journal of bacteriology}, volume = {203}, number = {6}, pages = {}, pmid = {33431432}, issn = {1098-5530}, abstract = {Free-living amoebae are ubiquitous in aquatic environments and act as environmental reservoirs for nontuberculous mycobacteria. Mycobacterium avium subsp. hominissuis recovered from Acanthamoeba has been demonstrated to be more virulent in both human and murine models. Here, we investigate the persistence of M. avium subsp. hominissuis after short-term (2 weeks) and long-term (42 weeks) co-culture in Acanthamoeba lenticulata We hypothesize that A. lenticulata-adapted M. avium subsp. hominissuis demonstrate phenotypic and genomic changes facilitating intracellular persistence in naïve Acanthamoeba and human macrophages. M. avium subsp. hominissuis CFU in co-culture with A. lenticulata were recorded every 2 weeks up to 60 weeks. While A. lenticulata-associated M. avium subsp. hominissuis CFU did not significantly change across 60 weeks of co-culture, longer adaptation time in amoebae reduced colony size. Isolates recovered after 2 or 42 weeks of amoebae co-culture were referred as "early-adapted" and "late-adapted" M. avium subsp. hominissuis, respectively. Whole genome sequencing was performed on amoebae-adapted isolates with pan-genome comparisons to the original M. avium subsp. hominissuis isolate. Next, amoebae-adapted isolates were assessed for their persistence in A. lenticulata, A. castellanii, and human THP-1 macrophages. Multiplex cytokine/chemokine analyses were conducted on THP-1 culture supernatants. Compared to the original isolate, counts of late-adapted M. avium subsp. hominissuis were reduced in Acanthamoeba and contrary to expectations, lower counts were also observed in THP-1 macrophages with concomitant decrease in TNFa, IL-6, and MIP-1b suggesting that host adaptation may influence the inflammatory properties of M. avium IMPORTANCE Short-term interaction between Acanthamoeba and M. avium has been demonstrated to increase infectivity in human and murine models of infection, establishing the paradigm that amoebae "train" M. avium in the environment by selecting for phenotypes capable of enduring in human cells. We investigate this phenomenon further by determining the consequence of long-term amoebae adaptation on M. avium subsp. hominissuis persistence in host cells. We monitored genomic changes across long-term Acanthamoeba co-culture and report significant changes to the M. avium subsp. hominissuis genome in response to amoebae-adaptation and reduced colony size. Furthermore, we examined isolates co-cultured with A. lenticulata for 2 or 42 weeks and provide biological evidence that long-term co-culture in amoebae reduces M. avium persistence in human macrophages.}, } @article {pmid33430372, year = {2021}, author = {Firrao, G and Scortichini, M and Pagliari, L}, title = {Orthology-Based Estimate of the Contribution of Horizontal Gene Transfer from Distantly Related Bacteria to the Intraspecific Diversity and Differentiation of Xylella fastidiosa.}, journal = {Pathogens (Basel, Switzerland)}, volume = {10}, number = {1}, pages = {}, pmid = {33430372}, issn = {2076-0817}, abstract = {Xylella fastidiosa is a xylem-limited bacterium phylogenetically related to the xanthomonads, with an unusually large and diversified range of plant hosts. To ascertain the origin of its peculiarities, its pan-genome was scanned to identify the genes that are not coherent with its phylogenetic position within the order Xanthomonadales. The results of the analysis revealed that a large fraction of the genes of the Xylella pan-genome have no ortholog or close paralog in the order Xanthomonadales. For a significant part of the genes, the closest homologue was found in bacteria belonging to distantly related taxonomic groups, most frequently in the Betaproteobacteria. Other species, such as Xanthomonas vasicola and Xanthomonas albilineans which were investigated for comparison, did not show a similar genetic contribution from distant branches of the prokaryotic tree of life. This finding indicates that the process of acquisition of DNA from the environment is still a relevant component of Xylella fastidiosa evolution. Although the ability of Xylella fastidiosa strains to recombine among themselves is well known, the results of the pan-genome analyses stressed the additional relevance of environmental DNA in shaping their genomes, with potential consequences on their phytopathological features.}, } @article {pmid33429431, year = {2021}, author = {Du, H and Diao, C and Zhao, P and Zhou, L and Liu, JF}, title = {Integrated hybrid de novo assembly technologies to obtain high-quality pig genome using short and long reads.}, journal = {Briefings in bioinformatics}, volume = {22}, number = {5}, pages = {}, doi = {10.1093/bib/bbaa399}, pmid = {33429431}, issn = {1477-4054}, mesh = {*Algorithms ; Animals ; Chromosome Mapping/*methods ; Contig Mapping/*methods ; Gene Library ; *Genome ; High-Throughput Nucleotide Sequencing ; Male ; Sequence Analysis, DNA ; Software ; Swine/*genetics ; }, abstract = {With the rapid progress of sequencing technologies, various types of sequencing reads and assembly algorithms have been designed to construct genome assemblies. Although recent studies have attempted to evaluate the appropriate type of sequencing reads and algorithms for assembling high-quality genomes, it is still a challenge to set the correct combination for constructing animal genomes. Here, we present a comparative performance assessment of 14 assembly combinations-9 software programs with different short and long reads of Duroc pig. Based on the results of the optimization process for genome construction, we designed an integrated hybrid de novo assembly pipeline, HSCG, and constructed a draft genome for Duroc pig. Comparison between the new genome and Sus scrofa 11.1 revealed important breakpoints in two S. scrofa 11.1 genes. Our findings may provide new insights into the pan-genome analysis studies of agricultural animals, and the integrated assembly pipeline may serve as a guide for the assembly of other animal genomes.}, } @article {pmid33428861, year = {2021}, author = {Harrison, F and Smyth, AR}, title = {Professor Pangloss and the Pangenome: Does Staphylococcus aureus Have the Best of All Possible Worlds?.}, journal = {American journal of respiratory and critical care medicine}, volume = {203}, number = {9}, pages = {1055-1057}, pmid = {33428861}, issn = {1535-4970}, support = {MR/R001898/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Genomics ; Humans ; *Staphylococcal Infections ; *Staphylococcus aureus/genetics ; }, } @article {pmid33423895, year = {2021}, author = {Domingo-Sananes, MR and McInerney, JO}, title = {Mechanisms That Shape Microbial Pangenomes.}, journal = {Trends in microbiology}, volume = {29}, number = {6}, pages = {493-503}, doi = {10.1016/j.tim.2020.12.004}, pmid = {33423895}, issn = {1878-4380}, mesh = {Bacteria/*genetics ; *Evolution, Molecular ; *Genetic Fitness ; *Genome, Bacterial ; Phylogeny ; }, abstract = {Analyses of multiple whole-genome sequences from the same species have revealed that differences in gene content can be substantial, particularly in prokaryotes. Such variation has led to the recognition of pangenomes, the complete set of genes present in a species - consisting of core genes, present in all individuals, and accessory genes whose presence is variable. Questions now arise about how pangenomes originate and evolve. We describe how gene content variation can arise as a result of the combination of several processes, including random drift, selection, gain/loss balance, and the influence of ecological and epistatic interactions. We believe that identifying the contributions of these processes to pangenomes will need novel theoretical approaches and empirical data.}, } @article {pmid33421537, year = {2021}, author = {Wang, M and Ruan, R and Li, H}, title = {The completed genome sequence of the pathogenic ascomycete fungus Penicillium digitatum.}, journal = {Genomics}, volume = {113}, number = {2}, pages = {439-446}, doi = {10.1016/j.ygeno.2021.01.001}, pmid = {33421537}, issn = {1089-8646}, mesh = {Conserved Sequence ; Fungal Proteins/genetics ; *Genome, Fungal ; Molecular Sequence Annotation ; Penicillium/*genetics ; Secondary Metabolism/genetics ; Whole Genome Sequencing ; }, abstract = {P. digitatum, the causative agent of green mold, is one of the most destructive pathogens in the citrus industry. To facilitate basal researches on this important plant pathogen, here we report a finished genome sequence for P. digitatum strain PDW03 using a combination of Illumina, PacBio, and Hi-C sequencing technologies. The assembly comprised 6 chromosomes from telomere to telomere and encodes approximately 9000 proteins. Genomic re-analyses identified 302 Carbohydrate-active enzymes, 420 secreted proteins, and 39 secondary metabolite (SM) gene clusters. Furthermore, we found 10 fragmentary SM clusters in the P. digitatum PDW03 genome. Pangenome analysis based on 5 P. digitatum genomes available showed that conserved orthogroups account for ~68% of the species pangenome. Taken together, this fully completed P. digitatum genome will provide an optimum resource for further researches to investigate the driving forces of fungal host switch and effectors functioning in plant-pathogen interaction.}, } @article {pmid33419343, year = {2020}, author = {Higdon, SM and Huang, BC and Bennett, AB and Weimer, BC}, title = {Identification of Nitrogen Fixation Genes in Lactococcus Isolated from Maize Using Population Genomics and Machine Learning.}, journal = {Microorganisms}, volume = {8}, number = {12}, pages = {}, pmid = {33419343}, issn = {2076-2607}, support = {2019-67013-29724//united states department of agriculture - NIFA/ ; }, abstract = {Sierra Mixe maize is a landrace variety from Oaxaca, Mexico, that utilizes nitrogen derived from the atmosphere via an undefined nitrogen fixation mechanism. The diazotrophic microbiota associated with the plant's mucilaginous aerial root exudate composed of complex carbohydrates was previously identified and characterized by our group where we found 23 lactococci capable of biological nitrogen fixation (BNF) without containing any of the proposed essential genes for this trait (nifHDKENB). To determine the genes in Lactococcus associated with this phenotype, we selected 70 lactococci from the dairy industry that are not known to be diazotrophic to conduct a comparative population genomic analysis. This showed that the diazotrophic lactococcal genomes were distinctly different from the dairy isolates. Examining the pangenome followed by genome-wide association study and machine learning identified genes with the functions needed for BNF in the maize isolates that were absent from the dairy isolates. Many of the putative genes received an 'unknown' annotation, which led to the domain analysis of the 135 homologs. This revealed genes with molecular functions needed for BNF, including mucilage carbohydrate catabolism, glycan-mediated host adhesion, iron/siderophore utilization, and oxidation/reduction control. This is the first report of this pathway in this organism to underpin BNF. Consequently, we proposed a model needed for BNF in lactococci that plausibly accounts for BNF in the absence of the nif operon in this organism.}, } @article {pmid33417534, year = {2021}, author = {Horesh, G and Blackwell, GA and Tonkin-Hill, G and Corander, J and Heinz, E and Thomson, NR}, title = {A comprehensive and high-quality collection of Escherichia coli genomes and their genes.}, journal = {Microbial genomics}, volume = {7}, number = {2}, pages = {}, pmid = {33417534}, issn = {2057-5858}, support = {206194/WT_/Wellcome Trust/United Kingdom ; 204016/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Access to Information ; Computational Biology/methods ; Data Curation ; *Databases, Genetic ; Escherichia coli/classification/*genetics ; Escherichia coli Proteins/*genetics ; Gene Flow ; Genome, Bacterial ; Shigella/classification/*genetics ; }, abstract = {Escherichia coli is a highly diverse organism that includes a range of commensal and pathogenic variants found across a range of niches and worldwide. In addition to causing severe intestinal and extraintestinal disease, E. coli is considered a priority pathogen due to high levels of observed drug resistance. The diversity in the E. coli population is driven by high genome plasticity and a very large gene pool. All these have made E. coli one of the most well-studied organisms, as well as a commonly used laboratory strain. Today, there are thousands of sequenced E. coli genomes stored in public databases. While data is widely available, accessing the information in order to perform analyses can still be a challenge. Collecting relevant available data requires accessing different sources, where data may be stored in a range of formats, and often requires further manipulation and processing to apply various analyses and extract useful information. In this study, we collated and intensely curated a collection of over 10 000 E. coli and Shigella genomes to provide a single, uniform, high-quality dataset. Shigella were included as they are considered specialized pathovars of E. coli. We provide these data in a number of easily accessible formats that can be used as the foundation for future studies addressing the biological differences between E. coli lineages and the distribution and flow of genes in the E. coli population at a high resolution. The analysis we present emphasizes our lack of understanding of the true diversity of the E. coli species, and the biased nature of our current understanding of the genetic diversity of such a key pathogen.}, } @article {pmid33413118, year = {2021}, author = {Nzoyikorera, N and Diawara, I and Fresia, P and Maaloum, F and Katfy, K and Nayme, K and Maaloum, M and Cornick, J and Chaguza, C and Timinouni, M and Belabess, H and Zerouali, K and Elmdaghri, N}, title = {Whole genomic comparative analysis of Streptococcus pneumoniae serotype 1 isolates causing invasive and non-invasive infections among children under 5 years in Casablanca, Morocco.}, journal = {BMC genomics}, volume = {22}, number = {1}, pages = {39}, pmid = {33413118}, issn = {1471-2164}, support = {MR/R003076/1/MRC_/Medical Research Council/United Kingdom ; OPP1023440//Bill and Melinda Gates Foundation/ ; }, mesh = {Bayes Theorem ; Child ; Child, Preschool ; Genomics ; Humans ; Morocco/epidemiology ; Phylogeny ; *Pneumococcal Infections/epidemiology ; Pneumococcal Vaccines ; Serogroup ; Serotyping ; *Streptococcus pneumoniae/genetics ; }, abstract = {BACKGROUND: Streptococcus pneumoniae serotype 1 remains a leading cause of invasive pneumococcal diseases, even in countries with PCV-10/PCV-13 vaccine implementation. The main objective of this study, which is part of the Pneumococcal African Genome project (PAGe), was to determine the phylogenetic relationships of serotype 1 isolates recovered from children patients in Casablanca (Morocco), compared to these from other African countries; and to investigate the contribution of accessory genes and recombination events to the genetic diversity of this serotype.

RESULTS: The genome average size of the six-pneumococcus serotype 1 from Casablanca was 2,227,119 bp, and the average content of coding sequences was 2113, ranging from 2041 to 2161. Pangenome analysis of the 80 genomes used in this study revealed 1685 core genes and 1805 accessory genes. The phylogenetic tree based on core genes and the hierarchical bayesian clustering analysis revealed five sublineages with a phylogeographic structure by country. The Moroccan strains cluster in two different lineages, the five invasive strains clusters altogether in a divergent clade distantly related to the non-invasive strain, that cluster with all the serotype 1 genomes from Africa.

CONCLUSIONS: The whole genome sequencing provides increased resolution analysis of the highly virulent serotype 1 in Casablanca, Morocco. Our results are concordant with previous works, showing that the phylogeography of S. pneumoniae serotype 1 is structured by country, and despite the small size (six isolates) of the Moroccan sample, our analysis shows the genetic cohesion of the Moroccan invasive isolates.}, } @article {pmid33401455, year = {2021}, author = {Badhan, S and Ball, AS and Mantri, N}, title = {First Report of CRISPR/Cas9 Mediated DNA-Free Editing of 4CL and RVE7 Genes in Chickpea Protoplasts.}, journal = {International journal of molecular sciences}, volume = {22}, number = {1}, pages = {}, pmid = {33401455}, issn = {1422-0067}, mesh = {*CRISPR-Associated Protein 9 ; Cicer/enzymology/*genetics/metabolism/physiology ; Coenzyme A Ligases/*genetics/metabolism/physiology ; Droughts ; Gene Editing/*methods ; Gene Knockout Techniques ; Lignin/biosynthesis ; Plant Proteins/genetics ; Plants, Genetically Modified ; *Stress, Physiological ; Transcription Factors/*genetics/metabolism/physiology ; }, abstract = {The current genome editing system Clustered Regularly Interspaced Short Palindromic Repeats Cas9 (CRISPR/Cas9) has already confirmed its proficiency, adaptability, and simplicity in several plant-based applications. Together with the availability of a vast amount of genome data and transcriptome data, CRISPR/Cas9 presents a massive opportunity for plant breeders and researchers. The successful delivery of ribonucleoproteins (RNPs), which are composed of Cas9 enzyme and a synthetically designed single guide RNA (sgRNA) and are used in combination with various transformation methods or lately available novel nanoparticle-based delivery approaches, allows targeted mutagenesis in plants species. Even though this editing technique is limitless, it has still not been employed in many plant species to date. Chickpea is the second most crucial winter grain crop cultivated worldwide; there are currently no reports on CRISPR/Cas9 gene editing in chickpea. Here, we selected the 4-coumarate ligase (4CL) and Reveille 7 (RVE7) genes, both associated with drought tolerance for CRISPR/Cas9 editing in chickpea protoplast. The 4CL represents a key enzyme involved in phenylpropanoid metabolism in the lignin biosynthesis pathway. It regulates the accumulation of lignin under stress conditions in several plants. The RVE7 is a MYB transcription factor which is part of regulating circadian rhythm in plants. The knockout of these selected genes in the chickpea protoplast using DNA-free CRISPR/Cas9 editing represents a novel approach for achieving targeted mutagenesis in chickpea. Results showed high-efficiency editing was achieved for RVE7 gene in vivo compared to the 4CL gene. This study will help unravel the role of these genes under drought stress and understand the complex drought stress mechanism pathways. This is the first study in chickpea protoplast utilizing CRISPR/Cas9 DNA free gene editing of drought tolerance associated genes.}, } @article {pmid33397904, year = {2021}, author = {Yahara, K and Suzuki, M and Hirabayashi, A and Suda, W and Hattori, M and Suzuki, Y and Okazaki, Y}, title = {Long-read metagenomics using PromethION uncovers oral bacteriophages and their interaction with host bacteria.}, journal = {Nature communications}, volume = {12}, number = {1}, pages = {27}, pmid = {33397904}, issn = {2041-1723}, mesh = {Bacteria/*virology ; Bacteriophages/*genetics ; Clustered Regularly Interspaced Short Palindromic Repeats/genetics ; DNA Contamination ; DNA, Viral/genetics ; Drug Resistance, Microbial/genetics ; Genes, Viral ; Genome, Bacterial ; *High-Throughput Nucleotide Sequencing ; Host-Pathogen Interactions/*genetics ; Humans ; Integrases/genetics ; Metagenome ; *Metagenomics ; Mouth/*microbiology/*virology ; Prophages/genetics ; Proteomics ; Streptococcus/virology ; }, abstract = {Bacteriophages (phages), or bacterial viruses, are very diverse and highly abundant worldwide, including as a part of the human microbiomes. Although a few metagenomic studies have focused on oral phages, they relied on short-read sequencing. Here, we conduct a long-read metagenomic study of human saliva using PromethION. Our analyses, which integrate both PromethION and HiSeq data of >30 Gb per sample with low human DNA contamination, identify hundreds of viral contigs; 0-43.8% and 12.5-56.3% of the confidently predicted phages and prophages, respectively, do not cluster with those reported previously. Our analyses demonstrate enhanced scaffolding, and the ability to place a prophage in its host genomic context and enable its taxonomic classification. Our analyses also identify a Streptococcus phage/prophage group and nine jumbo phages/prophages. 86% of the phage/prophage group and 67% of the jumbo phages/prophages contain remote homologs of antimicrobial resistance genes. Pan-genome analysis of the phages/prophages reveals remarkable diversity, identifying 0.3% and 86.4% of the genes as core and singletons, respectively. Furthermore, our study suggests that oral phages present in human saliva are under selective pressure to escape CRISPR immunity. Our study demonstrates the power of long-read metagenomics utilizing PromethION in uncovering bacteriophages and their interaction with host bacteria.}, } @article {pmid33397434, year = {2021}, author = {Della Coletta, R and Qiu, Y and Ou, S and Hufford, MB and Hirsch, CN}, title = {How the pan-genome is changing crop genomics and improvement.}, journal = {Genome biology}, volume = {22}, number = {1}, pages = {3}, pmid = {33397434}, issn = {1474-760X}, mesh = {Computational Biology ; Crops, Agricultural/*genetics ; DNA Transposable Elements ; Genetic Variation ; *Genome, Plant ; Genomics/*methods ; }, abstract = {Crop genomics has seen dramatic advances in recent years due to improvements in sequencing technology, assembly methods, and computational resources. These advances have led to the development of new tools to facilitate crop improvement. The study of structural variation within species and the characterization of the pan-genome has revealed extensive genome content variation among individuals within a species that is paradigm shifting to crop genomics and improvement. Here, we review advances in crop genomics and how utilization of these tools is shifting in light of pan-genomes that are becoming available for many crop species.}, } @article {pmid33396617, year = {2020}, author = {Fontana, F and Alessandri, G and Lugli, GA and Mancabelli, L and Longhi, G and Anzalone, R and Viappiani, A and Ventura, M and Turroni, F and Milani, C}, title = {Probiogenomics Analysis of 97 Lactobacillus crispatus Strains as a Tool for the Identification of Promising Next-Generation Probiotics.}, journal = {Microorganisms}, volume = {9}, number = {1}, pages = {}, pmid = {33396617}, issn = {2076-2607}, support = {-//Fondazione Cariparma/ ; }, abstract = {Members of the genus Lactobacillus represent the most common colonizers of the human vagina and are well-known for preserving vaginal health and contrasting the colonization of opportunistic pathogens. Remarkably, high abundance of Lactobacillus crispatus in the vaginal environment has been linked to vaginal health, leading to the widespread use of many L. crispatus strains as probiotics. Nevertheless, despite the scientific and industrial relevance of this species, a comprehensive investigation of the genomics of L. crispatus taxon is still missing. For this reason, we have performed a comparative genomics analysis of 97 L. crispatus strains, encompassing 16 strains sequenced in the framework of this study alongside 81 additional publicly available genome sequences. Thus, allowing the dissection of the L.crispatus pan-genome and core-genome followed by a comprehensive phylogenetic analysis based on the predicted core genes that revealed clustering based on ecological origin. Subsequently, a genomics-targeted approach, i.e., probiogenomics analysis, was applied for in-depth analysis of the eight L. crispatus strains of human origin sequenced in this study. In detail their genetic repertoire was screened for strain-specific genes responsible for phenotypic features that may guide the identification of optimal candidates for next-generation probiotics. The latter includes bacteriocin production, carbohydrates transport and metabolism, as well as a range of features that may be responsible for improved ecological fitness. In silico results regarding the genetic repertoire involved in carbohydrate metabolism were also validated by growth assays on a range of sugars, leading to the selection of putative novel probiotic strains.}, } @article {pmid33396198, year = {2020}, author = {Cechova, M}, title = {Probably Correct: Rescuing Repeats with Short and Long Reads.}, journal = {Genes}, volume = {12}, number = {1}, pages = {}, pmid = {33396198}, issn = {2073-4425}, mesh = {Centromere/chemistry ; Chromosome Mapping/*methods ; Computational Biology/methods ; DNA Methylation ; Genome Size ; *Genome, Human ; Humans ; *Microsatellite Repeats ; Sex Chromosomes/*chemistry ; Telomere/chemistry ; }, abstract = {Ever since the introduction of high-throughput sequencing following the human genome project, assembling short reads into a reference of sufficient quality posed a significant problem as a large portion of the human genome-estimated 50-69%-is repetitive. As a result, a sizable proportion of sequencing reads is multi-mapping, i.e., without a unique placement in the genome. The two key parameters for whether or not a read is multi-mapping are the read length and genome complexity. Long reads are now able to span difficult, heterochromatic regions, including full centromeres, and characterize chromosomes from "telomere to telomere". Moreover, identical reads or repeat arrays can be differentiated based on their epigenetic marks, such as methylation patterns, aiding in the assembly process. This is despite the fact that long reads still contain a modest percentage of sequencing errors, disorienting the aligners and assemblers both in accuracy and speed. Here, I review the proposed and implemented solutions to the repeat resolution and the multi-mapping read problem, as well as the downstream consequences of reference choice, repeat masking, and proper representation of sex chromosomes. I also consider the forthcoming challenges and solutions with regards to long reads, where we expect the shift from the problem of repeat localization within a single individual to the problem of repeat positioning within pangenomes.}, } @article {pmid33383865, year = {2020}, author = {Wibberg, D and Price-Carter, M and Rückert, C and Blom, J and Möbius, P}, title = {Complete Genome Sequence of Ovine Mycobacterium avium subsp. paratuberculosis Strain JIII-386 (MAP-S/type III) and Its Comparison to MAP-S/type I, MAP-C, and M. avium Complex Genomes.}, journal = {Microorganisms}, volume = {9}, number = {1}, pages = {}, pmid = {33383865}, issn = {2076-2607}, abstract = {Mycobacterium avium (M. a.) subsp. paratuberculosis (MAP) is a worldwide-distributed obligate pathogen in ruminants causing Johne's disease. Due to a lack of complete subtype III genome sequences, there is not yet conclusive information about genetic differences between strains of cattle (MAP-C, type II) and sheep (MAP-S) type, and especially between MAP-S subtypes I, and III. Here we present the complete, circular genome of MAP-S/type III strain JIII-386 (DE) closed by Nanopore-technology and its comparison with MAP-S/type I closed genome of strain Telford (AUS), MAP-S/type III draft genome of strain S397 (U.S.), twelve closed MAP-C strains, and eight closed M.-a.-complex-strains. Structural comparative alignments revealed clearly the mosaic nature of MAP, emphasized differences between the subtypes and the higher diversity of MAP-S genomes. The comparison of various genomic elements including transposases and genomic islands provide new insights in MAP genomics. MAP type specific phenotypic features may be attributed to genes of known large sequence polymorphisms (LSP[S]s) regions I-IV and deletions #1 and #2, confirmed here, but could also result from identified frameshifts or interruptions of various virulence-associated genes (e.g., mbtC in MAP-S). Comprehensive core and pan genome analysis uncovered unique genes (e.g., cytochromes) and genes probably acquired by horizontal gene transfer in different MAP-types and subtypes, but also emphasized the highly conserved and close relationship, and the complex evolution of M.-a.-strains.}, } @article {pmid33383801, year = {2020}, author = {Yang, SM and Baek, J and Kim, E and Kim, HB and Ko, S and Kim, D and Yoon, H and Kim, HY}, title = {Development of a Genoserotyping Method for Salmonella Infantis Detection on the Basis of Pangenome Analysis.}, journal = {Microorganisms}, volume = {9}, number = {1}, pages = {}, pmid = {33383801}, issn = {2076-2607}, support = {19162MFDS042//Ministry of Food and Drug Safety/ ; }, abstract = {In recent years, Salmonella Infantis has become a predominant serovariant in clinical and poultry isolates, thereby imposing a substantial economic burden on both public health and the livestock industry. With the aim of coping with the steep increase in serovar Infantis prevalence, a polymerase chain reaction (PCR)-based rapid and accurate diagnostic assay was developed in this study through pangenome profiling of 60 Salmonella serovars. A gene marker, SIN_02055, was identified, which is present in the S. Infantis genome but not in the pangenome of the other serovars. Primers specific to SIN_02055 were used to accurately detect serovar Infantis, and to successfully differentiate Infantis from the other 59 serovars in real-time PCR with a R[2] of 0.999 and an efficiency of 95.76%. The developed method was applied to 54 Salmonella strains belonging to eight dominant serovars, and distinguished Infantis from the other seven serovars with an accuracy of 100%. The diagnostic primer set also did not show false positive amplification with 32 strains from eight non-Salmonella bacterial species. This cost-effective and rapid method can be considered an alternative to the classic serotyping using antisera.}, } @article {pmid33381850, year = {2020}, author = {Bazin, A and Gautreau, G and Médigue, C and Vallenet, D and Calteau, A}, title = {panRGP: a pangenome-based method to predict genomic islands and explore their diversity.}, journal = {Bioinformatics (Oxford, England)}, volume = {36}, number = {Suppl_2}, pages = {i651-i658}, doi = {10.1093/bioinformatics/btaa792}, pmid = {33381850}, issn = {1367-4811}, mesh = {Gene Transfer, Horizontal ; *Genomic Islands/genetics ; Genomics ; Metagenome ; *Software ; }, abstract = {MOTIVATION: Horizontal gene transfer (HGT) is a major source of variability in prokaryotic genomes. Regions of genome plasticity (RGPs) are clusters of genes located in highly variable genomic regions. Most of them arise from HGT and correspond to genomic islands (GIs). The study of those regions at the species level has become increasingly difficult with the data deluge of genomes. To date, no methods are available to identify GIs using hundreds of genomes to explore their diversity.

RESULTS: We present here the panRGP method that predicts RGPs using pangenome graphs made of all available genomes for a given species. It allows the study of thousands of genomes in order to access the diversity of RGPs and to predict spots of insertions. It gave the best predictions when benchmarked along other GI detection tools against a reference dataset. In addition, we illustrated its use on metagenome assembled genomes by redefining the borders of the leuX tRNA hotspot, a well-studied spot of insertion in Escherichia coli. panRPG is a scalable and reliable tool to predict GIs and spots making it an ideal approach for large comparative studies.

The methods presented in the current work are available through the following software: https://github.com/labgem/PPanGGOLiN. Detailed results and scripts to compute the benchmark metrics are available at https://github.com/axbazin/panrgp_supdata.}, } @article {pmid33375492, year = {2020}, author = {Surachat, K and Kantachote, D and Deachamag, P and Wonglapsuwan, M}, title = {Genomic Insight into Pediococcus acidilactici HN9, a Potential Probiotic Strain Isolated from the Traditional Thai-Style Fermented Beef Nhang.}, journal = {Microorganisms}, volume = {9}, number = {1}, pages = {}, pmid = {33375492}, issn = {2076-2607}, abstract = {Pediococcus acidilactici HN9 is a beneficial lactic acid bacterium isolated from Nhang, a traditional Thai-style fermented beef. In this study, the molecular properties of P. acidilactici HN9 were characterized to provide insights into its potential probiotic activity. Specifically, this work sought to report the complete genome of P. acidilactici HN9 and perform a comparative genome analysis with other bacterial strains belonging to the genus Pediococcus. Genomic features of HN9 were compared with those of all other bacterial Pediococcus strains to examine the adaptation, evolutionary relationships, and diversity within this genus. Additionally, several bioinformatic approaches were used to investigate phylogenetic relationships, genome stability, virulence factors, bacteriocin production, and antimicrobial resistance genes of the HN9 strain, as well as to ensure its safety as a potential starter culture in food applications. A 2,034,522 bp circular chromosome and two circular plasmids, designated pHN9-1 (42,239-bp) and pHN9-2 (30,711-bp), were detected, and used for pan-genome analysis, as well as for identification of bacteriocin-encoding genes in 129 strains belonging to all Pediococcus species. Two CRISPR regions were identified in P. acidilactici HN9, including type II-A CRISPR/CRISPR-associated (Cas). This study provides an in-depth analysis on P. acidilactici HN9, facilitating a better understanding of its adaptability to different environments and its mechanism to maintain genome stability over time.}, } @article {pmid33371442, year = {2020}, author = {Blesa, A and Baquedano, I and González-de la Fuente, S and Mencía, M and Berenguer, J}, title = {Integrative and Conjugative Element ICETh1 Functions as a Pangenomic DNA Capture Module in Thermus thermophilus.}, journal = {Microorganisms}, volume = {8}, number = {12}, pages = {}, pmid = {33371442}, issn = {2076-2607}, support = {PID2019-109073RB-I00//Spanish Ministry of Science and Innovation/ ; BIO2016-77031-R//Spanish Ministry of Science and Innovation/ ; }, abstract = {Transjugation is an unconventional conjugation mechanism in Thermus thermophilus (Tth) that involves the active participation of both mating partners, encompassing a DNA secretion system (DSS) in the donor and an active natural competence apparatus (NCA) in the recipient cells. DSS is encoded within an integrative and conjugative element (ICETh1) in the strain Tth HB27, whereas the NCA is constitutively expressed in both mates. Previous experiments suggested the presence of multiple origins of transfer along the genome, which could generate genomic mosaicity among the progeny. Here, we designed transjugation experiments between two closely related strains of Tth with highly syntenic genomes, containing enough single nucleotide polymorphisms to allow precise parenthood analysis. Individual clones from the progeny were sequenced, revealing their origin as derivatives of our ICETh1-containing intended "donor" strain (HB27), which had acquired separate fragments from the genome of the ICETh1-free HB8 cells, which are our intended recipient. Due to the bidirectional nature of transjugation, only assays employing competence-defective HB27 derivatives as donors allowed the recovery of HB8-derived progeny. These results show a preference for a retrotransfer mechanism in transjugation in ICETh1-bearing strains, supporting an inter-strain gene-capture function for ICETh1. This function could benefit the donor-capable host by facilitating the acquisition of adaptive traits from external sources, ultimately increasing the open pangenome of Thermus, maximizing the potential repertoire of physiological and phenotypical traits related to adaptation and speciation.}, } @article {pmid33365333, year = {2020}, author = {Alawneh, JI and Vezina, B and Ramay, HR and Al-Harbi, H and James, AS and Soust, M and Moore, RJ and Olchowy, TWJ}, title = {Survey and Sequence Characterization of Bovine Mastitis-Associated Escherichia coli in Dairy Herds.}, journal = {Frontiers in veterinary science}, volume = {7}, number = {}, pages = {582297}, pmid = {33365333}, issn = {2297-1769}, abstract = {Escherichia coli is frequently associated with mastitis in cattle. "Pathogenic" and "commensal" isolates appear to be genetically similar. With a few exceptions, no notable genotypic differences have been found between commensal and mastitis-associated E. coli. In this study, 24 E. coli strains were isolated from dairy cows with clinical mastitis in three geographic regions of Australia (North Queensland, South Queensland, and Victoria), sequenced, then genomically surveyed. There was no observed relationship between sequence type (ST) and region (p = 0.51). The most common Multi Locus Sequence Type was ST10 (38%), then ST4429 (13%). Pangenomic analysis revealed a soft-core genome of 3,463 genes, including genes associated with antibiotic resistance, chemotaxis, motility, adhesion, biofilm formation, and pili. A total of 36 different plasmids were identified and generally found to have local distributions (p = 0.02). Only 2 plasmids contained antibiotic resistance genes, a p1303_5-like plasmid encoding multidrug-resistance (trimethoprim, quaternary ammonium, beta-lactam, streptomycin, sulfonamide, and kanamycin) from two North Queensland isolates on the same farm, while three Victorian isolates from the same farm contained a pCFSAN004177P_01-like plasmid encoding tetracycline-resistance. This pattern is consistent with a local spread of antibiotic resistance through plasmids of bovine mastitis cases. Notably, co-occurrence of plasmids containing virulence factors/antibiotic resistance with putative mobilization was rare, though the multidrug resistant p1303_5-like plasmid was predicted to be conjugative and is of some concern. This survey has provided greater understanding of antibiotic resistance within E. coli-associated bovine mastitis which will allow greater prediction and improved decision making in disease management.}, } @article {pmid33362726, year = {2020}, author = {Verma, DK and Chaudhary, C and Singh, L and Sidhu, C and Siddhardha, B and Prasad, SE and Thakur, KG}, title = {Isolation and Taxonomic Characterization of Novel Haloarchaeal Isolates From Indian Solar Saltern: A Brief Review on Distribution of Bacteriorhodopsins and V-Type ATPases in Haloarchaea.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {554927}, pmid = {33362726}, issn = {1664-302X}, abstract = {Haloarchaea inhabit high salinity environments worldwide. They are a potentially rich source of crucial biomolecules like carotenoids and industrially useful proteins. However, diversity in haloarchaea present in Indian high salinity environments is poorly studied. In the present study, we isolated 12 haloarchaeal strains from hypersaline Kottakuppam, Tamil Nadu solar saltern in India. 16S rRNA based taxonomic characterization of these isolates suggested that nine of them are novel strains that belong to genera Haloarcula, Halomicrobium, and Haloferax. Transmission electron microscopy suggests the polymorphic nature of these haloarchaeal isolates. Most of the haloarchaeal species are known to be high producers of carotenoids. We were able to isolate carotenoids from all these 12 isolates. The UV-Vis spectroscopy-based analysis suggests that bacterioruberin and lycopene are the major carotenoids produced by these isolates. Based on the visual inspection of the purified carotenoids, the isolates were classified into two broad categories i.e., yellow and orange, attributed to the differences in the ratio of bacterioruberin and lycopene as confirmed by the UV-Vis spectral analysis. Using a PCR-based screening assay, we were able to detect the presence of the bacteriorhodopsin gene (bop) in 11 isolates. We performed whole-genome sequencing for three bop positive and one bop negative haloarchaeal isolates. Whole-genome sequencing, followed by pan-genome analysis identified multiple unique genes involved in various biological functions. We also successfully cloned, expressed, and purified functional recombinant bacteriorhodopsin (BR) from one of the isolates using Escherichia coli as an expression host. BR has light-driven proton pumping activity resulting in the proton gradient across the membrane, which is utilized by V-Type ATPases to produce ATP. We analyzed the distribution of bop and other accessory genes involved in functional BR expression and ATP synthesis in all the representative haloarchaeal species. Our bioinformatics-based analysis of all the sequenced members of genus Haloarcula suggests that bop, if present, is usually inserted between the genes coding for B and D subunits of the V-type ATPases operon. This study provides new insights into the genomic variations in haloarchaea and reports expression of new BR variant having good expression in functional form in E. coli.}, } @article {pmid33360877, year = {2021}, author = {Li, F and Ye, Q and Chen, M and Zhou, B and Xiang, X and Wang, C and Shang, Y and Zhang, J and Pang, R and Wang, J and Xue, L and Cai, S and Ding, Y and Wu, Q}, title = {Mining of novel target genes through pan-genome analysis for multiplex PCR differentiation of the major Listeria monocytogenes serotypes.}, journal = {International journal of food microbiology}, volume = {339}, number = {}, pages = {109026}, doi = {10.1016/j.ijfoodmicro.2020.109026}, pmid = {33360877}, issn = {1879-3460}, mesh = {DNA Primers/genetics ; Food Microbiology ; Genome, Bacterial/genetics ; Listeria monocytogenes/*genetics ; Listeriosis/microbiology ; *Multiplex Polymerase Chain Reaction ; Serogroup ; Serotyping/*methods ; }, abstract = {The abundant information provided by the pan-genome analysis approach reveals the diversity among Listeria monocytogenes serotypes. The objective of this study was to mine novel target genes using pan-genome analysis for multiplex PCR detection and differentiation of the major L. monocytogenes serotypes present in food. Pan-genome analysis and PCR validation revealed a total of 10 specific targets: one for lineage I, two for serogroup I.1, one for serogroup I.2, two for lineage II, one for serogroup II.1, three for lineage III. Primers for the novel targets were highly specific in individual reactions. The detection limits were 10[3]-10[4] colony-forming units (CFU)/mL in pure bacterial cultures, meeting the requirements of molecular detection. Based on these novel targets, two new "lineage" multiplex PCR assays were developed to simultaneously distinguish between three lineages (I, II, and III) and five major serotypes (1/2a, 1/2b, 1/2c, 4b, and 4c) of L. monocytogenes. The detection limits of lineage I and lineage II&III mPCRs were 0.771 pg/μL and 1.76 pg/μL genomic DNA, respectively. The specificity of the mPCRs was robustly verified using other L. monocytogenes and non-L. monocytogenes serotypes. These results suggest that the two "lineage" multiplex PCRs based on novel targets offer a promising approach for accurate, sensitive, and rapid identification of L. monocytogenes serotypes.}, } @article {pmid33360414, year = {2021}, author = {Wu, D and Zhang, XJ and Liu, HC and Zhou, YG and Wu, XL and Nie, Y and Kang, YQ and Cai, M}, title = {Azospirillum oleiclasticum sp. nov, a nitrogen-fixing and heavy oil degrading bacterium isolated from an oil production mixture of Yumen Oilfield.}, journal = {Systematic and applied microbiology}, volume = {44}, number = {1}, pages = {126171}, doi = {10.1016/j.syapm.2020.126171}, pmid = {33360414}, issn = {1618-0984}, mesh = {Azospirillum/*classification/isolation & purification ; Bacterial Typing Techniques ; China ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; *Nitrogen Fixation ; Nucleic Acid Hybridization ; Oil and Gas Fields/*microbiology ; Petroleum/*metabolism ; Phospholipids/chemistry ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Ubiquinone/analogs & derivatives/chemistry ; }, abstract = {Two nitrogen-fixing and heavy oil degrading strains, designated RWY-5-1-1[T] and ROY-1-1-2, were isolated from an oil production mixture from Yumen Oilfield in China. The 16S rRNA gene sequence showed they belong to Azospirillum and have less than 96.1 % pairwise similarity with each species in this genus. The average nucleotide identity and digital DNA-DNA hybridization values between them and other type strains of Azospirillum species were less than 75.69 % and 22.0 %, respectively, both below the species delineation threshold. Pan-genomic analysis showed that the novel isolate RWY-5-1-1[T] shared 2145 core gene families with other type strains in Azospirillum, and the number of strain-specific gene families was 1623, almost two times more than the number known from other species. Furthermore, genes related to nitrogenase, hydrocarbon degradation and biosurfactant production were found in the isolates' genomes. Also, this strain was capable of reducing acetylene to ethylene at a rate of 22nmol ethylene h[-1] (10[8] cells) and degrading heavy oil at a rate of 36.2 %. The major fatty acids and polar lipids were summed feature 8 (C18:1ω7c/C18:1ω6c), and phosphatidylethanolamine, diphosphatidylglycerol, phosphatidylglycerol, and phosphatidylcholine. Furthermore, a combination of phenotypic, chemotaxonomic, phylogenetic and genotypic data clearly indicated that strains RWY-5-1-1[T] and ROY-1-1-2 represent a novel species, for which the name Azospirillum oleiclasticum sp. nov. is proposed. The type strain is RWY-5-1-1[T] (=CGMCC 1.13426[T] =KCTC 72259 [T]). Azospirillum novel strains with the ability of heavy oil degradation associated with the promotion of plant growth has never been reported to date.}, } @article {pmid33360413, year = {2021}, author = {Lassalle, F and Dastgheib, SMM and Zhao, FJ and Zhang, J and Verbarg, S and Frühling, A and Brinkmann, H and Osborne, TH and Sikorski, J and Balloux, F and Didelot, X and Santini, JM and Petersen, J}, title = {Phylogenomics reveals the basis of adaptation of Pseudorhizobium species to extreme environments and supports a taxonomic revision of the genus.}, journal = {Systematic and applied microbiology}, volume = {44}, number = {1}, pages = {126165}, doi = {10.1016/j.syapm.2020.126165}, pmid = {33360413}, issn = {1618-0984}, support = {MR/N010760/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Bacterial Proteins/genetics ; Bacterial Typing Techniques ; DNA, Bacterial/genetics ; *Extreme Environments ; Fatty Acids/chemistry ; Genome, Bacterial ; Nucleic Acid Hybridization ; *Phylogeny ; Rhizobiaceae/*classification ; Rhizobium ; Sequence Analysis, DNA ; }, abstract = {The family Rhizobiaceae includes many genera of soil bacteria, often isolated for their association with plants. Herein, we investigate the genomic diversity of a group of Rhizobium species and unclassified strains isolated from atypical environments, including seawater, rock matrix or polluted soil. Based on whole-genome similarity and core genome phylogeny, we show that this group corresponds to the genus Pseudorhizobium. We thus reclassify Rhizobium halotolerans, R. marinum, R. flavum and R. endolithicum as P. halotolerans sp. nov., P. marinum comb. nov., P. flavum comb. nov. and P. endolithicum comb. nov., respectively, and show that P. pelagicum is a synonym of P. marinum. We also delineate a new chemolithoautotroph species, P. banfieldiae sp. nov., whose type strain is NT-26[T] (=DSM 106348[T]=CFBP 8663[T]). This genome-based classification was supported by a chemotaxonomic comparison, with increasing taxonomic resolution provided by fatty acid, protein and metabolic profiles. In addition, we used a phylogenetic approach to infer scenarios of duplication, horizontal transfer and loss for all genes in the Pseudorhizobium pangenome. We thus identify the key functions associated with the diversification of each species and higher clades, shedding light on the mechanisms of adaptation to their respective ecological niches. Respiratory proteins acquired at the origin of Pseudorhizobium were combined with clade-specific genes to enable different strategies for detoxification and nutrition in harsh, nutrient-poor environments.}, } @article {pmid33351797, year = {2020}, author = {Kloosterman, AM and Cimermancic, P and Elsayed, SS and Du, C and Hadjithomas, M and Donia, MS and Fischbach, MA and van Wezel, GP and Medema, MH}, title = {Expansion of RiPP biosynthetic space through integration of pan-genomics and machine learning uncovers a novel class of lanthipeptides.}, journal = {PLoS biology}, volume = {18}, number = {12}, pages = {e3001026}, pmid = {33351797}, issn = {1545-7885}, mesh = {Algorithms ; Bacteriocins/*genetics/metabolism ; Biological Products/analysis/metabolism ; Computational Biology/methods ; Genome/genetics ; Genomics/*methods ; Machine Learning ; Multigene Family/genetics ; Peptides/genetics ; Protein Processing, Post-Translational/*genetics/physiology ; Ribosomes/metabolism ; }, abstract = {Microbial natural products constitute a wide variety of chemical compounds, many which can have antibiotic, antiviral, or anticancer properties that make them interesting for clinical purposes. Natural product classes include polyketides (PKs), nonribosomal peptides (NRPs), and ribosomally synthesized and post-translationally modified peptides (RiPPs). While variants of biosynthetic gene clusters (BGCs) for known classes of natural products are easy to identify in genome sequences, BGCs for new compound classes escape attention. In particular, evidence is accumulating that for RiPPs, subclasses known thus far may only represent the tip of an iceberg. Here, we present decRiPPter (Data-driven Exploratory Class-independent RiPP TrackER), a RiPP genome mining algorithm aimed at the discovery of novel RiPP classes. DecRiPPter combines a Support Vector Machine (SVM) that identifies candidate RiPP precursors with pan-genomic analyses to identify which of these are encoded within operon-like structures that are part of the accessory genome of a genus. Subsequently, it prioritizes such regions based on the presence of new enzymology and based on patterns of gene cluster and precursor peptide conservation across species. We then applied decRiPPter to mine 1,295 Streptomyces genomes, which led to the identification of 42 new candidate RiPP families that could not be found by existing programs. One of these was studied further and elucidated as a representative of a novel subfamily of lanthipeptides, which we designate class V. The 2D structure of the new RiPP, which we name pristinin A3 (1), was solved using nuclear magnetic resonance (NMR), tandem mass spectrometry (MS/MS) data, and chemical labeling. Two previously unidentified modifying enzymes are proposed to create the hallmark lanthionine bridges. Taken together, our work highlights how novel natural product families can be discovered by methods going beyond sequence similarity searches to integrate multiple pathway discovery criteria.}, } @article {pmid33347948, year = {2021}, author = {Gontijo, MTP and Vidigal, PMP and Lopez, MES and Brocchi, M}, title = {Bacteriophages that infect Gram-negative bacteria as source of signal-arrest-release motif lysins.}, journal = {Research in microbiology}, volume = {172}, number = {2}, pages = {103794}, doi = {10.1016/j.resmic.2020.103794}, pmid = {33347948}, issn = {1769-7123}, mesh = {Amino Acid Motifs ; Bacterial Outer Membrane ; Bacteriolysis ; Bacteriophages/*enzymology/*genetics ; Biodiversity ; Drug Resistance, Multiple, Bacterial ; Evolution, Molecular ; Genome, Bacterial ; Genome, Viral ; Gram-Negative Bacteria/genetics/*virology ; *Protein Sorting Signals ; Viral Proteins/*genetics/isolation & purification ; }, abstract = {Treatment of infections caused by multidrug-resistant (MDR) Gram-negative bacteria is challenging, a potential solution for which is the use of bacteriophage-derived lytic enzymes. However, the exogenous action of bacteriophage lysins against Gram-negative bacteria is hindered due to the presence of an impermeable outer membrane in these bacteria. Nevertheless, recent research has demonstrated that some lysins are capable of permeating the outer membrane of Gram-negative bacteria with the help of signal peptides. In the present study, we investigated the genomes of 309 bacteriophages that infect Gram-negative pathogens of clinical interest in order to determine the evolutionary markers of signal peptide-containing lysins. Complete genomes displayed 265 putative lysins, of which 17 (6.41%) contained signal-arrest-release motifs and 41 (15.47%) contained cleavable signal peptides. There was no apparent relationship between host specificity and lysin diversity. Nevertheless, the evolution of lysin genes might not be independent of the rest of the bacteriophage genome once pan-genome clustering and lysin diversity appear to be correlated. In addition, signal peptide- and signal-arrest-release-containing lysins were monophyletically distributed in the protein cladogram, suggesting that the natural selection of holin-independent lysins is divergent. Our study screened 58 (21.89%) out of 265 potential candidates for in vitro experimentation against MDR bacteria.}, } @article {pmid33347550, year = {2020}, author = {Bryant, E and Shen, Z and Mannion, A and Patterson, M and Buczek, J and Fox, JG}, title = {Campylobacter taeniopygiae sp. nov., Campylobacter aviculae sp. nov., and Campylobacter estrildidarum sp. nov., Novel Species Isolated from Laboratory-Maintained Zebra Finches.}, journal = {Avian diseases}, volume = {64}, number = {4}, pages = {457-466}, doi = {10.1637/aviandiseases-D-20-00019}, pmid = {33347550}, issn = {1938-4351}, support = {T32 OD010978/OD/NIH HHS/United States ; }, mesh = {Animals ; Animals, Laboratory ; Bird Diseases/*epidemiology/microbiology ; Campylobacter/*classification ; Campylobacter Infections/epidemiology/microbiology/*veterinary ; Finches ; Massachusetts/epidemiology ; Prevalence ; *Songbirds ; }, abstract = {Zebra finches (Taeniopygia guttata) are laboratory animal species commonly used for modeling neurobiology and learning. Historically, using bacterial culture, biochemical analysis, and 16S ribosomal RNA gene sequencing, bacterial isolates from feces of finches housed at Massachusetts Institute of Technology had been presumptively diagnosed as Campylobacter jejuni, which is commonly isolated from both domestic and wild birds. Although the zebra finches were not clinically affected, C. jejuni is a known zoonotic pathogen that causes gastroenteritis in humans worldwide. Human transmission is predominantly foodborne and associated with the consumption of contaminated poultry; however, humans can also become infected from contact with C. jejuni-infected reservoir hosts. Because C. jejuni-infected finches pose a risk to research personnel, a study was undertaken to investigate the prevalence and taxonomic identification of Campylobacter spp. present in the finch colony. Campylobacter spp. were isolated from a total of 26 finch fecal samples collected in 2003, 2010, and 2017. 16S ribosomal RNA sequencing of all isolates determined that they shared 99% identity with either C. jejuni or Campylobacter lari. Sixteen of the isolates were subjected to further biochemical characterization and atpA and rpoB gene sequence analysis. Based on these analyses, three clusters of Campylobacter species were identified. The draft whole-genome sequences were determined for one representative isolate from each cluster. A pan-genomic phylogenetic tree, average nucleotide identity, digital DNA-DNA hybridization, and orthologous gene analyses indicated that each isolate was its own novel species, distinct from C. jejuni and other avian Campylobacter species. We have named these novel species Campylobacter taeniopygiae, Campylobacter aviculae, and Campylobacter estrildidarum, and in each novel species, we identified virulence genes suggesting their pathogenic and zoonotic potential.}, } @article {pmid33347470, year = {2020}, author = {Viana, MVC and Profeta, R and da Silva, AL and Hurtado, R and Cerqueira, JC and Ribeiro, BFS and Almeida, MO and Morais-Rodrigues, F and Soares, SC and Oliveira, M and Tavares, L and Figueiredo, H and Wattam, AR and Barh, D and Ghosh, P and Silva, A and Azevedo, V}, title = {Taxonomic classification of strain PO100/5 shows a broader geographic distribution and genetic markers of the recently described Corynebacterium silvaticum.}, journal = {PloS one}, volume = {15}, number = {12}, pages = {e0244210}, pmid = {33347470}, issn = {1932-6203}, support = {HHSN272201400027C/AI/NIAID NIH HHS/United States ; }, mesh = {Corynebacterium/classification/*genetics/metabolism ; *Ecosystem ; Genetic Markers ; *Genome, Bacterial ; *Phylogeny ; Phylogeography ; Polymorphism, Genetic ; }, abstract = {The bacterial strain PO100/5 was isolated from a skin abscess taken from a pig (Sus scrofa domesticus) in the Alentejo region of southern Portugal. It was identified as Corynebacterium pseudotuberculosis using biochemical tests, multiplex PCR and Pulsed Field Gel Electrophoresis. After genome sequencing and rpoB phylogeny, the strain was classified as C. ulcerans. To better understand the taxonomy of this strain and improve identification methods, we compared strain PO100/5 to other publicly available genomes from C. diphtheriae group. Taxonomic analysis reclassified it and three others strains as the recently described C. silvaticum, which have been isolated from wild boar and roe deer in Germany and Austria. The results showed that PO100/5 is the first sequenced genome of a C. silvaticum strain from livestock and a different geographical region, has the unique sequence type ST709, and could be could produce the diphtheriae toxin, along with strain 05-13. Genomic analysis of PO100/5 showed four prophages, and eight conserved genomic islands in comparison to C. ulcerans. Pangenome analysis of 38 C. silvaticum and 76 C. ulcerans genomes suggested that C. silvaticum is a genetically homogeneous species, with 73.6% of its genes conserved and a pangenome near to be closed (α > 0.952). There are 172 genes that are unique to C. silvaticum in comparison to C. ulcerans. Most of these conserved genes are related to nutrient uptake and metabolism, prophages or immunity against them, and could be genetic markers for species identification. Strains PO100/5 (livestock) and KL0182T (wild boar) were predicted to be potential human pathogens. This information may be useful for identification and surveillance of this pathogen.}, } @article {pmid33344086, year = {2020}, author = {Hansen, MJ and Kudirkiene, E and Dalsgaard, I}, title = {Analysis of 44 Vibrio anguillarum genomes reveals high genetic diversity.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e10451}, pmid = {33344086}, issn = {2167-8359}, abstract = {Vibriosis, a hemorrhagic septicemic disease caused by the bacterium Vibrio anguillarum, is an important bacterial infection in Danish sea-reared rainbow trout. Despite of vaccination, outbreaks still occur, likely because the vaccine is based on V. anguillarum strains from abroad/other hosts than rainbow trout. Information about the genetic diversity of V. anguillarum specifically in Danish rainbow trout, is required to investigate this claim. Consequently, the aim of the present investigation was to sequence and to characterize a collection of 44 V. anguillarum strains obtained primarily from vibriosis outbreaks in Danish rainbow trout. The strains were sequenced, de novo assembled, and the genomes examined for the presence of plasmids, virulence, and acquired antibiotic resistance genes. To investigate the phylogeny, single nucleotide polymorphisms were identified, and the pan-genome was calculated. All strains carried tet(34) encoding tetracycline resistance, and 36 strains also contained qnrVC6 for increased fluoroquinolone/quinolone resistance. But interestingly, all strains were phenotypic sensitive to both oxytetracycline and oxolinic acid. Almost all serotype O1 strains contained a pJM1-like plasmid and nine serotype O2A strains carried the plasmid p15. The distribution of virulence genes was rather similar across the strains, although evident variance among serotypes was observed. Most significant, almost all serotype O2 and O3 strains, as well as the serotype O1 strain without a pJM1-like plasmid, carried genes encoding piscibactin biosynthesis. Hence supporting the hypothesis, that piscibactin plays a crucial role in virulence for pathogenic strains lacking the anguibactin system. The phylogenetic analysis and pan-genome calculations revealed great diversity within V. anguillarum. Serotype O1 strains were in general very similar, whereas considerable variation was found among serotype O2A strains. The great diversity within the V. anguillarum serotype O2A genomes is most likely the reason why vaccines provide good protection from some strains, but not from others. Hopefully, the new genomic data and knowledge provided in this study might help develop an optimized vaccine against V. anguillarum in the future to reduce the use of antibiotics, minimize economic losses and improve the welfare of the fish.}, } @article {pmid33343549, year = {2020}, author = {Ghaly, TM and Paulsen, IT and Sajjad, A and Tetu, SG and Gillings, MR}, title = {A Novel Family of Acinetobacter Mega-Plasmids Are Disseminating Multi-Drug Resistance Across the Globe While Acquiring Location-Specific Accessory Genes.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {605952}, pmid = {33343549}, issn = {1664-302X}, abstract = {Acinetobacter species are emerging as major nosocomial pathogens, aided by their ability to acquire resistance to all classes of antibiotics. A key factor leading to their multi-drug resistance phenotypes is the acquisition of a wide variety of mobile genetic elements, particularly large conjugative plasmids. Here, we characterize a family of 21 multi-drug resistance mega-plasmids in 11 different Acinetobacter species isolated from various locations across the globe. The plasmid family exhibits a highly dynamic and diverse accessory genome, including 221 antibiotic resistance genes (ARGs) that confer resistance to 13 classes of antibiotics. We show that plasmids isolated within the same geographic region are often evolutionarily divergent members of this family based on their core-genome, yet they exhibit a more similar accessory genome. Individual plasmids, therefore, can disseminate to different locations around the globe, where they then appear to acquire diverse sets of accessory genes from their local surroundings. Further, we show that plasmids from several geographic regions were enriched with location-specific functional traits. Together, our findings show that these mega-plasmids can transmit across species boundaries, have the capacity for global dissemination, can accumulate a diverse suite of location-specific accessory genes, and can confer multi-drug resistance phenotypes of significant concern for human health. We therefore highlight this previously undescribed plasmid family as a serious threat to healthcare systems worldwide. These findings also add to the growing concern that mega-plasmids are key disseminators of antibiotic resistance and require global surveillance.}, } @article {pmid33339499, year = {2020}, author = {Cai, Z and Guo, Q and Yao, Z and Zheng, W and Xie, J and Bai, S and Zhang, H}, title = {Comparative genomics of Klebsiella michiganensis BD177 and related members of Klebsiella sp. reveal the symbiotic relationship with Bactrocera dorsalis.}, journal = {BMC genetics}, volume = {21}, number = {Suppl 2}, pages = {138}, pmid = {33339499}, issn = {1471-2156}, mesh = {Animals ; Comparative Genomic Hybridization ; Gastrointestinal Microbiome ; *Genome, Bacterial ; Klebsiella/*genetics ; Phenotype ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Symbiosis ; Tephritidae/*microbiology ; }, abstract = {BACKGROUND: Bactrocera dorsalis is a destructive polyphagous and highly invasive insect pest of tropical and subtropical species of fruit and vegetable crops. The sterile insect technique (SIT) has been used for decades to control insect pests of agricultural, veterinary, and human health importance. Irradiation of pupae in SIT can reduce the ecological fitness of the sterile insects. Our previous study has shown that a gut bacterial strain BD177 that could restore ecological fitness by promoting host food intake and metabolic activities.

RESULTS: Using long-read sequence technologies, we assembled the complete genome of K. michiganensis BD177 strain. The complete genome of K. michiganensis BD177 comprises one circular chromosome and four plasmids with a GC content of 55.03%. The pan-genome analysis was performed on 119 genomes (strain BD177 genome and 118 out of 128 published Klebsiella sp. genomes since ten were discarded). The pan-genome includes a total of 49305 gene clusters, a small number of 858 core genes, and a high number of accessory (10566) genes. Pan-genome and average nucleotide identity (ANI) analysis showed that BD177 is more similar to the type strain K. michiganensis DSM2544, while away from the type strain K. oxytoca ATCC13182. Comparative genome analysis with 21 K. oxytoca and 12 K. michiganensis strains, identified 213 unique genes, several of them related to amino acid metabolism, metabolism of cofactors and vitamins, and xenobiotics biodegradation and metabolism in BD177 genome.

CONCLUSIONS: Phylogenomics analysis reclassified strain BD177 as a member of the species K. michiganensis. Comparative genome analysis suggested that K. michiganensis BD177 has the strain-specific ability to provide three essential amino acids (phenylalanine, tryptophan and methionine) and two vitamins B (folate and riboflavin) to B. dorsalis. The clear classification status of BD177 strain and identification of unique genetic characteristics may contribute to expanding our understanding of the symbiotic relationship of gut microbiota and B. dorsalis.}, } @article {pmid33339176, year = {2020}, author = {Ramsamy, Y and Mlisana, KP and Amoako, DG and Abia, ALK and Allam, M and Ismail, A and Singh, R and Essack, SY}, title = {Comparative Pathogenomics of Aeromonas veronii from Pigs in South Africa: Dominance of the Novel ST657 Clone.}, journal = {Microorganisms}, volume = {8}, number = {12}, pages = {}, pmid = {33339176}, issn = {2076-2607}, support = {Grant no. 106063//National Research Foundation/ ; }, abstract = {The pathogenomics of carbapenem-resistant Aeromonas veronii (A. veronii) isolates recovered from pigs in KwaZulu-Natal, South Africa, was explored by whole genome sequencing on the Illumina MiSeq platform. Genomic functional annotation revealed a vast array of similar central networks (metabolic, cellular, and biochemical). The pan-genome analysis showed that the isolates formed a total of 4349 orthologous gene clusters, 4296 of which were shared; no unique clusters were observed. All the isolates had similar resistance phenotypes, which corroborated their chromosomally mediated resistome (blaCPHA3 and blaOXA-12) and belonged to a novel sequence type, ST657 (a satellite clone). Isolates in the same sub-clades clustered according to their clonal lineages and host. Mobilome analysis revealed the presence of chromosome-borne insertion sequence families. The estimated pathogenicity score (Pscore ≈ 0.60) indicated their potential pathogenicity in humans. Furthermore, these isolates carried several virulence factors (adherence factors, toxins, and immune evasion), in different permutations and combinations, indicating a differential ability to establish infection. Phylogenomic and metadata analyses revealed a predilection for water environments and aquatic animals, with more recent reports in humans and food animals across geographies, making A. veronii a potential One Health indicator bacterium.}, } @article {pmid33329454, year = {2020}, author = {Park, S and Steinegger, M and Cho, HS and Chun, J}, title = {Metagenomic Association Analysis of Gut Symbiont Limosilactobacillus reuteri Without Host-Specific Genome Isolation.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {585622}, pmid = {33329454}, issn = {1664-302X}, abstract = {Limosilactobacillus reuteri is a model symbiont that colonizes the guts of vertebrates in studies on host adaptation of the gut symbiont. Previous studies have investigated host-specific phylogenetic and functional properties by isolating the genomic sequence. This dependency on genome isolation is a significant bottleneck. Here, we propose a method to study the association between L. reuteri and its hosts directly from metagenomic reads without strain isolation using pan-genomes. We characterized the host-specificity of L. reuteri in metagenomic samples, not only in previously studied organisms (mice and pigs) but also in dogs. For each sample, two types of profiles were generated: (1) genome-based strain type abundance profiles and (2) gene composition profiles. Our profiles showed host-association of L. reuteri in both phylogenetic and functional aspects without depending on host-specific genome isolation. We observed not only the presence of host-specific lineages, but also the dominant lineages associated with the different hosts. Furthermore, we showed that metagenome-assembled genomes provide detailed insights into the host-specificity of L. reuteri. We inferred evolutionary trajectories of host-associative L. reuteri strains in the metagenomic samples by placing the metagenome-assembled genomes into a phylogenetic tree and identified novel host-specific genes that were unannotated in existing pan-genome databases. Our pan-genomic approach reduces the need for time-consuming and expensive host-specific genome isolation, while producing consistent results with previous host-association findings in mice and pigs. Additionally, we predicted associations that have not yet been studied in dogs.}, } @article {pmid33326691, year = {2021}, author = {Whibley, A and Kelley, JL and Narum, SR}, title = {The changing face of genome assemblies: Guidance on achieving high-quality reference genomes.}, journal = {Molecular ecology resources}, volume = {21}, number = {3}, pages = {641-652}, doi = {10.1111/1755-0998.13312}, pmid = {33326691}, issn = {1755-0998}, mesh = {*Genome ; *Genomics/trends ; *Sequence Analysis, DNA ; }, abstract = {The quality of genome assemblies has improved rapidly in recent years due to continual advances in sequencing technology, assembly approaches, and quality control. In the field of molecular ecology, this has led to the development of exceptional quality genome assemblies that will be important long-term resources for broader studies into ecological, conservation, evolutionary, and population genomics of naturally occurring species. Moreover, the extent to which a single reference genome represents the diversity within a species varies: pan-genomes will become increasingly important ecological genomics resources, particularly in systems found to have considerable presence-absence variation in their functional content. Here, we highlight advances in technology that have raised the bar for genome assembly and provide guidance on standards to achieve exceptional quality reference genomes. Key recommendations include the following: (a) Genome assemblies should include long-read sequencing except in rare cases where it is effectively impossible to acquire adequately preserved samples needed for high molecular weight DNA standards. (b) At least one scaffolding approach should be included with genome assembly such as Hi-C or optical mapping. (c) Genome assemblies should be carefully evaluated, this may involve utilising short read data for genome polishing, error correction, k-mer analyses, and estimating the percent of reads that map back to an assembly. Finally, a genome assembly is most valuable if all data and methods are made publicly available and the utility of a genome for further studies is verified through examples. While these recommendations are based on current technology, we anticipate that future advances will push the field further and the molecular ecology community should continue to adopt new approaches that attain the highest quality genome assemblies.}, } @article {pmid33323129, year = {2020}, author = {Utter, DR and Borisy, GG and Eren, AM and Cavanaugh, CM and Mark Welch, JL}, title = {Metapangenomics of the oral microbiome provides insights into habitat adaptation and cultivar diversity.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {293}, pmid = {33323129}, issn = {1474-760X}, support = {P30 DK042086/DK/NIDDK NIH HHS/United States ; R01 DE022586/DE/NIDCR NIH HHS/United States ; UL1 TR001102/TR/NCATS NIH HHS/United States ; }, mesh = {Bacteria/*genetics ; Chromosome Mapping ; Haemophilus parainfluenzae/genetics ; Humans ; *Metagenome ; Microbiota/*genetics ; Micrococcaceae/genetics ; Mouth/*microbiology ; RNA, Ribosomal, 16S/genetics ; }, abstract = {BACKGROUND: The increasing availability of microbial genomes and environmental shotgun metagenomes provides unprecedented access to the genomic differences within related bacteria. The human oral microbiome with its diverse habitats and abundant, relatively well-characterized microbial inhabitants presents an opportunity to investigate bacterial population structures at an ecosystem scale.

RESULTS: Here, we employ a metapangenomic approach that combines public genomes with Human Microbiome Project (HMP) metagenomes to study the diversity of microbial residents of three oral habitats: tongue dorsum, buccal mucosa, and supragingival plaque. For two exemplar taxa, Haemophilus parainfluenzae and the genus Rothia, metapangenomes reveal distinct genomic groups based on shared genome content. H. parainfluenzae genomes separate into three distinct subgroups with differential abundance between oral habitats. Functional enrichment analyses identify an operon encoding oxaloacetate decarboxylase as diagnostic for the tongue-abundant subgroup. For the genus Rothia, grouping by shared genome content recapitulates species-level taxonomy and habitat preferences. However, while most R. mucilaginosa are restricted to the tongue as expected, two genomes represent a cryptic population of R. mucilaginosa in many buccal mucosa samples. For both H. parainfluenzae and the genus Rothia, we identify not only limitations in the ability of cultivated organisms to represent populations in their native environment, but also specifically which cultivar gene sequences are absent or ubiquitous.

CONCLUSIONS: Our findings provide insights into population structure and biogeography in the mouth and form specific hypotheses about habitat adaptation. These results illustrate the power of combining metagenomes and pangenomes to investigate the ecology and evolution of bacteria across analytical scales.}, } @article {pmid33323122, year = {2020}, author = {Shaiber, A and Willis, AD and Delmont, TO and Roux, S and Chen, LX and Schmid, AC and Yousef, M and Watson, AR and Lolans, K and Esen, ÖC and Lee, STM and Downey, N and Morrison, HG and Dewhirst, FE and Mark Welch, JL and Eren, AM}, title = {Functional and genetic markers of niche partitioning among enigmatic members of the human oral microbiome.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {292}, pmid = {33323122}, issn = {1474-760X}, support = {R35 GM133420/GM/NIGMS NIH HHS/United States ; R01 DE016937/DE/NIDCR NIH HHS/United States ; R35GM133420/NH/NIH HHS/United States ; R01 DE022586/DE/NIDCR NIH HHS/United States ; R01 DE024468/DE/NIDCR NIH HHS/United States ; P30 DK042086/DK/NIDDK NIH HHS/United States ; DE016937/DE/NIDCR NIH HHS/United States ; DE024468/DE/NIDCR NIH HHS/United States ; }, mesh = {Adaptation, Physiological ; Adult ; Bacteria/genetics ; Female ; *Genetic Markers ; Genome, Bacterial ; Humans ; Interspersed Repetitive Sequences ; Male ; *Metagenome ; Metagenomics ; Microbiota/*genetics ; Middle Aged ; Mouth/*microbiology ; Phylogeny ; RNA, Ribosomal, 16S ; }, abstract = {INTRODUCTION: Microbial residents of the human oral cavity have long been a major focus of microbiology due to their influence on host health and intriguing patterns of site specificity amidst the lack of dispersal limitation. However, the determinants of niche partitioning in this habitat are yet to be fully understood, especially among taxa that belong to recently discovered branches of microbial life.

RESULTS: Here, we assemble metagenomes from tongue and dental plaque samples from multiple individuals and reconstruct 790 non-redundant genomes, 43 of which resolve to TM7, a member of the Candidate Phyla Radiation, forming six monophyletic clades that distinctly associate with either plaque or tongue. Both pangenomic and phylogenomic analyses group tongue-specific clades with other host-associated TM7 genomes. In contrast, plaque-specific TM7 group with environmental TM7 genomes. Besides offering deeper insights into the ecology, evolution, and mobilome of cryptic members of the oral microbiome, our study reveals an intriguing resemblance between dental plaque and non-host environments indicated by the TM7 evolution, suggesting that plaque may have served as a stepping stone for environmental microbes to adapt to host environments for some clades of microbes. Additionally, we report that prophages are widespread among oral-associated TM7, while absent from environmental TM7, suggesting that prophages may have played a role in adaptation of TM7 to the host environment.

CONCLUSIONS: Our data illuminate niche partitioning of enigmatic members of the oral cavity, including TM7, SR1, and GN02, and provide genomes for poorly characterized yet prevalent members of this biome, such as uncultivated Flavobacteriaceae.}, } @article {pmid33321204, year = {2021}, author = {Basharat, Z and Jahanzaib, M and Yasmin, A and Khan, IA}, title = {Pan-genomics, drug candidate mining and ADMET profiling of natural product inhibitors screened against Yersinia pseudotuberculosis.}, journal = {Genomics}, volume = {113}, number = {1 Pt 1}, pages = {238-244}, doi = {10.1016/j.ygeno.2020.12.015}, pmid = {33321204}, issn = {1089-8646}, mesh = {Anti-Bacterial Agents/*pharmacology ; Bacterial Proteins/antagonists & inhibitors/genetics/metabolism ; Binding Sites ; Drug Resistance, Bacterial ; Genome, Bacterial ; Genomics ; *Molecular Docking Simulation ; Network Pharmacology ; Phytochemicals/*pharmacology ; Protein Binding ; Yersinia pseudotuberculosis/drug effects/*genetics/metabolism ; }, abstract = {Yersinia pseudotuberculosis belongs to the family Enterobacteriaceae and is responsible for scarlatinoid fever, food poisoning, post-infectious complications like erythema nodosum/reactive arthritis as well as pseudoappendicitis in children. Genome sequences of the 23 whole genomes from NCBI were utilized for conducting the pan-genomic analysis. Essential proteins from the core region were obtained and drug targets were identified using a hierarchal in silico approach. Among these, multidrug resistance protein sub-unit mdtC was chosen for further analysis. This protein unit confers resistance to antibiotics upon forming a tripartite complex with units A and B in Escherichia coli. Details of the function have not yet been elucidated experimentally in Yersinia spp. Computational structure modeling and validation were followed by screening against phytochemical libraries of traditional Indian (Ayurveda), North African, and traditional Chinese flora using Molecular Operating Environment software version 2019.0102. ADMET profiling and descriptor study of best docked compounds was studied. Since phytotherapy is the best resort to antibiotic resistance so these compounds should be tested experimentally to further validate the results. The obtained information could aid wet-lab scientists to work on the scaffold of screened drug-like compounds from natural resources. This could be useful in our quest for antibiotic-resistant therapy against Y. pseudotuberculosis.}, } @article {pmid33310406, year = {2021}, author = {Wolter, LA and Wietz, M and Ziesche, L and Breider, S and Leinberger, J and Poehlein, A and Daniel, R and Schulz, S and Brinkhoff, T}, title = {Pseudooceanicola algae sp. nov., isolated from the marine macroalga Fucus spiralis, shows genomic and physiological adaptations for an algae-associated lifestyle.}, journal = {Systematic and applied microbiology}, volume = {44}, number = {1}, pages = {126166}, doi = {10.1016/j.syapm.2020.126166}, pmid = {33310406}, issn = {1618-0984}, mesh = {Adaptation, Physiological ; Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Fucus/*microbiology ; Germany ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Rhodobacteraceae/*classification/isolation & purification ; Seawater ; Seaweed/microbiology ; Sequence Analysis, DNA ; }, abstract = {The genus Pseudooceanicola from the alphaproteobacterial Roseobacter group currently includes ten validated species. We herein describe strain Lw-13e[T], the first Pseudooceanicola species from marine macroalgae, isolated from the brown alga Fucus spiralis abundant at European and North American coasts. Physiological and pangenome analyses of Lw-13e[T] showed corresponding adaptive features. Adaptations to the tidal environment include a broad salinity tolerance, degradation of macroalgae-derived substrates (mannitol, mannose, proline), and resistance to several antibiotics and heavy metals. Notably, Lw-13e[T] can degrade oligomeric alginate via PL15 alginate lyase encoded in a polysaccharide utilization locus (PUL), rarely described for roseobacters to date. Plasmid localization of the PUL strengthens the importance of mobile genetic elements for evolutionary adaptations within the Roseobacter group. PL15 homologs were primarily detected in marine plant-associated metagenomes from coastal environments but not in the open ocean, corroborating its adaptive role in algae-rich habitats. Exceptional is the tolerance of Lw-13e[T] against the broad-spectrum antibiotic tropodithietic acid, produced by Phaeobacter spp. co-occurring in coastal habitats. Furthermore, Lw-13e[T] exhibits features resembling terrestrial plant-bacteria associations, i.e. biosynthesis of siderophores, terpenes and volatiles, which may contribute to mutual bacteria-algae interactions. Closest described relative of Lw-13e[T] is Pseudopuniceibacterium sediminis CY03[T] with 98.4% 16S rRNA gene sequence similarity. However, protein sequence-based core genome phylogeny and average nucleotide identity indicate affiliation of Lw-13e[T] with the genus Pseudooceanicola. Based on phylogenetic, physiological and (chemo)taxonomic distinctions, we propose strain Lw-13e[T] (=DSM 29013[T]=LMG 30557[T]) as a novel species with the name Pseudooceanicola algae.}, } @article {pmid33304460, year = {2020}, author = {Jiao, J and Tian, CF}, title = {Ancestral zinc-finger bearing protein MucR in alpha-proteobacteria: A novel xenogeneic silencer?.}, journal = {Computational and structural biotechnology journal}, volume = {18}, number = {}, pages = {3623-3631}, pmid = {33304460}, issn = {2001-0370}, abstract = {The MucR/Ros family protein is conserved in alpha-proteobacteria and characterized by its zinc-finger motif that has been proposed as the ancestral domain from which the eukaryotic C2H2 zinc-finger structure evolved. In the past decades, accumulated evidences have revealed MucR as a pleiotropic transcriptional regulator that integrating multiple functions such as virulence, symbiosis, cell cycle and various physiological processes. Scattered reports indicate that MucR mainly acts as a repressor, through oligomerization and binding to multiple sites of AT-rich target promoters. The N-terminal region and zinc-finger bearing C-terminal region of MucR mediate oligomerization and DNA-binding, respectively. These features are convergent to those of xenogeneic silencers such as H-NS, MvaT, Lsr2 and Rok, which are mainly found in other lineages. Phylogenetic analysis of MucR homologs suggests an ancestral origin of MucR in alpha- and delta-proteobacteria. Multiple independent duplication and lateral gene transfer events contribute to the diversity and phyletic distribution of MucR. Finally, we posed questions which remain unexplored regarding the putative roles of MucR as a xenogeneic silencer and a general manager in balancing adaptation and regulatory integration in the pangenome context.}, } @article {pmid33303031, year = {2020}, author = {Zhou, G and Liang, H and Gu, Y and Ju, C and He, L and Guo, P and Shao, Z and Zhang, J and Zhang, M}, title = {Comparative genomics of Helicobacter pullorum from different countries.}, journal = {Gut pathogens}, volume = {12}, number = {1}, pages = {56}, pmid = {33303031}, issn = {1757-4749}, support = {SZSM201803081//the Sanming Project of Medicine in Shenzhen/ ; 2018ZX10712-001//the National Key Program of China/ ; }, abstract = {BACKGROUND: Helicobacter pullorum commonly colonized in the gastrointestinal tract of poultry and caused gastroenteritis. This bacterium could be transmitted to humans through contaminated food and caused colitis and hepatitis. Currently, the genetic characteristics of the H. pullorum were not recognized enough. In this study, the genomes of 23 H. pullorum strains from different counties were comparatively analyzed. Among them, H. pullorum 2013BJHL was the first isolated and reported in China.

RESULTS: The genomes of the studied strains were estimated to vary from 1.55 to 2.03 Mb, with a GC content of ~ 34%. 4064 pan genes and 1267 core genes were obtained from the core-pan genome analysis using the Roary pipeline. Core genome SNPs (cg-SNPs) were obtained using Snippy4 software. Two groups were identified with the phylogenetic analysis based on the cg-SNPs. Some adhesion-related, immune regulation, motility-related, antiphagocytosis-related, toxin-related and quorum sensing related genes were identified as virulence factors. APH(3')-IIIa, APH(2'')-If, and AAC(6')-Ie-APH(2'')-Ia were identified as antibiotic resistance genes among the H. pullorum genomes. cat, SAT-4 and tetO genes were only identified in 2013BJHL, and tet(C) was identified in MIT98-5489. MIC determination revealed that the 2013BJHL showed acquired resistance to ciprofloxacin, nalidixic acid, tetracycline, gentamicin, streptomycin and erythromycin, only sensitive to ampicillin. The antibiotic resistance genetic determinants on the 2013BJHL genome correlate well with observed antimicrobial susceptibility patterns. Two types of VI secretion system (T6SS) were identified in 52.2% (12/23) the studied strains.

CONCLUSION: In this study, we obtained the genetic characteristics of H. pullorum from different sources in the world. The comprehensive genetic characteristics of H. pullorum were first described. H. pullorum showed highly genetic diversity and two sub-types of T6SSs were first identified in H. pullorum. 2013BJHL was found to be multidrug resistant as it was resistant to at least three different antibiotic classes.}, } @article {pmid33302542, year = {2020}, author = {Webster, J and Bogema, D and Chapman, TA}, title = {Comparative Genomics of Xanthomonas citri pv. citri A* Pathotype Reveals Three Distinct Clades with Varying Plasmid Distribution.}, journal = {Microorganisms}, volume = {8}, number = {12}, pages = {}, pmid = {33302542}, issn = {2076-2607}, support = {PBCRC2002, PBCRC2156//Plant Biosecurity CRC/ ; }, abstract = {Citrus bacterial canker (CBC) is an important disease of citrus cultivars worldwide that causes blister-like lesions on host plants and leads to more severe symptoms such as plant defoliation and premature fruit drop. The causative agent, Xanthomonas citri pv. citri, exists as three pathotypes-A, A*, and A[w]-which differ in their host range and elicited host response. To date, comparative analyses have been hampered by the lack of closed genomes for the A* pathotype. In this study, we sequenced and assembled six CBC isolates of pathotype A* using second- and third-generation sequencing technologies to produce complete, closed assemblies. Analysis of these genomes and reference A, A*, and A[w] sequences revealed genetic groups within the A* pathotype. Investigation of accessory genomes revealed virulence factors, including type IV secretion systems and heavy metal resistance genes, differentiating the genetic groups. Genomic comparisons of closed genome assemblies also provided plasmid distribution information for the three genetic groups of A*. The genomes presented here complement existing closed genomes of A and A[w] pathotypes that are publicly available and open opportunities to investigate the evolution of X. citri pv. citri and the virulence factors that contribute to this serious pathogen.}, } @article {pmid33301093, year = {2021}, author = {Danilevicz, MF and Tay Fernandez, CG and Marsh, JI and Bayer, PE and Edwards, D}, title = {High-Throughput Genotyping Technologies in Plant Taxonomy.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2222}, number = {}, pages = {149-166}, pmid = {33301093}, issn = {1940-6029}, mesh = {Computational Biology/methods ; *DNA Barcoding, Taxonomic/methods/standards ; DNA Contamination ; Evolution, Molecular ; Genetic Markers ; Genome, Plant ; Genomics/methods ; Genotype ; *Genotyping Techniques ; *High-Throughput Screening Assays/standards ; Phylogeny ; Plants/*classification/*genetics ; Polymorphism, Single Nucleotide ; }, abstract = {Molecular markers provide researchers with a powerful tool for variation analysis between plant genomes. They are heritable and widely distributed across the genome and for this reason have many applications in plant taxonomy and genotyping. Over the last decade, molecular marker technology has developed rapidly and is now a crucial component for genetic linkage analysis, trait mapping, diversity analysis, and association studies. This chapter focuses on molecular marker discovery, its application, and future perspectives for plant genotyping through pangenome assemblies. Included are descriptions of automated methods for genome and sequence distance estimation, genome contaminant analysis in sequence reads, genome structural variation, and SNP discovery methods.}, } @article {pmid33295861, year = {2020}, author = {Choo, SW and Rishik, S and Wee, WY}, title = {Comparative genome analyses of Mycobacteroides immunogenum reveals two potential novel subspecies.}, journal = {Microbial genomics}, volume = {6}, number = {12}, pages = {}, pmid = {33295861}, issn = {2057-5858}, mesh = {Genome, Bacterial ; Genomic Islands ; Genomics/*methods ; Multilocus Sequence Typing ; Mycobacteriaceae/*classification/genetics/pathogenicity ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Selection, Genetic ; Species Specificity ; Virulence Factors/*genetics ; }, abstract = {Mycobacteroides immunogenum is an emerging opportunistic pathogen implicated in nosocomial infections. Comparative genome analyses may provide better insights into its genomic structure, functions and evolution. The present analysis showed that M. immunogenum has an open pan-genome. Approximately 36.8% of putative virulence genes were identified in the accessory regions of M. immunogenum. Phylogenetic analyses revealed two potential novel subspecies of M. immunogenum, supported by evidence from ANIb (average nucleotide identity using blast) and GGDC (Genome to Genome Distance Calculator) analyses. We identified 74 genomic islands (GIs) in Subspecies 1 and 23 GIs in Subspecies 2. All Subspecies 2-harboured GIs were not found in Subspecies 1, indicating that they might have been acquired by Subspecies 2 after their divergence. Subspecies 2 has more defence genes than Subspecies 1, suggesting that it might be more resistant to the insertion of foreign DNA and probably explaining why Subspecies 2 has fewer GIs. Positive selection analysis suggest that M. immunogenum has a lower selection pressure compared to non-pathogenic mycobacteria. Thirteen genes were positively selected and many were involved in virulence.}, } @article {pmid33291832, year = {2020}, author = {Choi, HJ and Shin, D and Shin, M and Yun, B and Kang, M and Yang, HJ and Jeong, DY and Kim, Y and Oh, S}, title = {Comparative Genomic and Functional Evaluations of Bacillus subtilis Newly Isolated from Korean Traditional Fermented Foods.}, journal = {Foods (Basel, Switzerland)}, volume = {9}, number = {12}, pages = {}, pmid = {33291832}, issn = {2304-8158}, support = {2018R1D1A1A02085907//National Research Foundation of Korea/ ; 2016M3C1B5907057//Ministry of Science, Information and Communications Technology (ICT) and Future Planning/ ; }, abstract = {Many fermented foods are known to have beneficial effects on human and animal health, offering anti-aging and immunomodulatory benefits to host. Microorganisms contained in the fermented foods are known to provide metabolic products possibly improving host health. However, despite of a number of studies on the functional effects of the fermented foods, isolation and identification of the effective bacterial strains in the products are still in progress. The objective of this study was to isolate candidate functional strains in various Korean traditional fermented foods, including ganjang, gochujang, doenjang, and jeotgal, and evaluate their beneficial effects on the host, using Caenorhabditis elegans as a surrogate animal model. Among the 30 strains isolated, five Bacillus spp. were selected that increased the expression level of pmk-1, an innate immune gene of C. elegans. These strains extended the nematode lifespan and showed intestinal adhesion to the host. Based on the bioinformatic analyses of whole genome sequences and pangenomes, the five strains of Bacillus subtilis were genetically different from the strains found in East Asian countries and previously reported strains isolated from Korean fermented foods. Our findings suggest that the newly isolated B. subtilis strains can be a good candidate for probiotic with further in-depth investigation on health benefits and safety.}, } @article {pmid33283866, year = {2020}, author = {Valero-Jiménez, CA and Steentjes, MBF and Slot, JC and Shi-Kunne, X and Scholten, OE and van Kan, JAL}, title = {Dynamics in Secondary Metabolite Gene Clusters in Otherwise Highly Syntenic and Stable Genomes in the Fungal Genus Botrytis.}, journal = {Genome biology and evolution}, volume = {12}, number = {12}, pages = {2491-2507}, pmid = {33283866}, issn = {1759-6653}, mesh = {Aldehydes/metabolism ; Allium/*microbiology ; Botrytis/*genetics/metabolism ; Bridged Bicyclo Compounds/metabolism ; *Genome, Fungal ; Host Specificity/*genetics ; Multigene Family ; *Phylogeny ; Polyketides/metabolism ; Secondary Metabolism/genetics ; Synteny ; }, abstract = {Fungi of the genus Botrytis infect >1,400 plant species and cause losses in many crops. Besides the broad host range pathogen Botrytis cinerea, most other species are restricted to a single host. Long-read technology was used to sequence genomes of eight Botrytis species, mostly pathogenic on Allium species, and the related onion white rot fungus, Sclerotium cepivorum. Most assemblies contained <100 contigs, with the Botrytis aclada genome assembled in 16 gapless chromosomes. The core genome and pan-genome of 16 Botrytis species were defined and the secretome, effector, and secondary metabolite repertoires analyzed. Among those genes, none is shared among all Allium pathogens and absent from non-Allium pathogens. The genome of each of the Allium pathogens contains 8-39 predicted effector genes that are unique for that single species, none stood out as potential determinant for host specificity. Chromosome configurations of common ancestors of the genus Botrytis and family Sclerotiniaceae were reconstructed. The genomes of B. cinerea and B. aclada were highly syntenic with only 19 rearrangements between them. Genomes of Allium pathogens were compared with ten other Botrytis species (nonpathogenic on Allium) and with 25 Leotiomycetes for their repertoire of secondary metabolite gene clusters. The pattern was complex, with several clusters displaying patchy distribution. Two clusters involved in the synthesis of phytotoxic metabolites are at distinct genomic locations in different Botrytis species. We provide evidence that the clusters for botcinic acid production in B. cinerea and Botrytis sinoallii were acquired by horizontal transfer from taxa within the same genus.}, } @article {pmid33283865, year = {2020}, author = {Fagorzi, C and Ilie, A and Decorosi, F and Cangioli, L and Viti, C and Mengoni, A and diCenzo, GC}, title = {Symbiotic and Nonsymbiotic Members of the Genus Ensifer (syn. Sinorhizobium) Are Separated into Two Clades Based on Comparative Genomics and High-Throughput Phenotyping.}, journal = {Genome biology and evolution}, volume = {12}, number = {12}, pages = {2521-2534}, pmid = {33283865}, issn = {1759-6653}, mesh = {Fabaceae/microbiology ; Gene Transfer, Horizontal ; Genome, Bacterial ; Genomics ; Microarray Analysis ; Nitrogen Fixation/*genetics ; *Phylogeny ; Sinorhizobium/classification/*genetics ; Symbiosis/genetics ; }, abstract = {Rhizobium-legume symbioses serve as paradigmatic examples for the study of mutualism evolution. The genus Ensifer (syn. Sinorhizobium) contains diverse plant-associated bacteria, a subset of which can fix nitrogen in symbiosis with legumes. To gain insights into the evolution of symbiotic nitrogen fixation (SNF), and interkingdom mutualisms more generally, we performed extensive phenotypic, genomic, and phylogenetic analyses of the genus Ensifer. The data suggest that SNF likely emerged several times within the genus Ensifer through independent horizontal gene transfer events. Yet, the majority (105 of 106) of the Ensifer strains with the nodABC and nifHDK nodulation and nitrogen fixation genes were found within a single, monophyletic clade. Comparative genomics highlighted several differences between the "symbiotic" and "nonsymbiotic" clades, including divergences in their pangenome content. Additionally, strains of the symbiotic clade carried 325 fewer genes, on average, and appeared to have fewer rRNA operons than strains of the nonsymbiotic clade. Initial characterization of a subset of ten Ensifer strains identified several putative phenotypic differences between the clades. Tested strains of the nonsymbiotic clade could catabolize 25% more carbon sources, on average, than strains of the symbiotic clade, and they were better able to grow in LB medium and tolerate alkaline conditions. On the other hand, the tested strains of the symbiotic clade were better able to tolerate heat stress and acidic conditions. We suggest that these data support the division of the genus Ensifer into two main subgroups, as well as the hypothesis that pre-existing genetic features are required to facilitate the evolution of SNF in bacteria.}, } @article {pmid33279475, year = {2021}, author = {Amodru, V and Garcia, ME and Libe, R and Brue, T and Reznik, Y and Castinetti, F}, title = {Medical management of adrenocortical carcinoma: Current recommendations, new therapeutic options and future perspectives.}, journal = {Annales d'endocrinologie}, volume = {82}, number = {1}, pages = {52-58}, doi = {10.1016/j.ando.2020.12.003}, pmid = {33279475}, issn = {2213-3941}, mesh = {Adrenal Cortex Neoplasms/*drug therapy ; Adrenocortical Carcinoma/*drug therapy ; Antineoplastic Combined Chemotherapy Protocols/classification/*therapeutic use ; Drugs, Investigational/therapeutic use ; Endocrinology/methods/standards/trends ; Humans ; Medical Oncology/methods/standards/trends ; Practice Guidelines as Topic ; Therapies, Investigational/methods/standards/trends ; }, abstract = {Adrenocortical carcinoma is a rare malignant tumor of poor prognosis, frequently requiring additional treatments after initial surgery. Due to its adrenolytic action, mitotane has become the first-line medical treatment in patients with aggressive adrenocortical carcinoma. Over the last 2years, apart from the classical chemotherapy based on etoposide and platinum salts, several studies reported the use of drugs such as temozolomide, tyrosine kinase inhibitors or immunotherapy, with more or less convincing results. The aim of this review is to give further insights in the use of these drugs, and to describe potential therapeutic perspectives based on recent pangenomic studies, for the future management of these still difficult to treat tumors.}, } @article {pmid33273480, year = {2020}, author = {Gaba, S and Kumari, A and Medema, M and Kaushik, R}, title = {Pan-genome analysis and ancestral state reconstruction of class halobacteria: probability of a new super-order.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {21205}, pmid = {33273480}, issn = {2045-2322}, mesh = {Datasets as Topic ; Euryarchaeota/classification/*genetics ; *Genome, Archaeal ; *Phylogeny ; Probability ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Halobacteria, a class of Euryarchaeota are extremely halophilic archaea that can adapt to a wide range of salt concentration generally from 10% NaCl to saturated salt concentration of 32% NaCl. It consists of the orders: Halobacteriales, Haloferaciales and Natriabales. Pan-genome analysis of class Halobacteria was done to explore the core (300) and variable components (Softcore: 998, Cloud:36531, Shell:11784). The core component revealed genes of replication, transcription, translation and repair, whereas the variable component had a major portion of environmental information processing. The pan-gene matrix was mapped onto the core-gene tree to find the ancestral (44.8%) and derived genes (55.1%) of the Last Common Ancestor of Halobacteria. A High percentage of derived genes along with presence of transformation and conjugation genes indicate the occurrence of horizontal gene transfer during the evolution of Halobacteria. A Core and pan-gene tree were also constructed to infer a phylogeny which implicated on the new super-order comprising of Natrialbales and Halobacteriales.}, } @article {pmid33271875, year = {2020}, author = {Chen, Z and Erickson, DL and Meng, J}, title = {Benchmarking Long-Read Assemblers for Genomic Analyses of Bacterial Pathogens Using Oxford Nanopore Sequencing.}, journal = {International journal of molecular sciences}, volume = {21}, number = {23}, pages = {}, pmid = {33271875}, issn = {1422-0067}, support = {U01FD001418//U.S. Food and Drug Administration/ ; }, mesh = {Bacteria/classification/drug effects/*genetics/pathogenicity ; Computational Biology/methods ; Drug Resistance, Bacterial ; *Genome, Bacterial ; Genomics/*methods ; High-Throughput Nucleotide Sequencing/*methods ; Multilocus Sequence Typing ; Phylogeny ; Reproducibility of Results ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Oxford Nanopore sequencing can be used to achieve complete bacterial genomes. However, the error rates of Oxford Nanopore long reads are greater compared to Illumina short reads. Long-read assemblers using a variety of assembly algorithms have been developed to overcome this deficiency, which have not been benchmarked for genomic analyses of bacterial pathogens using Oxford Nanopore long reads. In this study, long-read assemblers, namely Canu, Flye, Miniasm/Racon, Raven, Redbean, and Shasta, were thus benchmarked using Oxford Nanopore long reads of bacterial pathogens. Ten species were tested for mediocre- and low-quality simulated reads, and 10 species were tested for real reads. Raven was the most robust assembler, obtaining complete and accurate genomes. All Miniasm/Racon and Raven assemblies of mediocre-quality reads provided accurate antimicrobial resistance (AMR) profiles, while the Raven assembly of Klebsiella variicola with low-quality reads was the only assembly with an accurate AMR profile among all assemblers and species. All assemblers functioned well for predicting virulence genes using mediocre-quality and real reads, whereas only the Raven assemblies of low-quality reads had accurate numbers of virulence genes. Regarding multilocus sequence typing (MLST), Miniasm/Racon was the most effective assembler for mediocre-quality reads, while only the Raven assemblies of Escherichia coli O157:H7 and K. variicola with low-quality reads showed positive MLST results. Miniasm/Racon and Raven were the best performers for MLST using real reads. The Miniasm/Racon and Raven assemblies showed accurate phylogenetic inference. For the pan-genome analyses, Raven was the strongest assembler for simulated reads, whereas Miniasm/Racon and Raven performed the best for real reads. Overall, the most robust and accurate assembler was Raven, closely followed by Miniasm/Racon.}, } @article {pmid33264401, year = {2021}, author = {Brown, AV and Conners, SI and Huang, W and Wilkey, AP and Grant, D and Weeks, NT and Cannon, SB and Graham, MA and Nelson, RT}, title = {A new decade and new data at SoyBase, the USDA-ARS soybean genetics and genomics database.}, journal = {Nucleic acids research}, volume = {49}, number = {D1}, pages = {D1496-D1501}, pmid = {33264401}, issn = {1362-4962}, mesh = {Chromosome Mapping ; Crops, Agricultural ; *Databases, Genetic ; Epigenesis, Genetic ; *Gene Expression Regulation, Plant ; Genetic Association Studies ; *Genome, Plant ; *Genotype ; Internet ; Molecular Sequence Annotation ; Phylogeny ; Plant Breeding/methods ; Plant Proteins/*genetics/metabolism ; Polymorphism, Single Nucleotide ; Quantitative Trait Loci ; Quantitative Trait, Heritable ; Reference Standards ; Software ; Soybeans/classification/*genetics/metabolism ; United States ; United States Department of Agriculture ; }, abstract = {SoyBase, a USDA genetic and genomics database, holds professionally curated soybean genetic and genomic data, which is integrated and made accessible to researchers and breeders. The site holds several reference genome assemblies, as well as genetic maps, thousands of mapped traits, expression and epigenetic data, pedigree information, and extensive variant and genotyping data sets. SoyBase displays include genetic, genomic, and epigenetic maps of the soybean genome. Gene expression data is presented in the genome viewer as heat maps and pictorial and tabular displays in gene report pages. Millions of sequence variants have been added, representing variations across various collections of cultivars. This variant data is explorable using new interactive tools to visualize the distribution of those variants across the genome, between selected accessions. SoyBase holds several reference-quality soybean genome assemblies, accessible via various query tools and browsers, including a new visualization system for exploring the soybean pan-genome. SoyBase also serves as a nexus of announcements pertinent to the greater soybean research community. The database also includes a soybean-specific anatomic and biochemical trait ontology. The database can be accessed at https://soybase.org.}, } @article {pmid33261004, year = {2020}, author = {Khadke, S and Mandave, P and Kuvalekar, A and Pandit, V and Karandikar, M and Mantri, N}, title = {Synergistic Effect of Omega-3 Fatty Acids and Oral-Hypoglycemic Drug on Lipid Normalization through Modulation of Hepatic Gene Expression in High Fat Diet with Low Streptozotocin-Induced Diabetic Rats.}, journal = {Nutrients}, volume = {12}, number = {12}, pages = {}, pmid = {33261004}, issn = {2072-6643}, mesh = {Animals ; Blood Glucose ; Diabetes Mellitus, Experimental ; Diabetes Mellitus, Type 2/chemically induced/*drug therapy ; Diet, High-Fat/*adverse effects ; Drug Synergism ; Fatty Acids, Omega-3/chemistry/*pharmacology ; Fish Oils/chemistry/pharmacology ; Gene Expression Regulation/*drug effects ; Glyburide/administration & dosage/*therapeutic use ; Hypoglycemic Agents/administration & dosage/therapeutic use ; Linseed Oil/pharmacology ; Lipids/*blood ; Liver/metabolism ; Male ; Rats ; Rats, Wistar ; }, abstract = {Type 2 diabetes mellitus, which an outcome of impaired insulin action and its secretion, is concomitantly associated with lipid abnormalities. The study was designed to evaluate the combinational effect of omega-3 fatty acids (flax and fish oil) and glibenclamide on abnormal lipid profiles, increased blood glucose, and impaired liver and kidney functions in a high fat diet with low streptozotocin (STZ)-induced diabetic rats, including its probable mechanism of action. The male Wistar rats (n = 48) were distributed into eight groups. All animal groups except the healthy received a high fat diet (HFD) for 90 days. Further, diabetes was developed by low dose STZ (35 mg/kg). Diabetic animals received, omega-3 fatty acids (500 mg/kg), along with glibenclamide (0.25 mg/kg). Both flax and fish oil intervention decreased (p ≤ 0.001) serum triglycerides and very low density lipoprotein and elevated (p ≤ 0.001) high density lipoprotein levels in diabetic rats. Total cholesterol and low-density lipoprotein level was decreased (p ≤ 0.001) in fish oil-treated rats. However, it remained unaffected in the flax oil treatment group. Both flax and fish oil intervention downregulate the expression of fatty acid metabolism genes, transcription factors (sterol regulatory element-binding proteins-1c and nuclear factor-κβ), and their regulatory genes i.e., acetyl-coA carboxylase alpha, fatty acid synthase, and tumor necrosis factors-α. The peroxisome proliferator-activated receptor gamma gene expression was upregulated (p ≤ 0.001) in the fish oil treatment group. Whereas, carnitine palmitoyltransferase 1 and fatty acid binding protein gene expression were upregulated (p ≤ 0.001) in both flax and fish oil intervention group.}, } @article {pmid33255840, year = {2020}, author = {Zhang, Y and Thomas, W and Bayer, PE and Edwards, D and Batley, J}, title = {Frontiers in Dissecting and Managing Brassica Diseases: From Reference-Based RGA Candidate Identification to Building Pan-RGAomes.}, journal = {International journal of molecular sciences}, volume = {21}, number = {23}, pages = {}, pmid = {33255840}, issn = {1422-0067}, support = {FT130100604//Australian Research Council/ ; DP160104497//Australian Research Council/ ; }, mesh = {Brassica napus/*genetics/microbiology/virology ; Chromosome Mapping ; Disease Resistance/*genetics ; Genes, Plant/genetics ; Genome, Plant/genetics ; Plant Diseases/*genetics/microbiology/virology ; Quantitative Trait Loci/*genetics ; }, abstract = {The Brassica genus contains abundant economically important vegetable and oilseed crops, which are under threat of diseases caused by fungal, bacterial and viral pathogens. Resistance gene analogues (RGAs) are associated with quantitative and qualitative disease resistance and the identification of candidate RGAs associated with disease resistance is crucial for understanding the mechanism and management of diseases through breeding. The availability of Brassica genome assemblies has greatly facilitated reference-based quantitative trait loci (QTL) mapping for disease resistance. In addition, pangenomes, which characterise both core and variable genes, have been constructed for B. rapa, B. oleracea and B. napus. Genome-wide characterisation of RGAs using conserved domains and motifs in reference genomes and pangenomes reveals their clustered arrangements and presence of structural variations. Here, we comprehensively review RGA identification in important Brassica genome and pangenome assemblies. Comparison of the RGAs in QTL between resistant and susceptible individuals allows for efficient identification of candidate disease resistance genes. However, the reference-based QTL mapping and RGA candidate identification approach is restricted by the under-represented RGA diversity characterised in the limited number of Brassica assemblies. The species-wide repertoire of RGAs make up the pan-resistance gene analogue genome (pan-RGAome). Building a pan-RGAome, through either whole genome resequencing or resistance gene enrichment sequencing, would effectively capture RGA diversity, greatly expanding breeding resources that can be utilised for crop improvement.}, } @article {pmid33253207, year = {2020}, author = {Yahara, H and Hiraki, A and Maruoka, Y and Hirabayashi, A and Suzuki, M and Yahara, K}, title = {Shotgun metagenome sequencing identification of a set of genes encoded by Actinomyces associated with medication-related osteonecrosis of the jaw.}, journal = {PloS one}, volume = {15}, number = {11}, pages = {e0241676}, pmid = {33253207}, issn = {1932-6203}, mesh = {Actinomyces/*genetics ; Adult ; Aged ; Aged, 80 and over ; Female ; Humans ; Jaw Diseases/*metabolism/*pathology ; Metagenome/*genetics ; Middle Aged ; Models, Biological ; Osteonecrosis/genetics/*metabolism ; Phylogeny ; }, abstract = {Medication-related osteonecrosis of the jaw (MRONJ) is intractable and severely affects a patient's quality of life. Although many cases of MRONJ have been reported in the past decade, the disease pathophysiology is unclear and there are no evidence-based therapeutic strategies. MRONJ usually features bone inflammation and infection. Prior studies that explored the association between MRONJ and microbial infection used the culture-based approach, which is not applicable to hundreds of unculturable taxa in the human oral microbiome, or 16S ribosomal RNA gene sequencing, which does not provide quantitative information of the abundance of specific taxa, and information of the presence, abundance, and function of specific genes in the microbiome. Here, deep shotgun metagenome sequencing (>10 Gb per sample) of bulk DNA extracted from saliva of MRONJ patients and healthy controls was performed to overcome these limitations. Comparative quantitative analyses of taxonomic and functional composition of these deep metagenomes (initially of 5 patients and 5 healthy controls) revealed an average 10.1% increase of genus Actinomyces and a 33.2% decrease in genus Streptococcus normally predominant in the human oral microbiota. Pan-genome analysis identified genes present exclusively in the MRONJ samples. Further analysis of the reads mapping to the genes in the extended dataset comprising five additional MRONJ samples and publicly available dataset of nine healthy controls resulted in the identification of 31 genes significantly associated with MRONJ. All these genes were encoded by Actinomyces genomic regions. Of these, the top two abundant genes were almost exclusively encoded by Actinomyces among usual taxa in the human oral microbiota. The potential relationships of these key genes with the disease are discussed at molecular level based on the literature. Although the sample size was small, this study will aid future studies to verify the data and characterize these genes in vitro and in vivo to understand the disease mechanisms, develop molecular targeted drugs, and for early stage screening and prognosis prediction.}, } @article {pmid33245329, year = {2020}, author = {Hammond, JA and Gordon, EA and Socarras, KM and Chang Mell, J and Ehrlich, GD}, title = {Beyond the pan-genome: current perspectives on the functional and practical outcomes of the distributed genome hypothesis.}, journal = {Biochemical Society transactions}, volume = {48}, number = {6}, pages = {2437-2455}, pmid = {33245329}, issn = {1470-8752}, support = {P41 RR006009/RR/NCRR NIH HHS/United States ; R01 AI080935/AI/NIAID NIH HHS/United States ; R01 DC002148/DC/NIDCD NIH HHS/United States ; U01 DK082316/DK/NIDDK NIH HHS/United States ; }, mesh = {Algorithms ; Animals ; Bacterial Infections/microbiology ; Bacterial Physiological Phenomena ; Biodiversity ; Ecology ; Evolution, Molecular ; *Genes, Bacterial ; Genetic Variation ; *Genome, Bacterial ; Genome-Wide Association Study ; Genomics ; Genotype ; Humans ; Mice ; Molecular Biology ; Multigene Family ; Phenotype ; Phylogeny ; Symbiosis ; Whole Genome Sequencing ; }, abstract = {The principle of monoclonality with regard to bacterial infections was considered immutable prior to 30 years ago. This view, espoused by Koch for acute infections, has proven inadequate regarding chronic infections as persistence requires multiple forms of heterogeneity among the bacterial population. This understanding of bacterial plurality emerged from a synthesis of what-were-then novel technologies in molecular biology and imaging science. These technologies demonstrated that bacteria have complex life cycles, polymicrobial ecologies, and evolve in situ via the horizontal exchange of genic characters. Thus, there is an ongoing generation of diversity during infection that results in far more highly complex microbial communities than previously envisioned. This perspective is based on the fundamental tenet that the bacteria within an infecting population display genotypic diversity, including gene possession differences, which result from horizontal gene transfer mechanisms including transformation, conjugation, and transduction. This understanding is embodied in the concepts of the supragenome/pan-genome and the distributed genome hypothesis (DGH). These paradigms have fostered multiple researches in diverse areas of bacterial ecology including host-bacterial interactions covering the gamut of symbiotic relationships including mutualism, commensalism, and parasitism. With regard to the human host, within each of these symbiotic relationships all bacterial species possess attributes that contribute to colonization and persistence; those species/strains that are pathogenic also encode traits for invasion and metastases. Herein we provide an update on our understanding of bacterial plurality and discuss potential applications in diagnostics, therapeutics, and vaccinology based on perspectives provided by the DGH with regard to the evolution of pathogenicity.}, } @article {pmid33240617, year = {2020}, author = {Hudson, LK and Constantine-Renna, L and Thomas, L and Moore, C and Qian, X and Garman, K and Dunn, JR and Denes, TG}, title = {Genomic characterization and phylogenetic analysis of Salmonella enterica serovar Javiana.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e10256}, pmid = {33240617}, issn = {2167-8359}, abstract = {Salmonella enterica serovar Javiana is the fourth most reported serovar of laboratory-confirmed human Salmonella infections in the U.S. and in Tennessee (TN). Although Salmonella ser. Javiana is a common cause of human infection, the majority of cases are sporadic in nature rather than outbreak-associated. To better understand Salmonella ser. Javiana microbial population structure in TN, we completed a phylogenetic analysis of 111 Salmonella ser. Javiana clinical isolates from TN collected from Jan. 2017 to Oct. 2018. We identified mobile genetic elements and genes known to confer antibiotic resistance present in the isolates, and performed a pan-genome-wide association study (pan-GWAS) to compare gene content between clades identified in this study. The population structure of TN Salmonella ser. Javiana clinical isolates consisted of three genetic clades: TN clade I (n = 54), TN clade II (n = 4), and TN clade III (n = 48). Using a 5, 10, and 25 hqSNP distance threshold for cluster identification, nine, 12, and 10 potential epidemiologically-relevant clusters were identified, respectively. The majority of genes that were found to be over-represented in specific clades were located in mobile genetic element (MGE) regions, including genes encoding integrases and phage structures (91.5%). Additionally, a large portion of the over-represented genes from TN clade II (44.9%) were located on an 87.5 kb plasmid containing genes encoding a toxin/antitoxin system (ccdAB). Additionally, we completed phylogenetic analyses of global Salmonella ser. Javiana datasets to gain a broader insight into the population structure of this serovar. We found that the global phylogeny consisted of three major clades (one of which all of the TN isolates belonged to) and two cgMLST eBurstGroups (ceBGs) and that the branch length between the two Salmonella ser. Javiana ceBGs (1,423 allelic differences) was comparable to those from other serovars that have been reported as polyphyletic (929-2,850 allelic differences). This study demonstrates the population structure of TN and global Salmonella ser. Javiana isolates, a clinically important Salmonella serovar and can provide guidance for phylogenetic cluster analyses for public health surveillance and response.}, } @article {pmid33240320, year = {2020}, author = {Su, F and Tian, R and Yang, Y and Li, H and Sun, G and Li, Y and Han, B and Xu, X and Chen, X and Zhao, G and Cui, H and Xu, H}, title = {Comparative Genome Analysis Reveals the Molecular Basis of Niche Adaptation of Staphylococcus epidermidis Strains.}, journal = {Frontiers in genetics}, volume = {11}, number = {}, pages = {566080}, pmid = {33240320}, issn = {1664-8021}, abstract = {Staphylococcus epidermidis is one of the most commonly isolated species from human skin and the second leading cause of bloodstream infections. Here, we performed a large-scale comparative study without any pre-assigned reference to identify genomic determinants associated with the diversity and adaptation of S. epidermidis strains to various environments. Pan-genome of S. epidermidis was open with 435 core proteins and had a pan-genome size of 8,034 proteins. Genome-wide phylogenetic tree showed high heterogeneity and suggested that routine whole genome sequencing was a powerful tool for analyzing the complex evolution of S. epidermidis and for investigating the infection sources. Comparative genome analyses demonstrated a range of antimicrobial resistance (AMR) genes, especially those within mobile genetic elements. The complicated host-bacterium and bacterium-bacterium relationships help S. epidermidis to play a vital role in balancing the epithelial microflora. The highly variable and dynamic nature of the S. epidermidis genome may contribute to its success in adapting to broad habitats. Genes related to biofilm formation and cell toxicity were significantly enriched in the blood and skin, demonstrating their potentials in identifying risk genotypes. This study gave a general landscape of S. epidermidis pan-genome and provided valuable insights into mechanisms for genome evolution and lifestyle adaptation of this ecologically flexible species.}, } @article {pmid33239781, year = {2020}, author = {Jayakodi, M and Padmarasu, S and Haberer, G and Bonthala, VS and Gundlach, H and Monat, C and Lux, T and Kamal, N and Lang, D and Himmelbach, A and Ens, J and Zhang, XQ and Angessa, TT and Zhou, G and Tan, C and Hill, C and Wang, P and Schreiber, M and Boston, LB and Plott, C and Jenkins, J and Guo, Y and Fiebig, A and Budak, H and Xu, D and Zhang, J and Wang, C and Grimwood, J and Schmutz, J and Guo, G and Zhang, G and Mochida, K and Hirayama, T and Sato, K and Chalmers, KJ and Langridge, P and Waugh, R and Pozniak, CJ and Scholz, U and Mayer, KFX and Spannagl, M and Li, C and Mascher, M and Stein, N}, title = {The barley pan-genome reveals the hidden legacy of mutation breeding.}, journal = {Nature}, volume = {588}, number = {7837}, pages = {284-289}, pmid = {33239781}, issn = {1476-4687}, support = {/ERC_/European Research Council/International ; }, mesh = {Chromosome Inversion/genetics ; Chromosome Mapping ; Chromosomes, Plant/*genetics ; Genetic Loci/genetics ; Genome, Plant/*genetics ; Genotype ; Hordeum/classification/*genetics ; *Internationality ; *Mutation ; *Plant Breeding ; Polymorphism, Genetic/genetics ; Reference Standards ; Seed Bank ; Sequence Inversion ; Whole Genome Sequencing ; }, abstract = {Genetic diversity is key to crop improvement. Owing to pervasive genomic structural variation, a single reference genome assembly cannot capture the full complement of sequence diversity of a crop species (known as the 'pan-genome'[1]). Multiple high-quality sequence assemblies are an indispensable component of a pan-genome infrastructure. Barley (Hordeum vulgare L.) is an important cereal crop with a long history of cultivation that is adapted to a wide range of agro-climatic conditions[2]. Here we report the construction of chromosome-scale sequence assemblies for the genotypes of 20 varieties of barley-comprising landraces, cultivars and a wild barley-that were selected as representatives of global barley diversity. We catalogued genomic presence/absence variants and explored the use of structural variants for quantitative genetic analysis through whole-genome shotgun sequencing of 300 gene bank accessions. We discovered abundant large inversion polymorphisms and analysed in detail two inversions that are frequently found in current elite barley germplasm; one is probably the product of mutation breeding and the other is tightly linked to a locus that is involved in the expansion of geographical range. This first-generation barley pan-genome makes previously hidden genetic variation accessible to genetic studies and breeding.}, } @article {pmid33239396, year = {2021}, author = {Baker, JL and Morton, JT and Dinis, M and Alvarez, R and Tran, NC and Knight, R and Edlund, A}, title = {Deep metagenomics examines the oral microbiome during dental caries, revealing novel taxa and co-occurrences with host molecules.}, journal = {Genome research}, volume = {31}, number = {1}, pages = {64-74}, pmid = {33239396}, issn = {1549-5469}, support = {F32 DE026947/DE/NIDCR NIH HHS/United States ; K99 DE029228/DE/NIDCR NIH HHS/United States ; R00 DE024543/DE/NIDCR NIH HHS/United States ; }, mesh = {Bacteria ; *Dental Caries ; Epstein-Barr Virus Infections ; Herpesvirus 4, Human ; Humans ; *Metagenomics ; *Microbiota/genetics ; }, abstract = {Dental caries, the most common chronic infectious disease worldwide, has a complex etiology involving the interplay of microbial and host factors that are not completely understood. In this study, the oral microbiome and 38 host cytokines and chemokines were analyzed across 23 children with caries and 24 children with healthy dentition. De novo assembly of metagenomic sequencing obtained 527 metagenome-assembled genomes (MAGs), representing 150 bacterial species. Forty-two of these species had no genomes in public repositories, thereby representing novel taxa. These new genomes greatly expanded the known pangenomes of many oral clades, including the enigmatic Saccharibacteria clades G3 and G6, which had distinct functional repertoires compared to other oral Saccharibacteria. Saccharibacteria are understood to be obligate epibionts, which are dependent on host bacteria. These data suggest that the various Saccharibacteria clades may rely on their hosts for highly distinct metabolic requirements, which would have significant evolutionary and ecological implications. Across the study group, Rothia, Neisseria, and Haemophilus spp. were associated with good dental health, whereas Prevotella spp., Streptococcus mutans, and Human herpesvirus 4 (Epstein-Barr virus [EBV]) were more prevalent in children with caries. Finally, 10 of the host immunological markers were significantly elevated in the caries group, and co-occurrence analysis provided an atlas of potential relationships between microbes and host immunological molecules. Overall, this study illustrated the oral microbiome at an unprecedented resolution and contributed several leads for further study that will increase the understanding of caries pathogenesis and guide therapeutic development.}, } @article {pmid33237299, year = {2021}, author = {Valentin, G and Abdel, T and Gaëtan, D and Jean-François, D and Matthieu, C and Mathieu, R}, title = {GreenPhylDB v5: a comparative pangenomic database for plant genomes.}, journal = {Nucleic acids research}, volume = {49}, number = {D1}, pages = {D1464-D1471}, pmid = {33237299}, issn = {1362-4962}, mesh = {Amino Acid Sequence ; Arabidopsis/genetics/metabolism ; Crops, Agricultural ; *Databases, Genetic ; *Gene Expression Regulation, Plant ; Gene Ontology ; *Genome, Plant ; Genomics/*methods ; Internet ; Molecular Sequence Annotation ; Multigene Family ; Oryza/genetics/metabolism ; Phylogeny ; Plant Proteins/*genetics/metabolism ; Plants/classification/*genetics/metabolism ; Protein Interaction Mapping ; Sequence Alignment ; Sequence Homology, Amino Acid ; Software ; }, abstract = {Comparative genomics is the analysis of genomic relationships among different species and serves as a significant base for evolutionary and functional genomic studies. GreenPhylDB (https://www.greenphyl.org) is a database designed to facilitate the exploration of gene families and homologous relationships among plant genomes, including staple crops critically important for global food security. GreenPhylDB is available since 2007, after the release of the Arabidopsis thaliana and Oryza sativa genomes and has undergone multiple releases. With the number of plant genomes currently available, it becomes challenging to select a single reference for comparative genomics studies but there is still a lack of databases taking advantage several genomes by species for orthology detection. GreenPhylDBv5 introduces the concept of comparative pangenomics by harnessing multiple genome sequences by species. We created 19 pangenes and processed them with other species still relying on one genome. In total, 46 plant species were considered to build gene families and predict their homologous relationships through phylogenetic-based analyses. In addition, since the previous publication, we rejuvenated the website and included a new set of original tools including protein-domain combination, tree topologies searches and a section for users to store their own results in order to support community curation efforts.}, } @article {pmid33219399, year = {2021}, author = {Khan, S and Vancuren, SJ and Hill, JE}, title = {A Generalist Lifestyle Allows Rare Gardnerella spp. to Persist at Low Levels in the Vaginal Microbiome.}, journal = {Microbial ecology}, volume = {82}, number = {4}, pages = {1048-1060}, pmid = {33219399}, issn = {1432-184X}, mesh = {Female ; Gardnerella ; Humans ; Life Style ; *Microbiota/genetics ; Vagina ; *Vaginosis, Bacterial ; }, abstract = {Gardnerella spp. are considered a hallmark of bacterial vaginosis, a dysbiosis of the vaginal microbiome. There are four cpn60 sequence-based subgroups within the genus (A, B, C and D), and thirteen genome species have been defined recently. Gardnerella spp. co-occur in the vaginal microbiome with varying abundance, and these patterns are shaped by a resource-dependent, exploitative competition, which affects the growth rate of subgroups A, B and C negatively. The growth rate of rarely abundant subgroup D, however, increases with the increasing number of competitors, negatively affecting the growth rate of others. We hypothesized that a nutritional generalist lifestyle and minimal niche overlap with the other more abundant Gardnerella spp. facilitate the maintenance of subgroup D in the vaginal microbiome through negative frequency-dependent selection. Using 40 whole-genome sequences from isolates representing all four subgroups, we found that they could be distinguished based on the content of their predicted proteomes. Proteins associated with carbohydrate and amino acid uptake and metabolism were significant contributors to the separation of subgroups. Subgroup D isolates had significantly more of their proteins assigned to amino acid metabolism than the other subgroups. Subgroup D isolates were also significantly different from others in terms of number and type of carbon sources utilized in a phenotypic assay, while the other three could not be distinguished. Overall, the results suggest that a generalist lifestyle and lack of niche overlap with other Gardnerella spp. leads to subgroup D being favoured by negative frequency-dependent selection in the vaginal microbiome.}, } @article {pmid33217199, year = {2020}, author = {Tahir Ul Qamar, M and Zhu, X and Khan, MS and Xing, F and Chen, LL}, title = {Pan-genome: A promising resource for noncoding RNA discovery in plants.}, journal = {The plant genome}, volume = {13}, number = {3}, pages = {e20046}, doi = {10.1002/tpg2.20046}, pmid = {33217199}, issn = {1940-3372}, mesh = {Base Sequence ; DNA Transposable Elements ; *Genome, Plant ; *RNA, Untranslated/genetics ; }, abstract = {Plant genomes contain both protein-coding and noncoding sequences including transposable elements (TEs) and noncoding RNAs (ncRNAs). The ncRNAs are recognized as important elements that play fundamental roles in the structural organization and function of plant genomes. Despite various hypotheses, TEs are believed to be a major precursor of ncRNAs. Transposable elements are also prime factors that cause genomic variation among members of a species. Hence, TEs pose a major challenge in the discovery and analysis of ncRNAs. With the increase in the number of sequenced plant genomes, it is now accepted that a single reference genome is insufficient to represent the complete genomic diversity and contents of a species, and exploring the pan-genome of a species is critical. In this review, we summarize the recent progress in the field of plant pan-genomes. We also discuss TEs and their roles in ncRNA biogenesis and present our perspectives on the application of pan-genomes for the discovery of ncRNAs to fully explore and exploit their biological roles in plants.}, } @article {pmid33213029, year = {2020}, author = {Dar, HA and Zaheer, T and Ullah, N and Bakhtiar, SM and Zhang, T and Yasir, M and Azhar, EI and Ali, A}, title = {Pangenome Analysis of Mycobacterium tuberculosis Reveals Core-Drug Targets and Screening of Promising Lead Compounds for Drug Discovery.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {9}, number = {11}, pages = {}, pmid = {33213029}, issn = {2079-6382}, support = {FP-1-42//THE DEANSHIP OF SCIENTIFIC RESEARCH (DSR) AT KING ABDULAZIZ UNIVERSITY, JEDDAH, SAUDI ARABIA/ ; }, abstract = {Tuberculosis, caused by Mycobacterium tuberculosis (M. tuberculosis), is one of the leading causes of human deaths globally according to the WHO TB 2019 report. The continuous rise in multi- and extensive-drug resistance in M. tuberculosis broadens the challenges to control tuberculosis. The availability of a large number of completely sequenced genomes of M. tuberculosis has provided an opportunity to explore the pangenome of the species along with the pan-phylogeny and to identify potential novel drug targets leading to drug discovery. We attempt to calculate the pangenome of M. tuberculosis that comprises a total of 150 complete genomes and performed the phylo-genomic classification and analysis. Further, the conserved core genome (1251 proteins) is subjected to various sequential filters (non-human homology, essentiality, virulence, physicochemical parameters, and pathway analysis) resulted in identification of eight putative broad-spectrum drug targets. Upon molecular docking analyses of these targets with ligands available at the DrugBank database shortlisted a total of five promising ligands with projected inhibitory potential; namely, 2'deoxy-thymidine-5'-diphospho-alpha-d-glucose, uridine diphosphate glucose, 2'-deoxy-thymidine-beta-l-rhamnose, thymidine-5'-triphosphate, and citicoline. We are confident that with further lead optimization and experimental validation, these lead compounds may provide a sound basis to develop safe and effective drugs against tuberculosis disease in humans.}, } @article {pmid33204772, year = {2020}, author = {Korzhenkov, AA and Toshchakov, SV and Podosokorskaya, OA and Patrushev, MV and Kublanov, IV}, title = {Data on draft genome sequence of Caldanaerobacter sp. strain 1523vc, a thermophilic bacterium, isolated from a hot spring of Uzon Caldera, (Kamchatka, Russia).}, journal = {Data in brief}, volume = {33}, number = {}, pages = {106336}, pmid = {33204772}, issn = {2352-3409}, abstract = {The draft genome sequence of Caldanaerobacter sp. strain 1523vc, a thermophilic bacterium, isolated from a hot spring of Uzon Caldera, (Kamchatka, Russia) is presented. The complete genome assembly was of 2 713 207 bp with predicted completeness of 99.38%. Genome structural annotation revealed 2674 protein-coding genes, 127 pseudogenes and 77 RNA genes. Pangenome analysis of 7 currently available high quality Caldanaerobacter spp. genomes including 1523vc revealed 4673 gene clusters. Of them, 1130 clusters formed a core genome of genus Caldanaerobacter. Of the rest 3543 Caldanaerobacter pangenome genes, 385 were exclusively represented in 1523vc genome. 101 of 2801 Caldanaerobacter CDS were found to be encoding carbohydrate-active enzymes (CAZymes). The majority of CAZymes were predicted to be involved in degradation of beta-linked polysaccharides as chitin, cellulose and hemicelluloses, reflecting the metabolism of strain 1523vc, isolated on cellulose. 5 of 101 CAZyme genes were found to be unique for the strain 1523vc and belonged to GH23, GT56, GH15 and two CE9 family proteins. The draft genome of strain 1523vc was deposited at DBJ/EMBL/GenBank under the accessions JABEQB000000000, PRJNA629090 and SAMN14766777 for Genome, Bioproject and Biosample, respectively.}, } @article {pmid33202901, year = {2020}, author = {Kim, J and Sung, J and Han, K and Lee, W and Mun, S and Lee, J and Bahk, K and Yang, I and Bae, YK and Kim, C and Kim, JI and Seo, JS}, title = {A High Quality Asian Genome Assembly Identifies Features of Common Missing Regions.}, journal = {Genes}, volume = {11}, number = {11}, pages = {}, pmid = {33202901}, issn = {2073-4425}, mesh = {Asian People/*genetics ; *Genome, Human ; High-Throughput Nucleotide Sequencing ; Humans ; Polymerase Chain Reaction ; Repetitive Sequences, Nucleic Acid ; }, abstract = {The current human reference genome (GRCh38), with its superior quality, has contributed significantly to genome analysis. However, GRCh38 may still underrepresent the ethnic genome, specifically for Asians, though exactly what we are missing is still elusive. Here, we juxtaposed GRCh38 with a high-contiguity genome assembly of one Korean (AK1) to show that a part of AK1 genome is missing in GRCh38 and that the missing regions harbored ~1390 putative coding elements. Furthermore, we found that multiple populations shared some certain parts in the missing genome when we analyzed the "unmapped" (to GRCh38) reads of fourteen individuals (five East-Asians, four Europeans, and five Africans), amounting to ~5.3 Mb (~0.2% of AK1) of the total genomic regions. The recovered AK1 regions from the "unmapped reads", which were the estimated missing regions that did not exist in GRCh38, harbored candidate coding elements. We verified that most of the common (shared by ≥7 individuals) missing regions exist in human and chimpanzee DNA. Moreover, we further identified the occurrence mechanism and ethnic heterogeneity as well as the presence of the common missing regions. This study illuminates a potential advantage of using a pangenome reference and brings up the need for further investigations on the various features of regions globally missed in GRCh38.}, } @article {pmid33193209, year = {2020}, author = {Kalam, S and Basu, A and Ahmad, I and Sayyed, RZ and El-Enshasy, HA and Dailin, DJ and Suriani, NL}, title = {Recent Understanding of Soil Acidobacteria and Their Ecological Significance: A Critical Review.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {580024}, pmid = {33193209}, issn = {1664-302X}, abstract = {Acidobacteria represents an underrepresented soil bacterial phylum whose members are pervasive and copiously distributed across nearly all ecosystems. Acidobacterial sequences are abundant in soils and represent a significant fraction of soil microbial community. Being recalcitrant and difficult-to-cultivate under laboratory conditions, holistic, polyphasic approaches are required to study these refractive bacteria extensively. Acidobacteria possesses an inventory of genes involved in diverse metabolic pathways, as evidenced by their pan-genomic profiles. Because of their preponderance and ubiquity in the soil, speculations have been made regarding their dynamic roles in vital ecological processes viz., regulation of biogeochemical cycles, decomposition of biopolymers, exopolysaccharide secretion, and plant growth promotion. These bacteria are expected to have genes that might help in survival and competitive colonization in the rhizosphere, leading to the establishment of beneficial relationships with plants. Exploration of these genetic attributes and more in-depth insights into the belowground mechanics and dynamics would lead to a better understanding of the functions and ecological significance of this enigmatic phylum in the soil-plant environment. This review is an effort to provide a recent update into the diversity of genes in Acidobacteria useful for characterization, understanding ecological roles, and future biotechnological perspectives.}, } @article {pmid33193203, year = {2020}, author = {Li, X and Lin, J and Hu, Y and Zhou, J}, title = {PARMAP: A Pan-Genome-Based Computational Framework for Predicting Antimicrobial Resistance.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {578795}, pmid = {33193203}, issn = {1664-302X}, abstract = {Antimicrobial resistance (AMR) has emerged as one of the most urgent global threats to public health. Accurate detection of AMR phenotypes is critical for reducing the spread of AMR strains. Here, we developed PARMAP (Prediction of Antimicrobial Resistance by MAPping genetic alterations in pan-genome) to predict AMR phenotypes and to identify AMR-associated genetic alterations based on the pan-genome of bacteria by utilizing machine learning algorithms. When we applied PARMAP to 1,597 Neisseria gonorrhoeae strains, it successfully predicted their AMR phenotypes based on a pan-genome analysis. Furthermore, it identified 328 genetic alterations in 23 known AMR genes and discovered many new AMR-associated genetic alterations in ciprofloxacin-resistant N. gonorrhoeae, and it clearly indicated the genetic heterogeneity of AMR genes in different subtypes of resistant N. gonorrhoeae. Additionally, PARMAP performed well in predicting the AMR phenotypes of Mycobacterium tuberculosis and Escherichia coli, indicating the robustness of the PARMAP framework. In conclusion, PARMAP not only precisely predicts the AMR of a population of strains of a given species but also uses whole-genome sequencing data to prioritize candidate AMR-associated genetic alterations based on their likelihood of contributing to AMR. Thus, we believe that PARMAP will accelerate investigations into AMR mechanisms in other human pathogens.}, } @article {pmid33193173, year = {2020}, author = {Yuan, C and Wei, Y and Zhang, S and Cheng, J and Cheng, X and Qian, C and Wang, Y and Zhang, Y and Yin, Z and Chen, H}, title = {Comparative Genomic Analysis Reveals Genetic Mechanisms of the Variety of Pathogenicity, Antibiotic Resistance, and Environmental Adaptation of Providencia Genus.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {572642}, pmid = {33193173}, issn = {1664-302X}, abstract = {The bacterial genus Providencia is Gram-negative opportunistic pathogens, which have been isolated from a variety of environments and organisms, ranging from humans to animals. Providencia alcalifaciens, Providencia rettgeri, and Providencia stuartii are the most common clinical isolates, however, these three species differ in their pathogenicity, antibiotic resistance and environmental adaptation. Genomes of 91 isolates of the genus Providencia were investigated to clarify their genetic diversity, focusing on virulence factors, antibiotic resistance genes, and environmental adaptation genes. Our study revealed an open pan-genome for the genus Providencia containing 14,720 gene families. Species of the genus Providencia exhibited different functional constraints, with the core genes, accessory genes, and unique genes. A maximum-likelihood phylogeny reconstructed with concatenated single-copy core genes classified all Providencia isolates into 11 distant groups. Comprehensive and systematic comparative genomic analyses revealed that specific distributions of virulence genes, which were highly homologous to virulence genes of the genus Proteus, contributed to diversity in pathogenicity of Providencia alcalifaciens, Providencia rettgeri, and Providencia stuartii. Furthermore, multidrug resistance (MDR) phenotypes of isolates of Providencia rettgeri and Providencia stuartii were predominantly due to resistance genes from class 1 and 2 integrons. In addition, Providencia rettgeri and Providencia stuartii harbored more genes related to material transport and energy metabolism, which conferred a stronger ability to adapt to diverse environments. Overall, our study provided valuable insights into the genetic diversity and functional features of the genus Providencia, and revealed genetic mechanisms underlying diversity in pathogenicity, antibiotic resistance and environmental adaptation of members of this genus.}, } @article {pmid33184704, year = {2021}, author = {Gao, L and Koo, DH and Juliana, P and Rife, T and Singh, D and Lemes da Silva, C and Lux, T and Dorn, KM and Clinesmith, M and Silva, P and Wang, X and Spannagl, M and Monat, C and Friebe, B and Steuernagel, B and Muehlbauer, GJ and Walkowiak, S and Pozniak, C and Singh, R and Stein, N and Mascher, M and Fritz, A and Poland, J}, title = {The Aegilops ventricosa 2N[v]S segment in bread wheat: cytology, genomics and breeding.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {134}, number = {2}, pages = {529-542}, pmid = {33184704}, issn = {1432-2242}, support = {1339389//National Science Foundation/ ; AID-OAA-A-13-00051//United States Agency for International Development/ ; }, mesh = {Aegilops/genetics/*growth & development/microbiology ; Basidiomycota/*physiology ; Bread ; Chromosome Mapping ; Chromosomes, Plant/genetics ; *Gene Expression Regulation, Plant ; Genetic Markers ; *Plant Breeding ; Plant Diseases/*genetics/microbiology ; Plant Proteins/genetics/*metabolism ; Triticum/genetics/*growth & development/microbiology ; }, abstract = {The first cytological characterization of the 2N[v]S segment in hexaploid wheat; complete de novo assembly and annotation of 2N[v]S segment; 2N[v]S frequency is increasing 2N[v]S and is associated with higher yield. The Aegilops ventricosa 2N[v]S translocation segment has been utilized in breeding disease-resistant wheat crops since the early 1990s. This segment is known to possess several important resistance genes against multiple wheat diseases including root knot nematode, stripe rust, leaf rust and stem rust. More recently, this segment has been associated with resistance to wheat blast, an emerging and devastating wheat disease in South America and Asia. To date, full characterization of the segment including its size, gene content and its association with grain yield is lacking. Here, we present a complete cytological and physical characterization of this agronomically important translocation in bread wheat. We de novo assembled the 2N[v]S segment in two wheat varieties, 'Jagger' and 'CDC Stanley,' and delineated the segment to be approximately 33 Mb. A total of 535 high-confidence genes were annotated within the 2N[v]S region, with > 10% belonging to the nucleotide-binding leucine-rich repeat (NLR) gene families. Identification of groups of NLR genes that are potentially N genome-specific and expressed in specific tissues can fast-track testing of candidate genes playing roles in various disease resistances. We also show the increasing frequency of 2N[v]S among spring and winter wheat breeding programs over two and a half decades, and the positive impact of 2N[v]S on wheat grain yield based on historical datasets. The significance of the 2N[v]S segment in wheat breeding due to resistance to multiple diseases and a positive impact on yield highlights the importance of understanding and characterizing the wheat pan-genome for better insights into molecular breeding for wheat improvement.}, } @article {pmid33183231, year = {2020}, author = {Piza-Buitrago, A and Rincón, V and Donato, J and Saavedra, SY and Duarte, C and Morero, J and Falquet, L and Reguero, MT and Barreto-Hernández, E}, title = {Genome-based characterization of two Colombian clinical Providencia rettgeri isolates co-harboring NDM-1, VIM-2, and other β-lactamases.}, journal = {BMC microbiology}, volume = {20}, number = {1}, pages = {345}, pmid = {33183231}, issn = {1471-2180}, support = {FP44842-155-2015//COLCIENCIAS/ ; 35030//DIEB, Universidad Nacional de Colombia CO)/ ; }, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/*genetics ; Colombia ; Drug Resistance, Multiple, Bacterial/genetics ; Enterobacteriaceae Infections/*microbiology ; Genome, Bacterial/genetics ; Humans ; Male ; Microbial Sensitivity Tests ; Providencia/drug effects/*genetics/isolation & purification ; beta-Lactam Resistance/genetics ; beta-Lactamases/*genetics ; }, abstract = {BACKGROUND: Providencia rettgeri is a nosocomial pathogen associated with urinary tract infections and related to Healthcare-Associated Infection (HAI). In recent years isolates producing New Delhi Metallo-β-lactamase (NDM) and other β-lactamases have been reported that reduce the efficiency of clinical antimicrobial treatments. In this study, we analyzed antibiotic resistance, the presence of resistance genes and the clonal relationship of two P. rettgeri isolates obtained from male patients admitted to the same hospital in Bogotá - Colombia, 2015.

RESULTS: Antibiotic susceptibility profile evaluated by the Kirby-Bauer method revealed that both isolates were resistant to third-generation carbapenems and cephalosporins. Whole-genome sequencing (Illumina HiSeq) followed by SPAdes assembling, Prokka annotation in combination with an in-house Python program and resistance gene detection by ResFinder identified the same six β-lactamase genes in both isolates: blaNDM-1, blaVIM-2, blaCTX-M-15, blaOXA-10, blaCMY-2 and blaTEM-1. Additionally, various resistance genes associated with antibiotic target alteration (arnA, PmrE, PmrF, LpxA, LpxC, gyrB, folP, murA, rpoB, rpsL, tet34) were found and four efflux pumps (RosAB, EmrD, mdtH and cmlA). The additional resistance to gentamicin in one of the two isolates could be explained by a detected SNP in CpxA (Cys191Arg) which is involved in the stress response of the bacterial envelope. Genome BLAST comparison using CGView, the ANI value (99.99%) and the pangenome (using Roary) phylogenetic tree (same clade, small distance) showed high similarity between the isolates. The rMLST analysis indicated that both isolates were typed as rST-61,696, same as the RB151 isolate previously isolated in Bucaramanga, Colombia, 2013, and the FDAARGOS_330 isolate isolated in the USA, 2015.

CONCLUSIONS: We report the coexistence of the carbapenemase genes blaNDM-1, and blaVIM-2, together with the β-lactamase genes blaCTX-M-15, blaOXA-10, blaCMY-2 and blaTEM-1, in P. rettgeri isolates from two patients in Colombia. Whole-genome sequence analysis indicated a circulation of P. rettgeri rST-61,696 strains in America that needs to be investigated further.}, } @article {pmid33174833, year = {2020}, author = {Pandey, A and Humbert, MV and Jackson, A and Passey, JL and Hampson, DJ and Cleary, DW and La Ragione, RM and Christodoulides, M}, title = {Evidence of homologous recombination as a driver of diversity in Brachyspira pilosicoli.}, journal = {Microbial genomics}, volume = {6}, number = {12}, pages = {}, pmid = {33174833}, issn = {2057-5858}, mesh = {Animals ; Australia ; Brachyspira/*classification/genetics ; Chickens/*microbiology ; Computational Biology/*methods ; Computer Simulation ; Evolution, Molecular ; Genomics ; *Homologous Recombination ; Phylogeny ; Phylogeography ; Sequence Analysis, DNA ; United Kingdom ; }, abstract = {The enteric, pathogenic spirochaete Brachyspira pilosicoli colonizes and infects a variety of birds and mammals, including humans. However, there is a paucity of genomic data available for this organism. This study introduces 12 newly sequenced draft genome assemblies, boosting the cohort of examined isolates by fourfold and cataloguing the intraspecific genomic diversity of the organism more comprehensively. We used several in silico techniques to define a core genome of 1751 genes and qualitatively and quantitatively examined the intraspecific species boundary using phylogenetic analysis and average nucleotide identity, before contextualizing this diversity against other members of the genus Brachyspira. Our study revealed that an additional isolate that was unable to be species typed against any other Brachyspira lacked putative virulence factors present in all other isolates. Finally, we quantified that homologous recombination has as great an effect on the evolution of the core genome of the B. pilosicoli as random mutation (r/m=1.02). Comparative genomics has informed Brachyspira diversity, population structure, host specificity and virulence. The data presented here can be used to contribute to developing advanced screening methods, diagnostic assays and prophylactic vaccines against this zoonotic pathogen.}, } @article {pmid33173909, year = {2020}, author = {Lau, BT and Pavlichin, D and Hooker, AC and Almeda, A and Shin, G and Chen, J and Sahoo, MK and Huang, C and Pinsky, BA and Lee, H and Ji, HP}, title = {Profiling SARS-CoV-2 mutation fingerprints that range from the viral pangenome to individual infection quasispecies.}, journal = {medRxiv : the preprint server for health sciences}, volume = {}, number = {}, pages = {}, pmid = {33173909}, support = {P01 HG000205/HG/NHGRI NIH HHS/United States ; R01 HG006137/HG/NHGRI NIH HHS/United States ; R35 HG011292/HG/NHGRI NIH HHS/United States ; U01 HG010963/HG/NHGRI NIH HHS/United States ; }, abstract = {BACKGROUND: The genome of SARS-CoV-2 is susceptible to mutations during viral replication due to the errors generated by RNA-dependent RNA polymerases. These mutations enable the SARS-CoV-2 to evolve into new strains. Viral quasispecies emerge from de novo mutations that occur in individual patients. In combination, these sets of viral mutations provide distinct genetic fingerprints that reveal the patterns of transmission and have utility in contract tracing.

METHODS: Leveraging thousands of sequenced SARS-CoV-2 genomes, we performed a viral pangenome analysis to identify conserved genomic sequences. We used a rapid and highly efficient computational approach that relies on k-mers, short tracts of sequence, instead of conventional sequence alignment. Using this method, we annotated viral mutation signatures that were associated with specific strains. Based on these highly conserved viral sequences, we developed a rapid and highly scalable targeted sequencing assay to identify mutations, detect quasispecies and identify mutation signatures from patients. These results were compared to the pangenome genetic fingerprints.

RESULTS: We built a k-mer index for thousands of SARS-CoV-2 genomes and identified conserved genomics regions and landscape of mutations across thousands of virus genomes. We delineated mutation profiles spanning common genetic fingerprints (the combination of mutations in a viral assembly) and rare ones that occur in only small fraction of patients. We developed a targeted sequencing assay by selecting primers from the conserved viral genome regions to flank frequent mutations. Using a cohort of SARS-CoV-2 clinical samples, we identified genetic fingerprints consisting of strain-specific mutations seen across populations and de novo quasispecies mutations localized to individual infections. We compared the mutation profiles of viral samples undergoing analysis with the features of the pangenome.

CONCLUSIONS: We conducted an analysis for viral mutation profiles that provide the basis of genetic fingerprints. Our study linked pangenome analysis with targeted deep sequenced SARS-CoV-2 clinical samples. We identified quasispecies mutations occurring within individual patients, mutations demarcating dominant species and the prevalence of mutation signatures, of which a significant number were relatively unique. Analysis of these genetic fingerprints may provide a way of conducting molecular contact tracing.}, } @article {pmid33171694, year = {2020}, author = {Drijver, EPMD and Stohr, JJJM and Verweij, JJ and Verhulst, C and Velkers, FC and Stegeman, A and Bergh, MFQKD and Kluytmans, JAJW and Group, IS}, title = {Limited Genetic Diversity of blaCMY-2-Containing IncI1-pST12 Plasmids from Enterobacteriaceae of Human and Broiler Chicken Origin in The Netherlands.}, journal = {Microorganisms}, volume = {8}, number = {11}, pages = {}, pmid = {33171694}, issn = {2076-2607}, support = {Interreg V Flanders-The Netherlands program//European Regional Development Fund/ ; }, abstract = {Distinguishing epidemiologically related and unrelated plasmids is essential to confirm plasmid transmission. We compared IncI1-pST12 plasmids from both human and livestock origin and explored the degree of sequence similarity between plasmids from Enterobacteriaceae with different epidemiological links. Short-read sequence data of Enterobacteriaceae cultured from humans and broilers were screened for the presence of both a blaCMY-2 gene and an IncI1-pST12 replicon. Isolates were long-read sequenced on a MinION sequencer (OxfordNanopore Technologies). After plasmid reconstruction using hybrid assembly, pairwise single nucleotide polymorphisms (SNPs) were determined. The plasmids were annotated, and a pan-genome was constructed to compare genes variably present between the different plasmids. Nine Escherichia coli sequences of broiler origin, four Escherichia coli sequences, and one Salmonella enterica sequence of human origin were selected for the current analysis. A circular contig with the IncI1-pST12 replicon and blaCMY-2 gene was extracted from the assembly graph of all fourteen isolates. Analysis of the IncI1-pST12 plasmids revealed a low number of SNP differences (range of 0-9 SNPs). The range of SNP differences overlapped in isolates with different epidemiological links. One-hundred and twelve from a total of 113 genes of the pan-genome were present in all plasmid constructs. Next generation sequencing analysis of blaCMY-2-containing IncI1-pST12 plasmids isolated from Enterobacteriaceae with different epidemiological links show a high degree of sequence similarity in terms of SNP differences and the number of shared genes. Therefore, statements on the horizontal transfer of these plasmids based on genetic identity should be made with caution.}, } @article {pmid33168033, year = {2020}, author = {Gerdol, M and Moreira, R and Cruz, F and Gómez-Garrido, J and Vlasova, A and Rosani, U and Venier, P and Naranjo-Ortiz, MA and Murgarella, M and Greco, S and Balseiro, P and Corvelo, A and Frias, L and Gut, M and Gabaldón, T and Pallavicini, A and Canchaya, C and Novoa, B and Alioto, TS and Posada, D and Figueras, A}, title = {Massive gene presence-absence variation shapes an open pan-genome in the Mediterranean mussel.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {275}, pmid = {33168033}, issn = {1474-760X}, mesh = {Animals ; Base Sequence ; Biological Evolution ; Female ; *Genome ; Genomics ; Humans ; Immunity, Innate ; Male ; Mytilus/anatomy & histology/*genetics ; Peptide Elongation Factor 1 ; Pore Forming Cytotoxic Proteins ; }, abstract = {BACKGROUND: The Mediterranean mussel Mytilus galloprovincialis is an ecologically and economically relevant edible marine bivalve, highly invasive and resilient to biotic and abiotic stressors causing recurrent massive mortalities in other bivalves. Although these traits have been recently linked with the maintenance of a high genetic variation within natural populations, the factors underlying the evolutionary success of this species remain unclear.

RESULTS: Here, after the assembly of a 1.28-Gb reference genome and the resequencing of 14 individuals from two independent populations, we reveal a complex pan-genomic architecture in M. galloprovincialis, with a core set of 45,000 genes plus a strikingly high number of dispensable genes (20,000) subject to presence-absence variation, which may be entirely missing in several individuals. We show that dispensable genes are associated with hemizygous genomic regions affected by structural variants, which overall account for nearly 580 Mb of DNA sequence not included in the reference genome assembly. As such, this is the first study to report the widespread occurrence of gene presence-absence variation at a whole-genome scale in the animal kingdom.

CONCLUSIONS: Dispensable genes usually belong to young and recently expanded gene families enriched in survival functions, which might be the key to explain the resilience and invasiveness of this species. This unique pan-genome architecture is characterized by dispensable genes in accessory genomic regions that exceed by orders of magnitude those observed in other metazoans, including humans, and closely mirror the open pan-genomes found in prokaryotes and in a few non-metazoan eukaryotes.}, } @article {pmid33163077, year = {2020}, author = {Vasilyev, IY and Nikolaeva, IV and Siniagina, MN and Kharchenko, AM and Shaikhieva, GS}, title = {Multidrug-Resistant Hypervirulent Klebsiella pneumoniae Found Persisting Silently in Infant Gut Microbiota.}, journal = {International journal of microbiology}, volume = {2020}, number = {}, pages = {4054393}, pmid = {33163077}, issn = {1687-918X}, abstract = {Since the spread of multidrug-resistant Klebsiella pneumoniae (MDRKP) strains is considered as a challenge for patients with weakened or suppressed immunity, the emergence of isolates carrying determinants of hypervirulent phenotypes in addition may become a serious problem even for healthy individuals. The aim of this study is an investigation of the nonoutbreak K. pneumoniae emergence occurred in early 2017 at a maternity hospital of Kazan, Russia. Ten bacterial isolates demonstrating multiple drug resistance phenotypes were collected from eight healthy full-term breastfed neonates, observed at the maternity hospital of Kazan, Russia. All the infants and their mothers were dismissed without symptoms or complaints, in a satisfactory condition. Whole-genome shotgun (WGS) sequencing was performed with the purpose to track down a possible spread source(s) and obtain detailed information about resistance determinants and pathogenic potential of the collected isolates. Microdilution tests have confirmed production of extended-spectrum β-lactamases (ESBL) and their resistance to aminoglycoside, β-lactam, fluoroquinolone, sulfonamide, nitrofurantoin, trimethoprim, and fosfomycin antibiotics and Klebsiella phage. The WGS analysis has revealed the genes that are resistant to aminoglycosides, fluoroquinolones, macrolides, sulfonamides, chloramphenicols, tetracyclines, and trimethoprim and ESBL determinants. The pangenome analysis had split the isolates into two phylogenetic clades. The first group, a more heterogeneous clade, was represented by 5 isolates with 4 different in silico multilocus sequence types (MLSTs). The second group contained 5 isolates from infants born vaginally with the single MLST ST23, positive for genes corresponding to hypervirulent phenotypes: yersiniabactin, aerobactin, salmochelin, colibactin, hypermucoid determinants, and specific alleles of K- and O-antigens. The source of the MDRKP spread was not defined. Infected infants have shown no developed disease symptoms.}, } @article {pmid33152994, year = {2020}, author = {Lugli, GA and Tarracchini, C and Alessandri, G and Milani, C and Mancabelli, L and Turroni, F and Neuzil-Bunesova, V and Ruiz, L and Margolles, A and Ventura, M}, title = {Decoding the Genomic Variability among Members of the Bifidobacterium dentium Species.}, journal = {Microorganisms}, volume = {8}, number = {11}, pages = {}, pmid = {33152994}, issn = {2076-2607}, abstract = {Members of the Bifidobacterium dentium species are usually identified in the oral cavity of humans and associated with the development of plaque and dental caries. Nevertheless, they have also been detected from fecal samples, highlighting a widespread distribution among mammals. To explore the genetic variability of this species, we isolated and sequenced the genomes of 18 different B. dentium strains collected from fecal samples of several primate species and an Ursus arctos. Thus, we investigated the genomic variability and metabolic abilities of the new B. dentium isolates together with 20 public genome sequences. Comparative genomic analyses provided insights into the vast metabolic repertoire of the species, highlighting 19 glycosyl hydrolases families shared between each analyzed strain. Phylogenetic analysis of the B. dentium taxon, involving 1140 conserved genes, revealed a very close phylogenetic relatedness among members of this species. Furthermore, low genomic variability between strains was also confirmed by an average nucleotide identity analysis showing values higher than 98.2%. Investigating the genetic features of each strain, few putative functional mobile elements were identified. Besides, a consistent occurrence of defense mechanisms such as CRISPR-Cas and restriction-modification systems may be responsible for the high genome synteny identified among members of this taxon.}, } @article {pmid33151900, year = {2021}, author = {Faillot, S and Foulonneau, T and Néou, M and Espiard, S and Garinet, S and Vaczlavik, A and Jouinot, A and Rondof, W and Septier, A and Drougat, L and Hécale-Perlemoine, K and Ragazzon, B and Rizk-Rabin, M and Sibony, M and Bonnet-Serrano, F and Guibourdenche, J and Libé, R and Groussin, L and Dousset, B and de Reyniès, A and Bertherat, J and Assié, G}, title = {Genomic classification of benign adrenocortical lesions.}, journal = {Endocrine-related cancer}, volume = {28}, number = {1}, pages = {79-95}, doi = {10.1530/ERC-20-0128}, pmid = {33151900}, issn = {1479-6821}, mesh = {Adrenocortical Adenoma/*genetics ; Genomics/*methods ; Humans ; }, abstract = {Benign adrenal tumors cover a spectrum of lesions with distinct morphology and steroid secretion. Current classification is empirical. Beyond a few driver mutations, pathophysiology is not well understood. Here, a pangenomic characterization of benign adrenocortical tumors is proposed, aiming at unbiased classification and new pathophysiological insights. Benign adrenocortical tumors (n = 146) were analyzed by transcriptome, methylome, miRNome, chromosomal alterations and mutational status, using expression arrays, methylation arrays, miRNA sequencing, SNP arrays, and exome or targeted next-generation sequencing respectively. Pathological and hormonal data were collected for all tumors. Pangenomic analysis identifies four distinct molecular categories: (1) tumors responsible for overt Cushing, gathering distinct tumor types, sharing a common cAMP/PKA pathway activation by distinct mechanisms; (2) adenomas with mild autonomous cortisol excess and non-functioning adenomas, associated with beta-catenin mutations; (3) primary macronodular hyperplasia with ARMC5 mutations, showing an ovarian expression signature; (4) aldosterone-producing adrenocortical adenomas, apart from other benign tumors. Epigenetic alterations and steroidogenesis seem associated, including CpG island hypomethylation in tumors with no or mild cortisol secretion, miRNA patterns defining specific molecular groups, and direct regulation of steroidogenic enzyme expression by methylation. Chromosomal alterations and somatic mutations are subclonal, found in less than 2/3 of cells. New pathophysiological insights, including distinct molecular signatures supporting the difference between mild autonomous cortisol excess and overt Cushing, ARMC5 implication into the adreno-gonadal differentiation faith, and the subclonal nature of driver alterations in benign tumors, will orient future research. This first genomic classification provides a large amount of data as a starting point.}, } @article {pmid33150080, year = {2020}, author = {Dahlhausen, KE and Jospin, G and Coil, DA and Eisen, JA and Wilkins, LGE}, title = {Isolation and sequence-based characterization of a koala symbiont: Lonepinella koalarum.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e10177}, pmid = {33150080}, issn = {2167-8359}, abstract = {Koalas (Phascolarctos cinereus) are highly specialized herbivorous marsupials that feed almost exclusively on Eucalyptus leaves, which are known to contain varying concentrations of many different toxic chemical compounds. The literature suggests that Lonepinella koalarum, a bacterium in the Pasteurellaceae family, can break down some of these toxic chemical compounds. Furthermore, in a previous study, we identified L. koalarum as the most predictive taxon of koala survival during antibiotic treatment. Therefore, we believe that this bacterium may be important for koala health. Here, we isolated a strain of L. koalarum from a healthy koala female and sequenced its genome using a combination of short-read and long-read sequencing. We placed the genome assembly into a phylogenetic tree based on 120 genome markers using the Genome Taxonomy Database (GTDB), which currently does not include any L. koalarum assemblies. Our genome assembly fell in the middle of a group of Haemophilus, Pasteurella and Basfia species. According to average nucleotide identity and a 16S rRNA gene tree, the closest relative of our isolate is L. koalarum strain Y17189. Then, we annotated the gene sequences and compared them to 55 closely related, publicly available genomes. Several genes that are known to be involved in carbohydrate metabolism could exclusively be found in L. koalarum relative to the other taxa in the pangenome, including glycoside hydrolase families GH2, GH31, GH32, GH43 and GH77. Among the predicted genes of L. koalarum were 79 candidates putatively involved in the degradation of plant secondary metabolites. Additionally, several genes coding for amino acid variants were found that had been shown to confer antibiotic resistance in other bacterial species against pulvomycin, beta-lactam antibiotics and the antibiotic efflux pump KpnH. In summary, this genetic characterization allows us to build hypotheses to explore the potentially beneficial role that L. koalarum might play in the koala intestinal microbiome. Characterizing and understanding beneficial symbionts at the whole genome level is important for the development of anti- and probiotic treatments for koalas, a highly threatened species due to habitat loss, wildfires, and high prevalence of Chlamydia infections.}, } @article {pmid33144553, year = {2021}, author = {Kim, E and Cho, EJ and Yang, SM and Kim, HY}, title = {Identification and Monitoring of Lactobacillus delbrueckii Subspecies Using Pangenomic-Based Novel Genetic Markers.}, journal = {Journal of microbiology and biotechnology}, volume = {31}, number = {2}, pages = {280-289}, pmid = {33144553}, issn = {1738-8872}, mesh = {Bacterial Typing Techniques ; Genetic Markers ; Genome, Bacterial ; Lactobacillus delbrueckii/*classification/*genetics/isolation & purification/metabolism ; Polymerase Chain Reaction ; Species Specificity ; }, abstract = {Genetic markers currently used for the discrimination of Lactobacillus delbrueckii subspecies have low efficiency for identification at subspecies level. Therefore, our objective in this study was to select novel genetic markers for accurate identification and discrimination of six L. delbrueckii subspecies based on pangenome analysis. We evaluated L. delbrueckii genomes to avoid making incorrect conclusions in the process of selecting genetic markers due to mislabeled genomes. Genome analysis showed that two genomes of L. delbrueckii subspecies deposited at NCBI were misidentified. Based on these results, subspecies-specific genetic markers were selected by comparing the core and pangenomes. Genetic markers were confirmed to be specific for 59,196,562 genome sequences via in silico analysis. They were found in all strains of the same subspecies, but not in other subspecies or bacterial strains. These genetic markers also could be used to accurately identify genomes at the subspecies level for genomes known at the species level. A real-time PCR method for detecting three main subspecies (L. delbrueckii subsp. delbrueckii, lactis, and bulgaricus) was developed to cost-effectively identify them using genetic markers. Results showed 100% specificity for each subspecies. These genetic markers could differentiate each subspecies from 44 other lactic acid bacteria. This real-time PCR method was then applied to monitor 26 probiotics and dairy products. It was also used to identify 64 unknown strains isolated from raw milk samples and dairy products. Results confirmed that unknown isolates and subspecies contained in the product could be accurately identified using this real-time PCR method.}, } @article {pmid33139952, year = {2020}, author = {Sun, X and Jiao, C and Schwaninger, H and Chao, CT and Ma, Y and Duan, N and Khan, A and Ban, S and Xu, K and Cheng, L and Zhong, GY and Fei, Z}, title = {Phased diploid genome assemblies and pan-genomes provide insights into the genetic history of apple domestication.}, journal = {Nature genetics}, volume = {52}, number = {12}, pages = {1423-1432}, pmid = {33139952}, issn = {1546-1718}, mesh = {*Domestication ; Evolution, Molecular ; Fruit/genetics ; Genome, Plant/genetics ; Hybridization, Genetic/*genetics ; Malus/*classification/*genetics ; }, abstract = {Domestication of the apple was mainly driven by interspecific hybridization. In the present study, we report the haplotype-resolved genomes of the cultivated apple (Malus domestica cv. Gala) and its two major wild progenitors, M. sieversii and M. sylvestris. Substantial variations are identified between the two haplotypes of each genome. Inference of genome ancestry identifies ~23% of the Gala genome as of hybrid origin. Deep sequencing of 91 accessions identifies selective sweeps in cultivated apples that originated from either of the two progenitors and are associated with important domestication traits. Construction and analyses of apple pan-genomes uncover thousands of new genes, with hundreds of them being selected from one of the progenitors and largely fixed in cultivated apples, revealing that introgression of new genes/alleles is a hallmark of apple domestication through hybridization. Finally, transcriptome profiles of Gala fruits at 13 developmental stages unravel ~19% of genes displaying allele-specific expression, including many associated with fruit quality.}, } @article {pmid33139862, year = {2020}, author = {Bayer, PE and Golicz, AA and Scheben, A and Batley, J and Edwards, D}, title = {Author Correction: Plant pan-genomes are the new reference.}, journal = {Nature plants}, volume = {6}, number = {11}, pages = {1389}, doi = {10.1038/s41477-020-00776-y}, pmid = {33139862}, issn = {2055-0278}, abstract = {An amendment to this paper has been published and can be accessed via a link at the top of the paper.}, } @article {pmid33135163, year = {2021}, author = {Zilch, TJ and Lee, JJ and Bressan, GC and McDonough, SP and Mohammed, HO and Divers, TJ and Chang, YF}, title = {Evaluation of new leptospiral antigens for the diagnosis of equine leptospirosis: An approach using pan-genomic analysis, reverse vaccinology and antigenic selection.}, journal = {Equine veterinary journal}, volume = {53}, number = {5}, pages = {1025-1035}, doi = {10.1111/evj.13380}, pmid = {33135163}, issn = {2042-3306}, mesh = {Agglutination Tests/veterinary ; Animals ; Antibodies, Bacterial ; Antigens, Bacterial ; Cross-Sectional Studies ; Genomics ; *Horse Diseases/diagnosis ; Horses ; *Leptospira/genetics ; *Leptospirosis/diagnosis/veterinary ; Vaccinology ; }, abstract = {BACKGROUND: The current gold standard diagnostic test for leptospirosis is the microscopic agglutination test (MAT), which has many drawbacks; therefore, the development of a better and easier serological test for leptospirosis is needed.

OBJECTIVES: To apply reverse vaccinology (RV) and antigenic selection on the assortment of leptospiral targets and evaluate their potential for use as reagents for the diagnosis of equine leptospirosis.

STUDY DESIGN: Cross-sectional study.

METHODS: The antigenic selection parameters were: proteins with antigenicity score ≥0.5 (VaxiJen), at least one B cell epitope and size between 10 and 275 KDa. New leptospiral proteins were cloned, expressed and serologically screened against equine sera (n = 128) on a single analysis and comparative combinations. Sensitivity (Se) and specificity (Sp), accuracy, positive predictive value (PPV) and negative predictive value (NPV) were calculated. A BLAST with nucleotide and protein sequences was used to identify the serovar or species specificity.

MAIN LIMITATIONS: This cross-sectional analysis had three main limitations: (a) The equine sera used in these tests were limited to sera submitted to the Animal Health Diagnosis Center and were only tested against seven serovars; (b) MAT results were considered being 'perfect', and the highest titre presented was considered being the infecting serovar, which may not hold true; (c) The strains used to represent the serovars and the limited number of different serovars and species included in the genetic analysis, which leads to the possibility that these proteins might be present in different species or serovars that perhaps would be seroprevalent in another geographic region.

CONCLUSIONS: The new leptospiral antigens described in this research could increase the sensitivity and specificity of ELISA for detection of Leptospira exposure and the detection of leptospirosis in horses along with support from other clinical signs. Some of these new antigens might be used to improve the detection of infecting serovar.}, } @article {pmid33129664, year = {2021}, author = {Rogalski, E and Ehrmann, MA and Vogel, RF}, title = {Intraspecies diversity and genome-phenotype-associations in Fructilactobacillus sanfranciscensis.}, journal = {Microbiological research}, volume = {243}, number = {}, pages = {126625}, doi = {10.1016/j.micres.2020.126625}, pmid = {33129664}, issn = {1618-0623}, mesh = {Bacterial Proteins/genetics/metabolism ; Biodiversity ; Bread/analysis/microbiology ; Genome Size ; *Genome, Bacterial ; Lactobacillaceae/classification/*genetics/isolation & purification/metabolism ; Multienzyme Complexes ; NADH, NADPH Oxidoreductases ; Phylogeny ; Sucrose/metabolism ; Triticum/microbiology ; Xylose/metabolism ; }, abstract = {In this study the intraspecies diversity of Fructilactobacillus (F.) sanfranciscensis (formerly Lactobacillus sanfranciscensis) was characterized by comparative genomics supported by physiological data. Twenty-four strains of F. sanfranciscensis were analyzed and sorted into six different genomic clusters. The core genome comprised only 43,14 % of the pan genome, i.e. 0.87 Mbp of 2.04 Mbp. The main annotated genomic differences reside in maltose, fructose and sucrose as well as nucleotide metabolism, use of electron acceptors, and exopolysacchride formation. Furthermore, all strains are well equipped to cope with oxidative stress via NADH oxidase and a distinct thiol metabolism. Only ten of 24 genomes contain two maltose phosphorylase genes (mapA and mapB). In F. sanfranciscensis TMW 1.897 only mapA was found. All strains except those from genomic cluster 2 contained the mannitol dehydrogenase and should therefore be able to use fructose as external electron acceptor. Moreover, six strains were able to grow on fructose as sole carbon source, as they contained a functional fructokinase gene. No growth was observed on pentoses, i.e. xylose, arabinose or ribose, as sole carbon source. This can be referred to the absence of ribose pyranase rbsD in all genomes, and absence of or mutations in numerous other genes, which are essential for arabinose and xylose metabolism. Seven strains were able to produce exopolysaccharides (EPS) from sucrose. In addition, the strains containing levS were able to grow on sucrose as sole carbon source. Strains of one cluster exhibit auxotrophies for purine nucleotides. The physiological and genomic analyses suggest that the biodiversity of F. sanfranciscensis is larger than anticipated. Consequently, "original" habitats and lifestyles of F. sanfranciscensis may vary but can generally be referred to an adaptation to sugary (maltose/sucrose/fructose-rich) and aerobic environments as found in plants and insects. It can dominate sourdoughs as a result of reductive evolution and cooperation with fructose-delivering, acetate-tolerant yeasts.}, } @article {pmid33128615, year = {2020}, author = {Huang, WC and Hu, Y and Zhang, G and Li, M}, title = {Comparative genomic analysis reveals metabolic diversity of different Paenibacillus groups.}, journal = {Applied microbiology and biotechnology}, volume = {104}, number = {23}, pages = {10133-10143}, pmid = {33128615}, issn = {1432-0614}, support = {91851105//National Natural Science Foundation of China (CN)/ ; }, mesh = {DNA, Bacterial/genetics ; Genomics ; Humans ; *Paenibacillus/genetics ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {The genus Paenibacillus was originally recognized based on the 16S rRNA gene phylogeny. Recently, a standardized bacterial taxonomy approach based on a genome phylogeny has substantially revised the classification of Paenibacillus, dividing it into 23 genera. However, the metabolic differences among these groups remain undescribed. Here, genomes of 41 Paenibacillus strains comprising 25 species were sequenced, and a comparative genomic analysis was performed considering these and 187 publicly available Paenibacillus genomes to understand their phylogeny and metabolic differences. Phylogenetic analysis indicated that Paenibacillus clustered into 10 subgroups. Core genome and pan-genome analyses revealed similar functional categories among the different Paenibacillus subgroups; however, each group tended to harbor specific gene families. A large proportion of genes in the subgroups A, E, and G are related to carbohydrate metabolism. Among them, genes related to the glycoside hydrolase family were most abundant. Metabolic reconstruction of the newly sequenced genomes showed that the Embden-Meyerhof-Parnas pathway, pentose phosphate pathway, and citric acid cycle are central pathways of carbohydrate metabolism in Paenibacillus. Further, the genomes of the subgroups A and G lack genes involved in glyoxylate cycle and D-galacturonate degradation, respectively. The current study revealed the metabolic diversity of Paenibacillus subgroups assigned based on a genomic phylogeny and could inform the taxonomy of Paenibacillus. KEY POINTS: • Paenibacillus clustered into 10 subgroups. • Genomic content variation and metabolic diversity in the subgroup A, E, and G were described. • Carbohydrate transport and metabolism is important for Paenibacillus survival.}, } @article {pmid33121037, year = {2020}, author = {García-Crespo, C and Soria, ME and Gallego, I and Ávila, AI and Martínez-González, B and Vázquez-Sirvent, L and Gómez, J and Briones, C and Gregori, J and Quer, J and Perales, C and Domingo, E}, title = {Dissimilar Conservation Pattern in Hepatitis C Virus Mutant Spectra, Consensus Sequences, and Data Banks.}, journal = {Journal of clinical medicine}, volume = {9}, number = {11}, pages = {}, pmid = {33121037}, issn = {2077-0383}, support = {SAF2014-52400-R//Ministerio de Economía, Industria y Competitividad, Gobierno de España/ ; SAF2017-87846-R//Ministerio de Economía, Industria y Competitividad, Gobierno de España/ ; BFU2017-91384-EXP//Ministerio de Ciencia, Innovación y Universidades/ ; PI18/00210//Instituto de Salud Carlos III/ ; S2013/ABI-2906//Comunidad de Madrid/FEDER/ ; S2018/BAA-4370//Comunidad de Madrid/FEDER/ ; CP14/00121//Instituto de Salud Carlos III/ ; CPII19/00001//Instituto de Salud Carlos III/ ; PI19/00301//Instituto de Salud Carlos III/ ; IDI-20151125//Ministerio de Ciencia, Innovación y Universidades/ ; BIO2016-79618R//Ministerio de Economía y Competitividad/ ; PID2019-104903RB-I00//Ministerio de Economía y Competitividad/ ; MDM-2017-0737//Spanish State research agency/ ; PRE2018-083422//Ministerio de Ciencia, Innovación y Universidades/ ; PFIS FI19/00119//Instituto de Salud Carlos III/ ; }, abstract = {The influence of quasispecies dynamics on long-term virus diversification in nature is a largely unexplored question. Specifically, whether intra-host nucleotide and amino acid variation in quasispecies fit the variation observed in consensus sequences or data bank alignments is unknown. Genome conservation and dynamics simulations are used for the computational design of universal vaccines, therapeutic antibodies and pan-genomic antiviral agents. The expectation is that selection of escape mutants will be limited when mutations at conserved residues are required. This strategy assumes long-term (epidemiologically relevant) conservation but, critically, does not consider short-term (quasispecies-dictated) residue conservation. We calculated mutant frequencies of individual loci from mutant spectra of hepatitis C virus (HCV) populations passaged in cell culture and from infected patients. Nucleotide or amino acid conservation in consensus sequences of the same populations, or in the Los Alamos HCV data bank did not match residue conservation in mutant spectra. The results relativize the concept of sequence conservation in viral genetics and suggest that residue invariance in data banks is an insufficient basis for the design of universal viral ligands for clinical purposes. Our calculations suggest relaxed mutational restrictions during quasispecies dynamics, which may contribute to higher calculated short-term than long-term viral evolutionary rates.}, } @article {pmid33115833, year = {2020}, author = {Zukancic, A and Khan, MA and Gurmen, SJ and Gliniecki, QM and Moritz-Kinkade, DL and Maddox, CW and Alam, MT}, title = {Staphylococcal Protein A (spa) Locus Is a Hot Spot for Recombination and Horizontal Gene Transfer in Staphylococcus pseudintermedius.}, journal = {mSphere}, volume = {5}, number = {5}, pages = {}, pmid = {33115833}, issn = {2379-5042}, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Dogs ; *Gene Transfer, Horizontal ; Genome, Bacterial ; Genomics ; Microbial Sensitivity Tests ; *Recombination, Genetic ; Staphylococcal Infections/microbiology/*veterinary ; Staphylococcal Protein A/*genetics ; Staphylococcus/drug effects/*genetics ; Virulence Factors/*genetics ; Whole Genome Sequencing ; }, abstract = {Staphylococcus pseudintermedius is a major canine pathogen but also occasionally colonizes and infects humans. Multidrug-resistant methicillin-resistant S. pseudintermedius (MDR MRSP) strains have emerged globally, making treatment and control of this pathogen challenging. Sequence type 71 (ST71), ST68, and ST45 are the most widespread and successful MDR MRSP clones. The potential genetic factors underlying the clonal success of these and other predominant clones remain unknown. Characterization of the pangenome, lineage-associated accessory genes, and genes acquired through horizontal gene transfer from other bacteria is important for identifying such factors. Here, we analyzed genome sequence data from 622 S. pseudintermedius isolates to investigate the evolution of pathogenicity across lineages. We show that the predominant clones carry one or more lineage-associated virulence genes. The gene encoding staphylococcal protein A (SpA), a key virulence factor involved in immune evasion and a potential vaccine antigen, is deleted in 62% of isolates. Most importantly, we have discovered that the spa locus is a hot spot for recombination and horizontal gene transfer in S. pseudintermedius, where genes related to restriction modification, prophage immunity, mercury resistance, and nucleotide and carbohydrate metabolism have been acquired in different lineages. Our study also establishes that ST45 is composed of two distinct sublineages that differ in their accessory gene content and virulence potential. Collectively, this study reports several previously undetected lineage-associated genetic factors that may have a role in the clonal success of the major MDR MRSP clones. These data provide a framework for future experimental studies on S. pseudintermedius pathogenesis and for developing novel therapeutics against this pathogen.IMPORTANCEStaphylococcus pseudintermedius is a major canine pathogen but can also occasionally infect humans. Identification of genetic factors contributing to the virulence and clonal success of multidrug-resistant S. pseudintermedius clones is critical for the development of therapeutics against this pathogen. Here, we characterized the genome sequences of a global collection of 622 S. pseudintermedius isolates. We show that all major clones, besides carrying core virulence genes, which are present in all strains, carry one or more lineage-specific genes. Many of these genes have been acquired from other bacterial species through a horizontal gene transfer mechanism. Importantly, we have discovered that the staphylococcal protein A gene (spa), a widely used marker for molecular typing of S. pseudintermedius strains and a potential vaccine candidate antigen, is deleted in 62% of strains. Furthermore, the spa locus in S. pseudintermedius acts as a reservoir to accumulate lineage-associated genes with adaptive functions.}, } @article {pmid33106639, year = {2020}, author = {Ding, Y and Weckwerth, PR and Poretsky, E and Murphy, KM and Sims, J and Saldivar, E and Christensen, SA and Char, SN and Yang, B and Tong, AD and Shen, Z and Kremling, KA and Buckler, ES and Kono, T and Nelson, DR and Bohlmann, J and Bakker, MG and Vaughan, MM and Khalil, AS and Betsiashvili, M and Dressano, K and Köllner, TG and Briggs, SP and Zerbe, P and Schmelz, EA and Huffaker, A}, title = {Genetic elucidation of interconnected antibiotic pathways mediating maize innate immunity.}, journal = {Nature plants}, volume = {6}, number = {11}, pages = {1375-1388}, doi = {10.1038/s41477-020-00787-9}, pmid = {33106639}, issn = {2055-0278}, mesh = {Anti-Bacterial Agents/*biosynthesis ; Disease Resistance/*genetics/physiology ; Gene Expression Profiling ; Genes, Plant/genetics/physiology ; Immunity, Innate/*genetics ; Metabolic Networks and Pathways/*genetics ; Metabolomics ; Multigene Family/genetics/physiology ; Proteomics ; Zea mays/*genetics/immunology/metabolism/microbiology ; }, abstract = {Specialized metabolites constitute key layers of immunity that underlie disease resistance in crops; however, challenges in resolving pathways limit our understanding of the functions and applications of these metabolites. In maize (Zea mays), the inducible accumulation of acidic terpenoids is increasingly considered to be a defence mechanism that contributes to disease resistance. Here, to understand maize antibiotic biosynthesis, we integrated association mapping, pan-genome multi-omic correlations, enzyme structure-function studies and targeted mutagenesis. We define ten genes in three zealexin (Zx) gene clusters that encode four sesquiterpene synthases and six cytochrome P450 proteins that collectively drive the production of diverse antibiotic cocktails. Quadruple mutants in which the ability to produce zealexins (ZXs) is blocked exhibit a broad-spectrum loss of disease resistance. Genetic redundancies ensuring pathway resiliency to single null mutations are combined with enzyme substrate promiscuity, creating a biosynthetic hourglass pathway that uses diverse substrates and in vivo combinatorial chemistry to yield complex antibiotic blends. The elucidated genetic basis of biochemical phenotypes that underlie disease resistance demonstrates a predominant maize defence pathway and informs innovative strategies for transferring chemical immunity between crops.}, } @article {pmid33105850, year = {2020}, author = {Slizen, MV and Galzitskaya, OV}, title = {Comparative Analysis of Proteomes of a Number of Nosocomial Pathogens by KEGG Modules and KEGG Pathways.}, journal = {International journal of molecular sciences}, volume = {21}, number = {21}, pages = {}, pmid = {33105850}, issn = {1422-0067}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/genetics/*metabolism ; Cross Infection/microbiology ; Drug Resistance, Bacterial/drug effects/*physiology ; Enterobacter cloacae/genetics/isolation & purification ; Genome, Bacterial ; Humans ; Mycoplasma/genetics/isolation & purification ; Proteome/genetics/metabolism ; Pseudomonas aeruginosa/genetics/isolation & purification ; Staphylococcus aureus/genetics/isolation & purification ; }, abstract = {Nosocomial (hospital-acquired) infections remain a serious challenge for health systems. The reason for this lies not only in the local imperfection of medical practices and protocols. The frequency of infection with antibiotic-resistant strains of bacteria is growing every year, both in developed and developing countries. In this work, a pangenome and comparative analysis of 201 genomes of Staphylococcus aureus, Enterobacter spp., Pseudomonas aeruginosa, and Mycoplasma spp. was performed on the basis of high-level functional annotations-KEGG pathways and KEGG modules. The first three organisms are serious nosocomial pathogens, often exhibiting multidrug resistance. Analysis of KEGG modules revealed methicillin resistance in 25% of S. aureus strains and resistance to carbapenems in 21% of Enterobacter spp. strains. P. aeruginosa has a wide range of unique efflux systems. One hundred percent of the analyzed strains have at least two drug resistance systems, and 75% of the strains have seven. Each of the organisms has a characteristic set of metabolic features, whose impact on drug resistance can be considered in future studies. Comparing the genomes of nosocomial pathogens with each other and with Mycoplasma genomes can expand our understanding of the versatility of certain metabolic features and mechanisms of drug resistance.}, } @article {pmid33105087, year = {2021}, author = {Zou, W and Ye, G and Zhang, K and Yang, H and Yang, J}, title = {Analysis of the core genome and pangenome of Clostridium butyricum.}, journal = {Genome}, volume = {64}, number = {1}, pages = {51-61}, doi = {10.1139/gen-2020-0072}, pmid = {33105087}, issn = {1480-3321}, mesh = {Butyrates/metabolism ; Carbohydrate Metabolism ; Clostridium butyricum/classification/*genetics/*metabolism ; Fermentation ; Genes, Bacterial/genetics ; Genome Size ; Metabolic Networks and Pathways/genetics ; Operon ; Phylogeny ; Propylene Glycols ; Sequence Analysis, DNA ; }, abstract = {Clostridium butyricum is an anaerobic bacterium that inhabits broad niches. Clostridium butyricum is known for its production of butyrate, 1,3-propanediol, and hydrogen. This study aimed to present a comparative pangenome analysis of 24 strains isolated from different niches. We sequenced and annotated the genome of C. butyricum 3-3 isolated from the Chinese baijiu ecosystem. The pangenome of C. butyricum was open. The core genome, accessory genome, and strain-specific genes comprised 1011, 4543, and 1473 genes, respectively. In the core genome, Carbohydrate metabolism was the largest category, and genes in the biosynthetic pathway of butyrate and glycerol metabolism were conserved (in the core or soft-core genome). Furthermore, the 1,3-propanediol operon existed in 20 strains. In the accessory genome, numerous mobile genetic elements belonging to the Replication, recombination, and repair (L) category were identified. In addition, genome islands were identified in all 24 strains, ranging from 2 (strain KNU-L09) to 53 (strain SU1), and phage sequences were found in 17 of the 24 strains. This study provides an important genomic framework that could pave the way for the exploration of C. butyricum and future studies on the genetic diversification of C. butyricum.}, } @article {pmid33068485, year = {2021}, author = {Song, JM and Liu, DX and Xie, WZ and Yang, Z and Guo, L and Liu, K and Yang, QY and Chen, LL}, title = {BnPIR: Brassica napus pan-genome information resource for 1689 accessions.}, journal = {Plant biotechnology journal}, volume = {19}, number = {3}, pages = {412-414}, pmid = {33068485}, issn = {1467-7652}, mesh = {*Brassica napus/genetics ; *Brassica rapa/genetics ; Genome, Plant/genetics ; }, } @article {pmid33066802, year = {2020}, author = {Li, H and Feng, X and Chu, C}, title = {The design and construction of reference pangenome graphs with minigraph.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {265}, pmid = {33066802}, issn = {1474-760X}, support = {R01 HG010040/HG/NHGRI NIH HHS/United States ; U01 HG010961/HG/NHGRI NIH HHS/United States ; }, mesh = {Animals ; *Genome, Human ; *Genomic Structural Variation ; Genomics/*methods/standards ; Humans ; Reference Standards ; }, abstract = {The recent advances in sequencing technologies enable the assembly of individual genomes to the quality of the reference genome. How to integrate multiple genomes from the same species and make the integrated representation accessible to biologists remains an open challenge. Here, we propose a graph-based data model and associated formats to represent multiple genomes while preserving the coordinate of the linear reference genome. We implement our ideas in the minigraph toolkit and demonstrate that we can efficiently construct a pangenome graph and compactly encode tens of thousands of structural variants missing from the current reference genome.}, } @article {pmid33065016, year = {2020}, author = {De Filippis, F and Pasolli, E and Ercolini, D}, title = {Newly Explored Faecalibacterium Diversity Is Connected to Age, Lifestyle, Geography, and Disease.}, journal = {Current biology : CB}, volume = {30}, number = {24}, pages = {4932-4943.e4}, doi = {10.1016/j.cub.2020.09.063}, pmid = {33065016}, issn = {1879-0445}, mesh = {Adolescent ; Adult ; Age Factors ; Aged ; Animals ; Child ; Child, Preschool ; Datasets as Topic ; Dysbiosis/*microbiology ; Faecalibacterium/genetics/*isolation & purification ; Feces/microbiology ; Gastrointestinal Microbiome/*genetics ; Geography ; Humans ; Infant ; Life Style ; Macaca ; Metagenome ; Metagenomics ; Middle Aged ; Phylogeny ; *Probiotics ; Young Adult ; }, abstract = {Faecalibacterium is prevalent in the human gut and a promising microbe for the development of next-generation probiotics (NGPs) or biotherapeutics. Analyzing reference Faecalibacterium genomes and almost 3,000 Faecalibacterium-like metagenome-assembled genomes (MAGs) reconstructed from 7,907 human and 203 non-human primate gut metagenomes, we identified the presence of 22 different Faecalibacterium-like species-level genome bins (SGBs), some further divided in different strains according to the subject geographical origin. Twelve SGBs are globally spread in the human gut and show different genomic potential in the utilization of complex polysaccharides, suggesting that higher SGB diversity may be related with increased utilization of plant-based foods. Moreover, up to 11 different species may co-occur in the same subject, with lower diversity in Western populations, as well as intestinal inflammatory states and obesity. The newly explored Faecalibacterium diversity will be able to support the choice of strains suitable as NGPs, guided by the consideration of the differences existing in their functional potential.}, } @article {pmid33055096, year = {2020}, author = {Zhou, Z and Charlesworth, J and Achtman, M}, title = {Accurate reconstruction of bacterial pan- and core genomes with PEPPAN.}, journal = {Genome research}, volume = {30}, number = {11}, pages = {1667-1679}, pmid = {33055096}, issn = {1549-5469}, support = {//Wellcome Trust/United Kingdom ; 202792/Z/16/Z//Wellcome Trust/United Kingdom ; BB/L020319/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Algorithms ; Bacteria/*classification ; Genes, Bacterial ; *Genome, Bacterial ; Genomics/*methods ; *Phylogeny ; Pseudogenes ; Software ; Streptococcus/classification/genetics ; }, abstract = {Bacterial genomes can contain traces of a complex evolutionary history, including extensive homologous recombination, gene loss, gene duplications, and horizontal gene transfer. To reconstruct the phylogenetic and population history of a set of multiple bacteria, it is necessary to examine their pangenome, the composite of all the genes in the set. Here we introduce PEPPAN, a novel pipeline that can reliably construct pangenomes from thousands of genetically diverse bacterial genomes that represent the diversity of an entire genus. PEPPAN outperforms existing pangenome methods by providing consistent gene and pseudogene annotations extended by similarity-based gene predictions, and identifying and excluding paralogs by combining tree- and synteny-based approaches. The PEPPAN package additionally includes PEPPAN_parser, which implements additional downstream analyses, including the calculation of trees based on accessory gene content or allelic differences between core genes. To test the accuracy of PEPPAN, we implemented SimPan, a novel pipeline for simulating the evolution of bacterial pangenomes. We compared the accuracy and speed of PEPPAN with four state-of-the-art pangenome pipelines using both empirical and simulated data sets. PEPPAN was more accurate and more specific than any of the other pipelines and was almost as fast as any of them. As a case study, we used PEPPAN to construct a pangenome of approximately 40,000 genes from 3052 representative genomes spanning at least 80 species of Streptococcus The resulting gene and allelic trees provide an unprecedented overview of the genomic diversity of the entire Streptococcus genus.}, } @article {pmid33050509, year = {2020}, author = {Neik, TX and Amas, J and Barbetti, M and Edwards, D and Batley, J}, title = {Understanding Host-Pathogen Interactions in Brassica napus in the Omics Era.}, journal = {Plants (Basel, Switzerland)}, volume = {9}, number = {10}, pages = {}, pmid = {33050509}, issn = {2223-7747}, support = {DP1601004497, DP200100762//Australian Research Council/ ; }, abstract = {Brassica napus (canola/oilseed rape/rapeseed) is an economically important crop, mostly found in temperate and sub-tropical regions, that is cultivated widely for its edible oil. Major diseases of Brassica crops such as Blackleg, Clubroot, Sclerotinia Stem Rot, Downy Mildew, Alternaria Leaf Spot and White Rust have caused significant yield and economic losses in rapeseed-producing countries worldwide, exacerbated by global climate change, and, if not remedied effectively, will threaten global food security. To gain further insights into the host-pathogen interactions in relation to Brassica diseases, it is critical that we review current knowledge in this area and discuss how omics technologies can offer promising results and help to push boundaries in our understanding of the resistance mechanisms. Omics technologies, such as genomics, proteomics, transcriptomics and metabolomics approaches, allow us to understand the host and pathogen, as well as the interaction between the two species at a deeper level. With these integrated data in multi-omics and systems biology, we are able to breed high-quality disease-resistant Brassica crops in a more holistic, targeted and accurate way.}, } @article {pmid33050495, year = {2020}, author = {Kumar, R and Register, K and Christopher-Hennings, J and Moroni, P and Gioia, G and Garcia-Fernandez, N and Nelson, J and Jelinski, MD and Lysnyansky, I and Bayles, D and Alt, D and Scaria, J}, title = {Population Genomic Analysis of Mycoplasma bovis Elucidates Geographical Variations and Genes associated with Host-Types.}, journal = {Microorganisms}, volume = {8}, number = {10}, pages = {}, pmid = {33050495}, issn = {2076-2607}, abstract = {: Among more than twenty species belonging to the class Mollecutes, Mycoplasma bovis is the most common cause of bovine mycoplasmosis in North America and Europe. Bovine mycoplasmosis causes significant economic loss in the cattle industry. The number of M. bovis positive herds recently has increased in North America and Europe. Since antibiotic treatment is ineffective and no efficient vaccine is available, M. bovis induced mycoplasmosis is primarily controlled by herd management measures such as the restriction of moving infected animals out of the herds and culling of infected or shedders of M. bovis. To better understand the population structure and genomic factors that may contribute to its transmission, we sequenced 147 M. bovis strains isolated from four different countries viz. USA (n = 121), Canada (n = 22), Israel (n = 3) and Lithuania (n = 1). All except two of the isolates (KRB1 and KRB8) were isolated from two host types i.e., bovine (n = 75) and bison (n = 70). We performed a large-scale comparative analysis of M. bovis genomes by integrating 103 publicly available genomes and our dataset (250 total genomes). Whole genome single nucleotide polymorphism (SNP) based phylogeny using M. agalactiae as an outgroup revealed that M. bovis population structure is composed of five different clades. USA isolates showed a high degree of genomic divergence in comparison to the Australian isolates. Based on host of origin, all the isolates in clade IV was of bovine origin, whereas majority of the isolates in clades III and V was of bison origin. Our comparative genome analysis also revealed that M. bovis has an open pangenome with a large breadth of unexplored diversity of genes. The function based analysis of autogenous vaccine candidates (n = 10) included in this study revealed that their functional diversity does not span the genomic diversity observed in all five clades identified in this study. Our study also found that M. bovis genome harbors a large number of IS elements and their number increases significantly (p = 7.8x10[-6]) as the genome size increases. Collectively, the genome data and the whole genome-based population analysis in this study may help to develop better understanding of M. bovis induced mycoplasmosis in cattle.}, } @article {pmid33049910, year = {2020}, author = {Vacher, S and Suybeng, V and Girard, E and Masliah Planchon, J and Thomson, G and Le Goux, C and Garinet, S and Schnitzler, A and Chemlali, W and Firlej, V and Damotte, D and Allory, Y and Kamal, M and Pignot, G and Bieche, I}, title = {Genomic Instability Signature of Palindromic Non-Coding Somatic Mutations in Bladder Cancer.}, journal = {Cancers}, volume = {12}, number = {10}, pages = {}, pmid = {33049910}, issn = {2072-6694}, abstract = {Numerous pan-genomic studies identified alterations in protein-coding genes and signaling pathways involved in bladder carcinogenesis, while non-coding somatic alterations remain weakly explored. The goal of this study was to identify clinical biomarkers in non-coding regions for bladder cancer patients. We have previously identified in bladder tumors two non-coding mutational hotspots occurring at high frequencies (≥30%). These mutations are located close to the GPR126 and PLEKHS1 genes, at the guanine or the cytosine of a TGAACA core motif flanked, on both sides, by a stretch of palindromic sequences. Here, we hypothesize that such a pattern of recurrent non-coding mutations could be a signature of somatic genomic instability specifically involved in bladder cancer. We analyzed 26 additional mutable non-coding sites with the same core motif in a cohort of 103 bladder cancers composed of 44 NMIBC cases and 59 MIBC cases using high-resolution melting (HRM) and Sanger sequencing. Five bladder cancers were additionally analyzed for protein-coding gene mutations using a targeted NGS panel composed of 571 genes. Expression levels of three members of the APOBEC3 family genes were assessed using real-time quantitative RT-PCR. Non-coding somatic mutations were observed for at least one TGAACA core motif locus in 62.1% (64/103) of bladder tumor samples. These non-coding mutations co-occurred in the bladder tumors but were absent in prostate tumor, HPV-positive Head and Neck Squamous Cell Carcinoma, and high microsatellite instability (MSI-H) colorectal tumor series. This signature of palindromic non-coding somatic mutations, specific to bladder tumors, was not associated with patients' outcome and was more frequent in females. Interestingly, this signature was associated with high tumor mutational burden (TMB) and high expression levels of APOBEC3B and interferon inducible genes. We identified a new type of somatic genomic instability targeting the TGAACA core motif loci flanked by palindromic sequences in bladder cancer. This mutational signature is a promising candidate clinical biomarker for the early detection of relapse and a major low-cost alternative to the TMB to monitor the response to immunotherapy for bladder cancer patients.}, } @article {pmid33040146, year = {2021}, author = {Eizenga, JM and Novak, AM and Kobayashi, E and Villani, F and Cisar, C and Heumos, S and Hickey, G and Colonna, V and Paten, B and Garrison, E}, title = {Efficient dynamic variation graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {36}, number = {21}, pages = {5139-5144}, pmid = {33040146}, issn = {1367-4811}, support = {U01 HL137183/HL/NHLBI NIH HHS/United States ; //Federal Ministry for Economic Affairs and Energy of Germany/ ; DT06172015//W. M. Keck Foundation/ ; T32 HG008345/HG/NHGRI NIH HHS/United States ; //Central Innovation Programme/ ; R01 HG010485/HG/NHGRI NIH HHS/United States ; }, mesh = {Genome ; Genomics ; *Libraries ; *Software ; }, abstract = {MOTIVATION: Pangenomics is a growing field within computational genomics. Many pangenomic analyses use bidirected sequence graphs as their core data model. However, implementing and correctly using this data model can be difficult, and the scale of pangenomic datasets can be challenging to work at. These challenges have impeded progress in this field.

RESULTS: Here, we present a stack of two C++ libraries, libbdsg and libhandlegraph, which use a simple, field-proven interface, designed to expose elementary features of these graphs while preventing common graph manipulation mistakes. The libraries also provide a Python binding. Using a diverse collection of pangenome graphs, we demonstrate that these tools allow for efficient construction and manipulation of large genome graphs with dense variation. For instance, the speed and memory usage are up to an order of magnitude better than the prior graph implementation in the VG toolkit, which has now transitioned to using libbdsg's implementations.

libhandlegraph and libbdsg are available under an MIT License from https://github.com/vgteam/libhandlegraph and https://github.com/vgteam/libbdsg.}, } @article {pmid33038732, year = {2020}, author = {Summers, ZM and Belahbib, H and Pradel, N and Bartoli, M and Mishra, P and Tamburini, C and Dolla, A and Ollivier, B and Armougom, F}, title = {A novel Thermotoga strain TFO isolated from a Californian petroleum reservoir phylogenetically related to Thermotoga petrophila and T. naphthophila, two thermophilic anaerobic isolates from a Japanese reservoir: Taxonomic and genomic considerations.}, journal = {Systematic and applied microbiology}, volume = {43}, number = {6}, pages = {126132}, doi = {10.1016/j.syapm.2020.126132}, pmid = {33038732}, issn = {1618-0984}, mesh = {Anaerobiosis ; Bacterial Typing Techniques ; California ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; Glycolipids/chemistry ; Nucleic Acid Hybridization ; Petroleum/*microbiology ; Phospholipids/chemistry ; *Phylogeny ; Sequence Analysis, DNA ; Thermotoga/*classification/isolation & purification ; }, abstract = {Hot oil reservoirs harbor diverse microbial communities, with many of them inhabiting thermophilic or hyperthermophilic fermentative Thermotogae species. A new Thermotoga sp. strain TFO was isolated from an Californian offshore oil reservoir which is phylogenetically related to thermophilic species T. petrophila RKU-1[T] and T. naphthophila RKU-10[T], isolated from the Kubiki oil reservoir in Japan. The average nucleotide identity and DNA-DNA hybridization measures provide evidence that the novel strain TFO is closely related to T. naphthophila RKU-10[T], T. petrophila RKU-1[T] and can not be differentiated at the species level. In the light of these results, the reclassification of T. naphthophila RKU-10 and strain TFO as heterotypic synonyms of T. petrophila is proposed. A pangenomic survey of closely related species revealed 55 TFO strain-specific proteins, many of which being linked to glycosyltransferases and mobile genetic elements such as recombinases, transposases and prophage, which can contribute to genome evolution and plasticity, promoting bacterial diversification and adaptation to environmental changes. The discovery of a TFO-specific transport system dctPQM, encoding a tripartite ATP-independent periplasmic transporter (TRAP), has to be highlighted. The presence of this TRAP system assumes that it could assist in anaerobic n-alkane degradation by addition of fumarate dicarboxylic acid, suggesting a niche-specific gene pool which correlates with the oil reservoir that T. petrophila TFO inhabits. Finally, T. naphthophila RKU-10, T. petrophila RKU-1[T], T. petrophila TFO form a distinct phylogenetic lineage with different geographic origins, share the same type of ecological niche including the burial history of fields. Theses findings might support the indigenous character of this species in oil reservoirs.}, } @article {pmid33037962, year = {2020}, author = {Kumar, J and Sen Gupta, D}, title = {Prospects of next generation sequencing in lentil breeding.}, journal = {Molecular biology reports}, volume = {47}, number = {11}, pages = {9043-9053}, pmid = {33037962}, issn = {1573-4978}, mesh = {Crops, Agricultural/*genetics ; Gene Expression Profiling/methods ; Gene Expression Regulation, Plant ; Genes, Plant/*genetics ; Genomics/methods ; High-Throughput Nucleotide Sequencing/*methods ; Lens Plant/*genetics ; Plant Breeding/*methods ; *Polymorphism, Single Nucleotide ; Quantitative Trait Loci/genetics ; }, abstract = {Lentil is an important food legume crop that has large and complex genome. During past years, considerable attention has been given on the use of next generation sequencing for enriching the genomic resources including identification of SSR and SNP markers, development of unigenes, transcripts, and identification of candidate genes for biotic and abiotic stresses, analysis of genetic diversity and identification of genes/ QTLs for agronomically important traits. However, in other crops including pulses, next generation sequencing has revolutionized the genomic research and helped in genomic assisted breeding rapidly and cost effectively. The present review discuss current status and future prospects of the use NGS based breeding in lentil.}, } @article {pmid33028681, year = {2020}, author = {Muthuirulandi Sethuvel, DP and Mutreja, A and Pragasam, AK and Vasudevan, K and Murugan, D and Anandan, S and Michael, JS and Walia, K and Veeraraghavan, B}, title = {Phylogenetic and Evolutionary Analysis Reveals the Recent Dominance of Ciprofloxacin-Resistant Shigella sonnei and Local Persistence of S. flexneri Clones in India.}, journal = {mSphere}, volume = {5}, number = {5}, pages = {}, pmid = {33028681}, issn = {2379-5042}, support = {/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Anti-Bacterial Agents/*pharmacology ; Ciprofloxacin/*pharmacology ; Drug Resistance, Multiple, Bacterial/*genetics ; Dysentery, Bacillary/epidemiology/microbiology ; *Evolution, Molecular ; Feces/microbiology ; Genome, Bacterial ; Humans ; India/epidemiology ; *Phylogeny ; Serogroup ; Shigella flexneri/genetics ; Shigella sonnei/classification/*drug effects ; Whole Genome Sequencing ; }, abstract = {Shigella is the second leading cause of bacterial diarrhea worldwide. Recently, Shigella sonnei seems to be replacing Shigella flexneri in low- and middle-income countries undergoing economic development. Despite this, studies focusing on these species at the genomic level remain largely unexplored. Here, we compared the genome sequences of S. flexneri and S. sonnei isolates from India with the publicly available genomes of global strains. Our analysis provides evidence for the long-term persistence of all phylogenetic groups (PGs) of S. flexneri and the recent dominance of the ciprofloxacin-resistant S. sonnei lineage in India. Within S. flexneri PGs, the majority of the study isolates belonged to PG3 within the predominance of serotype 2. For S. sonnei, the current pandemic involves globally distributed multidrug-resistant (MDR) clones that belong to Central Asia lineage III. The presence of such epidemiologically dominant lineages in association with stable antimicrobial resistance (AMR) determinants results in successful survival in the community.IMPORTANCEShigella is the second leading cause of bacterial diarrhea worldwide. This has been categorized as a priority pathogen among enteric bacteria by the Global Antimicrobial Resistance Surveillance System (GLASS) of the World Health Organization (WHO). Recently, S. sonnei seems to be replacing S. flexneri in low- and middle-income countries undergoing economic development. Antimicrobial resistance in S. flexneri and S. sonnei is a growing international concern, specifically with the international dominance of the multidrug-resistant (MDR) lineage. Genomic studies focusing on S. flexneri and S. sonnei in India remain largely unexplored. This study provides information on the introduction and expansion of drug-resistant Shigella strains in India for the first time by comparing the genome sequences of S. flexneri and S. sonnei isolates from India with the publicly available genomes of global strains. The study discusses the key differences between the two dominant species of Shigella at the genomic level to understand the evolutionary trends and genome dynamics of emerging and existing resistance clones. The present work demonstrates evidence for the long-term persistence of all PGs of S. flexneri and the recent dominance of a ciprofloxacin-resistant S. sonnei lineage in India.}, } @article {pmid33025059, year = {2021}, author = {Khilyas, IV and Sorokina, AV and Markelova, MI and Belenikin, M and Shafigullina, L and Tukhbatova, RI and Shagimardanova, EI and Blom, J and Sharipova, MR and Cohen, MF}, title = {Genomic and phenotypic analysis of siderophore-producing Rhodococcus qingshengii strain S10 isolated from an arid weathered serpentine rock environment.}, journal = {Archives of microbiology}, volume = {203}, number = {2}, pages = {855-860}, pmid = {33025059}, issn = {1432-072X}, support = {19-74-00062//Russian Science Foundation/ ; }, mesh = {Desert Climate ; Environment ; Genome, Bacterial/genetics ; Iron/metabolism ; Peptide Synthases/genetics ; Prophages/genetics ; Rhodococcus/*enzymology/*genetics ; Russia ; Siderophores/*metabolism ; }, abstract = {The success of members of the genus Rhodococcus in colonizing arid rocky environments is owed in part to desiccation tolerance and an ability to extract iron through the secretion and uptake of siderophores. Here, we report a comprehensive genomic and taxonomic analysis of Rhodococcus qingshengii strain S10 isolated from eathered serpentine rock at the arid Khalilovsky massif, Russia. Sequence comparisons of whole genomes and of selected marker genes clearly showed strain S10 to belong to the R. qingshengii species. Four prophage sequences within the R. qingshengii S10 genome were identified, one of which encodes for a putative siderophore-interacting protein. Among the ten non-ribosomal peptides synthase (NRPS) clusters identified in the strain S10 genome, two show high homology to those responsible for siderophore synthesis. Phenotypic analyses demonstrated that R. qingshengii S10 secretes siderophores and possesses adaptive features (tolerance of up to 8% NaCl and pH 9) that should enable survival in its native habitat within dry serpentine rock.}, } @article {pmid33023476, year = {2020}, author = {Sonnenberg, CB and Kahlke, T and Haugen, P}, title = {Vibrionaceae core, shell and cloud genes are non-randomly distributed on Chr 1: An hypothesis that links the genomic location of genes with their intracellular placement.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {695}, pmid = {33023476}, issn = {1471-2164}, mesh = {Chromosome Mapping/methods ; Chromosomes, Bacterial/*genetics ; *Genes, Bacterial ; Vibrionaceae/cytology/*genetics ; }, abstract = {BACKGROUND: The genome of Vibrionaceae bacteria, which consists of two circular chromosomes, is replicated in a highly ordered fashion. In fast-growing bacteria, multifork replication results in higher gene copy numbers and increased expression of genes located close to the origin of replication of Chr 1 (ori1). This is believed to be a growth optimization strategy to satisfy the high demand of essential growth factors during fast growth. The relationship between ori1-proximate growth-related genes and gene expression during fast growth has been investigated by many researchers. However, it remains unclear which other gene categories that are present close to ori1 and if expression of all ori1-proximate genes is increased during fast growth, or if expression is selectively elevated for certain gene categories.

RESULTS: We calculated the pangenome of all complete genomes from the Vibrionaceae family and mapped the four pangene categories, core, softcore, shell and cloud, to their chromosomal positions. This revealed that core and softcore genes were found heavily biased towards ori1, while shell genes were overrepresented at the opposite part of Chr 1 (i.e., close to ter1). RNA-seq of Aliivibrio salmonicida and Vibrio natriegens showed global gene expression patterns that consistently correlated with chromosomal distance to ori1. Despite a biased gene distribution pattern, all pangene categories contributed to a skewed expression pattern at fast-growing conditions, whereas at slow-growing conditions, softcore, shell and cloud genes were responsible for elevated expression.

CONCLUSION: The pangene categories were non-randomly organized on Chr 1, with an overrepresentation of core and softcore genes around ori1, and overrepresentation of shell and cloud genes around ter1. Furthermore, we mapped our gene distribution data on to the intracellular positioning of chromatin described for V. cholerae, and found that core/softcore and shell/cloud genes appear enriched at two spatially separated intracellular regions. Based on these observations, we hypothesize that there is a link between the genomic location of genes and their cellular placement.}, } @article {pmid33022985, year = {2020}, author = {Malik, A and Kim, YR and Kim, SB}, title = {Genome Mining of the Genus Streptacidiphilus for Biosynthetic and Biodegradation Potential.}, journal = {Genes}, volume = {11}, number = {10}, pages = {}, pmid = {33022985}, issn = {2073-4425}, mesh = {Bacterial Proteins/genetics/*metabolism ; *Biodegradation, Environmental ; Computational Biology ; DNA, Bacterial/analysis/*genetics ; *Genome, Bacterial ; *Multigene Family ; Phylogeny ; Sequence Analysis, DNA/*methods ; Streptomycetaceae/*genetics ; }, abstract = {The genus Streptacidiphilus represents a group of acidophilic actinobacteria within the family Streptomycetaceae, and currently encompasses 15 validly named species, which include five recent additions within the last two years. Considering the potential of the related genera within the family, namely Streptomyces and Kitasatospora, these relatively new members of the family can also be a promising source for novel secondary metabolites. At present, 15 genome data for 11 species from this genus are available, which can provide valuable information on their biology including the potential for metabolite production as well as enzymatic activities in comparison to the neighboring taxa. In this study, the genome sequences of 11 Streptacidiphilus species were subjected to the comparative analysis together with selected Streptomyces and Kitasatospora genomes. This study represents the first comprehensive comparative genomic analysis of the genus Streptacidiphilus. The results indicate that the genomes of Streptacidiphilus contained various secondary metabolite (SM) producing biosynthetic gene clusters (BGCs), some of them exclusively identified in Streptacidiphilus only. Several of these clusters may potentially code for SMs that may have a broad range of bioactivities, such as antibacterial, antifungal, antimalarial and antitumor activities. The biodegradation capabilities of Streptacidiphilus were also explored by investigating the hydrolytic enzymes for complex carbohydrates. Although all genomes were enriched with carbohydrate-active enzymes (CAZymes), their numbers in the genomes of some strains such as Streptacidiphilus carbonis NBRC 100919[T] were higher as compared to well-known carbohydrate degrading organisms. These distinctive features of each Streptacidiphilus species make them interesting candidates for future studies with respect to their potential for SM production and enzymatic activities.}, } @article {pmid33022031, year = {2020}, author = {Chambers, J and Sparks, N and Sydney, N and Livingstone, PG and Cookson, AR and Whitworth, DE}, title = {Comparative Genomics and Pan-Genomics of the Myxococcaceae, including a Description of Five Novel Species: Myxococcus eversor sp. nov., Myxococcus llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogochensis sp. nov., Myxococcus vastator sp. nov., Pyxidicoccus caerfyrddinensis sp. nov., and Pyxidicoccus trucidator sp. nov.}, journal = {Genome biology and evolution}, volume = {12}, number = {12}, pages = {2289-2302}, pmid = {33022031}, issn = {1759-6653}, mesh = {*Genome, Bacterial ; Genomics ; Myxococcales/*genetics ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Members of the predatory Myxococcales (myxobacteria) possess large genomes, undergo multicellular development, and produce diverse secondary metabolites, which are being actively prospected for novel drug discovery. To direct such efforts, it is important to understand the relationships between myxobacterial ecology, evolution, taxonomy, and genomic variation. This study investigated the genomes and pan-genomes of organisms within the Myxococcaceae, including the genera Myxococcus and Corallococcus, the most abundant myxobacteria isolated from soils. Previously, ten species of Corallococcus were known, whereas six species of Myxococcus phylogenetically surrounded a third genus (Pyxidicoccus) composed of a single species. Here, we describe draft genome sequences of five novel species within the Myxococcaceae (Myxococcus eversor, Myxococcus llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogochensis, Myxococcus vastator, Pyxidicoccus caerfyrddinensis, and Pyxidicoccus trucidator) and for the Pyxidicoccus type species strain, Pyxidicoccus fallax DSM 14698T. Genomic and physiological comparisons demonstrated clear differences between the five novel species and every other Myxococcus or Pyxidicoccus spp. type strain. Subsequent analyses of type strain genomes showed that both the Corallococcus pan-genome and the combined Myxococcus and Pyxidicoccus (Myxococcus/Pyxidicoccus) pan-genome are large and open, but with clear differences. Genomes of Corallococcus spp. are generally smaller than those of Myxococcus/Pyxidicoccus spp. but have core genomes three times larger. Myxococcus/Pyxidicoccus spp. genomes are more variable in size, with larger and more unique sets of accessory genes than those of Corallococcus species. In both genera, biosynthetic gene clusters are relatively enriched in the shell pan-genomes, implying they grant a greater evolutionary benefit than other shell genes, presumably by conferring selective advantages during predation.}, } @article {pmid33016627, year = {2020}, author = {Jensen, SE and Charles, JR and Muleta, K and Bradbury, PJ and Casstevens, T and Deshpande, SP and Gore, MA and Gupta, R and Ilut, DC and Johnson, L and Lozano, R and Miller, Z and Ramu, P and Rathore, A and Romay, MC and Upadhyaya, HD and Varshney, RK and Morris, GP and Pressoir, G and Buckler, ES and Ramstein, GP}, title = {A sorghum practical haplotype graph facilitates genome-wide imputation and cost-effective genomic prediction.}, journal = {The plant genome}, volume = {13}, number = {1}, pages = {e20009}, doi = {10.1002/tpg2.20009}, pmid = {33016627}, issn = {1940-3372}, support = {/GATES/Bill & Melinda Gates Foundation/United States ; }, mesh = {Cost-Benefit Analysis ; Genome ; Genomics ; Haplotypes ; *Sorghum/genetics ; }, abstract = {Successful management and utilization of increasingly large genomic datasets is essential for breeding programs to accelerate cultivar development. To help with this, we developed a Sorghum bicolor Practical Haplotype Graph (PHG) pangenome database that stores haplotypes and variant information. We developed two PHGs in sorghum that were used to identify genome-wide variants for 24 founders of the Chibas sorghum breeding program from 0.01x sequence coverage. The PHG called single nucleotide polymorphisms (SNPs) with 5.9% error at 0.01x coverage-only 3% higher than PHG error when calling SNPs from 8x coverage sequence. Additionally, 207 progenies from the Chibas genomic selection (GS) training population were sequenced and processed through the PHG. Missing genotypes were imputed from PHG parental haplotypes and used for genomic prediction. Mean prediction accuracies with PHG SNP calls range from .57-.73 and are similar to prediction accuracies obtained with genotyping-by-sequencing or targeted amplicon sequencing (rhAmpSeq) markers. This study demonstrates the use of a sorghum PHG to impute SNPs from low-coverage sequence data and shows that the PHG can unify genotype calls across multiple sequencing platforms. By reducing input sequence requirements, the PHG can decrease the cost of genotyping, make GS more feasible, and facilitate larger breeding populations. Our results demonstrate that the PHG is a useful research and breeding tool that maintains variant information from a diverse group of taxa, stores sequence data in a condensed but readily accessible format, unifies genotypes across genotyping platforms, and provides a cost-effective option for genomic selection.}, } @article {pmid33014966, year = {2020}, author = {Roe, C and Williamson, CHD and Vazquez, AJ and Kyger, K and Valentine, M and Bowers, JR and Phillips, PD and Harrison, V and Driebe, E and Engelthaler, DM and Sahl, JW}, title = {Bacterial Genome Wide Association Studies (bGWAS) and Transcriptomics Identifies Cryptic Antimicrobial Resistance Mechanisms in Acinetobacter baumannii.}, journal = {Frontiers in public health}, volume = {8}, number = {}, pages = {451}, pmid = {33014966}, issn = {2296-2565}, support = {R21 AI121738/AI/NIAID NIH HHS/United States ; }, mesh = {*Acinetobacter Infections/drug therapy ; *Acinetobacter baumannii/genetics ; Anti-Bacterial Agents/pharmacology ; Arizona ; Drug Resistance, Bacterial/genetics ; Genome-Wide Association Study ; Humans ; Transcriptome ; }, abstract = {Antimicrobial resistance (AMR) in the nosocomial pathogen, Acinetobacter baumannii, is becoming a serious public health threat. While some mechanisms of AMR have been reported, understanding novel mechanisms of resistance is critical for identifying emerging resistance. One of the first steps in identifying novel AMR mechanisms is performing genotype/phenotype association studies; however, performing these studies is complicated by the plastic nature of the A. baumannii pan-genome. In this study, we compared the antibiograms of 12 antimicrobials associated with multiple drug families for 84 A. baumannii isolates, many isolated in Arizona, USA. in silico screening of these genomes for known AMR mechanisms failed to identify clear correlations for most drugs. We then performed a bacterial genome wide association study (bGWAS) looking for associations between all possible 21-mers; this approach generally failed to identify mechanisms that explained the resistance phenotype. In order to decrease the genomic noise associated with population stratification, we compared four phylogenetically-related pairs of isolates with differing susceptibility profiles. RNA-Sequencing (RNA-Seq) was performed on paired isolates and differentially-expressed genes were identified. In these isolate pairs, five different potential mechanisms were identified, highlighting the difficulty of broad AMR surveillance in this species. To verify and validate differential expression, amplicon sequencing was performed. These results suggest that a diagnostic platform based on gene expression rather than genomics alone may be beneficial in certain surveillance efforts. The implementation of such advanced diagnostics coupled with increased AMR surveillance will potentially improve A. baumannii infection treatment and patient outcomes.}, } @article {pmid33013780, year = {2020}, author = {Yang, Y and Zhang, Y and Cápiro, NL and Yan, J}, title = {Genomic Characteristics Distinguish Geographically Distributed Dehalococcoidia.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {546063}, pmid = {33013780}, issn = {1664-302X}, abstract = {Dehalococcoidia (Dia) class microorganisms are frequently found in various pristine and contaminated environments. Metagenome-assembled genomes (MAGs) and single-cell amplified genomes (SAGs) studies have substantially improved the understanding of Dia microbial ecology and evolution; however, an updated thorough investigation on the genomic and evolutionary characteristics of Dia microorganisms distributed in geographically distinct environments has not been implemented. In this study, we analyzed available genomic data to unravel Dia evolutionary and metabolic traits. Based on the phylogeny of 16S rRNA genes retrieved from sixty-seven genomes, Dia microorganisms can be categorized into three groups, the terrestrial cluster that contains all Dehalococcoides and Dehalogenimonas strains, the marine cluster I, and the marine cluster II. These results reveal that a higher ratio of horizontally transferred genetic materials was found in the Dia marine clusters compared to that of the Dia terrestrial cluster. Pangenome analysis further suggests that Dia microorganisms have evolved cluster-specific enzymes (e.g., dehalogenase in terrestrial Dia, sulfite reductase in marine Dia) and biosynthesis capabilities (e.g., siroheme biosynthesis in marine Dia). Marine Dia microorganisms are likely adapted to versatile metabolisms for energy conservation besides organohalide respiration. The genomic differences between marine and terrestrial Dia may suggest distinct functions and roles in element cycling (e.g., carbon, sulfur, chlorine), which require interdisciplinary approaches to unravel the physiology and evolution of Dia in various environments.}, } @article {pmid33013760, year = {2020}, author = {Kim, HB and Kim, E and Yang, SM and Lee, S and Kim, MJ and Kim, HY}, title = {Development of Real-Time PCR Assay to Specifically Detect 22 Bifidobacterium Species and Subspecies Using Comparative Genomics.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {2087}, pmid = {33013760}, issn = {1664-302X}, abstract = {Bifidobacterium species are used as probiotics to provide beneficial effects to humans. These effects are specific to some species or subspecies of Bifidobacterium. However, some Bifidobacterium species or subspecies are not distinguished because similarity of 16S rRNA and housekeeping gene sequences within Bifidobacterium species is very high. In this study, we developed a real-time polymerase chain reaction (PCR) assay to rapidly and accurately detect 22 Bifidobacterium species by selecting genetic markers using comparative genomic analysis. A total of 210 Bifidobacterium genome sequences were compared to select species- or subspecies-specific genetic markers. A phylogenetic tree based on pan-genomes generated clusters according to Bifidobacterium species or subspecies except that two strains were not grouped with their subspecies. Based on pan-genomes constructed, species- or subspecies-specific genetic markers were selected. The specificity of these markers was confirmed by aligning these genes against 210 genome sequences. Real-time PCR could detect 22 Bifidobacterium specifically. We constructed the criterion for quantification by standard curves. To further test the developed assay for commercial food products, we monitored 26 probiotic products and 7 dairy products. Real-time PCR results and labeling data were then compared. Most of these products (21/33, 63.6%) were consistent with their label claims. Some products labeled at species level only can be detected up to subspecies level through our developed assay.}, } @article {pmid33008825, year = {2020}, author = {Akob, DM and Hallenbeck, M and Beulig, F and Fabisch, M and Küsel, K and Keffer, JL and Woyke, T and Shapiro, N and Lapidus, A and Klenk, HP and Chan, CS}, title = {Mixotrophic Iron-Oxidizing Thiomonas Isolates from an Acid Mine Drainage-Affected Creek.}, journal = {Applied and environmental microbiology}, volume = {86}, number = {24}, pages = {}, pmid = {33008825}, issn = {1098-5336}, mesh = {Burkholderiales/*metabolism ; Germany ; Iron/*metabolism ; Mining ; Oxidation-Reduction ; Rivers/*microbiology ; Wastewater/*microbiology ; }, abstract = {Natural attenuation of heavy metals occurs via coupled microbial iron cycling and metal precipitation in creeks impacted by acid mine drainage (AMD). Here, we describe the isolation, characterization, and genomic sequencing of two iron-oxidizing bacteria (FeOB) species: Thiomonas ferrovorans FB-6 and Thiomonas metallidurans FB-Cd, isolated from slightly acidic (pH 6.3), Fe-rich, AMD-impacted creek sediments. These strains precipitated amorphous iron oxides, lepidocrocite, goethite, and magnetite or maghemite and grew at a pH optimum of 5.5. While Thiomonas spp. are known as mixotrophic sulfur oxidizers and As oxidizers, the FB strains oxidized Fe, which suggests they can efficiently remove Fe and other metals via coprecipitation. Previous evidence for Thiomonas sp. Fe oxidation is largely ambiguous, possibly because of difficulty demonstrating Fe oxidation in heterotrophic/mixotrophic organisms. Therefore, we also conducted a genomic analysis to identify genetic mechanisms of Fe oxidation, other metal transformations, and additional adaptations, comparing the two FB strain genomes with 12 other Thiomonas genomes. The FB strains fall within a relatively novel group of Thiomonas strains that includes another strain (b6) with solid evidence of Fe oxidation. Most Thiomonas isolates, including the FB strains, have the putative iron oxidation gene cyc2, but only the two FB strains possess the putative Fe oxidase genes mtoAB The two FB strain genomes contain the highest numbers of strain-specific gene clusters, greatly increasing the known Thiomonas genetic potential. Our results revealed that the FB strains are two distinct novel species of Thiomonas with the genetic potential for bioremediation of AMD via iron oxidation.IMPORTANCE As AMD moves through the environment, it impacts aquatic ecosystems, but at the same time, these ecosystems can naturally attenuate contaminated waters via acid neutralization and catalyzing metal precipitation. This is the case in the former Ronneburg uranium-mining district, where AMD impacts creek sediments. We isolated and characterized two iron-oxidizing Thiomonas species that are mildly acidophilic to neutrophilic and that have two genetic pathways for iron oxidation. These Thiomonas species are well positioned to naturally attenuate AMD as it discharges across the landscape.}, } @article {pmid33007861, year = {2020}, author = {Harris, LG and Bodger, O and Post, V and Mack, D and Morgenstern, M and Rohde, H and Moriarty, TF and Wilkinson, TS}, title = {Temporal Changes in Patient-Matched Staphylococcus epidermidis Isolates from Infections: towards Defining a 'True' Persistent Infection.}, journal = {Microorganisms}, volume = {8}, number = {10}, pages = {}, pmid = {33007861}, issn = {2076-2607}, abstract = {Staphylococcus epidermidis is found naturally on the skin but is a common cause of persistent orthopaedic device-related infections (ODRIs). This study used a pan-genome and gene-by-gene approach to analyse the clonality of whole genome sequences (WGS) of 115 S. epidermidis isolates from 55 patients with persistent ODRIs. Analysis of the 522 gene core genome revealed that the isolates clustered into three clades, and MLST analysis showed that 83% of the isolates belonged to clonal complex 2 (CC2). Analysis also found 13 isolate pairs had different MLST types and less than 70% similarity within the genes; hence, these were defined as re-infection by a different S. epidermidis strain. Comparison of allelic diversity in the remaining 102 isolates (49 patients) revealed that 6 patients had microevolved infections (>7 allele differences), and only 37 patients (77 isolates) had a 'true' persistent infection. Analysis of the core genomes of isolate pairs from 37 patients found 110/841 genes had variations; mainly in metabolism associated genes. The accessory genome consisted of 2936 genes; with an average size of 1515 genes. To conclude, this study demonstrates the advantage of using WGS for identifying the accuracy of a persistent infection diagnosis. Hence, persistent infections can be defined as 'true' persistent infections if the core genome of paired isolates has ≤7 allele differences; microevolved persistent infection if the paired isolates have >7 allele differences but same MLST type; and polyclonal if they are the same species but a different MLST type.}, } @article {pmid32996243, year = {2020}, author = {Srivastava, AK and Srivastava, R and Sharma, A and Bharati, AP and Tiwari, PK and Singh, AK and Srivastava, AK and Chakdar, H and Kashyap, PL and Saxena, AK}, title = {Pan-genome analysis of Exiguobacterium reveals species delineation and genomic similarity with Exiguobacterium profundum PHM 11.}, journal = {Environmental microbiology reports}, volume = {12}, number = {6}, pages = {639-650}, doi = {10.1111/1758-2229.12890}, pmid = {32996243}, issn = {1758-2229}, support = {//ICAR-CRP Genomics/International ; //ICAR-AMAAS/International ; }, mesh = {Bacterial Proteins/genetics ; Base Composition ; DNA, Bacterial/genetics ; Exiguobacterium/classification/*genetics ; *Genome, Bacterial ; Phylogeny ; Sequence Analysis, DNA ; }, abstract = {The stint of the bacterial species is convoluting, but the new algorithms to calculate genome-to-genome distance (GGD) and DNA-DNA hybridization (DDH) for comparative genome analysis have rejuvenated the exploration of species and sub-species characterization. The present study reports the first whole genome sequence of Exiguobacterium profundum PHM11. PHM11 genome consist of ~ 2.92 Mb comprising 48 contigs, 47.93% G + C content. Functional annotations revealed a total of 3033 protein coding genes and 33 non-protein coding genes. Out of these, only 2316 could be characterized and others reported as hypothetical proteins. The comparative analysis of predicted proteome of PHM11 with five other Exiguobacterium sp. identified 3806 clusters, out of which the PHM11 shared a total of 2723 clusters having 1664 common clusters, 131 singletons and 928 distributed between five species. The pan-genome analysis of 70 different genomic sequences of Exigubacterium strains devoid of a species taxon was done on the basis of GGD and the DDH which identified eight genomes analogous to the PHM11 at species level and may be characterized as E. profundum. The ANI value and phylogenetic tree analysis also support the same. The results regarding pan-genome analysis provide a convincing insight for delineation of these eight strains to species.}, } @article {pmid32983925, year = {2020}, author = {Patel, M and Patel, HM and Vohra, N and Dave, S}, title = {Complete genome sequencing and comparative genome characterization of the lignocellulosic biomass degrading bacterium Pseudomonas stutzeri MP4687 from cattle rumen.}, journal = {Biotechnology reports (Amsterdam, Netherlands)}, volume = {28}, number = {}, pages = {e00530}, pmid = {32983925}, issn = {2215-017X}, abstract = {We report the complete genome sequencing of novel Pseudomonas stutzeri strain MP4687 isolated from cattle rumen. Various strains of P. stutzeri have been reported from different environmental samples including oil-contaminated sites, crop roots, air, and human clinical samples, but not from rumen samples, which is being reported here for the first time. The genome of P. stutzeri MP4687 has a single replicon, 4.75 Mb chromosome and a G + C content of 63.45%. The genome encodes for 4,790 protein coding genes including 164 CAZymes and 345 carbohydrate processing genes. The isolate MP4687 harbors LCB hydrolyzing potential through endoglucanase (4.5 U/mL), xylanase (3.1 U/mL), β-glucosidase (3.3 U/mL) and β-xylosidase (1.9 U/mL) activities. The pangenome analysis further revealed that MP4687 has a very high number of unique genes (>2100) compared to other P. stutzeri genomes, which might have an important role in rumen functioning.}, } @article {pmid32983058, year = {2020}, author = {Verma, DK and Vasudeva, G and Sidhu, C and Pinnaka, AK and Prasad, SE and Thakur, KG}, title = {Biochemical and Taxonomic Characterization of Novel Haloarchaeal Strains and Purification of the Recombinant Halotolerant α-Amylase Discovered in the Isolate.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {2082}, pmid = {32983058}, issn = {1664-302X}, abstract = {Haloarchaea are salt-loving archaea and potential source of industrially relevant halotolerant enzymes. In the present study, three reddish-pink, extremely halophilic archaeal strains, namely wsp1 (wsp-water sample Pondicherry), wsp3, and wsp4, were isolated from the Indian Solar saltern. The phylogenetic analysis based on 16S rRNA gene sequences suggests that both wsp3 and wsp4 strains belong to Halogeometricum borinquense while wsp1 is closely related to Haloferax volcanii species. The comparative genomics revealed an open pangenome for both genera investigated here. Whole-genome sequence analysis revealed that these isolates have multiple copies of industrially/biotechnologically important unique genes and enzymes. Among these unique enzymes, for recombinant expression and purification, we selected four putative α-amylases identified in these three isolates. We successfully purified functional halotolerant recombinant Amy2, from wsp1 using pelB signal sequence-based secretion strategy using Escherichia coli as an expression host. This method may prove useful to produce functional haloarchaeal secretory recombinant proteins suitable for commercial or research applications. Biochemical analysis of Amy2 suggests the halotolerant nature of the enzyme having maximum enzymatic activity observed at 1 M NaCl. We also report the isolation and characterization of carotenoids purified from these isolates. This study highlights the presence of several industrially important enzymes in the haloarchaeal strains which may potentially have improved features like stability and salt tolerance suitable for industrial applications.}, } @article {pmid32983012, year = {2020}, author = {Hounmanou, YMG and Dalsgaard, A and Sopacua, TF and Uddin, GMN and Leekitcharoenphon, P and Hendriksen, RS and Olsen, JE and Larsen, MH}, title = {Molecular Characteristics and Zoonotic Potential of Salmonella Weltevreden From Cultured Shrimp and Tilapia in Vietnam and China.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {1985}, pmid = {32983012}, issn = {1664-302X}, abstract = {Salmonella Weltevreden is increasingly reported from aquatic environments, seafood, and patients in several Southeast Asian countries. Using genome-wide analysis, we characterized S. Weltevreden isolated from cultured shrimp and tilapia from Vietnam and China to study their genetic characteristics and relatedness to clinical isolates of S. Weltevreden ST-365. The phylogenetic analysis revealed up to 312 single-nucleotide polymorphism (SNP) difference between tilapia isolates, whereas isolates from shrimp were genetically more closely related. Epidemiologically unrelated isolates from Vietnam were closely related to isolates from China, e.g., 20 SNPs differences between strains 28V and 75C. In comparison with strains from other parts of the world, our environmental isolates predominantly clustered within the continental South Asia lineage, constituted mostly of strains from human stool with as low as seven SNPs difference, e.g., 30V versus Cont_ERR495254. All sequenced isolates were MLST type ST-365 and contained the major virulence-related genes encoded by the Salmonella Pathogenicity Islands 1-5. Ten of the isolates harbored the IncFII(S) plasmid similar to the virulence genes-mediated plasmid pSPCV of S. Paratyphi C, and one isolate had the IncQ1 plasmid on the same contig with strA/B, sul2, and tetA resistance genes similar to the IncQ1 type, pNUC of S. Typhimurium. A pangenomic analysis yielded 7891 genes including a core genome of 4892 genes, with a closely related accessory genome content between clinical and environmental isolates (Benjamini p > 0.05). In a search for differences that could explain the higher prevalence of S. Weltevreden in aquatic samples, genomes were compared with those of other Salmonella enterica serovars. S. Weltevreden revealed specific regions harboring glpX (Fructose-1;6-bisphosphatase; class II), rfbC (dTDP-4-dehydrorhamnose 3;5-epimerase), and cmtB (PTS Mannitol-specific cryptic phosphotransferase enzyme IIA component) involved in carbohydrate biosynthesis pathways. Our study builds grounds for future experiments to determine genes or pathways that are essential when S. Weltevreden are in aquatic environments and microbial interactions providing survival advantages to S. Weltevreden in such environments.}, } @article {pmid32979565, year = {2020}, author = {Chen, Y and Song, W and Xie, X and Wang, Z and Guan, P and Peng, H and Jiao, Y and Ni, Z and Sun, Q and Guo, W}, title = {A Collinearity-Incorporating Homology Inference Strategy for Connecting Emerging Assemblies in the Triticeae Tribe as a Pilot Practice in the Plant Pangenomic Era.}, journal = {Molecular plant}, volume = {13}, number = {12}, pages = {1694-1708}, doi = {10.1016/j.molp.2020.09.019}, pmid = {32979565}, issn = {1752-9867}, mesh = {Algorithms ; Chromosomes, Plant/genetics ; Databases, Genetic ; Diploidy ; Evolution, Molecular ; Genome, Plant ; *Genomics ; Poaceae/*genetics ; Software ; }, abstract = {Plant genome sequencing has dramatically increased, and some species even have multiple high-quality reference versions. Demands for clade-specific homology inference and analysis have increased in the pangenomic era. Here we present a novel method, GeneTribe (https://chenym1.github.io/genetribe/), for homology inference among genetically similar genomes that incorporates gene collinearity and shows better performance than traditional sequence-similarity-based methods in terms of accuracy and scalability. The Triticeae tribe is a typical allopolyploid-rich clade with complex species relationships that includes many important crops, such as wheat, barley, and rye. We built Triticeae-GeneTribe (http://wheat.cau.edu.cn/TGT/), a homology database, by integrating 12 Triticeae genomes and 3 outgroup model genomes and implemented versatile analysis and visualization functions. With macrocollinearity analysis, we were able to construct a refined model illustrating the structural rearrangements of the 4A-5A-7B chromosomes in wheat as two major translocation events. With collinearity analysis at both the macro- and microscale, we illustrated the complex evolutionary history of homologs of the wheat vernalization gene Vrn2, which evolved as a combined result of genome translocation, duplication, and polyploidization and gene loss events. Our work provides a useful practice for connecting emerging genome assemblies, with awareness of the extensive polyploidy in plants, and will help researchers efficiently exploit genome sequence resources.}, } @article {pmid32977700, year = {2020}, author = {McCubbin, T and Gonzalez-Garcia, RA and Palfreyman, RW and Stowers, C and Nielsen, LK and Marcellin, E}, title = {A Pan-Genome Guided Metabolic Network Reconstruction of Five Propionibacterium Species Reveals Extensive Metabolic Diversity.}, journal = {Genes}, volume = {11}, number = {10}, pages = {}, pmid = {32977700}, issn = {2073-4425}, mesh = {Bacterial Proteins/genetics/*metabolism ; Base Composition ; Chromosome Mapping ; Chromosomes, Bacterial/*genetics ; DNA, Bacterial/analysis/*genetics ; *Gene Expression Regulation, Bacterial ; *Genome, Bacterial ; Humans ; *Metabolic Networks and Pathways ; Phylogeny ; Propionibacterium/classification/genetics/growth & development/*metabolism ; }, abstract = {Propionibacteria have been studied extensively since the early 1930s due to their relevance to industry and importance as human pathogens. Still, their unique metabolism is far from fully understood. This is partly due to their signature high GC content, which has previously hampered the acquisition of quality sequence data, the accurate annotation of the available genomes, and the functional characterization of genes. The recent completion of the genome sequences for several species has led researchers to reassess the taxonomical classification of the genus Propionibacterium, which has been divided into several new genres. Such data also enable a comparative genomic approach to annotation and provide a new opportunity to revisit our understanding of their metabolism. Using pan-genome analysis combined with the reconstruction of the first high-quality Propionibacterium genome-scale metabolic model and a pan-metabolic model of current and former members of the genus Propionibacterium, we demonstrate that despite sharing unique metabolic traits, these organisms have an unexpected diversity in central carbon metabolism and a hidden layer of metabolic complexity. This combined approach gave us new insights into the evolution of Propionibacterium metabolism and led us to propose a novel, putative ferredoxin-linked energy conservation strategy. The pan-genomic approach highlighted key differences in Propionibacterium metabolism that reflect adaptation to their environment. Results were mathematically captured in genome-scale metabolic reconstructions that can be used to further explore metabolism using metabolic modeling techniques. Overall, the data provide a platform to explore Propionibacterium metabolism and a tool for the rational design of strains.}, } @article {pmid32975504, year = {2020}, author = {Feng, Y and Fan, X and Zhu, L and Yang, X and Liu, Y and Gao, S and Jin, X and Liu, D and Ding, J and Guo, Y and Hu, Y}, title = {Phylogenetic and genomic analysis reveals high genomic openness and genetic diversity of Clostridium perfringens.}, journal = {Microbial genomics}, volume = {6}, number = {10}, pages = {}, pmid = {32975504}, issn = {2057-5858}, mesh = {Animals ; Base Sequence ; Clostridium Infections/microbiology ; Clostridium perfringens/*genetics/isolation & purification/*pathogenicity ; Drug Resistance, Multiple, Bacterial/*genetics ; Genetic Variation/genetics ; Genome, Bacterial/*genetics ; Genomics ; Humans ; Phylogeny ; Plasmids/*genetics ; Virulence/genetics ; Whole Genome Sequencing ; }, abstract = {Clostridium perfringens is associated with a variety of diseases in both humans and animals. Recent advances in genomic sequencing make it timely to re-visit this important pathogen. Although the genome sequence of C. perfringens was first determined in 2002, large-scale comparative genomics with isolates of different origins is still lacking. In this study, we used whole-genome sequencing of 45 C. perfringens isolates with isolation time spanning an 80-year period and performed comparative analysis of 173 genomes from worldwide strains. We also conducted phylogenetic lineage analysis and introduced an openness index (OI) to evaluate the openness of bacterial genomes. We classified all these genomes into five lineages and hypothesized that the origin of C. perfringens dates back to ~80 000 years ago. We showed that the pangenome of the 173 C. perfringens strains contained a total of 26 954 genes, while the core genome comprised 1020 genes, accounting for about a third of the genome of each isolate. We demonstrated that C. perfringens had the highest OI compared with 51 other bacterial species. Intact prophage sequences were found in nearly 70.0 % of C. perfringens genomes, while CRISPR sequences were found only in ~40.0 %. Plasmids were prevalent in C. perfringens isolates, and half of the virulence genes and antibiotic resistance genes (ARGs) identified in all the isolates could be found in plasmids. ARG-sharing network analysis showed that C. perfringens shared its 11 ARGs with 55 different bacterial species, and a high frequency of ARG transfer may have occurred between C. perfringens and species in the genera Streptococcus and Staphylococcus. Correlation analysis showed that the ARG number in C. perfringens strains increased with time, while the virulence gene number was relative stable. Our results, taken together with previous studies, revealed the high genome openness and genetic diversity of C. perfringens and provide a comprehensive view of the phylogeny, genomic features, virulence gene and ARG profiles of worldwide strains.}, } @article {pmid32972461, year = {2020}, author = {Rautiainen, M and Marschall, T}, title = {GraphAligner: rapid and versatile sequence-to-graph alignment.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {253}, pmid = {32972461}, issn = {1474-760X}, mesh = {Algorithms ; Animals ; Genomics/*methods ; Humans ; Sequence Alignment/*methods ; *Software ; }, abstract = {Genome graphs can represent genetic variation and sequence uncertainty. Aligning sequences to genome graphs is key to many applications, including error correction, genome assembly, and genotyping of variants in a pangenome graph. Yet, so far, this step is often prohibitively slow. We present GraphAligner, a tool for aligning long reads to genome graphs. Compared to the state-of-the-art tools, GraphAligner is 13x faster and uses 3x less memory. When employing GraphAligner for error correction, we find it to be more than twice as accurate and over 12x faster than extant tools.Availability: Package manager: https://anaconda.org/bioconda/graphaligner and source code: https://github.com/maickrau/GraphAligner.}, } @article {pmid32969787, year = {2020}, author = {Sánchez-Osuna, M and Cortés, P and Llagostera, M and Barbé, J and Erill, I}, title = {Exploration into the origins and mobilization of di-hydrofolate reductase genes and the emergence of clinical resistance to trimethoprim.}, journal = {Microbial genomics}, volume = {6}, number = {11}, pages = {}, pmid = {32969787}, issn = {2057-5858}, mesh = {Acinetobacter baumannii/drug effects/*genetics/isolation & purification ; Anti-Bacterial Agents/*pharmacology ; Biological Evolution ; Escherichia coli/drug effects/genetics ; Folic Acid/biosynthesis ; Humans ; Microbial Sensitivity Tests ; Sulfonamides/pharmacology ; Tetrahydrofolate Dehydrogenase/*genetics ; Trimethoprim/*pharmacology ; Trimethoprim Resistance/*genetics ; }, abstract = {Trimethoprim is a synthetic antibacterial agent that targets folate biosynthesis by competitively binding to the di-hydrofolate reductase enzyme (DHFR). Trimethoprim is often administered synergistically with sulfonamide, another chemotherapeutic agent targeting the di-hydropteroate synthase (DHPS) enzyme in the same pathway. Clinical resistance to both drugs is widespread and mediated by enzyme variants capable of performing their biological function without binding to these drugs. These mutant enzymes were assumed to have arisen after the discovery of these synthetic drugs, but recent work has shown that genes conferring resistance to sulfonamide were present in the bacterial pangenome millions of years ago. Here, we apply phylogenetics and comparative genomics methods to study the largest family of mobile trimethoprim-resistance genes (dfrA). We show that most of the dfrA genes identified to date map to two large clades that likely arose from independent mobilization events. In contrast to sulfonamide resistance (sul) genes, we find evidence of recurrent mobilization in dfrA genes. Phylogenetic evidence allows us to identify novel dfrA genes in the emerging pathogen Acinetobacter baumannii, and we confirm their resistance phenotype in vitro. We also identify a cluster of dfrA homologues in cryptic plasmid and phage genomes, but we show that these enzymes do not confer resistance to trimethoprim. Our methods also allow us to pinpoint the chromosomal origin of previously reported dfrA genes, and we show that many of these ancient chromosomal genes also confer resistance to trimethoprim. Our work reveals that trimethoprim resistance predated the clinical use of this chemotherapeutic agent, but that novel mutations have likely also arisen and become mobilized following its widespread use within and outside the clinic. Hence, this work confirms that resistance to novel drugs may already be present in the bacterial pangenome, and stresses the importance of rapid mobilization as a fundamental element in the emergence and global spread of resistance determinants.}, } @article {pmid32968153, year = {2020}, author = {Jin, L and Chen, Y and Yang, W and Qiao, Z and Zhang, X}, title = {Complete genome sequence of fish-pathogenic Aeromonas hydrophila HX-3 and a comparative analysis: insights into virulence factors and quorum sensing.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {15479}, pmid = {32968153}, issn = {2045-2322}, mesh = {Aeromonas hydrophila/*genetics/pathogenicity/ultrastructure ; Animals ; Chromosomes, Bacterial/genetics ; Cloning, Molecular ; Fish Diseases/microbiology ; Genes, Bacterial/genetics ; Genome, Bacterial/*genetics ; Genomics ; Gram-Negative Bacterial Infections/microbiology/veterinary ; Microscopy, Electron, Scanning ; Microscopy, Electron, Transmission ; Phylogeny ; Quorum Sensing/genetics ; Virulence Factors/genetics ; Whole Genome Sequencing/methods ; }, abstract = {The gram-negative, aerobic, rod-shaped bacterium Aeromonas hydrophila, the causative agent of motile aeromonad septicaemia, has attracted increasing attention due to its high pathogenicity. Here, we constructed the complete genome sequence of a virulent strain, A. hydrophila HX-3 isolated from Pseudosciaena crocea and performed comparative genomics to investigate its virulence factors and quorum sensing features in comparison with those of other Aeromonas isolates. HX-3 has a circular chromosome of 4,941,513 bp with a 61.0% G + C content encoding 4483 genes, including 4318 protein-coding genes, and 31 rRNA, 127 tRNA and 7 ncRNA operons. Seventy interspersed repeat and 153 tandem repeat sequences, 7 transposons, 8 clustered regularly interspaced short palindromic repeats, and 39 genomic islands were predicted in the A. hydrophila HX-3 genome. Phylogeny and pan-genome were also analyzed herein to confirm the evolutionary relationships on the basis of comparisons with other fully sequenced Aeromonas genomes. In addition, the assembled HX-3 genome was successfully annotated against the Cluster of Orthologous Groups of proteins database (76.03%), Gene Ontology database (18.13%), and Kyoto Encyclopedia of Genes and Genome pathway database (59.68%). Two-component regulatory systems in the HX-3 genome and virulence factors profiles through comparative analysis were predicted, providing insights into pathogenicity. A large number of genes related to the AHL-type 1 (ahyI, ahyR), LuxS-type 2 (luxS, pfs, metEHK, litR, luxOQU) and QseBC-type 3 (qseB, qseC) autoinducer systems were also identified. As a result of the expression of the ahyI gene in Escherichia coli BL21 (DE3), combined UPLC-MS/MS profiling led to the identification of several new N-acyl-homoserine lactone compounds synthesized by AhyI. This genomic analysis determined the comprehensive QS systems of A. hydrophila, which might provide novel information regarding the mechanisms of virulence signatures correlated with QS.}, } @article {pmid32959240, year = {2021}, author = {Yero, D and Conchillo-Solé, O and Daura, X}, title = {Antigen Discovery in Bacterial Panproteomes.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2183}, number = {}, pages = {43-62}, doi = {10.1007/978-1-0716-0795-4_5}, pmid = {32959240}, issn = {1940-6029}, mesh = {Antigens, Bacterial/genetics/*immunology/metabolism ; Bacteria/genetics/*immunology/metabolism ; Bacterial Proteins/genetics/*immunology/metabolism ; Bacterial Vaccines/immunology ; *Computational Biology/methods ; Databases, Factual ; Genome, Bacterial ; Genome-Wide Association Study ; Genomics/methods ; Humans ; Molecular Sequence Annotation ; *Proteome ; *Proteomics/methods ; Vaccinology ; Web Browser ; Workflow ; }, abstract = {There is still a lack of vaccines for many bacterial infections for which the best treatment option would be a prophylactic one. On the other hand, effectiveness has been questioned for some existing vaccines, prompting new developments. Therapeutic vaccines are also becoming a treatment option in specific cases where antibiotics tend to fail. In this scenario, refinement and extension of the classical reverse vaccinology approach is allowing scientists to find new and more effective antigens. In this chapter, we describe an in silico methodology that integrates pangenomic, immunoinformatic, structural, and evolutionary approaches for the screening of potential antigens in a given bacterial species. The strategy focuses on targeting relatively conserved epitopes in core proteins to design broadly cross-protective vaccines and avoid allele-specific immunity. The proposed methodological steps and computational tools can be easily implemented in a reverse vaccinology approach not only to identify new leads with strong immune response but also to develop diagnostic assays.}, } @article {pmid32959087, year = {2020}, author = {González-Gómez, JP and Soto-Rodriguez, S and López-Cuevas, O and Castro-Del Campo, N and Chaidez, C and Gomez-Gil, B}, title = {Phylogenomic Analysis Supports Two Possible Origins for Latin American Strains of Vibrio parahaemolyticus Associated with Acute Hepatopancreatic Necrosis Disease (AHPND).}, journal = {Current microbiology}, volume = {77}, number = {12}, pages = {3851-3860}, doi = {10.1007/s00284-020-02214-w}, pmid = {32959087}, issn = {1432-0991}, mesh = {Animals ; China ; Latin America ; Mexico ; Necrosis ; *Penaeidae ; Phylogeny ; *Vibrio parahaemolyticus/genetics ; }, abstract = {Acute hepatopancreatic necrosis disease (AHPND) is a severe disease affecting recently stocked cultured shrimps. The disease is mainly caused by V. parahaemolyticus that harbors the pVA1 plasmid; this plasmid contains the pirA and pirB genes, which encode a delta-endotoxin. AHPND originated in China in 2009 and has since spread to several other Asian countries and recently to Latin America (2013). Many Asian strains have been sequenced, and their sequences are publicly accessible in scientific databases, but only four strains from Latin America have been reported. In this study, we analyzed nine pVA1-harboring V. parahaemolyticus sequences from strains isolated in Mexico along with the 38 previously available pVA1-harboring V. parahaemolyticus sequences and the reference strain RIMD 2210633. The studied sequences were clustered into three phylogenetic clades (Latin American, Malaysian, and Cosmopolitan) through pangenomic and phylogenomic analysis. The nucleotide sequence alignment of the pVA1 plasmids harbored by the Asian and Latin American strains confirmed that the main structural difference in the plasmid between the Asian and Latin American strains is the absence of the Tn3 transposon in the Asian strains; in addition, some deletions in the pirAB region were found in two of the Latin American strains. Our study represents the most robust and inclusive phylogenomic analysis of pVA1-harboring V. parahaemolyticus conducted to date and provides insight into the epidemiology of AHPND. In addition, this study highlights that disease diagnosis through the detection of the pirA and pirB genes is an inadequate approach due to the instability of these genes.}, } @article {pmid32958892, year = {2020}, author = {Fang, X and Lloyd, CJ and Palsson, BO}, title = {Reconstructing organisms in silico: genome-scale models and their emerging applications.}, journal = {Nature reviews. Microbiology}, volume = {18}, number = {12}, pages = {731-743}, pmid = {32958892}, issn = {1740-1534}, support = {R01 GM057089/GM/NIGMS NIH HHS/United States ; }, mesh = {Actinobacteria/classification/genetics/growth & development/metabolism ; Computer Simulation ; Cyanobacteria/classification/genetics/growth & development/metabolism ; Escherichia coli/*genetics/growth & development/metabolism ; Firmicutes/classification/genetics/growth & development/metabolism ; *Gene Regulatory Networks ; *Genome, Bacterial ; Genomics/instrumentation/*methods ; Metabolic Networks and Pathways/*genetics ; *Models, Genetic ; Phenotype ; Proteobacteria/classification/genetics/growth & development/metabolism ; Stress, Physiological/genetics ; Thermotoga/classification/genetics/growth & development/metabolism ; Whole Genome Sequencing ; }, abstract = {Escherichia coli is considered to be the best-known microorganism given the large number of published studies detailing its genes, its genome and the biochemical functions of its molecular components. This vast literature has been systematically assembled into a reconstruction of the biochemical reaction networks that underlie E. coli's functions, a process which is now being applied to an increasing number of microorganisms. Genome-scale reconstructed networks are organized and systematized knowledge bases that have multiple uses, including conversion into computational models that interpret and predict phenotypic states and the consequences of environmental and genetic perturbations. These genome-scale models (GEMs) now enable us to develop pan-genome analyses that provide mechanistic insights, detail the selection pressures on proteome allocation and address stress phenotypes. In this Review, we first discuss the overall development of GEMs and their applications. Next, we review the evolution of the most complete GEM that has been developed to date: the E. coli GEM. Finally, we explore three emerging areas in genome-scale modelling of microbial phenotypes: collections of strain-specific models, metabolic and macromolecular expression models, and simulation of stress responses.}, } @article {pmid32957508, year = {2020}, author = {Phanse, Y and Wu, CW and Venturino, AJ and Hansen, C and Nelson, K and Broderick, SR and Steinberg, H and Talaat, AM}, title = {A Protective Vaccine against Johne's Disease in Cattle.}, journal = {Microorganisms}, volume = {8}, number = {9}, pages = {}, pmid = {32957508}, issn = {2076-2607}, support = {2013-33610-21044//USDA NIFA SBIR/ ; 2013-01151//NIFA Foundational Program on Animal Health/ ; }, abstract = {Johne's disease (JD) caused by Mycobacterium avium subsp. paratuberculosis (M. paratuberculosis) is a chronic infection characterized by the development of granulomatous enteritis in wild and domesticated ruminants. It is one of the most significant livestock diseases not only in the USA but also globally, accounting for USD 200-500 million losses annually for the USA alone with potential link to cases of Crohn's disease in humans. Developing safe and protective vaccines is of a paramount importance for JD control in dairy cows. The current study evaluated the safety, immunity and protective efficacy of a novel live attenuated vaccine (LAV) candidate with and without an adjuvant in comparison to an inactivated vaccine. Results indicated that the LAV, irrespective of the adjuvant presence, induced robust T cell immune responses indicated by proinflammatory cytokine production such as IFN-γ, IFN-α, TNF-α and IL-17 as well as strong response to intradermal skin test against M. paratuberculosis antigens. Furthermore, the LAV was safe with minimal tissue pathology. Finally, calves vaccinated with adjuvanted LAV did not shed M. paratuberculosis post-challenge, a much-desired characteristic of an effective vaccine against JD. Together, this data suggests a strong potential of testing LAV in field trials to curb JD in dairy herds.}, } @article {pmid32939951, year = {2021}, author = {Zhong, C and Wang, L and Ning, K}, title = {Pan-genome study of Thermococcales reveals extensive genetic diversity and genetic evidence of thermophilic adaption.}, journal = {Environmental microbiology}, volume = {23}, number = {7}, pages = {3599-3613}, doi = {10.1111/1462-2920.15234}, pmid = {32939951}, issn = {1462-2920}, mesh = {Adaptation, Physiological/genetics ; Evolution, Molecular ; Gene Duplication ; Genome ; Humans ; Phylogeny ; *Thermococcales/genetics ; }, abstract = {Thermococcales has a strong adaptability to extreme environments, which is of profound interest in explaining how complex life forms emerge on earth. However, their gene composition, thermal stability and evolution in hyperthermal environments are still little known. Here, we characterized the pan-genome architecture of 30 Thermococcales species to gain insight into their genetic properties, evolutionary patterns and specific metabolisms adapted to niches. We revealed an open pan-genome of Thermococcales comprising 6070 gene families that tend to increase with the availability of additional genomes. The genome contents of Thermococcales were flexible, with a series of genes experienced gene duplication, progressive divergence, or gene gain and loss events exhibiting distinct functional features. These archaea had concise types of heat shock proteins, such as HSP20, HSP60 and prefoldin, which were constrained by strong purifying selection that governed their conservative evolution. Furthermore, purifying selection forced genes involved in enzyme, motility, secretion system, defence system and chaperones to differ in functional constraints and their disparity in the rate of evolution may be related to adaptation to specific niche. These results deepened our understanding of genetic diversity and adaptation patterns of Thermococcales, and provided valuable research models for studying the metabolic traits of early life forms.}, } @article {pmid32937932, year = {2020}, author = {Khan, M and Stapleton, F and Summers, S and Rice, SA and Willcox, MDP}, title = {Antibiotic Resistance Characteristics of Pseudomonas aeruginosa Isolated from Keratitis in Australia and India.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {9}, number = {9}, pages = {}, pmid = {32937932}, issn = {2079-6382}, abstract = {This study investigated genomic differences in Australian and Indian Pseudomonas aeruginosa isolates from keratitis (infection of the cornea). Overall, the Indian isolates were resistant to more antibiotics, with some of those isolates being multi-drug resistant. Acquired genes were related to resistance to fluoroquinolones, aminoglycosides, beta-lactams, macrolides, sulphonamides, and tetracycline and were more frequent in Indian (96%) than in Australian (35%) isolates (p = 0.02). Indian isolates had large numbers of gene variations (median 50,006, IQR = 26,967-50,600) compared to Australian isolates (median 26,317, IQR = 25,681-33,780). There were a larger number of mutations in the mutL and uvrD genes associated with the mismatch repair (MMR) system in Indian isolates, which may result in strains losing their efficacy for DNA repair. The number of gene variations were greater in isolates carrying MMR system genes or exoU. In the phylogenetic division, the number of core genes were similar in both groups, but Indian isolates had larger numbers of pan genes (median 6518, IQR = 6040-6935). Clones related to three different sequence types-ST308, ST316, and ST491-were found among Indian isolates. Only one clone, ST233, containing two strains was present in Australian isolates. The most striking differences between Australian and Indian isolates were carriage of exoU (that encodes a cytolytic phospholipase) in Indian isolates and exoS (that encodes for GTPase activator activity) in Australian isolates, large number of acquired resistance genes, greater changes to MMR genes, and a larger pan genome as well as increased overall genetic variation in the Indian isolates.}, } @article {pmid32934114, year = {2020}, author = {Yin, Z and Zhang, S and Wei, Y and Wang, M and Ma, S and Yang, S and Wang, J and Yuan, C and Jiang, L and Du, Y}, title = {Horizontal Gene Transfer Clarifies Taxonomic Confusion and Promotes the Genetic Diversity and Pathogenicity of Plesiomonas shigelloides.}, journal = {mSystems}, volume = {5}, number = {5}, pages = {}, pmid = {32934114}, issn = {2379-5077}, abstract = {Plesiomonas shigelloides is an emerging pathogen that has been shown to be involved in gastrointestinal diseases and extraintestinal infections in humans. However, the taxonomic position, evolutionary dynamics, and pathogenesis of P. shigelloides remain unclear. We reported the draft genome sequences of 12 P. shigelloides strains representing different serogroups. We were able to determine a clear distinction between P. shigelloides and other members of Enterobacterales via core genome phylogeny, Neighbor-Net network, and average genome identity analysis. The pan-genome analysis of P. shigelloides revealed extensive genetic diversity and presented large flexible gene repertoires, while the core genome phylogeny exhibited a low level of clonality. The discordance between the core genome phylogeny and the pan-genome phylogeny indicated that flexible accessory genomes account for an important proportion of the evolution of P. shigelloides, which was subsequently characterized by determinations of hundreds of horizontally transferred genes (horizontal genes), massive gene expansions and contractions, and diverse mobile genetic elements (MGEs). The apparently high levels of horizontal gene transfer (HGT) in P. shigelloides were conferred from bacteria with novel properties from other taxa (mainly Vibrionaceae and Aeromonadaceae), which caused the historical taxonomic confusion and shaped the virulence gene pools. Furthermore, P. shigelloides genomes contain many macromolecular secretion system genes, virulence factor genes, and resistance genes, indicating its potential to cause intestinal and invasive infections. Collectively, our work provides insights into the phylogenetic position, evolutionary dynamic, and pathogenesis of P. shigelloides at the genomic level, which could facilitate the observation and research of this important pathogen.IMPORTANCE The taxonomic position of P. shigelloides has been the subject of debate for a long time, and until now, the evolutionary dynamics and pathogenesis of P. shigelloides were unclear. In this study, pan-genome analysis indicated extensive genetic diversity and the presence of large and variable gene repertoires. Our results revealed that horizontal gene transfer was the focal driving force for the genetic diversity of the P. shigelloides pan-genome and might have contributed to the emergence of novel properties. Vibrionaceae and Aeromonadaceae were found to be the predominant donor taxa for horizontal genes, which might have caused the taxonomic confusion historically. Comparative genomic analysis revealed the potential of P. shigelloides to cause intestinal and invasive diseases. Our results could advance the understanding of the evolution and pathogenesis of P. shigelloides, particularly in elucidating the role of horizontal gene transfer and investigating virulence-related elements.}, } @article {pmid32934112, year = {2020}, author = {Ross, DE and Marshall, CW and Gulliver, D and May, HD and Norman, RS}, title = {Defining Genomic and Predicted Metabolic Features of the Acetobacterium Genus.}, journal = {mSystems}, volume = {5}, number = {5}, pages = {}, pmid = {32934112}, issn = {2379-5077}, abstract = {Acetogens are anaerobic bacteria capable of fixing CO2 or CO to produce acetyl coenzyme A (acetyl-CoA) and ultimately acetate using the Wood-Ljungdahl pathway (WLP). Acetobacterium woodii is the type strain of the Acetobacterium genus and has been critical for understanding the biochemistry and energy conservation in acetogens. Members of the Acetobacterium genus have been isolated from a variety of environments or have had genomes recovered from metagenome data, but no systematic investigation has been done on the unique and various metabolisms of the genus. To gain a better appreciation for the metabolic breadth of the genus, we sequenced the genomes of 4 isolates (A. fimetarium, A. malicum, A. paludosum, and A. tundrae) and conducted a comparative genome analysis (pan-genome) of 11 different Acetobacterium genomes. A unifying feature of the Acetobacterium genus is the carbon-fixing WLP. The methyl (cluster II) and carbonyl (cluster III) branches of the Wood-Ljungdahl pathway are highly conserved across all sequenced Acetobacterium genomes, but cluster I encoding the formate dehydrogenase is not. In contrast to A. woodii, all but four strains encode two distinct Rnf clusters, Rnf being the primary respiratory enzyme complex. Metabolism of fructose, lactate, and H2:CO2 was conserved across the genus, but metabolism of ethanol, methanol, caffeate, and 2,3-butanediol varied. Additionally, clade-specific metabolic potential was observed, such as amino acid transport and metabolism in the psychrophilic species, and biofilm formation in the A. wieringae clade, which may afford these groups an advantage in low-temperature growth or attachment to solid surfaces, respectively.IMPORTANCE Acetogens are anaerobic bacteria capable of fixing CO2 or CO to produce acetyl-CoA and ultimately acetate using the Wood-Ljungdahl pathway (WLP). This autotrophic metabolism plays a major role in the global carbon cycle and, if harnessed, can help reduce greenhouse gas emissions. Overall, the data presented here provide a framework for examining the ecology and evolution of the Acetobacterium genus and highlight the potential of these species as a source for production of fuels and chemicals from CO2 feedstocks.}, } @article {pmid32928108, year = {2020}, author = {Chen, Z and Erickson, DL and Meng, J}, title = {Benchmarking hybrid assembly approaches for genomic analyses of bacterial pathogens using Illumina and Oxford Nanopore sequencing.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {631}, pmid = {32928108}, issn = {1471-2164}, mesh = {Benchmarking ; Campylobacter jejuni ; Contig Mapping/methods/standards ; Cronobacter sakazakii ; Drug Resistance, Bacterial ; *Genome, Bacterial ; Genomics/*methods/standards ; Listeria monocytogenes ; Nanopore Sequencing/*methods/standards ; Pseudomonas aeruginosa ; Salmonella typhimurium ; Virulence ; }, abstract = {BACKGROUND: We benchmarked the hybrid assembly approaches of MaSuRCA, SPAdes, and Unicycler for bacterial pathogens using Illumina and Oxford Nanopore sequencing by determining genome completeness and accuracy, antimicrobial resistance (AMR), virulence potential, multilocus sequence typing (MLST), phylogeny, and pan genome. Ten bacterial species (10 strains) were tested for simulated reads of both mediocre- and low-quality, whereas 11 bacterial species (12 strains) were tested for real reads.

RESULTS: Unicycler performed the best for achieving contiguous genomes, closely followed by MaSuRCA, while all SPAdes assemblies were incomplete. MaSuRCA was less tolerant of low-quality long reads than SPAdes and Unicycler. The hybrid assemblies of five antimicrobial-resistant strains with simulated reads provided consistent AMR genotypes with the reference genomes. The MaSuRCA assembly of Staphylococcus aureus with real reads contained msr(A) and tet(K), while the reference genome and SPAdes and Unicycler assemblies harbored blaZ. The AMR genotypes of the reference genomes and hybrid assemblies were consistent for the other five antimicrobial-resistant strains with real reads. The numbers of virulence genes in all hybrid assemblies were similar to those of the reference genomes, irrespective of simulated or real reads. Only one exception existed that the reference genome and hybrid assemblies of Pseudomonas aeruginosa with mediocre-quality long reads carried 241 virulence genes, whereas 184 virulence genes were identified in the hybrid assemblies of low-quality long reads. The MaSuRCA assemblies of Escherichia coli O157:H7 and Salmonella Typhimurium with mediocre-quality long reads contained 126 and 118 virulence genes, respectively, while 110 and 107 virulence genes were detected in their MaSuRCA assemblies of low-quality long reads, respectively. All approaches performed well in our MLST and phylogenetic analyses. The pan genomes of the hybrid assemblies of S. Typhimurium with mediocre-quality long reads were similar to that of the reference genome, while SPAdes and Unicycler were more tolerant of low-quality long reads than MaSuRCA for the pan-genome analysis. All approaches functioned well in the pan-genome analysis of Campylobacter jejuni with real reads.

CONCLUSIONS: Our research demonstrates the hybrid assembly pipeline of Unicycler as a superior approach for genomic analyses of bacterial pathogens using Illumina and Oxford Nanopore sequencing.}, } @article {pmid32924924, year = {2020}, author = {Psomopoulos, FE and van Helden, J and Médigue, C and Chasapi, A and Ouzounis, CA}, title = {Ancestral state reconstruction of metabolic pathways across pangenome ensembles.}, journal = {Microbial genomics}, volume = {6}, number = {11}, pages = {}, pmid = {32924924}, issn = {2057-5858}, mesh = {*Algorithms ; Amino Acid Sequence ; Bacteria/*genetics/*metabolism ; Base Sequence ; *Evolution, Molecular ; Genome/*genetics ; Metabolic Networks and Pathways/*genetics ; Phylogeny ; Software ; }, abstract = {As genome sequencing efforts are unveiling the genetic diversity of the biosphere with an unprecedented speed, there is a need to accurately describe the structural and functional properties of groups of extant species whose genomes have been sequenced, as well as their inferred ancestors, at any given taxonomic level of their phylogeny. Elaborate approaches for the reconstruction of ancestral states at the sequence level have been developed, subsequently augmented by methods based on gene content. While these approaches of sequence or gene-content reconstruction have been successfully deployed, there has been less progress on the explicit inference of functional properties of ancestral genomes, in terms of metabolic pathways and other cellular processes. Herein, we describe PathTrace, an efficient algorithm for parsimony-based reconstructions of the evolutionary history of individual metabolic pathways, pivotal representations of key functional modules of cellular function. The algorithm is implemented as a five-step process through which pathways are represented as fuzzy vectors, where each enzyme is associated with a taxonomic conservation value derived from the phylogenetic profile of its protein sequence. The method is evaluated with a selected benchmark set of pathways against collections of genome sequences from key data resources. By deploying a pangenome-driven approach for pathway sets, we demonstrate that the inferred patterns are largely insensitive to noise, as opposed to gene-content reconstruction methods. In addition, the resulting reconstructions are closely correlated with the evolutionary distance of the taxa under study, suggesting that a diligent selection of target pangenomes is essential for maintaining cohesiveness of the method and consistency of the inference, serving as an internal control for an arbitrary selection of queries. The PathTrace method is a first step towards the large-scale analysis of metabolic pathway evolution and our deeper understanding of functional relationships reflected in emerging pangenome collections.}, } @article {pmid32920913, year = {2020}, author = {Gardon, H and Biderre-Petit, C and Jouan-Dufournel, I and Bronner, G}, title = {A drift-barrier model drives the genomic landscape of a structured bacterial population.}, journal = {Molecular ecology}, volume = {29}, number = {21}, pages = {4143-4156}, doi = {10.1111/mec.15628}, pmid = {32920913}, issn = {1365-294X}, mesh = {Bacteria/genetics ; Evolution, Molecular ; *Genome, Bacterial ; Genomics ; *Prochlorococcus/genetics ; }, abstract = {Bacterial populations differentiate over time and space to form distinct genetic units. The mechanisms governing this diversification are presumed to result from the ecological context of living units to adapt to specific niches. Recently, a model assuming the acquisition of advantageous genes among populations rather than whole genome sweeps has emerged to explain population differentiation. However, the characteristics of these exchanged, or flexible, genes and whether their evolution is driven by adaptive or neutral processes remain controversial. By analysing the flexible genome of single-amplified genomes of co-occurring populations of the marine Prochlorococcus HLII ecotype, we highlight that genomic compartments - rather than population units - are characterized by different evolutionary trajectories. The dynamics of gene fluxes vary across genomic compartments and therefore the effectiveness of selection depends on the fluctuation of the effective population size along the genome. Taken together, these results support the drift-barrier model of bacterial evolution.}, } @article {pmid32913678, year = {2020}, author = {Christian, RW and Hewitt, SL and Nelson, G and Roalson, EH and Dhingra, A}, title = {Plastid transit peptides-where do they come from and where do they all belong? Multi-genome and pan-genomic assessment of chloroplast transit peptide evolution.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e9772}, pmid = {32913678}, issn = {2167-8359}, abstract = {Subcellular relocalization of proteins determines an organism's metabolic repertoire and thereby its survival in unique evolutionary niches. In plants, the plastid and its various morphotypes import a large and varied number of nuclear-encoded proteins to orchestrate vital biochemical reactions in a spatiotemporal context. Recent comparative genomics analysis and high-throughput shotgun proteomics data indicate that there are a large number of plastid-targeted proteins that are either semi-conserved or non-conserved across different lineages. This implies that homologs are differentially targeted across different species, which is feasible only if proteins have gained or lost plastid targeting peptides during evolution. In this study, a broad, multi-genome analysis of 15 phylogenetically diverse genera and in-depth analyses of pangenomes from Arabidopsis and Brachypodium were performed to address the question of how proteins acquire or lose plastid targeting peptides. The analysis revealed that random insertions or deletions were the dominant mechanism by which novel transit peptides are gained by proteins. While gene duplication was not a strict requirement for the acquisition of novel subcellular targeting, 40% of novel plastid-targeted genes were found to be most closely related to a sequence within the same genome, and of these, 30.5% resulted from alternative transcription or translation initiation sites. Interestingly, analysis of the distribution of amino acids in the transit peptides of known and predicted chloroplast-targeted proteins revealed monocot and eudicot-specific preferences in residue distribution.}, } @article {pmid32913672, year = {2020}, author = {Araújo, CL and Blanco, I and Souza, L and Tiwari, S and Pereira, LC and Ghosh, P and Azevedo, V and Silva, A and Folador, A}, title = {In silico functional prediction of hypothetical proteins from the core genome of Corynebacterium pseudotuberculosis biovar ovis.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e9643}, pmid = {32913672}, issn = {2167-8359}, abstract = {Corynebacterium pseudotuberculosis is a pathogen of veterinary relevance diseases, being divided into two biovars: equi and ovis; causing ulcerative lymphangitis and caseous lymphadenitis, respectively. The isolation and sequencing of C. pseudotuberculosis biovar ovis strains in the Northern and Northeastern regions of Brazil exhibited the emergence of this pathogen, which causes economic losses to small ruminant producers, and condemnation of carcasses and skins of animals. Through the pan-genomic approach, it is possible to determine and analyze genes that are shared by all strains of a species-the core genome. However, many of these genes do not have any predicted function, being characterized as hypothetical proteins (HP). In this study, we considered 32 C. pseudotuberculosis biovar ovis genomes for the pan-genomic analysis, where were identified 172 HP present in a core genome composed by 1255 genes. We are able to functionally annotate 80 sequences previously characterized as HP through the identification of structural features as conserved domains and families. Furthermore, we analyzed the physicochemical properties, subcellular localization and molecular function. Additionally, through RNA-seq data, we investigated the differential gene expression of the annotated HP. Genes inserted in pathogenicity islands had their virulence potential evaluated. Also, we have analyzed the existence of functional associations for their products based on protein-protein interaction networks, and perform the structural prediction of three targets. Due to the integration of different strategies, this study can underlie deeper in vitro researches in the characterization of these HP and the search for new solutions for combat this pathogen.}, } @article {pmid32903853, year = {2020}, author = {Zhang, X and Li, F and Cui, S and Mao, L and Li, X and Awan, F and Lv, W and Zeng, Z}, title = {Prevalence and Distribution Characteristics of blaKPC-2 and blaNDM-1 Genes in Klebsiella pneumoniae.}, journal = {Infection and drug resistance}, volume = {13}, number = {}, pages = {2901-2910}, pmid = {32903853}, issn = {1178-6973}, abstract = {BACKGROUND: Carbapenem-resistant Klebsiella pneumoniae infections have caused major concern and posed a global threat to public health. As blaKPC-2 and blaNDM-1 genes are the most widely reported carbapenem resistant genes in K. pneumonia, it is crucial to study the prevalence and geographical distribution of these two genes for further understanding of their transmission mode and mechanism.

PURPOSE: Here, we investigated the prevalence and distribution of blaKPC-2 and blaNDM-1 genes in carbapenem-resistant K. pneumoniae strains from a tertiary hospital and from 1579 genomes available in the NCBI database, and further analyzed the possible core structure of blaKPC-2 or blaNDM-1 genes among global genome data.

MATERIALS AND METHODS: K. pneumoniae strains from a tertiary hospital in China during 2013-2018 were collected and their antimicrobial susceptibility testing for 28 antibiotics was determined. Whole-genome sequencing of carbapenem-resistant K. pneumoniae strains was used to investigate the genetic characterization. The phylogenetic relationships of these strains were investigated through pan-genome analysis. The epidemiology and distribution of blaKPC-2 and blaNDM-1 genes in K. pneumoniae based on 1579 global genomes and carbapenem-resistant K. pneumoniae strains from hospital were analyzed using bioinformatics. The possible core structure carrying blaKPC-2 or blaNDM-1 genes was investigated among global data.

RESULTS: A total of 19 carbapenem-resistant K. pneumoniae were isolated in a tertiary hospital. All isolates had a multi-resistant pattern and eight kinds of resistance genes. The phylogenetic analysis showed all isolates in the hospital were dominated by two lineages composed of ST11 and ST25, respectively. ST11 and ST25 were the major ST type carrying blaKPC-2 and blaNDM-1 genes, respectively. Among 1579 global genomes data, 147 known ST types (1195 genomes) have been identified, while ST258 (23.6%) and ST11 (22.1%) were the globally prevalent clones among the known ST types. Genetic environment analysis showed that the ISKpn7-dnaA/ISKpn27 -blaKPC-2-ISkpn6 and blaNDM-1-ble-trpf-nagA may be the core structure in the horizontal transfer of blaKPC-2 and blaNDM-1 , respectively. In addition, DNA transferase (hin) may be involved in the horizontal transfer or the expression of blaNDM-1 .

CONCLUSION: There was clonal transmission of carbapenem-resistant K. pneumoniae in the tertiary hospital in China. The prevalence and distribution of blaKPC-2 and blaNDM-1 varied by countries and were driven by different transposons carrying the core structure. This study shed light on the genetic environment of blaKPC-2 and blaNDM-1 and offered basic information about the mechanism of carbapenem-resistant K. pneumoniae dissemination.}, } @article {pmid32903140, year = {2020}, author = {Ambros, IM and Tonini, GP and Pötschger, U and Gross, N and Mosseri, V and Beiske, K and Berbegall, AP and Bénard, J and Bown, N and Caron, H and Combaret, V and Couturier, J and Defferrari, R and Delattre, O and Jeison, M and Kogner, P and Lunec, J and Marques, B and Martinsson, T and Mazzocco, K and Noguera, R and Schleiermacher, G and Valent, A and Van Roy, N and Villamon, E and Janousek, D and Pribill, I and Glogova, E and Attiyeh, EF and Hogarty, MD and Monclair, TF and Holmes, K and Valteau-Couanet, D and Castel, V and Tweddle, DA and Park, JR and Cohn, S and Ladenstein, R and Beck-Popovic, M and De Bernardi, B and Michon, J and Pearson, ADJ and Ambros, PF}, title = {Age Dependency of the Prognostic Impact of Tumor Genomics in Localized Resectable MYCN-Nonamplified Neuroblastomas. Report From the SIOPEN Biology Group on the LNESG Trials and a COG Validation Group.}, journal = {Journal of clinical oncology : official journal of the American Society of Clinical Oncology}, volume = {38}, number = {31}, pages = {3685-3697}, pmid = {32903140}, issn = {1527-7755}, support = {U10 CA180886/CA/NCI NIH HHS/United States ; U10 CA180899/CA/NCI NIH HHS/United States ; /CRUK_/Cancer Research UK/United Kingdom ; }, mesh = {Age Factors ; *Chromosome Aberrations ; *Chromosomes, Human, Pair 1 ; *Chromosomes, Human, Pair 11 ; Clinical Trials as Topic ; Diploidy ; Gene Amplification ; Genomics ; Humans ; Infant ; N-Myc Proto-Oncogene Protein/*genetics ; Neoplasm Staging ; Neuroblastoma/*genetics/pathology/surgery ; Prognosis ; Progression-Free Survival ; Survival Rate ; }, abstract = {PURPOSE: For localized, resectable neuroblastoma without MYCN amplification, surgery only is recommended even if incomplete. However, it is not known whether the genomic background of these tumors may influence outcome.

PATIENTS AND METHODS: Diagnostic samples were obtained from 317 tumors, International Neuroblastoma Staging System stages 1/2A/2B, from 3 cohorts: Localized Neuroblastoma European Study Group I/II and Children's Oncology Group. Genomic data were analyzed using multi- and pangenomic techniques and fluorescence in-situ hybridization in 2 age groups (cutoff age, 18 months) and were quality controlled by the International Society of Pediatric Oncology European Neuroblastoma (SIOPEN) Biology Group.

RESULTS: Patients with stage 1 tumors had an excellent outcome (5-year event-free survival [EFS] ± standard deviation [SD], 95% ± 2%; 5-year overall survival [OS], 99% ± 1%). In contrast, patients with stage 2 tumors had a reduced EFS in both age groups (5-year EFS ± SD, 84% ± 3% in patients < 18 months of age and 75% ± 7% in patients ≥ 18 months of age). However, OS was significantly decreased only in the latter group (5-year OS ± SD in < 18months and ≥ 18months, 96% ± 2% and 81% ± 7%, respectively; P = .001). In < 18months, relapses occurred independent of segmental chromosome aberrations (SCAs); only 1p loss decreased EFS (5-year EFS ± SD in patients 1p loss and no 1p loss, 62% ± 13% and 87% ± 3%, respectively; P = .019) but not OS (5-year OS ± SD, 92% ± 8% and 97% ± 2%, respectively). In patients ≥ 18 months, only SCAs led to relapse and death, with 11q loss as the strongest marker (11q loss and no 11q loss: 5-year EFS ± SD, 48% ± 16% and 85% ± 7%, P = .033; 5-year OS ± SD, 46% ± 22% and 92% ± 6%, P = .038).

CONCLUSION: Genomic aberrations of resectable non-MYCN-amplified stage 2 neuroblastomas have a distinct age-dependent prognostic impact. Chromosome 1p loss is a risk factor for relapse but not for diminished OS in patients < 18 months, SCAs (especially 11q loss) are risk factors for reduced EFS and OS in those > 18months. In older patients with SCA, a randomized trial of postoperative chemotherapy compared with observation alone may be indicated.}, } @article {pmid32902773, year = {2020}, author = {Liu, Y and Tian, Z}, title = {From one linear genome to a graph-based pan-genome: a new era for genomics.}, journal = {Science China. Life sciences}, volume = {63}, number = {12}, pages = {1938-1941}, pmid = {32902773}, issn = {1869-1889}, mesh = {DNA/genetics ; Genetic Variation ; Genetics, Population ; Genome/*genetics ; Genomics/*methods/standards/trends ; Genotype ; Reference Standards ; Sequence Analysis, DNA ; }, } @article {pmid32901388, year = {2021}, author = {González-Dominici, LI and Saati-Santamaría, Z and García-Fraile, P}, title = {Genome Analysis and Genomic Comparison of the Novel Species Arthrobacter ipsi Reveal Its Potential Protective Role in Its Bark Beetle Host.}, journal = {Microbial ecology}, volume = {81}, number = {2}, pages = {471-482}, pmid = {32901388}, issn = {1432-184X}, support = {19-09072S//Grantová Agentura České Republiky/ ; CLU-2018-04//Junta de Castilla y León (ES)/ ; }, mesh = {Animals ; Antibiosis ; Arthrobacter/classification/genetics/*physiology ; Coleoptera/*microbiology ; DNA, Bacterial/genetics ; Fungi/growth & development ; Genes, Bacterial/genetics ; Genome, Bacterial/*genetics ; Host Microbial Interactions ; Phenotype ; Phylogeny ; Pinus/parasitology ; Plant Bark/*parasitology ; Plant Diseases/parasitology ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {The pine engraver beetle, Ips acuminatus Gyll, is a bark beetle that causes important damages in Scots pine (Pinus sylvestris) forests and plantations. As almost all higher organisms, Ips acuminatus harbours a microbiome, although the role of most members of its microbiome is not well understood. As part of a work in which we analysed the bacterial diversity associated to Ips acuminatus, we isolated the strain Arthrobacter sp. IA7. In order to study its potential role within the bark beetle holobiont, we sequenced and explored its genome and performed a pan-genome analysis of the genus Arthrobacter, showing specific genes of strain IA7 that might be related with its particular role in its niche. Based on these investigations, we suggest several potential roles of the bacterium within the beetle. Analysis of genes related to secondary metabolism indicated potential antifungal capability, confirmed by the inhibition of several entomopathogenic fungal strains (Metarhizium anisopliae CCF0966, Lecanicillium muscarium CCF6041, L. muscarium CCF3297, Isaria fumosorosea CCF4401, I. farinosa CCF4808, Beauveria bassiana CCF4422 and B. brongniartii CCF1547). Phylogenetic analyses of the 16S rRNA gene, six concatenated housekeeping genes (tuf-secY-rpoB-recA-fusA-atpD) and genome sequences indicated that strain IA7 is closely related to A. globiformis NBRC 12137[T] but forms a new species within the genus Arthrobacter; this was confirmed by digital DNA-DNA hybridization (37.10%) and average nucleotide identity (ANIb) (88.9%). Based on phenotypic and genotypic features, we propose strain IA7[T] as the novel species Arthrobacter ipsi sp. nov. (type strain IA7[T] = CECT 30100[T] = LMG 31782[T]) and suggest its protective role for its host.}, } @article {pmid32898134, year = {2020}, author = {Boisen, N and Østerlund, MT and Joensen, KG and Santiago, AE and Mandomando, I and Cravioto, A and Chattaway, MA and Gonyar, LA and Overballe-Petersen, S and Stine, OC and Rasko, DA and Scheutz, F and Nataro, JP}, title = {Redefining enteroaggregative Escherichia coli (EAEC): Genomic characterization of epidemiological EAEC strains.}, journal = {PLoS neglected tropical diseases}, volume = {14}, number = {9}, pages = {e0008613}, pmid = {32898134}, issn = {1935-2735}, mesh = {Adhesins, Bacterial/genetics ; Bacterial Adhesion/*genetics/physiology ; Case-Control Studies ; Cell Line ; Child, Preschool ; Diarrhea/microbiology ; Escherichia coli/classification/*genetics/isolation & purification/*pathogenicity ; Escherichia coli Infections/*epidemiology ; Escherichia coli Proteins/*genetics ; Fimbriae Proteins/*genetics ; Fimbriae, Bacterial/*genetics ; Genome, Bacterial/genetics ; Genomics ; Humans ; Infant ; Infant, Newborn ; Trans-Activators/genetics ; Virulence/genetics ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {Although enteroaggregative E. coli (EAEC) has been implicated as a common cause of diarrhea in multiple settings, neither its essential genomic nature nor its role as an enteric pathogen are fully understood. The current definition of this pathotype requires demonstration of cellular adherence; a working molecular definition encompasses E. coli which do not harbor the heat-stable or heat-labile toxins of enterotoxigenic E. coli (ETEC) and harbor the genes aaiC, aggR, and/or aatA. In an effort to improve the definition of this pathotype, we report the most definitive characterization of the pan-genome of EAEC to date, applying comparative genomics and functional characterization on a collection of 97 EAEC strains isolated in the course of a multicenter case-control diarrhea study (Global Enteric Multi-Center Study, GEMS). Genomic analysis revealed that the EAEC strains mapped to all phylogenomic groups of E. coli. Circa 70% of strains harbored one of the five described AAF variants; there were no additional AAF variants identified, and strains that lacked an identifiable AAF generally did not have an otherwise complete AggR regulon. An exception was strains that harbored an ETEC colonization factor (CF) CS22, like AAF a member of the chaperone-usher family of adhesins, but not phylogenetically related to the AAF family. Of all genes scored, sepA yielded the strongest association with diarrhea (P = 0.002) followed by the increased serum survival gene, iss (p = 0.026), and the outer membrane protease gene ompT (p = 0.046). Notably, the EAEC genomes harbored several genes characteristically associated with other E. coli pathotypes. Our data suggest that a molecular definition of EAEC could comprise E. coli strains harboring AggR and a complete AAF(I-V) or CS22 gene cluster. Further, it is possible that strains meeting this definition could be both enteric bacteria and urinary/systemic pathogens.}, } @article {pmid32893299, year = {2021}, author = {Bonnici, V and Maresi, E and Giugno, R}, title = {Challenges in gene-oriented approaches for pangenome content discovery.}, journal = {Briefings in bioinformatics}, volume = {22}, number = {3}, pages = {}, doi = {10.1093/bib/bbaa198}, pmid = {32893299}, issn = {1477-4054}, mesh = {*Algorithms ; Bacteria/classification/genetics ; Biological Evolution ; Computational Biology/*methods ; Genome/*genetics ; Genome, Bacterial/*genetics ; Genomics/*methods ; Mycoplasma/classification/genetics ; Phylogeny ; Software ; }, abstract = {Given a group of genomes, represented as the sets of genes that belong to them, the discovery of the pangenomic content is based on the search of genetic homology among the genes for clustering them into families. Thus, pangenomic analyses investigate the membership of the families to the given genomes. This approach is referred to as the gene-oriented approach in contrast to other definitions of the problem that takes into account different genomic features. In the past years, several tools have been developed to discover and analyse pangenomic contents. Because of the hardness of the problem, each tool applies a different strategy for discovering the pangenomic content. This results in a differentiation of the performance of each tool that depends on the composition of the input genomes. This review reports the main analysis instruments provided by the current state of the art tools for the discovery of pangenomic contents. Moreover, unlike previous works, the presented study compares pangenomic tools from a methodological perspective, analysing the causes that lead a given methodology to outperform other tools. The analysis is performed by taking into account different bacterial populations, which are synthetically generated by changing evolutionary parameters. The benchmarks used to compare the pangenomic tools, in addition to the computational pipeline developed for this purpose, are available at https://github.com/InfOmics/pangenes-review. Contact: V. Bonnici, R. Giugno Supplementary information: Supplementary data are available at Briefings in Bioinformatics online.}, } @article {pmid32880768, year = {2021}, author = {Zhu, Z and Wang, L and Qian, H and Gu, F and Li, Y and Zhang, H and Chen, Y and Shi, J and Ma, P and Bao, C and Gu, B}, title = {Comparative genome analysis of 12 Shigella sonnei strains: virulence, resistance, and their interactions.}, journal = {International microbiology : the official journal of the Spanish Society for Microbiology}, volume = {24}, number = {1}, pages = {83-91}, pmid = {32880768}, issn = {1618-1905}, support = {81902040//National Natural Science Foundation of China/ ; 81871734//National Natural Science Foundation of China/ ; 81701390//National Natural Science Foundation of China/ ; BK20170250//Natural Science Foundation of Jiangsu Province/ ; BK20180997//Natural Science Foundation of Jiangsu Province/ ; }, mesh = {Anti-Bacterial Agents/pharmacology ; China ; *Drug Resistance, Bacterial ; Dysentery, Bacillary/microbiology ; Genome, Bacterial ; Humans ; Microbial Sensitivity Tests ; Shigella sonnei/classification/drug effects/*genetics/*pathogenicity ; Virulence ; }, abstract = {Shigellosis is a highly infectious disease that is mainly transmitted via fecal-oral contact of the bacteria Shigella. Four species have been identified in Shigella genus, among which Shigella flexneri is used to be the most prevalent species globally and commonly isolated from developing countries. However, it is being replaced by Shigella sonnei that is currently the main causative agent for dysentery pandemic in many emerging industrialized countries such as Asia and the Middle East. For a better understanding of S. sonnei virulence and antibiotic resistance, we sequenced 12 clinical S. sonnei strains with varied antibiotic-resistance profiles collected from four cities in Jiangsu Province, China. Phylogenomic analysis clustered antibiotic-sensitive and resistant S. sonnei into two distinct groups while pan-genome analysis reveals the presence and absence of unique genes in each group. Screening of 31 classes of virulence factors found out that type 2 secretion system is doubled in resistant strains. Further principle component analysis based on the interactions between virulence and resistance indicated that abundant virulence factors are associated with higher levels of antibiotic resistance. The result present here is based on statistical analysis of a small sample size and serves basically as a guidance for further experimental and theoretical studies.}, } @article {pmid32879462, year = {2021}, author = {Muñoz-Ramirez, ZY and Pascoe, B and Mendez-Tenorio, A and Mourkas, E and Sandoval-Motta, S and Perez-Perez, G and Morgan, DR and Dominguez, RL and Ortiz-Princz, D and Cavazza, ME and Rocha, G and Queiroz, DMM and Catalano, M and Palma, GZ and Goldman, CG and Venegas, A and Alarcon, T and Oleastro, M and Vale, FF and Goodman, KJ and Torres, RC and Berthenet, E and Hitchings, MD and Blaser, MJ and Sheppard, SK and Thorell, K and Torres, J}, title = {A 500-year tale of co-evolution, adaptation, and virulence: Helicobacter pylori in the Americas.}, journal = {The ISME journal}, volume = {15}, number = {1}, pages = {78-92}, pmid = {32879462}, issn = {1751-7370}, support = {R01 CA190612/CA/NCI NIH HHS/United States ; P30 CA068485/CA/NCI NIH HHS/United States ; K07 CA125588/CA/NCI NIH HHS/United States ; P01 CA028842/CA/NCI NIH HHS/United States ; MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Americas ; Europe ; Genetic Variation ; Genome, Bacterial ; *Helicobacter Infections ; *Helicobacter pylori/genetics ; Humans ; United States ; Virulence/genetics ; }, abstract = {Helicobacter pylori is a common component of the human stomach microbiota, possibly dating back to the speciation of Homo sapiens. A history of pathogen evolution in allopatry has led to the development of genetically distinct H. pylori subpopulations, associated with different human populations, and more recent admixture among H. pylori subpopulations can provide information about human migrations. However, little is known about the degree to which some H. pylori genes are conserved in the face of admixture, potentially indicating host adaptation, or how virulence genes spread among different populations. We analyzed H. pylori genomes from 14 countries in the Americas, strains from the Iberian Peninsula, and public genomes from Europe, Africa, and Asia, to investigate how admixture varies across different regions and gene families. Whole-genome analyses of 723 H. pylori strains from around the world showed evidence of frequent admixture in the American strains with a complex mosaic of contributions from H. pylori populations originating in the Americas as well as other continents. Despite the complex admixture, distinctive genomic fingerprints were identified for each region, revealing novel American H. pylori subpopulations. A pan-genome Fst analysis showed that variation in virulence genes had the strongest fixation in America, compared with non-American populations, and that much of the variation constituted non-synonymous substitutions in functional domains. Network analyses suggest that these virulence genes have followed unique evolutionary paths in the American populations, spreading into different genetic backgrounds, potentially contributing to the high risk of gastric cancer in the region.}, } @article {pmid32879348, year = {2020}, author = {Carroll, LM and Huisman, JS and Wiedmann, M}, title = {Twentieth-century emergence of antimicrobial resistant human- and bovine-associated Salmonella enterica serotype Typhimurium lineages in New York State.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {14428}, pmid = {32879348}, issn = {2045-2322}, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Cattle/microbiology ; Cephalosporins/pharmacology ; *Drug Resistance, Multiple, Bacterial ; *Evolution, Molecular ; Humans ; New York ; Phylogeny ; Salmonella typhimurium/classification/drug effects/*genetics/pathogenicity ; Serogroup ; }, abstract = {Salmonella enterica serotype Typhimurium (S. Typhimurium) boasts a broad host range and can be transmitted between livestock and humans. While members of this serotype can acquire resistance to antimicrobials, the temporal dynamics of this acquisition is not well understood. Using New York State (NYS) and its dairy cattle farms as a model system, 87 S. Typhimurium strains isolated from 1999 to 2016 from either human clinical or bovine-associated sources in NYS were characterized using whole-genome sequencing. More than 91% of isolates were classified into one of four major lineages, two of which were largely susceptible to antimicrobials but showed sporadic antimicrobial resistance (AMR) gene acquisition, and two that were largely multidrug-resistant (MDR). All four lineages clustered by presence and absence of elements in the pan-genome. The two MDR lineages, one of which resembled S. Typhimurium DT104, were predicted to have emerged circa 1960 and 1972. The two largely susceptible lineages emerged earlier, but showcased sporadic AMR determinant acquisition largely after 1960, including acquisition of cephalosporin resistance-conferring genes after 1985. These results confine the majority of AMR acquisition events in NYS S. Typhimurium to the twentieth century, largely within the era of antibiotic usage.}, } @article {pmid32879312, year = {2020}, author = {Bellas, CM and Schroeder, DC and Edwards, A and Barker, G and Anesio, AM}, title = {Flexible genes establish widespread bacteriophage pan-genomes in cryoconite hole ecosystems.}, journal = {Nature communications}, volume = {11}, number = {1}, pages = {4403}, pmid = {32879312}, issn = {2041-1723}, support = {M 2299/FWF_/Austrian Science Fund FWF/Austria ; }, mesh = {Bacteriophages/*genetics ; Cyanobacteria/virology ; Ecosystem ; Gene Transfer, Horizontal ; Genes, Viral ; Genome, Viral ; Host Microbial Interactions/genetics ; Ice Cover/microbiology/*virology ; *Metagenome ; Metagenomics ; Phylogeny ; }, abstract = {Bacteriophage genomes rapidly evolve via mutation and horizontal gene transfer to counter evolving bacterial host defenses; such arms race dynamics should lead to divergence between phages from similar, geographically isolated ecosystems. However, near-identical phage genomes can reoccur over large geographical distances and several years apart, conversely suggesting many are stably maintained. Here, we show that phages with near-identical core genomes in distant, discrete aquatic ecosystems maintain diversity by possession of numerous flexible gene modules, where homologous genes present in the pan-genome interchange to create new phage variants. By repeatedly reconstructing the core and flexible regions of phage genomes from different metagenomes, we show a pool of homologous gene variants co-exist for each module in each location, however, the dominant variant shuffles independently in each module. These results suggest that in a natural community, recombination is the largest contributor to phage diversity, allowing a variety of host recognition receptors and genes to counter bacterial defenses to co-exist for each phage.}, } @article {pmid32872551, year = {2020}, author = {Singh, K and Jamshidi, N and Zomer, R and Piva, TJ and Mantri, N}, title = {Cannabinoids and Prostate Cancer: A Systematic Review of Animal Studies.}, journal = {International journal of molecular sciences}, volume = {21}, number = {17}, pages = {}, pmid = {32872551}, issn = {1422-0067}, support = {Not Applicable//MGC Pharmaceuticals Limited/ ; }, mesh = {Animals ; Benzoxazines/pharmacology/*therapeutic use ; Cannabinoids/pharmacology/*therapeutic use ; Cell Line, Tumor ; Cell Proliferation/drug effects ; Cell Survival/drug effects ; Dose-Response Relationship, Drug ; Humans ; Male ; Morpholines/pharmacology/*therapeutic use ; Naphthalenes/pharmacology/*therapeutic use ; PC-3 Cells ; Prostatic Neoplasms/*drug therapy/metabolism/pathology ; Tumor Burden/drug effects ; Xenograft Model Antitumor Assays ; }, abstract = {Prostate cancer is a major cause of death among men worldwide. Recent preclinical evidence implicates cannabinoids as powerful regulators of cell growth and differentiation, as well as potential anti-cancer agents. The aim of this review was to evaluate the effect of cannabinoids on in vivo prostate cancer models. The databases searched included PubMed, Embase, Scopus, and Web of Science from inception to August 2020. Articles reporting on the effect of cannabinoids on prostate cancer were deemed eligible. We identified six studies that were all found to be based on in vivo/xenograft animal models. Results: In PC3 and DU145 xenografts, WIN55,212-2 reduced cell proliferation in a dose-dependent manner. Furthermore, in LNCaP xenografts, WIN55,212-2 reduced cell proliferation by 66-69%. PM49, which is a synthetic cannabinoid quinone, was also found to result in a significant inhibition of tumor growth of up to 90% in xenograft models of LNCaP and 40% in xenograft models of PC3 cells, respectively. All studies have reported that the treatment of prostate cancers in in vivo/xenograft models with various cannabinoids decreased the size of the tumor, the outcomes of which depended on the dose and length of treatment. Within the limitation of these identified studies, cannabinoids were shown to reduce the size of prostate cancer tumors in animal models. However, further well-designed and controlled animal studies are warranted to confirm these findings.}, } @article {pmid32850499, year = {2020}, author = {Alam, I and Kamau, AA and Kulmanov, M and Jaremko, Ł and Arold, ST and Pain, A and Gojobori, T and Duarte, CM}, title = {Functional Pangenome Analysis Shows Key Features of E Protein Are Preserved in SARS and SARS-CoV-2.}, journal = {Frontiers in cellular and infection microbiology}, volume = {10}, number = {}, pages = {405}, pmid = {32850499}, issn = {2235-2988}, mesh = {Betacoronavirus/*chemistry ; COVID-19 ; Coronavirus Envelope Proteins ; Coronavirus Infections/virology ; Genes, Essential ; Genes, Viral ; Genome, Viral ; Humans ; Middle East Respiratory Syndrome Coronavirus/chemistry/genetics ; Mutation ; Open Reading Frames ; PDZ Domains ; Pandemics ; Pneumonia, Viral/virology ; Protein Domains ; Severe acute respiratory syndrome-related coronavirus/chemistry ; SARS-CoV-2 ; Viral Envelope Proteins/*chemistry/*genetics ; Viroporin Proteins ; }, abstract = {The spread of the novel coronavirus (SARS-CoV-2) has triggered a global emergency, that demands urgent solutions for detection and therapy to prevent escalating health, social, and economic impacts. The spike protein (S) of this virus enables binding to the human receptor ACE2, and hence presents a prime target for vaccines preventing viral entry into host cells. The S proteins from SARS and SARS-CoV-2 are similar, but structural differences in the receptor binding domain (RBD) preclude the use of SARS-specific neutralizing antibodies to inhibit SARS-CoV-2. Here we used comparative pangenomic analysis of all sequenced reference Betacoronaviruses, complemented with functional and structural analyses. This analysis reveals that, among all core gene clusters present in these viruses, the envelope protein E shows a variant cluster shared by SARS and SARS-CoV-2 with two completely-conserved key functional features, namely an ion-channel, and a PDZ-binding motif (PBM). These features play a key role in the activation of the inflammasome causing the acute respiratory distress syndrome, the leading cause of death in SARS and SARS-CoV-2 infections. Together with functional pangenomic analysis, mutation tracking, and previous evidence, on E protein as a determinant of pathogenicity in SARS, we suggest E protein as an alternative therapeutic target to be considered for further studies to reduce complications of SARS-CoV-2 infections in COVID-19.}, } @article {pmid32849479, year = {2020}, author = {Kumar, R and Bröms, JE and Sjöstedt, A}, title = {Exploring the Diversity Within the Genus Francisella - An Integrated Pan-Genome and Genome-Mining Approach.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {1928}, pmid = {32849479}, issn = {1664-302X}, abstract = {Pan-genome analysis is a powerful method to explore genomic heterogeneity and diversity of bacterial species. Here we present a pan-genome analysis of the genus Francisella, comprising a dataset of 63 genomes and encompassing clinical as well as environmental isolates from distinct geographic locations. To determine the evolutionary relationship within the genus, we performed phylogenetic whole-genome studies utilizing the average nucleotide identity, average amino acid identity, core genes and non-recombinant loci markers. Based on the analyses, the phylogenetic trees obtained identified two distinct clades, A and B and a diverse cluster designated C. The sizes of the pan-, core-, cloud-, and shell-genomes of Francisella were estimated and compared to those of two other facultative intracellular pathogens, Legionella and Piscirickettsia. Francisella had the smallest core-genome, 692 genes, compared to 886 and 1,732 genes for Legionella and Piscirickettsia respectively, while the pan-genome of Legionella was more than twice the size of that of the other two genera. Also, the composition of the Francisella Type VI secretion system (T6SS) was analyzed. Distinct differences in the gene content of the T6SS were identified. In silico approaches performed to identify putative substrates of these systems revealed potential effectors targeting the cell wall, inner membrane, cellular nucleic acids as well as proteins, thus constituting attractive targets for site-directed mutagenesis. The comparative analysis performed here provides a comprehensive basis for the assessment of the phylogenomic relationship of members of the genus Francisella and for the identification of putative T6SS virulence traits.}, } @article {pmid32849358, year = {2020}, author = {Bannantine, JP and Conde, C and Bayles, DO and Branger, M and Biet, F}, title = {Genetic Diversity Among Mycobacterium avium Subspecies Revealed by Analysis of Complete Genome Sequences.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {1701}, pmid = {32849358}, issn = {1664-302X}, abstract = {Mycobacterium avium comprises four subspecies that contain both human and veterinary pathogens. At the inception of this study, twenty-eight M. avium genomes had been annotated as RefSeq genomes, facilitating direct comparisons. These genomes represent strains from around the world and provided a unique opportunity to examine genome dynamics in this species. Each genome was confirmed to be classified correctly based on SNP genotyping, nucleotide identity and presence/absence of repetitive elements or other typing methods. The Mycobacterium avium subspecies paratuberculosis (Map) genome size and organization was remarkably consistent, averaging 4.8 Mb with a variance of only 29.6 kb among the 13 strains. Comparing recombination events along with the larger genome size and variance observed among Mycobacterium avium subspecies avium (Maa) and Mycobacterium avium subspecies hominissuis (Mah) strains (collectively termed non-Map) suggests horizontal gene transfer occurs in non-Map, but not in Map strains. Overall, M. avium subspecies could be divided into two major sub-divisions, with the Map type II (bovine strains) clustering tightly on one end of a phylogenetic spectrum and Mah strains clustering more loosely together on the other end. The most evolutionarily distinct Map strain was an ovine strain, designated Telford, which had >1,000 SNPs and showed large rearrangements compared to the bovine type II strains. The Telford strain clustered with Maa strains as an intermediate between Map type II and Mah. SNP analysis and genome organization analyses repeatedly demonstrated the conserved nature of Map versus the mosaic nature of non-Map M. avium strains. Finally, core and pangenomes were developed for Map and non-Map strains. A total of 80% Map genes belonged to the Map core genome, while only 40% of non-Map genes belonged to the non-Map core genome. These genomes provide a more complete and detailed comparison of these subspecies strains as well as a blueprint for how genetic diversity originated.}, } @article {pmid32845829, year = {2020}, author = {Perry, BJ and Sullivan, JT and Colombi, E and Murphy, RJT and Ramsay, JP and Ronson, CW}, title = {Symbiosis islands of Loteae-nodulating Mesorhizobium comprise three radiating lineages with concordant nod gene complements and nodulation host-range groupings.}, journal = {Microbial genomics}, volume = {6}, number = {9}, pages = {}, pmid = {32845829}, issn = {2057-5858}, mesh = {Bacterial Proteins/genetics ; Fucosyltransferases/genetics ; Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Lotus/*microbiology ; Mesorhizobium/classification/*physiology ; Plant Proteins/*genetics ; Symbiosis ; Whole Genome Sequencing/*methods ; }, abstract = {Mesorhizobium is a genus of soil bacteria, some isolates of which form an endosymbiotic relationship with diverse legumes of the Loteae tribe. The symbiotic genes of these mesorhizobia are generally carried on integrative and conjugative elements termed symbiosis islands (ICESyms). Mesorhizobium strains that nodulate Lotus spp. have been divided into host-range groupings. Group I (GI) strains nodulate L. corniculatus and L. japonicus ecotype Gifu, while group II (GII) strains have a broader host range, which includes L. pedunculatus. To identify the basis of this extended host range, and better understand Mesorhizobium and ICESym genomics, the genomes of eight Mesorhizobium strains were completed using hybrid long- and short-read assembly. Bioinformatic comparison with previously sequenced mesorhizobia genomes indicated host range was not predicted by Mesorhizobium genospecies but rather by the evolutionary relationship between ICESym symbiotic regions. Three radiating lineages of Loteae ICESyms were identified on this basis, which correlate with Lotus spp. host-range grouping and have lineage-specific nod gene complements. Pangenomic analysis of the completed GI and GII ICESyms identified 155 core genes (on average 30.1 % of a given ICESym). Individual GI or GII ICESyms carried diverse accessory genes with an average of 34.6 % of genes unique to a given ICESym. Identification and comparative analysis of NodD symbiotic regulatory motifs - nod boxes - identified 21 branches across the NodD regulons. Four of these branches were associated with seven genes unique to the five GII ICESyms. The nod boxes preceding the host-range gene nodZ in GI and GII ICESyms were disparate, suggesting regulation of nodZ may differ between GI and GII ICESyms. The broad host-range determinant(s) of GII ICESyms that confer nodulation of L. pedunculatus are likely present amongst the 53 GII-unique genes identified.}, } @article {pmid32843837, year = {2020}, author = {Costa, SS and Guimarães, LC and Silva, A and Soares, SC and Baraúna, RA}, title = {First Steps in the Analysis of Prokaryotic Pan-Genomes.}, journal = {Bioinformatics and biology insights}, volume = {14}, number = {}, pages = {1177932220938064}, pmid = {32843837}, issn = {1177-9322}, abstract = {Pan-genome is defined as the set of orthologous and unique genes of a specific group of organisms. The pan-genome is composed by the core genome, accessory genome, and species- or strain-specific genes. The pan-genome is considered open or closed based on the alpha value of the Heap law. In an open pan-genome, the number of gene families will continuously increase with the addition of new genomes to the analysis, while in a closed pan-genome, the number of gene families will not increase considerably. The first step of a pan-genome analysis is the homogenization of genome annotation. The same software should be used to annotate genomes, such as GeneMark or RAST. Subsequently, several software are used to calculate the pan-genome such as BPGA, GET_HOMOLOGUES, PGAP, among others. This review presents all these initial steps for those who want to perform a pan-genome analysis, explaining key concepts of the area. Furthermore, we present the pan-genomic analysis of 9 bacterial species. These are the species with the highest number of genomes deposited in GenBank. We also show the influence of the identity and coverage parameters on the prediction of orthologous and paralogous genes. Finally, we cite the perspectives of several research areas where pan-genome analysis can be used to answer important issues.}, } @article {pmid32833967, year = {2020}, author = {Li, Z and Simianer, H}, title = {Pan-genomic open reading frames: A potential supplement of single nucleotide polymorphisms in estimation of heritability and genomic prediction.}, journal = {PLoS genetics}, volume = {16}, number = {8}, pages = {e1008995}, pmid = {32833967}, issn = {1553-7404}, mesh = {Animals ; Breeding ; Genome/*genetics ; Genome-Wide Association Study ; *Genomics ; Genotype ; Open Reading Frames/*genetics ; Phenotype ; Polymorphism, Single Nucleotide/genetics ; Quantitative Trait Loci/*genetics ; }, abstract = {Pan-genomic open reading frames (ORFs) potentially carry protein-coding gene or coding variant information in a population. In this study, we suggest that pan-genomic ORFs are promising to be utilized in estimation of heritability and genomic prediction. A Saccharomyces cerevisiae dataset with whole-genome SNPs, pan-genomic ORFs, and the copy numbers of those ORFs is used to test the effectiveness of ORF data as a predictor in three prediction models for 35 traits. Our results show that the ORF-based heritability can capture more genetic effects than SNP-based heritability for all traits. Compared to SNP-based genomic prediction (GBLUP), pan-genomic ORF-based genomic prediction (OBLUP) is distinctly more accurate for all traits, and the predictive abilities on average are more than doubled across all traits. For four traits, the copy number of ORF-based prediction(CBLUP) is more accurate than OBLUP. When using different numbers of isolates in training sets in ORF-based prediction, the predictive abilities for all traits increased as more isolates are added in the training sets, suggesting that with very large training sets the prediction accuracy will be in the range of the square root of the heritability. We conclude that pan-genomic ORFs have the potential to be a supplement of single nucleotide polymorphisms in estimation of heritability and genomic prediction.}, } @article {pmid32828660, year = {2020}, author = {Sibbald, SJ and Eme, L and Archibald, JM and Roger, AJ}, title = {Lateral Gene Transfer Mechanisms and Pan-genomes in Eukaryotes.}, journal = {Trends in parasitology}, volume = {36}, number = {11}, pages = {927-941}, doi = {10.1016/j.pt.2020.07.014}, pmid = {32828660}, issn = {1471-5007}, mesh = {Eukaryota/*genetics ; Evolution, Molecular ; Gene Transfer, Horizontal/*genetics ; Genome/*genetics ; Genome, Protozoan/genetics ; Host-Parasite Interactions/genetics ; }, abstract = {Lateral gene transfer (LGT) is well known as an important driver of genome evolution in bacteria and archaea, but its importance in eukaryote evolution has yet to be fully elucidated. There is now abundant evidence indicating that LGT has played a role in the adaptation of eukaryotes to new environments and conditions, including host-parasite interactions. However, the mechanisms and frequency of LGT across the tree of eukaryotes remain poorly understood. Here we review evidence for known and potential mechanisms of LGT into diverse eukaryote lineages with a particular focus on protists, and we discuss trends emerging from recently reported examples. We also explore the potential role of LGT in generating 'pan-genomes' in diverse eukaryotic species.}, } @article {pmid32816227, year = {2020}, author = {Zhou, L and Zhang, T and Tang, S and Fu, X and Yu, S}, title = {Pan-genome analysis of Paenibacillus polymyxa strains reveals the mechanism of plant growth promotion and biocontrol.}, journal = {Antonie van Leeuwenhoek}, volume = {113}, number = {11}, pages = {1539-1558}, pmid = {32816227}, issn = {1572-9699}, support = {No. 20181BBF6003//Jiangxi Provincial Department of Science and Technology (CN)/ ; No. 31760547//National Natural Science Foundation of China/ ; }, mesh = {Genome, Plant/*genetics ; Nitrogen Fixation/genetics ; Paenibacillus polymyxa/classification/*genetics/*physiology ; Phylogeny ; *Plant Development ; Plant Growth Regulators/biosynthesis ; Rhizosphere ; }, abstract = {Rapid development of gene sequencing technologies has led to an exponential increase in microbial sequencing data. Genome research of a single organism does not capture the changes in the characteristics of genetic information within a species. Pan-genome analysis gives us a broader perspective to study the complete genetic information of a species. Paenibacillus polymyxa is a Gram-positive bacterium and an important plant growth-promoting rhizobacterium with the ability to produce multiple antibiotics, such as fusaricidin, lantibiotic, paenilan, and polymyxin. Our study explores the pan-genome of 14 representative P. polymyxa strains isolated from around the world. Heap's law model and curve fitting confirmed an open pan-genome of P. polymyxa. The phylogenetic and collinearity analyses reflected that the evolutionary classification of P. polymyxa strains are not associated with geographical area and ecological niches. Few genes related to phytohormone synthesis and phosphate solubilization were conserved; however, the nif cluster gene associated with nitrogen fixation exists only in some strains. This finding is indicative of nitrogen fixing ability is not stable in P. polymyxa. Analysis of antibiotic gene clusters in P. polymyxa revealed the presence of these genes in both core and accessory genomes. This observation indicates that the difference in living environment led to loss of ability to synthesize antibiotics in some strains. The current pan-genomic analysis of P. polymyxa will help us understand the mechanisms of biological control and plant growth promotion. It will also promote the use of P. polymyxa in agriculture.}, } @article {pmid32804605, year = {2020}, author = {Ouyabe, M and Tanaka, N and Shiwa, Y and Fujita, N and Kikuno, H and Babil, P and Shiwachi, H}, title = {Rhizobium dioscoreae sp. nov., a plant growth-promoting bacterium isolated from yam (Dioscorea species).}, journal = {International journal of systematic and evolutionary microbiology}, volume = {70}, number = {9}, pages = {5054-5062}, pmid = {32804605}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Dioscorea/*microbiology ; Endophytes ; Fatty Acids/chemistry ; Japan ; Nitrogen Fixation ; Nucleic Acid Hybridization ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Rhizobium/*classification/isolation & purification ; Sequence Analysis, DNA ; }, abstract = {This study investigated endophytic nitrogen-fixing bacteria isolated from two species of yam (water yam, Dioscorea alata L.; lesser yam, Dioscorea esculenta L.) grown in nutrient-poor alkaline soil conditions on Miyako Island, Okinawa, Japan. Two bacterial strains of the genus Rhizobium, S-93[T] and S-62, were isolated. The phylogenetic tree, based on the almost-complete 16S rRNA gene sequences (1476 bp for each strain), placed them in a distinct clade, with Rhizobium miluonense CCBAU 41251[T], Rhizobium hainanense I66[T], Rhizobium multihospitium HAMBI 2975[T], Rhizobium freirei PRF 81[T] and Rhizobium tropici CIAT 899[T] being their closest species. Their bacterial fatty acid profile, with major components of C19 : 0 cyclo ω8c and summed feature 8, as well as other phenotypic characteristics and DNA G+C content (59.65 mol%) indicated that the novel strains belong to the genus Rhizobium. Pairwise average nucleotide identity analyses separated the novel strains from their most closely related species with similarity values of 90.5, 88.9, 88.5, 84.5 and 84.4 % for R. multihospitium HAMBI 2975[T], R. tropici CIAT 899[T], R. hainanense CCBAU 57015[T], R. miluonense HAMBI 2971[T] and R. freirei PRF 81[T], respectively; digital DNA-DNA hybridization values were in the range of 26-42 %. Considering the phenotypic characteristics as well as the genomic data, it is suggested that strains S-93[T] and S-62 represent a new species, for which the name Rhizobium dioscoreae is proposed. The type strain is S-93[T] (=NRIC 0988[T]=NBRC 114257[T]=DSM 110498[T]).}, } @article {pmid32787780, year = {2020}, author = {Clawson, ML and Schuller, G and Dickey, AM and Bono, JL and Murray, RW and Sweeney, MT and Apley, MD and DeDonder, KD and Capik, SF and Larson, RL and Lubbers, BV and White, BJ and Blom, J and Chitko-McKown, CG and Brichta-Harhay, DM and Smith, TPL}, title = {Differences between predicted outer membrane proteins of genotype 1 and 2 Mannheimia haemolytica.}, journal = {BMC microbiology}, volume = {20}, number = {1}, pages = {250}, pmid = {32787780}, issn = {1471-2180}, support = {CRIS# 3040-32000-034-00D//Agricultural Research Service/International ; }, mesh = {Animals ; Bacterial Outer Membrane Proteins/*genetics ; Cattle ; Cattle Diseases/*microbiology ; Chromosomes, Bacterial/genetics ; Genotype ; Mannheimia haemolytica/classification/*genetics/isolation & purification ; Mutation ; Phylogeny ; Respiratory Tract Infections/*veterinary ; Whole Genome Sequencing/*methods ; }, abstract = {BACKGROUND: Mannheimia haemolytica strains isolated from North American cattle have been classified into two genotypes (1 and 2). Although members of both genotypes have been isolated from the upper and lower respiratory tracts of cattle with or without bovine respiratory disease (BRD), genotype 2 strains are much more frequently isolated from diseased lungs than genotype 1 strains. The mechanisms behind the increased association of genotype 2 M. haemolytica with BRD are not fully understood. To address that, and to search for interventions against genotype 2 M. haemolytica, complete, closed chromosome assemblies for 35 genotype 1 and 34 genotype 2 strains were generated and compared. Searches were conducted for the pan genome, core genes shared between the genotypes, and for genes specific to either genotype. Additionally, genes encoding outer membrane proteins (OMPs) specific to genotype 2 M. haemolytica were identified, and the diversity of their protein isoforms was characterized with predominantly unassembled, short-read genomic sequences for up to 1075 additional strains.

RESULTS: The pan genome of the 69 sequenced M. haemolytica strains consisted of 3111 genes, of which 1880 comprised a shared core between the genotypes. A core of 112 and 179 genes or gene variants were specific to genotype 1 and 2, respectively. Seven genes encoding predicted OMPs; a peptidase S6, a ligand-gated channel, an autotransporter outer membrane beta-barrel domain-containing protein (AOMB-BD-CP), a porin, and three different trimeric autotransporter adhesins were specific to genotype 2 as their genotype 1 homologs were either pseudogenes, or not detected. The AOMB-BD-CP gene, however, appeared to be truncated across all examined genotype 2 strains and to likely encode dysfunctional protein. Homologous gene sequences from additional M. haemolytica strains confirmed the specificity of the remaining six genotype 2 OMP genes and revealed they encoded low isoform diversity at the population level.

CONCLUSION: Genotype 2 M. haemolytica possess genes encoding conserved OMPs not found intact in more commensally prone genotype 1 strains. Some of the genotype 2 specific genes identified in this study are likely to have important biological roles in the pathogenicity of genotype 2 M. haemolytica, which is the primary bacterial cause of BRD.}, } @article {pmid32782425, year = {2020}, author = {Xu, S and Cheng, J and Meng, X and Xu, Y and Mu, Y}, title = {Complete Genome and Comparative Genome Analysis of Lactobacillus reuteri YSJL-12, a Potential Probiotics Strain Isolated From Healthy Sow Fresh Feces.}, journal = {Evolutionary bioinformatics online}, volume = {16}, number = {}, pages = {1176934320942192}, pmid = {32782425}, issn = {1176-9343}, abstract = {Lactobacillus reuteri YSJL-12 was isolated from healthy sow fresh feces and used as probiotics additives previously. To investigate the genetic basis on probiotic potential and identify the genes in the strain, the complete genome of YSJL-12 was sequenced. Then comparative genome analysis on 9 strains of Lactobacillus reuteri was performed. The genome of YSJL-12 consisted of a circular 2,084,748 bp chromosome and 2 circular plasmids (51,906 and 15,134 bp). From among the 2065 protein-coding sequences (CDSs), the genes resistant to the environmental stress were identified. The function of COG (Clusters of Orthologous Group) protein genes was predicted, and the KEGG (Kyoto Encyclopedia of Genes and Genomes) pathways were analyzed. The comparative genome analysis indicated that the pan-genome contained a core genome of 1257 orthologous gene clusters, an accessory genome of 1064 orthologous gene clusters, and 1148 strain-specific genes, and the antibacterial mechanism among Lactobacillus reuteri strains might be different. The phylogenetic analysis and genomic collinearity revealed that the phylogenetic relationship among 9 strains of Lactobacillus reuteri was connected with host species and showed host specificity. The research could help us to better predict genes function and understand genetic basis on adapting to host gut in Lactobacillus reuteri YSJL-12.}, } @article {pmid32779519, year = {2021}, author = {Bernardes, JS and Eberle, RJ and Vieira, FRJ and Coronado, MA}, title = {A comparative pan-genomic analysis of 53 C. pseudotuberculosis strains based on functional domains.}, journal = {Journal of biomolecular structure & dynamics}, volume = {39}, number = {18}, pages = {6974-6986}, doi = {10.1080/07391102.2020.1805017}, pmid = {32779519}, issn = {1538-0254}, mesh = {Animals ; *Corynebacterium Infections ; *Corynebacterium pseudotuberculosis/genetics ; Genome, Bacterial/genetics ; Genomics ; Horses ; Sheep ; Virulence/genetics ; }, abstract = {Corynebacterium pseudotuberculosis is a pathogenic bacterium with great veterinary and economic importance. It is classified into two biovars: ovis, nitrate-negative, that causes lymphadenitis in small ruminants and equi, nitrate-positive, causing ulcerative lymphangitis in equines. With the explosive growth of available genomes of several strains, pan-genome analysis has opened new opportunities for understanding the dynamics and evolution of C. pseudotuberculosis. However, few pan-genomic studies have compared biovars equi and ovis. Such studies have considered a reduced number of strains and compared entire genomes. Here we conducted an original pan-genome analysis based on protein sequences and their functional domains. We considered 53 C. pseudotuberculosis strains from both biovars isolated from different hosts and countries. We have analysed conserved domains, common domains more frequently found in each biovar and biovar-specific (unique) domains. Our results demonstrated that biovar equi is more variable; there is a significant difference in the number of proteins per strains, probably indicating the occurrence of more gene loss/gain events. Moreover, strains of biovar equi presented a higher number of biovar-specific domains, 77 against only eight in biovar ovis, most of them are associated with virulence mechanisms. With this domain analysis, we have identified functional differences among strains of biovars ovis and equi that could be related to niche-adaptation and probably help to better understanding mechanisms of virulence and pathogenesis. The distribution patterns of functional domains identified in this work might have impacts on bacterial physiology and lifestyle, encouraging the development of new diagnoses, vaccines, and treatments for C. pseudotuberculosis diseases.Communicated by Ramaswamy H. Sarma.}, } @article {pmid32770231, year = {2020}, author = {Palevich, N and Maclean, PH and Kelly, WJ and Leahy, SC and Rakonjac, J and Attwood, GT}, title = {Complete Genome Sequence of the Polysaccharide-Degrading Rumen Bacterium Pseudobutyrivibrio xylanivorans MA3014 Reveals an Incomplete Glycolytic Pathway.}, journal = {Genome biology and evolution}, volume = {12}, number = {9}, pages = {1566-1572}, pmid = {32770231}, issn = {1759-6653}, mesh = {Clostridiales/*genetics/metabolism ; *Genome, Bacterial ; Glycolysis/*genetics ; Polysaccharides, Bacterial/metabolism ; Whole Genome Sequencing ; }, abstract = {Bacterial species belonging to the genus Pseudobutyrivibrio are important members of the rumen microbiome contributing to the degradation of complex plant polysaccharides. Pseudobutyrivibrio xylanivorans MA3014 was selected for genome sequencing to examine its ability to breakdown and utilize plant polysaccharides. The complete genome sequence of MA3014 is 3.58 Mb, consists of three replicons (a chromosome, chromid, and plasmid), has an overall G + C content of 39.6%, and encodes 3,265 putative protein-coding genes (CDS). Comparative pan-genomic analysis of all cultivated and currently available P. xylanivorans genomes has revealed a strong correlation of orthologous genes within this rumen bacterial species. MA3014 is metabolically versatile and capable of growing on a range of simple mono- or oligosaccharides derived from complex plant polysaccharides such as pectins, mannans, starch, and hemicelluloses, with lactate, butyrate, and formate as the principal fermentation end products. The genes encoding these metabolic pathways have been identified and MA3014 is predicted to encode an extensive range of Carbohydrate-Active enZYmes with 78 glycoside hydrolases, 13 carbohydrate esterases, and 54 glycosyl transferases, suggesting an important role in solubilization of plant matter in the rumen.}, } @article {pmid32766786, year = {2020}, author = {Pan, Y and Awan, F and Zhenbao, M and Zhang, X and Zeng, J and Zeng, Z and Xiong, W}, title = {Preliminary view of the global distribution and spread of the tet(X) family of tigecycline resistance genes.}, journal = {The Journal of antimicrobial chemotherapy}, volume = {75}, number = {10}, pages = {2797-2803}, doi = {10.1093/jac/dkaa284}, pmid = {32766786}, issn = {1460-2091}, mesh = {*Anti-Bacterial Agents/pharmacology ; Drug Resistance, Bacterial/genetics ; *Genes, Bacterial/drug effects ; Phylogeny ; Plasmids/drug effects/genetics ; Tetracycline Resistance ; *Tigecycline/pharmacology ; }, abstract = {BACKGROUND: The emergence of plasmid-mediated tet(X3)/tet(X4) genes is threatening the role of tigecycline as a last-resort antibiotic to treat clinical infections caused by XDR bacteria. Considering the possible public health threat posed by tet(X) and its variants [which we collectively call 'tet(X) genes' in this study], global monitoring and surveillance are urgently required.

OBJECTIVES: Here we conducted a worldwide survey of the global distribution and spread of tet(X) genes.

METHODS: We analysed a comprehensive dataset of bacterial genomes in conjunction with surveillance data from our laboratory and the NCBI database, as well as sufficient metadata to characterize the results.

RESULTS: The global distribution features of tet(X) genes were revealed. We clustered three types of genetic backbones of tet(X) genes embedded or transferred in bacterial genomes. Our pan-genome analyses revealed a large genetic pool composed of tet(X)-carrying sequences. Moreover, phylogenetic trees of tet(X) genes and tet(X)-like proteins were built.

CONCLUSIONS: To the best of our knowledge, our results provide the first view of the global distribution of tet(X) genes, demonstrate the features of tet(X)-carrying fragments and highlight the possible evolution of tigecycline-inactivation enzymes in diverse bacterial species and habitats.}, } @article {pmid32762606, year = {2020}, author = {Santos, DDS and Calaça, PRA and Porto, ALF and de Souza, PRE and de Freitas, NSA and Cavalcanti Vieira Soares, MT}, title = {What Differentiates Probiotic from Pathogenic Bacteria? The Genetic Mobility of Enterococcus faecium Offers New Molecular Insights.}, journal = {Omics : a journal of integrative biology}, volume = {24}, number = {12}, pages = {706-713}, doi = {10.1089/omi.2020.0078}, pmid = {32762606}, issn = {1557-8100}, mesh = {*DNA Transposable Elements ; Drug Resistance, Microbial ; Enterococcus faecium/drug effects/*genetics/pathogenicity ; Food Microbiology ; Genes, Bacterial ; Genome, Bacterial ; *Genomics ; Nutrigenomics/methods ; *Probiotics ; }, abstract = {Enterococcus faecium is a lactic acid bacterium with applications in food engineering and nutrigenomics, including as starter cultures in fermented foods. To differentiate the E. faecium probiotic from pathogenic bacteria, physiological analyses are often used but they do not guarantee that a bacterial strain is not pathogenic. We report here new findings and an approach based on comparison of the genetic mobility of (1) probiotic, (2) pathogenic, and (3) nonpathogenic and non-probiotic strains, so as to differentiate probiotics, and inform their safe use. The region of the 16S ribosomal DNA (rDNA) genes of different E. faecium strains native to Pernambuco-Brazil was used with the GenBank query sequence. Complete genomes were selected and divided into three groups as noted above to identify the mobile genetic elements (MGEs) (transposase, integrase, conjugative transposon protein and phage) and antibiotic resistance genes (ARGs), and to undertake pan-genome analysis and multiple genome alignment. Differences in the number of MGEs were found in ARGs, in the presence and absence of the genes that differentiate E. faecium probiotics and pathogenic bacteria genetically. Our data suggest that genetic mobility appears to be informative in differentiating between probiotic and pathogenic strains. While the present findings are not necessarily applicable to all probiotics, they offer novel molecular insights to guide future research in nutrigenomics, clinical medicine, and food engineering on new ways to differentiate pathogenic from probiotic bacteria.}, } @article {pmid32761525, year = {2020}, author = {Son, S and Oh, JD and Lee, SH and Shin, D and Kim, Y}, title = {Comparative genomics of canine Lactobacillus reuteri reveals adaptation to a shared environment with humans.}, journal = {Genes & genomics}, volume = {42}, number = {9}, pages = {1107-1116}, doi = {10.1007/s13258-020-00978-w}, pmid = {32761525}, issn = {2092-9293}, mesh = {Adaptation, Biological/genetics ; Adaptation, Physiological/*genetics ; Animals ; Dogs/*microbiology ; Environment ; Evolution, Molecular ; Gastrointestinal Microbiome/genetics ; Gastrointestinal Tract/microbiology ; Gene-Environment Interaction ; Genome, Bacterial/genetics ; Genomics/methods ; Humans ; Limosilactobacillus reuteri/*genetics ; Phylogeny ; Republic of Korea ; }, abstract = {BACKGROUND: Lactobacillus reuteri is a gram-positive, non-motile bacterial species that has been used as a representative microorganism model to describe the ecology and evolution of vertebrate gut symbionts.

OBJECTIVE: Because the genetic features and evolutionary strategies of L. reuteri from the gastrointestinal tract of canines remain unknown, we tried to construct draft genome canine L. reuteri and investigate modified, acquired, or lost genetic features that have facilitated the evolution and adaptation of strains to specific environmental niches by this study.

METHODS: To examine canine L. reuteri, we sequenced an L. reuteri strain isolated from a dog in Korea. A comparative genomic approach was used to assess genetic diversity and gain insight into the distinguishing features related to different hosts based on 27 published genomic sequences.

RESULTS: The pan-genome of 28 L. reuteri strains contained 7,369 gene families, and the core genome contained 1070 gene families. The ANI tree based on the core genes in the canine L. reuteri strain (C1) was very close to those for three strains (IRT, DSM20016, JCM1112) from humans. Evolutionarily, these four strains formed one clade, which we regarded as C1-clade in this study. We could investigate a total of 32,050 amino acid substitutions among the 28 L. reuteri strain genomes. In this comparison, 283 amino acid substitutions were specific to strain C1 and four strains in C1-clade shared most of these 283 C1-strain specific amino acid substitutions, suggesting strongly similar selective pressure. In accessory genes, we could identify 127 C1-clade host-specific genes and found that several genes were closely related to replication, recombination, and repair.

CONCLUSION: This study provides new insights into the adaptation of L. reuteri to the canine intestinal habitat, and suggests that the genome of L. reuteri from canines is closely associated with their living and shared environment with humans.}, } @article {pmid32760427, year = {2020}, author = {Barrera-Redondo, J and Piñero, D and Eguiarte, LE}, title = {Genomic, Transcriptomic and Epigenomic Tools to Study the Domestication of Plants and Animals: A Field Guide for Beginners.}, journal = {Frontiers in genetics}, volume = {11}, number = {}, pages = {742}, pmid = {32760427}, issn = {1664-8021}, abstract = {In the last decade, genomics and the related fields of transcriptomics and epigenomics have revolutionized the study of the domestication process in plants and animals, leading to new discoveries and new unresolved questions. Given that some domesticated taxa have been more studied than others, the extent of genomic data can range from vast to nonexistent, depending on the domesticated taxon of interest. This review is meant as a rough guide for students and academics that want to start a domestication research project using modern genomic tools, as well as for researchers already conducting domestication studies that are interested in following a genomic approach and looking for alternate strategies (cheaper or more efficient) and future directions. We summarize the theoretical and technical background needed to carry out domestication genomics, starting from the acquisition of a reference genome and genome assembly, to the sampling design for population genomics, paleogenomics, transcriptomics, epigenomics and experimental validation of domestication-related genes. We also describe some examples of the aforementioned approaches and the relevant discoveries they made to understand the domestication of the studied taxa.}, } @article {pmid32759827, year = {2020}, author = {Botelho, J and Grosso, F and Peixe, L}, title = {ICEs Are the Main Reservoirs of the Ciprofloxacin-Modifying crpP Gene in Pseudomonas aeruginosa.}, journal = {Genes}, volume = {11}, number = {8}, pages = {}, pmid = {32759827}, issn = {2073-4425}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/genetics/*metabolism ; Ciprofloxacin/pharmacology ; *Drug Resistance, Bacterial ; *Interspersed Repetitive Sequences ; Pseudomonas aeruginosa/drug effects/*genetics ; RNA, Transfer/genetics ; }, abstract = {The ciprofloxacin-modifying crpP gene was recently identified in a plasmid isolated from a Pseudomonas aeruginosa clinical isolate. Homologues of this gene were also identified in Escherichia coli, Klebsiella pneumoniae and Acinetobacter baumannii. We set out to explore the mobile elements involved in the acquisition and spread of this gene in publicly available and complete genomes of Pseudomonas spp. All Pseudomonas complete genomes were downloaded from NCBI's Refseq library and were inspected for the presence of the crpP gene. The mobile elements carrying this gene were further characterized. The crpP gene was identified only in P. aeruginosa, in more than half of the complete chromosomes (61.9%, n = 133/215) belonging to 52 sequence types, of which the high-risk clone ST111 was the most frequent. We identified 136 crpP-harboring integrative and conjugative elements (ICEs), with 93.4% belonging to the mating-pair formation G (MPFG) family. The ICEs were integrated at the end of a tRNA[Lys] gene and were all flanked by highly conserved 45-bp direct repeats. The crpP-carrying ICEs contain 26 core genes (2.2% of all 1193 genes found in all the ICEs together), which are present in 99% or more of the crpP-harboring ICEs. The most frequently encoded traits on these ICEs include replication, transcription, intracellular trafficking and cell motility. Our work suggests that ICEs are the main vectors promoting the dissemination of the ciprofloxacin-modifying crpP gene in P. aeruginosa.}, } @article {pmid32753501, year = {2020}, author = {Petit, RA and Read, TD}, title = {Bactopia: a Flexible Pipeline for Complete Analysis of Bacterial Genomes.}, journal = {mSystems}, volume = {5}, number = {4}, pages = {}, pmid = {32753501}, issn = {2379-5077}, support = {U54 CK000485/CK/NCEZID CDC HHS/United States ; U54CK000485/ACL/ACL HHS/United States ; }, abstract = {Sequencing of bacterial genomes using Illumina technology has become such a standard procedure that often data are generated faster than can be conveniently analyzed. We created a new series of pipelines called Bactopia, built using Nextflow workflow software, to provide efficient comparative genomic analyses for bacterial species or genera. Bactopia consists of a data set setup step (Bactopia Data Sets [BaDs]), which creates a series of customizable data sets for the species of interest, the Bactopia Analysis Pipeline (BaAP), which performs quality control, genome assembly, and several other functions based on the available data sets and outputs the processed data to a structured directory format, and a series of Bactopia Tools (BaTs) that perform specific postprocessing on some or all of the processed data. BaTs include pan-genome analysis, computing average nucleotide identity between samples, extracting and profiling the 16S genes, and taxonomic classification using highly conserved genes. It is expected that the number of BaTs will increase to fill specific applications in the future. As a demonstration, we performed an analysis of 1,664 public Lactobacillus genomes, focusing on Lactobacillus crispatus, a species that is a common part of the human vaginal microbiome. Bactopia is an open source system that can scale from projects as small as one bacterial genome to ones including thousands of genomes and that allows for great flexibility in choosing comparison data sets and options for downstream analysis. Bactopia code can be accessed at https://www.github.com/bactopia/bactopiaIMPORTANCE It is now relatively easy to obtain a high-quality draft genome sequence of a bacterium, but bioinformatic analysis requires organization and optimization of multiple open source software tools. We present Bactopia, a pipeline for bacterial genome analysis, as an option for processing bacterial genome data. Bactopia also automates downloading of data from multiple public sources and species-specific customization. Because the pipeline is written in the Nextflow language, analyses can be scaled from individual genomes on a local computer to thousands of genomes using cloud resources. As a usage example, we processed 1,664 Lactobacillus genomes from public sources and used comparative analysis workflows (Bactopia Tools) to identify and analyze members of the L. crispatus species.}, } @article {pmid32745560, year = {2020}, author = {Tao, Y and Jordan, DR and Mace, ES}, title = {A Graph-Based Pan-Genome Guides Biological Discovery.}, journal = {Molecular plant}, volume = {13}, number = {9}, pages = {1247-1249}, doi = {10.1016/j.molp.2020.07.020}, pmid = {32745560}, issn = {1752-9867}, mesh = {*Genome ; Genomics ; *Soybeans ; }, } @article {pmid32744423, year = {2020}, author = {Correia, K and Mahadevan, R}, title = {Pan-Genome-Scale Network Reconstruction: Harnessing Phylogenomics Increases the Quantity and Quality of Metabolic Models.}, journal = {Biotechnology journal}, volume = {15}, number = {10}, pages = {e1900519}, doi = {10.1002/biot.201900519}, pmid = {32744423}, issn = {1860-7314}, support = {Research Excellence//Ontario Ministry of Research, Innovation and Science/ ; //Genome Canada/ ; }, mesh = {*Genome ; Genomics ; *Metabolic Networks and Pathways/genetics ; Phylogeny ; Saccharomyces cerevisiae/genetics ; }, abstract = {A genome-scale network reconstruction (GENRE) is a knowledgebase for an organism and has various applications. Available genome sequences have risen in recent years, but the number of curated GENREs has not kept pace. Existing yeast GENREs contain significant commission and omission errors. Current practices limit the quantity and quality of GENREs. An open and transparent phylogenomic-driven framework is outlined to address these issues. The method is demonstrated with 33 yeasts and fungi in Dikarya. A pan-fungal metabolic network called FYRMENT (Fungal and Yeast Metabolic Network) (https://github.com/LMSE/FYRMENT) is created, and annotated with ortholog groups from AYbRAH (https://github.com/LMSE/AYbRAH). Metabolic models for lower-level taxons are compiled. The fungal pan-GENRE contains 1553 orthologs, 2759 reactions, 2251 metabolites. The GENREs have higher genomic and metabolic coverage than existing yeast and fungal GENREs created with other methods. Metabolic simulations show the maximum amino acid yields from glucose differs between yeast lineages, indicating metabolic networks have evolved. Curating genomes and reactions at higher taxonomic-levels increases the quantity and quality of GENREs than conventional approaches. This approach can scale to other branches in the tree of life.}, } @article {pmid32742815, year = {2020}, author = {Parlikar, A and Kalia, K and Sinha, S and Patnaik, S and Sharma, N and Vemuri, SG and Sharma, G}, title = {Understanding genomic diversity, pan-genome, and evolution of SARS-CoV-2.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e9576}, pmid = {32742815}, issn = {2167-8359}, abstract = {Coronovirus disease 2019 (COVID-19) infection, which originated from Wuhan, China, has seized the whole world in its grasp and created a huge pandemic situation before humanity. Since December 2019, genomes of numerous isolates have been sequenced and analyzed for testing confirmation, epidemiology, and evolutionary studies. In the first half of this article, we provide a detailed review of the history and origin of COVID-19, followed by the taxonomy, nomenclature and genome organization of its causative agent Severe Acute Respiratory Syndrome-related Coronavirus-2 (SARS-CoV-2). In the latter half, we analyze subgenus Sarbecovirus (167 SARS-CoV-2, 312 SARS-CoV, and 5 Pangolin CoV) genomes to understand their diversity, origin, and evolution, along with pan-genome analysis of genus Betacoronavirus members. Whole-genome sequence-based phylogeny of subgenus Sarbecovirus genomes reasserted the fact that SARS-CoV-2 strains evolved from their common ancestors putatively residing in bat or pangolin hosts. We predicted a few country-specific patterns of relatedness and identified mutational hotspots with high, medium and low probability based on genome alignment of 167 SARS-CoV-2 strains. A total of 100-nucleotide segment-based homology studies revealed that the majority of the SARS-CoV-2 genome segments are close to Bat CoV, followed by some to Pangolin CoV, and some are unique ones. Open pan-genome of genus Betacoronavirus members indicates the diversity contributed by the novel viruses emerging in this group. Overall, the exploration of the diversity of these isolates, mutational hotspots and pan-genome will shed light on the evolution and pathogenicity of SARS-CoV-2 and help in developing putative methods of diagnosis and treatment.}, } @article {pmid32735209, year = {2020}, author = {Söderlund, R and Formenti, N and Caló, S and Chiari, M and Zoric, M and Alborali, GL and Sørensen Dalgaard, T and Wattrang, E and Eriksson, H}, title = {Comparative genome analysis of Erysipelothrix rhusiopathiae isolated from domestic pigs and wild boars suggests host adaptation and selective pressure from the use of antibiotics.}, journal = {Microbial genomics}, volume = {6}, number = {8}, pages = {}, pmid = {32735209}, issn = {2057-5858}, mesh = {Animals ; Animals, Wild/*microbiology ; Drug Resistance, Bacterial/*genetics ; Erysipelothrix/*genetics ; Erysipelothrix Infections/*microbiology ; Host Adaptation ; Phylogeny ; Serogroup ; Sus scrofa/*microbiology ; Swine ; }, abstract = {The disease erysipelas caused by Erysipelothrix rhusiopathiae (ER) is a major concern in pig production. In the present study the genomes of ER from pigs (n=87), wild boars (n=71) and other sources (n=85) were compared in terms of whole-genome SNP variation, accessory genome content and the presence of genetic antibiotic resistance determinants. The aim was to investigate if genetic features among ER were associated with isolate origin in order to better estimate the risk of transmission of porcine-adapted strains from wild boars to free-range pigs and to increase our understanding of the evolution of ER. Pigs and wild boars carried isolates representing all ER clades, but clade one only occurred in healthy wild boars and healthy pigs. Several accessory genes or gene variants were found to be significantly associated with the pig and wild boar hosts, with genes predicted to encode cell wall-associated or extracellular proteins overrepresented. Gene variants associated with serovar determination and capsule production in serovars known to be pathogenic for pigs were found to be significantly associated with pigs as hosts. In total, 30 % of investigated pig isolates but only 6 % of wild boar isolates carried resistance genes, most commonly tetM (tetracycline) and lsa(E) together with lnu(B) (lincosamides, pleuromutilin and streptogramin A). The incidence of variably present genes including resistance determinants was weakly linked to phylogeny, indicating that host adaptation in ER has evolved multiple times in diverse lineages mediated by recombination and the acquisition of mobile genetic elements. The presented results support the occurrence of host-adapted ER strains, but they do not indicate frequent transmission between wild boars and domestic pigs. This article contains data hosted by Microreact.}, } @article {pmid32728126, year = {2020}, author = {Gordon, SP and Contreras-Moreira, B and Levy, JJ and Djamei, A and Czedik-Eysenberg, A and Tartaglio, VS and Session, A and Martin, J and Cartwright, A and Katz, A and Singan, VR and Goltsman, E and Barry, K and Dinh-Thi, VH and Chalhoub, B and Diaz-Perez, A and Sancho, R and Lusinska, J and Wolny, E and Nibau, C and Doonan, JH and Mur, LAJ and Plott, C and Jenkins, J and Hazen, SP and Lee, SJ and Shu, S and Goodstein, D and Rokhsar, D and Schmutz, J and Hasterok, R and Catalan, P and Vogel, JP}, title = {Gradual polyploid genome evolution revealed by pan-genomic analysis of Brachypodium hybridum and its diploid progenitors.}, journal = {Nature communications}, volume = {11}, number = {1}, pages = {3670}, pmid = {32728126}, issn = {2041-1723}, support = {I 3033/FWF_/Austrian Science Fund FWF/Austria ; }, mesh = {Brachypodium/*genetics ; Chromosomes, Plant/genetics ; *Diploidy ; *Evolution, Molecular ; Genome, Chloroplast ; *Genome, Plant ; Genomics ; Hybridization, Genetic ; Phylogeny ; Polymorphism, Single Nucleotide ; *Polyploidy ; Retroelements/genetics ; Species Specificity ; }, abstract = {Our understanding of polyploid genome evolution is constrained because we cannot know the exact founders of a particular polyploid. To differentiate between founder effects and post polyploidization evolution, we use a pan-genomic approach to study the allotetraploid Brachypodium hybridum and its diploid progenitors. Comparative analysis suggests that most B. hybridum whole gene presence/absence variation is part of the standing variation in its diploid progenitors. Analysis of nuclear single nucleotide variants, plastomes and k-mers associated with retrotransposons reveals two independent origins for B. hybridum, ~1.4 and ~0.14 million years ago. Examination of gene expression in the younger B. hybridum lineage reveals no bias in overall subgenome expression. Our results are consistent with a gradual accumulation of genomic changes after polyploidization and a lack of subgenome expression dominance. Significantly, if we did not use a pan-genomic approach, we would grossly overestimate the number of genomic changes attributable to post polyploidization evolution.}, } @article {pmid32727443, year = {2020}, author = {Derakhshani, H and Bernier, SP and Marko, VA and Surette, MG}, title = {Completion of draft bacterial genomes by long-read sequencing of synthetic genomic pools.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {519}, pmid = {32727443}, issn = {1471-2164}, support = {OGI-146//Genome Canada/ ; }, mesh = {*Genome, Bacterial ; *Genomics ; High-Throughput Nucleotide Sequencing ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {BACKGROUND: Illumina technology currently dominates bacterial genomics due to its high read accuracy and low sequencing cost. However, the incompleteness of draft genomes generated by Illumina reads limits their application in comprehensive genomics analyses. Alternatively, hybrid assembly using both Illumina short reads and long reads generated by single molecule sequencing technologies can enable assembly of complete bacterial genomes, yet the high per-genome cost of long-read sequencing limits the widespread use of this approach in bacterial genomics. Here we developed a protocol for hybrid assembly of complete bacterial genomes using miniaturized multiplexed Illumina sequencing and non-barcoded PacBio sequencing of a synthetic genomic pool (SGP), thus significantly decreasing the overall per-genome cost of sequencing.

RESULTS: We evaluated the performance of SGP hybrid assembly on the genomes of 20 bacterial isolates with different genome sizes, a wide range of GC contents, and varying levels of phylogenetic relatedness. By improving the contiguity of Illumina assemblies, SGP hybrid assembly generated 17 complete and 3 nearly complete bacterial genomes. Increased contiguity of SGP hybrid assemblies resulted in considerable improvement in gene prediction and annotation. In addition, SGP hybrid assembly was able to resolve repeat elements and identify intragenomic heterogeneities, e.g. different copies of 16S rRNA genes, that would otherwise go undetected by short-read-only assembly. Comprehensive comparison of SGP hybrid assemblies with those generated using multiplexed PacBio long reads (long-read-only assembly) also revealed the relative advantage of SGP hybrid assembly in terms of assembly quality. In particular, we observed that SGP hybrid assemblies were completely devoid of both small (i.e. single base substitutions) and large assembly errors. Finally, we show the ability of SGP hybrid assembly to differentiate genomes of closely related bacterial isolates, suggesting its potential application in comparative genomics and pangenome analysis.

CONCLUSION: Our results indicate the superiority of SGP hybrid assembly over both short-read and long-read assemblies with respect to completeness, contiguity, accuracy, and recovery of small replicons. By lowering the per-genome cost of sequencing, our parallel sequencing and hybrid assembly pipeline could serve as a cost effective and high throughput approach for completing high-quality bacterial genomes.}, } @article {pmid32719517, year = {2020}, author = {Haberer, G and Kamal, N and Bauer, E and Gundlach, H and Fischer, I and Seidel, MA and Spannagl, M and Marcon, C and Ruban, A and Urbany, C and Nemri, A and Hochholdinger, F and Ouzunova, M and Houben, A and Schön, CC and Mayer, KFX}, title = {European maize genomes highlight intraspecies variation in repeat and gene content.}, journal = {Nature genetics}, volume = {52}, number = {9}, pages = {950-957}, pmid = {32719517}, issn = {1546-1718}, mesh = {Breeding/methods ; Chromosome Mapping ; Genetic Variation/*genetics ; Genome, Plant/*genetics ; Genotype ; Hybrid Vigor/genetics ; Phenotype ; Zea mays/*genetics ; }, abstract = {The diversity of maize (Zea mays) is the backbone of modern heterotic patterns and hybrid breeding. Historically, US farmers exploited this variability to establish today's highly productive Corn Belt inbred lines from blends of dent and flint germplasm pools. Here, we report de novo genome sequences of four European flint lines assembled to pseudomolecules with scaffold N50 ranging from 6.1 to 10.4 Mb. Comparative analyses with two US Corn Belt lines explains the pronounced differences between both germplasms. While overall syntenic order and consolidated gene annotations reveal only moderate pangenomic differences, whole-genome alignments delineating the core and dispensable genome, and the analysis of heterochromatic knobs and orthologous long terminal repeat retrotransposons unveil the dynamics of the maize genome. The high-quality genome sequences of the flint pool complement the maize pangenome and provide an important tool to study maize improvement at a genome scale and to enhance modern hybrid breeding.}, } @article {pmid32719416, year = {2020}, author = {Muqaddasi, QH and Brassac, J and Ebmeyer, E and Kollers, S and Korzun, V and Argillier, O and Stiewe, G and Plieske, J and Ganal, MW and Röder, MS}, title = {Prospects of GWAS and predictive breeding for European winter wheat's grain protein content, grain starch content, and grain hardness.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {12541}, pmid = {32719416}, issn = {2045-2322}, mesh = {Alleles ; Genetic Markers ; Genetic Variation ; Genetics, Population ; *Genome-Wide Association Study ; Grain Proteins/*metabolism ; Haplotypes/genetics ; Hardness ; Linkage Disequilibrium/genetics ; Molecular Sequence Annotation ; Phenotype ; Physical Chromosome Mapping ; *Plant Breeding ; Principal Component Analysis ; Quantitative Trait Loci/genetics ; Starch/*metabolism ; Triticum/*genetics/*growth & development ; }, abstract = {Grain quality traits determine the classification of registered wheat (Triticum aestivum L.) varieties. Although environmental factors and crop management practices exert a considerable influence on wheat quality traits, a significant proportion of the variance is attributed to the genetic factors. To identify the underlying genetic factors of wheat quality parameters viz., grain protein content (GPC), grain starch content (GSC), and grain hardness (GH), we evaluated 372 diverse European wheat varieties in replicated field trials in up to eight environments. We observed that all of the investigated traits hold a wide and significant genetic variation, and a significant negative correlation exists between GPC and GSC plus grain yield. Our association analyses based on 26,694 high-quality single nucleotide polymorphic markers revealed a strong quantitative genetic nature of GPC and GSC with associations on groups 2, 3, and 6 chromosomes. The identification of known Puroindoline-b gene for GH provided a positive analytic proof for our studies. We report that a locus QGpc.ipk-6A controls both GPC and GSC with opposite allelic effects. Based on wheat's reference and pan-genome sequences, the physical characterization of two loci viz., QGpc.ipk-2B and QGpc.ipk-6A facilitated the identification of the candidate genes for GPC. Furthermore, by exploiting additive and epistatic interactions of loci, we evaluated the prospects of predictive breeding for the investigated traits that suggested its efficient use in the breeding programs.}, } @article {pmid32719405, year = {2020}, author = {Flament-Simon, SC and de Toro, M and Chuprikova, L and Blanco, M and Moreno-González, J and Salas, M and Blanco, J and Redrejo-Rodríguez, M}, title = {High diversity and variability of pipolins among a wide range of pathogenic Escherichia coli strains.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {12452}, pmid = {32719405}, issn = {2045-2322}, mesh = {Animals ; *DNA Transposable Elements ; Escherichia coli/classification/*genetics/isolation & purification/metabolism ; Escherichia coli Infections/*microbiology/*veterinary ; Genetic Variation ; Genome, Bacterial ; Humans ; Phylogeny ; }, abstract = {Self-synthesizing transposons are integrative mobile genetic elements (MGEs) that encode their own B-family DNA polymerase (PolB). Discovered a few years ago, they are proposed as key players in the evolution of several groups of DNA viruses and virus-host interaction machinery. Pipolins are the most recent addition to the group, are integrated in the genomes of bacteria from diverse phyla and also present as circular plasmids in mitochondria. Remarkably, pipolins-encoded PolBs are proficient DNA polymerases endowed with DNA priming capacity, hence the name, primer-independent PolB (piPolB). We have now surveyed the presence of pipolins in a collection of 2,238 human and animal pathogenic Escherichia coli strains and found that, although detected in only 25 positive isolates (1.1%), they are present in E. coli strains from a wide variety of pathotypes, serotypes, phylogenetic groups and sequence types. Overall, the pangenome of strains carrying pipolins is highly diverse, despite the fact that a considerable number of strains belong to only three clonal complexes (CC10, CC23 and CC32). Comparative analysis with a set of 67 additional pipolin-harboring genomes from GenBank database spanning strains from diverse origin, further confirmed these results. The genetic structure of pipolins shows great flexibility and variability, with the piPolB gene and the attachment sites being the only common features. Most pipolins contain one or more recombinases that would be involved in excision/integration of the element in the same conserved tRNA gene. This mobilization mechanism might explain the apparent incompatibility of pipolins with other integrative MGEs such as integrons. In addition, analysis of cophylogeny between pipolins and pipolin-harboring strains showed a lack of congruence between several pipolins and their host strains, in agreement with horizontal transfer between hosts. Overall, these results indicate that pipolins can serve as a vehicle for genetic transfer among circulating E. coli and possibly also among other pathogenic bacteria.}, } @article {pmid32718320, year = {2020}, author = {Crysnanto, D and Pausch, H}, title = {Bovine breed-specific augmented reference graphs facilitate accurate sequence read mapping and unbiased variant discovery.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {184}, pmid = {32718320}, issn = {1474-760X}, mesh = {Animals ; Cattle/*genetics ; Gene Frequency ; *Genome ; Genotyping Techniques ; Humans ; Reference Standards ; *Sequence Analysis, DNA ; Species Specificity ; }, abstract = {BACKGROUND: The current bovine genomic reference sequence was assembled from a Hereford cow. The resulting linear assembly lacks diversity because it does not contain allelic variation, a drawback of linear references that causes reference allele bias. High nucleotide diversity and the separation of individuals by hundreds of breeds make cattle ideally suited to investigate the optimal composition of variation-aware references.

RESULTS: We augment the bovine linear reference sequence (ARS-UCD1.2) with variants filtered for allele frequency in dairy (Brown Swiss, Holstein) and dual-purpose (Fleckvieh, Original Braunvieh) cattle breeds to construct either breed-specific or pan-genome reference graphs using the vg toolkit. We find that read mapping is more accurate to variation-aware than linear references if pre-selected variants are used to construct the genome graphs. Graphs that contain random variants do not improve read mapping over the linear reference sequence. Breed-specific augmented and pan-genome graphs enable almost similar mapping accuracy improvements over the linear reference. We construct a whole-genome graph that contains the Hereford-based reference sequence and 14 million alleles that have alternate allele frequency greater than 0.03 in the Brown Swiss cattle breed. Our novel variation-aware reference facilitates accurate read mapping and unbiased sequence variant genotyping for SNPs and Indels.

CONCLUSIONS: We develop the first variation-aware reference graph for an agricultural animal (https://doi.org/10.5281/zenodo.3759712). Our novel reference structure improves sequence read mapping and variant genotyping over the linear reference. Our work is a first step towards the transition from linear to variation-aware reference structures in species with high genetic diversity and many sub-populations.}, } @article {pmid32718035, year = {2020}, author = {Yin, Z and Liu, J and Du, B and Ruan, HH and Huo, YX and Du, Y and Qiao, J}, title = {Whole-Genome-Based Survey for Polyphyletic Serovars of Salmonella enterica subsp. enterica Provides New Insights into Public Health Surveillance.}, journal = {International journal of molecular sciences}, volume = {21}, number = {15}, pages = {}, pmid = {32718035}, issn = {1422-0067}, support = {2019M660475//China Postdoctoral Science Foundation/ ; 21621004, 31870122//National Natural Science Foundation of China/ ; 18JCYBJC96000//Natural Science Foundation of Tianjin City/ ; 18JCTPJC65000//Tianjin Enterprise Science and Technology Commissioner Project/ ; }, mesh = {*DNA, Bacterial ; *Genome, Bacterial ; Humans ; *Multilocus Sequence Typing ; *Phylogeny ; Public Health Surveillance ; Salmonella/*genetics/isolation & purification ; Salmonella Infections/*genetics ; Serogroup ; Whole Genome Sequencing ; }, abstract = {Serotyping has traditionally been considered the basis for surveillance of Salmonella, but it cannot distinguish distinct lineages sharing the same serovar that vary in host range, pathogenicity and epidemiology. However, polyphyletic serovars have not been extensively investigated. Public health microbiology is currently being transformed by whole-genome sequencing (WGS) data, which promote the lineage determination using a more powerful and accurate technique than serotyping. The focus in this study is to survey and analyze putative polyphyletic serovars. The multi-locus sequence typing (MLST) phylogenetic analysis identified four putative polyphyletic serovars, namely, Montevideo, Bareilly, Saintpaul, and Muenchen. Whole-genome-based phylogeny and population structure highlighted the polyphyletic nature of Bareilly and Saintpaul and the multi-lineage nature of Montevideo and Muenchen. The population of these serovars was defined by extensive genetic diversity, the open pan genome and the small core genome. Source niche metadata revealed putative existence of lineage-specific niche adaptation (host-preference and environmental-preference), exhibited by lineage-specific genomic contents associated with metabolism and transport. Meanwhile, differences in genetic profiles relating to virulence and antimicrobial resistance within each lineage may contribute to pathogenicity and epidemiology. The results also showed that recombination events occurring at the H1-antigen loci may be an important reason for polyphyly. The results presented here provide the genomic basis of simple, rapid, and accurate identification of phylogenetic lineages of these serovars, which could have important implications for public health.}, } @article {pmid32715552, year = {2021}, author = {Fang, H and Xu, JB and Nie, Y and Wu, XL}, title = {Pan-genomic analysis reveals that the evolution of Dietzia species depends on their living habitats.}, journal = {Environmental microbiology}, volume = {23}, number = {2}, pages = {861-877}, doi = {10.1111/1462-2920.15176}, pmid = {32715552}, issn = {1462-2920}, support = {2018YFA0902100//National Key R&D Program of China/ ; 2018YFA0902103//National Key R&D Program of China/ ; 31770118//National Natural Science Foundation of China/ ; 31770120//National Natural Science Foundation of China/ ; }, mesh = {Actinobacteria/classification/*genetics/isolation & purification/metabolism ; Bacterial Proteins/genetics/metabolism ; *Ecosystem ; Evolution, Molecular ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Genomics ; Phylogeny ; }, abstract = {The bacterial genus Dietzia is widely distributed in various environments. The genomes of 26 diverse strains of Dietzia, including almost all the type strains, were analysed in this study. This analysis revealed a lipid metabolism gene richness, which could explain the ability of Dietzia to live in oil related environments. The pan-genome consists of 83,976 genes assigned into 10,327 gene families, 792 of which are shared by all the genomes of Dietzia. Mathematical extrapolation of the data suggests that the Dietzia pan-genome is open. Both gene duplication and gene loss contributed to the open pan-genome, while horizontal gene transfer was limited. Dietzia strains primarily gained their diverse metabolic capacity through more ancient gene duplications. Phylogenetic analysis of Dietzia isolated from aquatic and terrestrial environments showed two distinct clades from the same ancestor. The genome sizes of Dietzia strains from aquatic environments were significantly larger than those from terrestrial environments, which was mainly due to the occurrence of more gene loss events during the evolutionary progress of the strains from terrestrial environments. The evolutionary history of Dietzia was tightly coupled to environmental conditions, and iron concentrations should be one of the key factors shaping the genomes of the Dietzia lineages.}, } @article {pmid32714356, year = {2020}, author = {Moreno-Pérez, A and Pintado, A and Murillo, J and Caballo-Ponce, E and Tegli, S and Moretti, C and Rodríguez-Palenzuela, P and Ramos, C}, title = {Host Range Determinants of Pseudomonas savastanoi Pathovars of Woody Hosts Revealed by Comparative Genomics and Cross-Pathogenicity Tests.}, journal = {Frontiers in plant science}, volume = {11}, number = {}, pages = {973}, pmid = {32714356}, issn = {1664-462X}, abstract = {The study of host range determinants within the Pseudomonas syringae complex is gaining renewed attention due to its widespread distribution in non-agricultural environments, evidence of large variability in intra-pathovar host range, and the emergence of new epidemic diseases. This requires the establishment of appropriate model pathosystems facilitating integration of phenotypic, genomic and evolutionary data. Pseudomonas savastanoi pv. savastanoi is a model pathogen of the olive tree, and here we report a closed genome of strain NCPPB 3335, plus draft genome sequences of three strains isolated from oleander (pv. nerii), ash (pv. fraxini) and broom plants (pv. retacarpa). We then conducted a comparative genomic analysis of these four new genomes plus 16 publicly available genomes, representing 20 strains of these four P. savastanoi pathovars of woody hosts. Despite overlapping host ranges, cross-pathogenicity tests using four plant hosts clearly separated these pathovars and lead to pathovar reassignment of two strains. Critically, these functional assays were pivotal to reconcile phylogeny with host range and to define pathovar-specific genes repertoires. We report a pan-genome of 7,953 ortholog gene families and a total of 45 type III secretion system effector genes, including 24 core genes, four genes exclusive of pv. retacarpa and several genes encoding pathovar-specific truncations. Noticeably, the four pathovars corresponded with well-defined genetic lineages, with core genome phylogeny and hierarchical clustering of effector genes closely correlating with pathogenic specialization. Knot-inducing pathovars encode genes absent in the canker-inducing pv. fraxini, such as those related to indole acetic acid, cytokinins, rhizobitoxine, and a bacteriophytochrome. Other pathovar-exclusive genes encode type I, type II, type IV, and type VI secretion system proteins, the phytotoxine phevamine A, a siderophore, c-di-GMP-related proteins, methyl chemotaxis proteins, and a broad collection of transcriptional regulators and transporters of eight different superfamilies. Our combination of pathogenicity analyses and genomics tools allowed us to correctly assign strains to pathovars and to propose a repertoire of host range-related genes in the P. syringae complex.}, } @article {pmid32706329, year = {2020}, author = {Kc, R and Leong, KWC and Harkness, NM and Lachowicz, J and Gautam, SS and Cooley, LA and McEwan, B and Petrovski, S and Karupiah, G and O'Toole, RF}, title = {Whole-genome analyses reveal gene content differences between nontypeable Haemophilus influenzae isolates from chronic obstructive pulmonary disease compared to other clinical phenotypes.}, journal = {Microbial genomics}, volume = {6}, number = {8}, pages = {}, pmid = {32706329}, issn = {2057-5858}, mesh = {Genome, Bacterial ; Genome-Wide Association Study ; Haemophilus Infections/*microbiology ; *Haemophilus influenzae/genetics/pathogenicity ; Humans ; Meningitis/microbiology ; Otitis/microbiology ; Phenotype ; Pneumonia/microbiology ; Pulmonary Disease, Chronic Obstructive/*microbiology ; Virulence/*genetics ; }, abstract = {Nontypeable Haemophilus influenzae (NTHi) colonizes human upper respiratory airways and plays a key role in the course and pathogenesis of acute exacerbations of chronic obstructive pulmonary disease (COPD). Currently, it is not possible to distinguish COPD isolates of NTHi from other clinical isolates of NTHi using conventional genotyping methods. Here, we analysed the core and accessory genome of 568 NTHi isolates, including 40 newly sequenced isolates, to look for genetic distinctions between NTHi isolates from COPD with respect to other illnesses, including otitis media, meningitis and pneumonia. Phylogenies based on polymorphic sites in the core-genome did not show discrimination between NTHi strains collected from different clinical phenotypes. However, pan-genome-wide association studies identified 79 unique NTHi accessory genes that were significantly associated with COPD. Furthermore, many of the COPD-related NTHi genes have known or predicted roles in virulence, transmembrane transport of metal ions and nutrients, cellular respiration and maintenance of redox homeostasis. This indicates that specific genes may be required by NTHi for its survival or virulence in the COPD lung. These results advance our understanding of the pathogenesis of NTHi infection in COPD lungs.}, } @article {pmid32698896, year = {2020}, author = {Tonkin-Hill, G and MacAlasdair, N and Ruis, C and Weimann, A and Horesh, G and Lees, JA and Gladstone, RA and Lo, S and Beaudoin, C and Floto, RA and Frost, SDW and Corander, J and Bentley, SD and Parkhill, J}, title = {Producing polished prokaryotic pangenomes with the Panaroo pipeline.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {180}, pmid = {32698896}, issn = {1474-760X}, support = {206194/WT_/Wellcome Trust/United Kingdom ; 107032/Z/15/Z/WT_/Wellcome Trust/United Kingdom ; 204016/WT_/Wellcome Trust/United Kingdom ; MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {*Algorithms ; Biological Evolution ; Drug Resistance, Bacterial/genetics ; *Genome, Bacterial ; Genomics/*methods ; Klebsiella pneumoniae/genetics ; Mycobacterium tuberculosis/genetics ; *Software ; }, abstract = {Population-level comparisons of prokaryotic genomes must take into account the substantial differences in gene content resulting from horizontal gene transfer, gene duplication and gene loss. However, the automated annotation of prokaryotic genomes is imperfect, and errors due to fragmented assemblies, contamination, diverse gene families and mis-assemblies accumulate over the population, leading to profound consequences when analysing the set of all genes found in a species. Here, we introduce Panaroo, a graph-based pangenome clustering tool that is able to account for many of the sources of error introduced during the annotation of prokaryotic genome assemblies. Panaroo is available at https://github.com/gtonkinhill/panaroo .}, } @article {pmid32697186, year = {2020}, author = {Liou, JS and Huang, CH and Ikeyama, N and Lee, AY and Chen, IC and Blom, J and Chen, CC and Chen, CH and Lin, YC and Hsieh, SY and Huang, L and Ohkuma, M and Watanabe, K and Sakamoto, M}, title = {Prevotella hominis sp. nov., isolated from human faeces.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {70}, number = {8}, pages = {4767-4773}, doi = {10.1099/ijsem.0.004342}, pmid = {32697186}, issn = {1466-5034}, mesh = {Adult ; Bacteria, Anaerobic/classification ; Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; Feces/*microbiology ; Humans ; Nucleic Acid Hybridization ; *Phylogeny ; Prevotella/*classification/isolation & purification ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Species Specificity ; Taiwan ; Vitamin K 2/chemistry ; }, abstract = {A strictly anaerobic predominant bacterium, designated as strain gm001[T], was isolated from a freshly voided faecal sample collected from a healthy Taiwanese adult. Cells were Gram-stain-negative rods, non-motile and non-spore-forming. Strain gm001[T] was identified as a member of the genus Prevotella, and a comparison of 16S rRNA and hsp60 gene sequences revealed sequence similarities of 98.5 and 93.3 %, respectively, demonstrating that it was most closely related to the type strain of Prevotella copri. Phylogenomic tree analysis indicated that the gm001[T] cluster is an independent lineage of P. copri DSM 18205[T]. The average nucleotide identity, digital DNA‒DNA hybridization and average amino acid identity values between strain gm001[T] and P. copri DSM 18205[T] were 80.9, 28.6 and 83.8 %, respectively, which were clearly lower than the species delineation thresholds. The species-specific genes of this novel species were also identified on the basis of pan-genomic analysis. The predominant menaquinones were MK-11 and MK-12, and the predominant fatty acids were anteiso-C15 : 0, C15 : 0 and iso-C15 : 0. Acetate and succinate were produced from glucose as metabolic end products. Taken together, the results indicate that strain gm001[T] represents a novel species of the genus Prevotella, for which the name Prevotella hominis sp. nov. is proposed. The type strain is gm001[T] (=BCRC 81118[T]=JCM 33280[T]).}, } @article {pmid32690893, year = {2020}, author = {Bayer, PE and Golicz, AA and Scheben, A and Batley, J and Edwards, D}, title = {Plant pan-genomes are the new reference.}, journal = {Nature plants}, volume = {6}, number = {8}, pages = {914-920}, doi = {10.1038/s41477-020-0733-0}, pmid = {32690893}, issn = {2055-0278}, mesh = {Genes, Plant/genetics ; Genetic Variation/genetics ; *Genome, Plant/genetics ; Plants/*genetics ; Reference Values ; }, abstract = {Recent years have seen a surge in plant genome sequencing projects and the comparison of multiple related individuals. The high degree of genomic variation observed led to the realization that single reference genomes do not represent the diversity within a species, and led to the expansion of the pan-genome concept. Pan-genomes represent the genomic diversity of a species and includes core genes, found in all individuals, as well as variable genes, which are absent in some individuals. Variable gene annotations often show similarities across plant species, with genes for biotic and abiotic stress commonly enriched within variable gene groups. Here we review the growth of pan-genomics in plants, explore the origins of gene presence and absence variation, and show how pan-genomes can support plant breeding and evolution studies.}, } @article {pmid32690023, year = {2020}, author = {Gabrielaite, M and Marvig, RL}, title = {GenAPI: a tool for gene absence-presence identification in fragmented bacterial genome sequences.}, journal = {BMC bioinformatics}, volume = {21}, number = {1}, pages = {320}, pmid = {32690023}, issn = {1471-2105}, support = {0126//Danmarks Grundforskningsfond/ ; }, mesh = {Bacteria/*genetics ; *Genome, Bacterial ; Genomics/*methods ; High-Throughput Nucleotide Sequencing/*methods ; Molecular Sequence Annotation ; Sequence Analysis, DNA/*methods ; *Software ; }, abstract = {BACKGROUND: Bacterial gene loss and acquisition is a well-known phenomenon which contributes to bacterial adaptation through changes in important phenotypes such as virulence, antibiotic resistance and metabolic capability. While advances in DNA sequencing have accelerated our ability to generate short genome sequence reads to disentangle phenotypic changes caused by gene loss and acquisition, the short-read genome sequencing often results in fragmented genome assemblies as a basis for identification of gene loss and acquisition events. However, sensitive and precise determination of gene content change for fragmented genome assemblies remains challenging as analysis needs to account for cases when only a fragment of the gene is assembled or when the gene assembly is split in more than one contig.

RESULTS: We developed GenAPI, a command-line tool that is designed to compare the gene content of bacterial genomes for which only fragmented genome assemblies are available. GenAPI, unlike other available tools of similar purpose, accounts for imperfections in sequencing and assembly, and aims to compensate for them. We tested the performance of GenAPI on three different datasets to show that GenAPI has a high sensitivity while it maintains precision when dealing with partly assembled genes in both simulated and real datasets. Furthermore, we benchmarked the performance of GenAPI with six popular tools for gene presence-absence identification.

CONCLUSIONS: Our developed bioinformatics tool, called GenAPI, has the same precision and recall rates when analyzing complete genome sequences as the other tools of the same purpose; however, GenAPI's performance is markedly better on fragmented genome assemblies.}, } @article {pmid32687170, year = {2020}, author = {Yang, LL and Jiang, Z and Li, Y and Wang, ET and Zhi, XY}, title = {Plasmids Related to the Symbiotic Nitrogen Fixation Are Not Only Cooperated Functionally but Also May Have Evolved over a Time Span in Family Rhizobiaceae.}, journal = {Genome biology and evolution}, volume = {12}, number = {11}, pages = {2002-2014}, pmid = {32687170}, issn = {1759-6653}, mesh = {*Evolution, Molecular ; Genome, Bacterial ; Nitrogen Fixation/*genetics ; Phylogeny ; *Plasmids ; Rhizobiaceae/*genetics ; Symbiosis/*genetics ; }, abstract = {Rhizobia are soil bacteria capable of forming symbiotic nitrogen-fixing nodules associated with leguminous plants. In fast-growing legume-nodulating rhizobia, such as the species in the family Rhizobiaceae, the symbiotic plasmid is the main genetic basis for nitrogen-fixing symbiosis, and is susceptible to horizontal gene transfer. To further understand the symbioses evolution in Rhizobiaceae, we analyzed the pan-genome of this family based on 92 genomes of type/reference strains and reconstructed its phylogeny using a phylogenomics approach. Intriguingly, although the genetic expansion that occurred in chromosomal regions was the main reason for the high proportion of low-frequency flexible gene families in the pan-genome, gene gain events associated with accessory plasmids introduced more genes into the genomes of nitrogen-fixing species. For symbiotic plasmids, although horizontal gene transfer frequently occurred, transfer may be impeded by, such as, the host's physical isolation and soil conditions, even among phylogenetically close species. During coevolution with leguminous hosts, the plasmid system, including accessory and symbiotic plasmids, may have evolved over a time span, and provided rhizobial species with the ability to adapt to various environmental conditions and helped them achieve nitrogen fixation. These findings provide new insights into the phylogeny of Rhizobiaceae and advance our understanding of the evolution of symbiotic nitrogen fixation.}, } @article {pmid32677889, year = {2020}, author = {Coulton, A and Edwards, KJ}, title = {AutoCloner: automatic homologue-specific primer design for full-gene cloning in polyploids.}, journal = {BMC bioinformatics}, volume = {21}, number = {1}, pages = {311}, pmid = {32677889}, issn = {1471-2105}, support = {BB/M009122/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/T/000PR9814/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Amino Acid Substitution/genetics ; Base Sequence ; *Cloning, Molecular ; Computational Biology/*methods ; DNA Primers/genetics/*metabolism ; Polymerase Chain Reaction/methods ; Polymorphism, Single Nucleotide/genetics ; *Polyploidy ; *Sequence Homology ; *Software ; Triticum/*genetics ; }, abstract = {BACKGROUND: Polyploid organisms such as wheat complicate even the simplest of procedures in molecular biology. Whilst knowledge of genomic sequences in crops is increasing rapidly, the scientific community is still a long way from producing a full pan-genome for every species. Polymerase chain reaction and Sanger sequencing therefore remain widely used as methods for characterizing gene sequences in many varieties of crops. High sequence similarity between genomes in polyploids means that if primers are not homeologue-specific via the incorporation of a SNP at the 3' tail, sequences other than the target sequence will also be amplified. Current consensus for gene cloning in wheat is to manually perform many steps in a long bioinformatics pipeline.

RESULTS: Here we present AutoCloner (www.autocloner.com), a fully automated pipeline for crop gene cloning that includes a free-to-use web interface for users. AutoCloner takes a sequence of interest from the user and performs a basic local alignment search tool (BLAST) search against the genome assembly for their particular polyploid crop. Homologous sequences are then compiled with the input sequence into a multiple sequence alignment which is mined for single-nucleotide polymorphisms (SNPs). Various combinations of potential primers that cover the entire gene of interest are then created and evaluated by Primer3; the set of primers with the highest score, as well as all possible primers at every SNP location, are then returned to the user for polymerase chain reaction (PCR). We have successfully used AutoCloner to clone various genes of interest in the Apogee wheat variety, which has no current genome sequence. In addition, we have successfully run the pipeline on ~ 80,000 high-confidence gene models from a wheat genome assembly.

CONCLUSION: AutoCloner is the first tool to fully-automate primer design for gene cloning in polyploids, where previously the consensus within the wheat community was to perform this process manually. The web interface for AutoCloner provides a simple and effective polyploid primer-design method for gene cloning, with no need for researchers to download software or input any other details other than their sequence of interest.}, } @article {pmid32670797, year = {2020}, author = {Cospain, A and Dubourg, C and Gastineau, S and Pichard, S and Gandemer, V and Bonneau, J and de Tayrac, M and Moreau, C and Odent, S and Pasquier, L and Damaj, L and Lavillaureix, A}, title = {Incidental diagnosis of mucopolysaccharidosis type I in an infant with chronic intestinal pseudoobstruction by exome sequencing.}, journal = {Molecular genetics and metabolism reports}, volume = {24}, number = {}, pages = {100621}, pmid = {32670797}, issn = {2214-4269}, abstract = {Chronic intestinal pseudoobstruction (CIPO) is a severe form of intestinal dysmotility, and patients often undergo iterative abdominal surgeries and require parenteral nutrition. Several genes are known to be responsible for this pathology, including ACTG2 (autosomal dominant) and MYH11 (autosomal recessive). We report the first case of unexpected trio medical exome sequencing diagnosis of mucopolysaccharidosis type I (MPS-I) in a patient with an early CIPO. There was no clinical suspicion of MPS-I at the time of the prescription. It allowed biochemical confirmation of MPS-I, expert clinical evaluation and early treatment. Enzyme replacement therapy (ERT) with laronidase was started at 9 months old, and hematopoietic stem cell transplantation was carried out at 10 months and a half. The patient also had a 1.7 mb heterozygous deletion in chromosomal region 16p13.11p12.3, comprising several genes, including MYH11, paternally inherited. Her father has no symptoms of CIPO or other digestive symptoms. One previous association of CIPO and MPS-I was reported in 1986. Moreover, the number of incidental findings of inherited metabolic disorders with therapeutic impact will inevitably increase as pangenomic analyses become cheaper and easily available.}, } @article {pmid32670207, year = {2020}, author = {Castro-Jaimes, S and Bello-López, E and Velázquez-Acosta, C and Volkow-Fernández, P and Lozano-Zarain, P and Castillo-Ramírez, S and Cevallos, MA}, title = {Chromosome Architecture and Gene Content of the Emergent Pathogen Acinetobacter haemolyticus.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {926}, pmid = {32670207}, issn = {1664-302X}, abstract = {Acinetobacter haemolyticus is a Gammaproteobacterium that has been involved in serious diseases frequently linked to the nosocomial environment. Most of the strains causing such infections are sensitive to a wide variety of antibiotics, but recent reports indicate that this pathogen is acquiring very efficiently carbapenem-resistance determinants like the blaNDM-1 gene, all over the world. With this work we contribute with a collection set of 31 newly sequenced nosocomial A. haemolyticus isolates. Genome analysis of these sequences and others collected from RefSeq indicates that their chromosomes are organized in 12 syntenic blocks that contain most of the core genome genes. These blocks are separated by hypervariable regions that are rich in unique gene families, but also have signals of horizontal gene transfer. Genes involved in virulence or encoding different secretion systems are located inside syntenic regions and have recombination signals. The relative order of the synthetic blocks along the A. haemolyticus chromosome can change, indicating that they have been subject to several kinds of inversions. Genomes of this microorganism show large differences in gene content even if they are in the same clade. Here we also show that A. haemolyticus has an open pan-genome.}, } @article {pmid32669327, year = {2020}, author = {Chandrasekar, SS and Kingstad-Bakke, B and Wu, CW and Suresh, M and Talaat, AM}, title = {A Novel Mucosal Adjuvant System for Immunization against Avian Coronavirus Causing Infectious Bronchitis.}, journal = {Journal of virology}, volume = {94}, number = {19}, pages = {}, pmid = {32669327}, issn = {1098-5514}, support = {R21 AI149793/AI/NIAID NIH HHS/United States ; U01 AI124299/AI/NIAID NIH HHS/United States ; }, mesh = {Adjuvants, Immunologic/pharmacology ; Animals ; Bronchitis/*prevention & control/virology ; Chickens ; Coronavirus Infections/immunology/prevention & control/*veterinary ; Disease Models, Animal ; Gammacoronavirus/*immunology ; Immunity, Cellular ; Immunization ; Infectious bronchitis virus/immunology ; Mucous Membrane/*immunology ; Nucleocapsid/immunology ; Poultry Diseases/immunology/*prevention & control/virology ; Recombinant Proteins/immunology ; Vaccines, DNA/immunology ; Viral Load ; Viral Vaccines/*immunology ; }, abstract = {Infectious bronchitis (IB) caused by infectious bronchitis virus (IBV) is currently a major threat to chicken health, with multiple outbreaks being reported in the United States over the past decade. Modified live virus (MLV) vaccines used in the field can persist and provide the genetic material needed for recombination and emergence of novel IBV serotypes. Inactivated and subunit vaccines overcome some of the limitations of MLV with no risk of virulence reversion and emergence of new virulent serotypes. However, these vaccines are weakly immunogenic and poorly protective. There is an urgent need to develop more effective vaccines that can elicit a robust, long-lasting immune response. In this study, we evaluate a novel adjuvant system developed from Quil-A and chitosan (QAC) for the intranasal delivery of nucleic acid immunogens to improve protective efficacy. The QAC adjuvant system forms nanocarriers (<100 nm) that efficiently encapsulate nucleic acid cargo, exhibit sustained release of payload, and can stably transfect cells. Encapsulation of plasmid DNA vaccine expressing IBV nucleocapsid (N) protein by the QAC adjuvant system (pQAC-N) enhanced immunogenicity, as evidenced by robust induction of adaptive humoral and cellular immune responses postvaccination and postchallenge. Birds immunized with pQAC-N showed reduced clinical severity and viral shedding postchallenge on par with protection observed with current commercial vaccines without the associated safety concerns. Presented results indicate that the QAC adjuvant system can offer a safer alternative to the use of live vaccines against avian and other emerging coronaviruses.IMPORTANCE According to 2017 U.S. agriculture statistics, the combined value of production and sales from broilers, eggs, turkeys, and chicks was $42.8 billion. Of this number, broiler sales comprised 67% of the industry value, with the production of >50 billion pounds of chicken meat. The economic success of the poultry industry in the United States hinges on the extensive use of vaccines to control infectious bronchitis virus (IBV) and other poultry pathogens. The majority of vaccines currently licensed for poultry health include both modified live vaccine and inactivated pathogens. Despite their proven efficacy, modified live vaccine constructs take time to produce and could revert to virulence, which limits their safety. The significance of our research stems from the development of a safer and potent alternative mucosal vaccine to replace live vaccines against IBV and other emerging coronaviruses.}, } @article {pmid32665595, year = {2020}, author = {Krupovic, M and Dolja, VV and Koonin, EV}, title = {The LUCA and its complex virome.}, journal = {Nature reviews. Microbiology}, volume = {18}, number = {11}, pages = {661-670}, pmid = {32665595}, issn = {1740-1534}, mesh = {Archaea/virology ; Bacteria/virology ; Capsid Proteins ; *Evolution, Molecular ; Genome, Viral/*genetics ; Viral Proteins/chemistry/genetics ; Virome/*genetics ; *Viruses/chemistry/genetics/ultrastructure ; }, abstract = {The last universal cellular ancestor (LUCA) is the most recent population of organisms from which all cellular life on Earth descends. The reconstruction of the genome and phenotype of the LUCA is a major challenge in evolutionary biology. Given that all life forms are associated with viruses and/or other mobile genetic elements, there is no doubt that the LUCA was a host to viruses. Here, by projecting back in time using the extant distribution of viruses across the two primary domains of life, bacteria and archaea, and tracing the evolutionary histories of some key virus genes, we attempt a reconstruction of the LUCA virome. Even a conservative version of this reconstruction suggests a remarkably complex virome that already included the main groups of extant viruses of bacteria and archaea. We further present evidence of extensive virus evolution antedating the LUCA. The presence of a highly complex virome implies the substantial genomic and pan-genomic complexity of the LUCA itself.}, } @article {pmid32656099, year = {2020}, author = {Bohr, LL and Mortimer, TD and Pepperell, CS}, title = {Lateral Gene Transfer Shapes Diversity of Gardnerella spp.}, journal = {Frontiers in cellular and infection microbiology}, volume = {10}, number = {}, pages = {293}, pmid = {32656099}, issn = {2235-2988}, support = {R01 AI113287/AI/NIAID NIH HHS/United States ; T32 GM007215/GM/NIGMS NIH HHS/United States ; }, mesh = {Female ; Gardnerella ; Gene Transfer, Horizontal ; Humans ; Infant, Newborn ; *Microbiota/genetics ; Pregnancy ; *Premature Birth ; *Vaginosis, Bacterial ; }, abstract = {Gardnerella spp. are pathognomonic for bacterial vaginosis, which increases the risk of preterm birth and the transmission of sexually transmitted infections. Gardnerella spp. are genetically diverse, comprising what have recently been defined as distinct species with differing functional capacities. Disease associations with Gardnerella spp. are not straightforward: patients with BV are usually infected with multiple species, and Gardnerella spp. are also found in the vaginal microbiome of healthy women. Genome comparisons of Gardnerella spp. show evidence of lateral gene transfer (LGT), but patterns of LGT have not been characterized in detail. Here we sought to define the role of LGT in shaping the genetic structure of Gardnerella spp. We analyzed whole genome sequencing data for 106 Gardnerella strains and used these data for pan genome analysis and to characterize LGT in the core and accessory genomes, over recent and remote timescales. In our diverse sample of Gardnerella strains, we found that both the core and accessory genomes are clearly differentiated in accordance with newly defined species designations. We identified putative competence and pilus assembly genes across most species; we also found them to be differentiated between species. Competence machinery has diverged in parallel with the core genome, with selection against deleterious mutations as a predominant influence on their evolution. By contrast, the virulence factor vaginolysin, which encodes a toxin, appears to be readily exchanged among species. We identified five distinct prophage clusters in Gardnerella genomes, two of which appear to be exchanged between Gardnerella species. Differences among species are apparent in their patterns of LGT, including their exchange with diverse gene pools. Despite frequent LGT and co-localization in the same niche, our results show that Gardnerella spp. are clearly genetically differentiated and yet capable of exchanging specific genetic material. This likely reflects complex interactions within bacterial communities associated with the vaginal microbiome. Our results provide insight into how such interactions evolve and are maintained, allowing these multi-species communities to colonize and invade human tissues and adapt to antibiotics and other stressors.}, } @article {pmid32655519, year = {2020}, author = {Han, M and Liu, G and Chen, Y and Wang, D and Zhang, Y}, title = {Comparative Genomics Uncovers the Genetic Diversity and Characters of Veillonella atypica and Provides Insights Into Its Potential Applications.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {1219}, pmid = {32655519}, issn = {1664-302X}, abstract = {Veillonella atypica is a bacterium that is present in the gut and the oral cavity of mammals and plays diverse roles in different niches. A recent study demonstrated that Veillonella is highly associated with marathon running and approved that V. atypica gavage improves treadmill run time in mice, revealing that V. atypica has a high biotechnological potential in improving athlete performance. However, a comprehensive analysis of the genetic diversity, function traits, and genome editing method of V. atypica remains elusive. In the present study, we conducted a systemically comparative analysis of the genetic datasets of nine V. atypica strains. The pan-genome of V. atypica consisted of 2,065 homologous clusters and exhibited an open pan-genome structure. A phylogenetic analysis of V. atypica with two different categories revealed that V. atypica OK5 was the most distant from the other eight V. atypica strains. A total of 43 orthologous genes were identified as CAZyme genes and grouped into 23 CAZyme families. The CAZyme components derived from accessory clusters contributed to the differences in the ability of the nine V. atypica strains to utilize carbohydrates. An integrated analysis of the metabolic pathways of V. atypica suggested that V. atypica strains harbored vancomycin resistance and were involved in several biosynthesis pathways of secondary metabolites. The V. atypica strains harbored four main Cas proteins, namely, CAS-Type IIIA, CAS-Type IIA, CAS-Type IIC, and CAS-Type IIID. This pilot study provides an in-depth understanding of and a fundamental knowledge about the biology of V. atypica that allow the possibility to increase the biotechnological potential of this bacterium.}, } @article {pmid32655288, year = {2019}, author = {Banerjee, A and Somani, VK and Chakraborty, P and Bhatnagar, R and Varshney, RK and Echeverría-Vega, A and Cuadros-Orellana, S and Bandopadhyay, R}, title = {Molecular and Genomic Characterization of PFAB2: A Non-virulent Bacillus anthracis Strain Isolated from an Indian Hot Spring.}, journal = {Current genomics}, volume = {20}, number = {7}, pages = {491-507}, pmid = {32655288}, issn = {1389-2029}, abstract = {BACKGROUND: Thermophilic bacilli in both aerobic or facultative anaerobic forms have been isolated for over a hundred years from different mesophilic or thermophilic environments as they are potential source of bioactive secondary metabolites. But the taxonomic resolution in the Bacillus genus at species or at strain level is very challenging for the insufficient divergence of the 16S rRNA genes. One such recurring problem is among Bacillus anthracis, B. cereus and B. thuringiensis. The disease-causing B. anthracis strains have their characteristic virulence factors coded in two well-known plasmids, namely pXO1 (toxin genes) and pXO2 (capsule genes).

OBJECTIVE: The present study aimed at the molecular and genomic characterization of a recently reported thermophilic and environmental isolate of B. anthracis, strain PFAB2.

METHODS: We performed comparative genomics between the PFAB2 genome and different strains of B. anthracis, along with closely related B. cereus strains.

RESULTS: The pangenomic analysis suggests that the PFAB2 genome harbors no complete prophage genes. Cluster analysis of Bray-Kurtis similarity resemblance matrix revealed that gene content of PFAB2 is more closely related to other environmental strains of B. anthracis. The secretome analysis and the in vitro and in vivo pathogenesis experiments corroborate the avirulent phenotype of this strain. The most probable explanation for this phenotype is the apparent absence of plasmids harboring genes for capsule biosynthesis and toxins secretion in the draft genome. Additional features of PFAB2 are good spore-forming and germinating capabilities and rapid replication ability.

CONCLUSION: The high replication rate in a wide range of temperatures and culture media, the non-pathogenicity, the good spore forming capability and its genomic similarity to the Ames strain together make PFAB2 an interesting model strain for the study of the pathogenic evolution of B. anthracis.}, } @article {pmid32654396, year = {2021}, author = {Guo, G and Du, D and Yu, Y and Zhang, Y and Qian, Y and Zhang, W}, title = {Pan-genome analysis of Streptococcus suis serotype 2 revealed genomic diversity among strains of different virulence.}, journal = {Transboundary and emerging diseases}, volume = {68}, number = {2}, pages = {637-647}, doi = {10.1111/tbed.13725}, pmid = {32654396}, issn = {1865-1682}, support = {2018YFC1602500//National Key R&D Program of China/ ; 2017E10010//Opening Fund of Key Laboratory of microorganism technology and bioinformatics research of Zhejiang Province/ ; 31772751//National Natural Science Foundation of China/ ; }, mesh = {*Genetic Variation ; Genome, Bacterial/*genetics ; Serogroup ; Streptococcus suis/*genetics/*pathogenicity ; Virulence/genetics ; }, abstract = {Streptococcus suis (SS) is an emerging zoonotic pathogen that causes severe infections in swine and humans. Among the 33 known serotypes, serotype 2 is most frequently associated with infections in pigs and humans. To better understand the virulence characterization of S. suis serotype 2 (SS2) and discriminate the difference between virulent and avirulent strains in SS2, characterization of the genomic features of strains with different virulence is required. The result showed that Streptococcus suis have an open pan-genome. The pan-genome shared by the 19 S. suis serotype 2 strains was composed of 1,239 core genes and 2,436 accessory genes. COG analysis indicated that core genes are involved in the basic physiological function, but accessory genes related to tachytely evolution. Comparative analysis between core genomes of virulent strains and 9 avirulent strains suggested that srtBCD pilus cluster was a significant discrepancy between virulent and avirulent strains. Analysis between high virulent and group B low virulent strains showed 53 and 58 genes specific to each other. Moreover, genomes of avirulent strains tend to be larger than virulent strains; avirulent strains tend to possess more prophages sequences than virulent strains. Our findings could be contributed to a better understanding of the genomics of S. suis serotype 2.}, } @article {pmid32641450, year = {2020}, author = {Kirubakaran, TG and Andersen, Ø and Moser, M and Árnyasi, M and McGinnity, P and Lien, S and Kent, M}, title = {A Nanopore Based Chromosome-Level Assembly Representing Atlantic Cod from the Celtic Sea.}, journal = {G3 (Bethesda, Md.)}, volume = {10}, number = {9}, pages = {2903-2910}, pmid = {32641450}, issn = {2160-1836}, mesh = {Animals ; Chromosomes/genetics ; *Gadus morhua/genetics ; Genome ; Humans ; Male ; *Nanopores ; Polymorphism, Single Nucleotide ; }, abstract = {Currently available genome assemblies for Atlantic cod (Gadus morhua) have been constructed from fish belonging to the Northeast Arctic Cod (NEAC) population; a migratory population feeding in the Barents Sea. These assemblies have been crucial for the development of genetic markers which have been used to study population differentiation and adaptive evolution in Atlantic cod, pinpointing four discrete islands of genomic divergence located on linkage groups 1, 2, 7 and 12. In this paper, we present a high-quality reference genome from a male Atlantic cod representing a southern population inhabiting the Celtic sea. The genome assembly (gadMor_Celtic) was produced from long-read nanopore data and has a combined contig length of 686 Mb with an N50 of 10 Mb. Integrating contigs with genetic linkage mapping information enabled us to construct 23 chromosome sequences which mapped with high confidence to the latest NEAC population assembly (gadMor3) and allowed us to characterize, to an extent not previously reported large chromosomal inversions on linkage groups 1, 2, 7 and 12. In most cases, inversion breakpoints could be located within single nanopore contigs. Our results suggest the presence of inversions in Celtic cod on linkage groups 6, 11 and 21, although these remain to be confirmed. Further, we identified a specific repetitive element that is relatively enriched at predicted centromeric regions. Our gadMor_Celtic assembly provides a resource representing a 'southern' cod population which is complementary to the existing 'northern' population based genome assemblies and represents the first step toward developing pan-genomic resources for Atlantic cod.}, } @article {pmid32636251, year = {2020}, author = {Lees, JA and Mai, TT and Galardini, M and Wheeler, NE and Horsfield, ST and Parkhill, J and Corander, J}, title = {Improved Prediction of Bacterial Genotype-Phenotype Associations Using Interpretable Pangenome-Spanning Regressions.}, journal = {mBio}, volume = {11}, number = {4}, pages = {}, pmid = {32636251}, issn = {2150-7511}, support = {MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Bacteria/*genetics ; Computer Simulation ; Genetic Association Studies/*methods ; Genetic Variation ; Genomics/*methods ; Genotype ; *Metagenome ; Models, Theoretical ; Phenotype ; Regression Analysis ; }, abstract = {Discovery of genetic variants underlying bacterial phenotypes and the prediction of phenotypes such as antibiotic resistance are fundamental tasks in bacterial genomics. Genome-wide association study (GWAS) methods have been applied to study these relations, but the plastic nature of bacterial genomes and the clonal structure of bacterial populations creates challenges. We introduce an alignment-free method which finds sets of loci associated with bacterial phenotypes, quantifies the total effect of genetics on the phenotype, and allows accurate phenotype prediction, all within a single computationally scalable joint modeling framework. Genetic variants covering the entire pangenome are compactly represented by extended DNA sequence words known as unitigs, and model fitting is achieved using elastic net penalization, an extension of standard multiple regression. Using an extensive set of state-of-the-art bacterial population genomic data sets, we demonstrate that our approach performs accurate phenotype prediction, comparable to popular machine learning methods, while retaining both interpretability and computational efficiency. Compared to those of previous approaches, which test each genotype-phenotype association separately for each variant and apply a significance threshold, the variants selected by our joint modeling approach overlap substantially.IMPORTANCE Being able to identify the genetic variants responsible for specific bacterial phenotypes has been the goal of bacterial genetics since its inception and is fundamental to our current level of understanding of bacteria. This identification has been based primarily on painstaking experimentation, but the availability of large data sets of whole genomes with associated phenotype metadata promises to revolutionize this approach, not least for important clinical phenotypes that are not amenable to laboratory analysis. These models of phenotype-genotype association can in the future be used for rapid prediction of clinically important phenotypes such as antibiotic resistance and virulence by rapid-turnaround or point-of-care tests. However, despite much effort being put into adapting genome-wide association study (GWAS) approaches to cope with bacterium-specific problems, such as strong population structure and horizontal gene exchange, current approaches are not yet optimal. We describe a method that advances methodology for both association and generation of portable prediction models.}, } @article {pmid32634612, year = {2020}, author = {Shahi, N and Mallik, SK}, title = {Emerging bacterial fish pathogen Lactococcus garvieae RTCLI04, isolated from rainbow trout (Oncorhynchus mykiss): Genomic features and comparative genomics.}, journal = {Microbial pathogenesis}, volume = {147}, number = {}, pages = {104368}, doi = {10.1016/j.micpath.2020.104368}, pmid = {32634612}, issn = {1096-1208}, mesh = {Animals ; *Fish Diseases ; Genomics ; Humans ; India ; Lactococcus ; *Oncorhynchus mykiss ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Lactococcus garvieae is one of the emerging zoonotic bacterial pathogen, causes fatal hemorrhagic septicemia in cultured fish species, animals and humans, worldwide. Here, we report the genomic features of whole-genome sequence (WGS) of L. garvieae strain RTCLI04, recovered from lower intestine of farmed rainbow trout, Oncorhynchus mykiss in the northwest Himalayan region India. The genome of L. garvieae RTCLI04 is a single circular chromosome of 2,054,885 base pairs (bp), which encodes 1993 proteins and has G + C content of 39%. The bioinformatics analysis of WGS of RTCLI04, confirmed the presence of 51 tRNAs genes (including two pseudogenes), six rRNAs genes (four genes for 5S rRNA; one gene for 16S rRNA and one gene for 23S rRNA), five virulent domains, and twenty eight different genetic pathways. A Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR) finder tool indicates that three different CRISPR and one cas system with common spacer was present in the genome of L. garvieae RTCLI04. Pan-genome analysis of RTCLI04 and all the other reference L. garvieae strains shows that pan-genome of this bacterium consisted of 2239 putative protein-coding genes in which 1850 genes are core gene, 389 genes are dispensable gene, and 221 genes are unique to RTCLI04. L. garvieae RTCLI04 lacks genomic island of 16.5 Kb capsule gene cluster. In addition, 39 virulence-associated genes (VAGs) including hly1,-2,-3; PavA, PsaA; eno; LPxTG containing surface proteins 1, 2, 3 and 4; pgm, sod and 29 antimicrobial resistant genes (ARGs) including mefE (clindamycin), srmB (lincomycin), dfrA26 (trimethoprim), gyrB (nalidixic acid), arr-3 (rifampin), otrB (tetracycline), aac(6)-Ic (tobramycin), IrgB (penicillin), mecA (oxacillin), vanRB (vancomycin) and mfpA (fluoroquinolone) were also predicted in the genome of L. garvieae RTCLI04. Our study provides new insight into understanding the virulence mechanism, antimicrobial resistance, and development of effective therapeutic measures against L. garvieae during a disease outbreak in aquaculture.}, } @article {pmid32632274, year = {2020}, author = {Lyu, J}, title = {Pan-genome upgrade.}, journal = {Nature plants}, volume = {6}, number = {7}, pages = {732}, doi = {10.1038/s41477-020-0731-2}, pmid = {32632274}, issn = {2055-0278}, } @article {pmid32631215, year = {2020}, author = {Hurel, J and Schbath, S and Bougeard, S and Rolland, M and Petrillo, M and Touzain, F}, title = {DUGMO: tool for the detection of unknown genetically modified organisms with high-throughput sequencing data for pure bacterial samples.}, journal = {BMC bioinformatics}, volume = {21}, number = {1}, pages = {284}, pmid = {32631215}, issn = {1471-2105}, mesh = {Bacteria/*chemistry ; Computational Biology/*methods ; High-Throughput Nucleotide Sequencing/*methods ; Humans ; Organisms, Genetically Modified/*genetics ; Polymerase Chain Reaction/*methods ; }, abstract = {BACKGROUND: The European Community has adopted very restrictive policies regarding the dissemination and use of genetically modified organisms (GMOs). In fact, a maximum threshold of 0.9% of contaminating GMOs is tolerated for a "GMO-free" label. In recent years, imports of undescribed GMOs have been detected. Their sequences are not described and therefore not detectable by conventional approaches, such as PCR.

RESULTS: We developed DUGMO, a bioinformatics pipeline for the detection of genetically modified (GM) bacteria, including unknown GM bacteria, based on Illumina paired-end sequencing data. The method is currently focused on the detection of GM bacteria with - possibly partial - transgenes in pure bacterial samples. In the preliminary steps, coding sequences (CDSs) are aligned through two successive BLASTN against the host pangenome with relevant tuned parameters to discriminate CDSs belonging to the wild type genome (wgCDS) from potential GM coding sequences (pgmCDSs). Then, Bray-Curtis distances are calculated between the wgCDS and each pgmCDS, based on the difference of genomic vocabulary. Finally, two machine learning methods, namely the Random Forest and Generalized Linear Model, are carried out to target true GM CDS(s), based on six variables including Bray-Curtis distances and GC content. Tests carried out on a GM Bacillus subtilis showed 25 positive CDSs corresponding to the chloramphenicol resistance gene and CDSs of the inserted plasmids. On a wild type B. subtilis, no false positive sequences were detected.

CONCLUSION: DUGMO detects exogenous CDS, truncated, fused or highly mutated wild CDSs in high-throughput sequencing data, and was shown to be efficient at detecting GM sequences, but it might also be employed for the identification of recent horizontal gene transfers.}, } @article {pmid32619577, year = {2020}, author = {Kaushal, G and Singh, SP}, title = {Comparative genome analysis provides shreds of molecular evidence for reclassification of Leuconostoc mesenteroides MTCC 10508 as a strain of Leu. suionicum.}, journal = {Genomics}, volume = {112}, number = {6}, pages = {4023-4031}, doi = {10.1016/j.ygeno.2020.06.040}, pmid = {32619577}, issn = {1089-8646}, mesh = {DNA, Bacterial/genetics ; *Genome, Bacterial ; Leuconostoc mesenteroides/*genetics ; Phylogeny ; }, abstract = {This study presents the whole-genome comparative analysis of a Leuconostoc sp. strain, previously documented as Leu. mesenteroides MTCC 10508. The ANI, dDDH, dot plot, and MAUVE analyses suggested its reclassification as a strain of Leu. suionicum. Functional annotation identified a total of 1971 genes, out of which, 265 genes were mapped to CAZymes, evincing its carbohydrate transforming capability. The genome comparison with 59 Leu. mesenteroides and Leu. suionicum strains generated the core and pan-genome profiles, divulging the unique genes in Leuconostoc sp. MTCC 10508. For the first time, this study reports the genes encoding alpha-xylosidase and copper oxidase in a strain of Leu. suionicum. The genetic information for any possible allergenic molecule could not be detected in the genome, advocating the safety of the strain. The present investigation provides the genomic evidence for reclassification of the Leuconostoc sp. strain and also promulgates the molecular insights into its metabolic potential.}, } @article {pmid32615922, year = {2020}, author = {Duru, IC and Andreevskaya, M and Laine, P and Rode, TM and Ylinen, A and Løvdal, T and Bar, N and Crauwels, P and Riedel, CU and Bucur, FI and Nicolau, AI and Auvinen, P}, title = {Genomic characterization of the most barotolerant Listeria monocytogenes RO15 strain compared to reference strains used to evaluate food high pressure processing.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {455}, pmid = {32615922}, issn = {1471-2164}, support = {307856//Academy of Finland/ ; 311717//Academy of Finland/ ; ERA-IB-16-247 014//ERA-IB2/ ; 250 Subprogramme 3.2 - Horizon 2020 - Contract No. 15/2017//International and European Cooperation/ ; 031B0268//German Ministry for Education and Research/ ; }, mesh = {CRISPR-Cas Systems ; DNA Methylation ; *Food Preservation ; *Genome, Bacterial ; Genomics ; Listeria monocytogenes/*genetics ; Microbial Viability ; Pressure ; RNA-Seq ; Reference Standards ; }, abstract = {BACKGROUND: High pressure processing (HPP; i.e. 100-600 MPa pressure depending on product) is a non-thermal preservation technique adopted by the food industry to decrease significantly foodborne pathogens, including Listeria monocytogenes, from food. However, susceptibility towards pressure differs among diverse strains of L. monocytogenes and it is unclear if this is due to their intrinsic characteristics related to genomic content. Here, we tested the barotolerance of 10 different L. monocytogenes strains, from food and food processing environments and widely used reference strains including clinical isolate, to pressure treatments with 400 and 600 MPa. Genome sequencing and genome comparison of the tested L. monocytogenes strains were performed to investigate the relation between genomic profile and pressure tolerance.

RESULTS: None of the tested strains were tolerant to 600 MPa. A reduction of more than 5 log10 was observed for all strains after 1 min 600 MPa pressure treatment. L. monocytogenes strain RO15 showed no significant reduction in viable cell counts after 400 MPa for 1 min and was therefore defined as barotolerant. Genome analysis of so far unsequenced L. monocytogenes strain RO15, 2HF33, MB5, AB199, AB120, C7, and RO4 allowed us to compare the gene content of all strains tested. This revealed that the three most pressure tolerant strains had more than one CRISPR system with self-targeting spacers. Furthermore, several anti-CRISPR genes were detected in these strains. Pan-genome analysis showed that 10 prophage genes were significantly associated with the three most barotolerant strains.

CONCLUSIONS: L. monocytogenes strain RO15 was the most pressure tolerant among the selected strains. Genome comparison suggests that there might be a relationship between prophages and pressure tolerance in L. monocytogenes.}, } @article {pmid32615401, year = {2020}, author = {Steinbrenner, AD}, title = {The evolving landscape of cell surface pattern recognition across plant immune networks.}, journal = {Current opinion in plant biology}, volume = {56}, number = {}, pages = {135-146}, doi = {10.1016/j.pbi.2020.05.001}, pmid = {32615401}, issn = {1879-0356}, mesh = {Cell Membrane ; *Oomycetes ; Plant Immunity/genetics ; *Plants/genetics ; Receptors, Pattern Recognition/genetics ; Signal Transduction ; }, abstract = {To recognize diverse threats, plants monitor extracellular molecular patterns and transduce intracellular immune signaling through receptor complexes at the plasma membrane. Pattern recognition occurs through a prototypical network of interacting proteins, comprising A) receptors that recognize inputs associated with a growing number of pest and pathogen classes (bacteria, fungi, oomycetes, caterpillars), B) co-receptor kinases that participate in binding and signaling, and C) cytoplasmic kinases that mediate first stages of immune output. While this framework has been elucidated in reference accessions of model organisms, network components are part of gene families with widespread variation, potentially tuning immunocompetence for specific contexts. Most dramatically, variation in receptor repertoires determines the range of ligands acting as immunogenic inputs for a given plant. Diversification of receptor kinase (RK) and related receptor-like protein (RLP) repertoires may tune responses even within a species. Comparative genomics at pangenome scale will reveal patterns and features of immune network variation.}, } @article {pmid32614888, year = {2020}, author = {Chen, Z and Kuang, D and Xu, X and González-Escalona, N and Erickson, DL and Brown, E and Meng, J}, title = {Genomic analyses of multidrug-resistant Salmonella Indiana, Typhimurium, and Enteritidis isolates using MinION and MiSeq sequencing technologies.}, journal = {PloS one}, volume = {15}, number = {7}, pages = {e0235641}, pmid = {32614888}, issn = {1932-6203}, mesh = {Anti-Bacterial Agents/pharmacology ; Drug Resistance, Multiple, Bacterial/*genetics ; *Genome, Bacterial ; Genotype ; Microbial Sensitivity Tests ; Phenotype ; Phylogeny ; Plasmids/genetics/metabolism ; Point Mutation ; Salmonella enterica/classification/drug effects/*genetics/pathogenicity ; Salmonella enteritidis/classification/drug effects/genetics/pathogenicity ; Salmonella typhimurium/classification/drug effects/genetics/pathogenicity ; Virulence ; Whole Genome Sequencing/*methods ; }, abstract = {We sequenced 25 isolates of phenotypically multidrug-resistant Salmonella Indiana (n = 11), Typhimurium (n = 8), and Enteritidis (n = 6) using both MinION long-read [SQK-LSK109 and flow cell (R9.4.1)] and MiSeq short-read (Nextera XT and MiSeq Reagent Kit v2) sequencing technologies to determine the advantages of each approach in terms of the characteristics of genome structure, antimicrobial resistance (AMR), virulence potential, whole-genome phylogeny, and pan-genome. The MinION reads were base-called in real-time using MinKnow 3.4.8 integrated with Guppy 3.0.7. The long-read-only assembly, Illumina-only assembly, and hybrid assembly pipelines of Unicycler 0.4.8 were used to generate the MinION, MiSeq, and hybrid assemblies, respectively. The MinION assemblies were highly contiguous compared to the MiSeq assemblies but lacked accuracy, a deficiency that was mitigated by adding the MiSeq short reads through the Unicycler hybrid assembly which corrected erroneous single nucleotide polymorphisms (SNPs). The MinION assemblies provided similar predictions of AMR and virulence potential compared to the MiSeq and hybrid assemblies, although they produced more total false negatives of AMR genotypes, primarily due to failure in identifying tetracycline resistance genes in 11 of the 19 MinION assemblies of tetracycline-resistant isolates. The MinION assemblies displayed a large genetic distance from their corresponding MiSeq and hybrid assemblies on the whole-genome phylogenetic tree, indicating that the lower read accuracy of MinION sequencing caused incorrect clustering. The pan-genome of the MinION assemblies contained significantly more accessory genes and less core genes compared to the MiSeq and hybrid assemblies, suggesting that although these assemblies were more contiguous, their sequencing errors reduced accurate genome annotations. Our research demonstrates that MinION sequencing by itself provides an efficient assessment of the genome structure, antimicrobial resistance, and virulence potential of Salmonella; however, it is not sufficient for whole-genome phylogenetic and pan-genome analyses. MinION in combination with MiSeq facilitated the most accurate genomic analyses.}, } @article {pmid32613704, year = {2021}, author = {Hajem, N and Manzato, L and Branchet, MC and Herlin, A and Hassanaly, S and Huguet, E and Himbert, F and Bernard, P and Dussert, AS and Choulot, JC and Boisnic, S and Kéophiphath, M}, title = {Purple tulip extract improves signs of skin aging through dermal structural modulation as shown by genomic, protein expression and skin appearance of volunteers studied.}, journal = {Journal of cosmetic dermatology}, volume = {20}, number = {2}, pages = {691-702}, doi = {10.1111/jocd.13583}, pmid = {32613704}, issn = {1473-2165}, mesh = {Aged ; Cells, Cultured ; Female ; Fibroblasts ; Genomics ; Humans ; Plant Extracts/pharmacology ; Skin ; *Skin Aging ; *Tulipa ; Volunteers ; }, abstract = {BACKGROUND: Purple tulip extract is a rich source of flavonoids which are powerful antioxidants and can hence be considered as an ideal candidate for use in skin care products.

AIMS: We aimed to evaluate the effects of purple tulip extract on skin quality and to determine its molecular modes of interaction.

METHODS: A pangenomic study on human skin fibroblasts was carried out to analyze multiple changes in gene expression. Ex vivo studies of human skin explants exposed to ultraviolet (UV) irradiation or H2 O2 were performed to assess modulations of protein expression. Finally, a clinical assay was carried out to evaluate the efficacy of purple tulip extract on skin appearance and condition of aged women.

RESULTS: Genetic modulation analyses led us to infer the induction of many biological functions including cell differentiation, proliferation, migration, inflammatory responses, and matrix remodeling. The ex vivo studies revealed an enhancement of the collagen network and increased expression of glycosaminoglycans (GAG), fibronectin, and collagen VI. Finally, the clinical study highlighted the potential anti-aging properties of the purple tulip extract which decreased the relaxation of the oval face and improved skin elasticity after 28 days of treatment. Significant reductions of the length and depth of the nasolabial wrinkles were also observed.

CONCLUSION: Our genomics data on the effect of purple tulip extract on the ex vivo UV-challenged skin showed that genes responsible for, among others, the upkeep of the skin, such as collagen induction, immune cell proliferation, and epidermal repair, were all up-regulated. More importantly, the clinical study corroborated these data by the visible and measurable effects of the topical purple tulip extract on the aged skin of 22 women, further demonstrating the beneficial impact of the extract on aged skin.}, } @article {pmid32611933, year = {2020}, author = {Wang, B and Cheng, H and Qian, W and Zhao, W and Liang, C and Liu, C and Cui, G and Liu, H and Zhang, L}, title = {Comparative genome analysis and mining of secondary metabolites of Paenibacillus polymyxa.}, journal = {Genes & genetic systems}, volume = {95}, number = {3}, pages = {141-150}, doi = {10.1266/ggs.19-00053}, pmid = {32611933}, issn = {1880-5779}, mesh = {*Biological Control Agents ; Data Mining/methods ; *Genome, Bacterial ; Genomics/methods ; *Metabolome ; Metabolomics/methods ; Paenibacillus polymyxa/*genetics/metabolism/pathogenicity ; Plants/microbiology ; }, abstract = {Paenibacillus polymyxa is a well-known Gram-positive biocontrol bacterium. It has been reported that many P. polymyxa strains can inhibit bacteria, fungi and other plant pathogens. Paenibacillus polymyxa employs a variety of mechanisms to promote plant growth, so it is necessary to understand the biocontrol ability of bacteria at the genome level. In the present study, thanks to the widespread availability of Paenibacillus genome data and the development of bioinformatics tools, we were able to analyze and mine the genomes of 43 P. polymyxa strains. The strain NCTC4744 was determined not to be P. polymyxa according to digital DNA-DNA hybridization and average nucleotide identity. By analysis of the pan-genome and the core genome, we found that the pan-genome of P. polymyxa was open and that there were 3,192 core genes. In a gene cluster analysis of secondary metabolites, 797 secondary metabolite gene clusters were found, of which 343 are not similar to known clusters and are expected to reveal a large number of new secondary metabolites. We also analyzed the plant growth-promoting genes that were mined and found, surpisingly, that these genes are highly conserved. The results of the present study not only reveal a large number of unknown potential secondary metabolite gene clusters in P. polymyxa, but also suggest that plant growth promotion characteristics are evolutionary adaptations of P. polymyxa to plant-related habitats.}, } @article {pmid32610480, year = {2020}, author = {Fodor, A and Abate, BA and Deák, P and Fodor, L and Gyenge, E and Klein, MG and Koncz, Z and Muvevi, J and Ötvös, L and Székely, G and Vozik, D and Makrai, L}, title = {Multidrug Resistance (MDR) and Collateral Sensitivity in Bacteria, with Special Attention to Genetic and Evolutionary Aspects and to the Perspectives of Antimicrobial Peptides-A Review.}, journal = {Pathogens (Basel, Switzerland)}, volume = {9}, number = {7}, pages = {}, pmid = {32610480}, issn = {2076-0817}, support = {1214102//Hungarian Ministry of Human Capacities/ ; }, abstract = {Antibiotic poly-resistance (multidrug-, extreme-, and pan-drug resistance) is controlled by adaptive evolution. Darwinian and Lamarckian interpretations of resistance evolution are discussed. Arguments for, and against, pessimistic forecasts on a fatal "post-antibiotic era" are evaluated. In commensal niches, the appearance of a new antibiotic resistance often reduces fitness, but compensatory mutations may counteract this tendency. The appearance of new antibiotic resistance is frequently accompanied by a collateral sensitivity to other resistances. Organisms with an expanding open pan-genome, such as Acinetobacter baumannii, Pseudomonas aeruginosa, and Klebsiella pneumoniae, can withstand an increased number of resistances by exploiting their evolutionary plasticity and disseminating clonally or poly-clonally. Multidrug-resistant pathogen clones can become predominant under antibiotic stress conditions but, under the influence of negative frequency-dependent selection, are prevented from rising to dominance in a population in a commensal niche. Antimicrobial peptides have a great potential to combat multidrug resistance, since antibiotic-resistant bacteria have shown a high frequency of collateral sensitivity to antimicrobial peptides. In addition, the mobility patterns of antibiotic resistance, and antimicrobial peptide resistance, genes are completely different. The integron trade in commensal niches is fortunately limited by the species-specificity of resistance genes. Hence, we theorize that the suggested post-antibiotic era has not yet come, and indeed might never come.}, } @article {pmid32605102, year = {2020}, author = {Roder, T and Wüthrich, D and Bär, C and Sattari, Z and Ah, UV and Ronchi, F and Macpherson, AJ and Ganal-Vonarburg, SC and Bruggmann, R and Vergères, G}, title = {In Silico Comparison Shows that the Pan-Genome of a Dairy-Related Bacterial Culture Collection Covers Most Reactions Annotated to Human Microbiomes.}, journal = {Microorganisms}, volume = {8}, number = {7}, pages = {}, pmid = {32605102}, issn = {2076-2607}, support = {GRS-070/17//Gebert Rüf Stiftung/ ; }, abstract = {The diversity of the human microbiome is positively associated with human health. However, this diversity is endangered by Westernized dietary patterns that are characterized by a decreased nutrient variety. Diversity might potentially be improved by promoting dietary patterns rich in microbial strains. Various collections of bacterial cultures resulting from a century of dairy research are readily available worldwide, and could be exploited to contribute towards this end. We have conducted a functional in silico analysis of the metagenome of 24 strains, each representing one of the species in a bacterial culture collection composed of 626 sequenced strains, and compared the pathways potentially covered by this metagenome to the intestinal metagenome of four healthy, although overweight, humans. Remarkably, the pan-genome of the 24 strains covers 89% of the human gut microbiome's annotated enzymatic reactions. Furthermore, the dairy microbial collection covers biological pathways, such as methylglyoxal degradation, sulfate reduction, g-aminobutyric (GABA) acid degradation and salicylate degradation, which are differently covered among the four subjects and are involved in a range of cardiometabolic, intestinal, and neurological disorders. We conclude that microbial culture collections derived from dairy research have the genomic potential to complement and restore functional redundancy in human microbiomes.}, } @article {pmid32600255, year = {2020}, author = {Motyka-Pomagruk, A and Zoledowska, S and Misztak, AE and Sledz, W and Mengoni, A and Lojkowska, E}, title = {Comparative genomics and pangenome-oriented studies reveal high homogeneity of the agronomically relevant enterobacterial plant pathogen Dickeya solani.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {449}, pmid = {32600255}, issn = {1471-2164}, support = {UMO-2014/14/M/NZ8/00501//Narodowe Centrum Nauki/ ; UMO-2016/21/N/NZ1/02783//Narodowe Centrum Nauki/ ; Polish-Italian Collaborative Program Canaletto//Ministerstwo Nauki i Szkolnictwa Wyższego/ ; }, mesh = {Dickeya/classification/genetics/*pathogenicity ; Genome Size ; Genome, Bacterial ; Genomics/*methods ; High-Throughput Nucleotide Sequencing ; Phylogeny ; Solanum tuberosum/*microbiology ; Virulence Factors/*genetics ; Whole Genome Sequencing ; }, abstract = {BACKGROUND: Dickeya solani is an important plant pathogenic bacterium causing severe losses in European potato production. This species draws a lot of attention due to its remarkable virulence, great devastating potential and easier spread in contrast to other Dickeya spp. In view of a high need for extensive studies on economically important soft rot Pectobacteriaceae, we performed a comparative genomics analysis on D. solani strains to search for genetic foundations that would explain the differences in the observed virulence levels within the D. solani population.

RESULTS: High quality assemblies of 8 de novo sequenced D. solani genomes have been obtained. Whole-sequence comparison, ANIb, ANIm, Tetra and pangenome-oriented analyses performed on these genomes and the sequences of 14 additional strains revealed an exceptionally high level of homogeneity among the studied genetic material of D. solani strains. With the use of 22 genomes, the pangenome of D. solani, comprising 84.7% core, 7.2% accessory and 8.1% unique genes, has been almost completely determined, suggesting the presence of a nearly closed pangenome structure. Attribution of the genes included in the D. solani pangenome fractions to functional COG categories showed that higher percentages of accessory and unique pangenome parts in contrast to the core section are encountered in phage/mobile elements- and transcription- associated groups with the genome of RNS 05.1.2A strain having the most significant impact. Also, the first D. solani large-scale genome-wide phylogeny computed on concatenated core gene alignments is herein reported.

CONCLUSIONS: The almost closed status of D. solani pangenome achieved in this work points to the fact that the unique gene pool of this species should no longer expand. Such a feature is characteristic of taxa whose representatives either occupy isolated ecological niches or lack efficient mechanisms for gene exchange and recombination, which seems rational concerning a strictly pathogenic species with clonal population structure. Finally, no obvious correlations between the geographical origin of D. solani strains and their phylogeny were found, which might reflect the specificity of the international seed potato market.}, } @article {pmid32596333, year = {2020}, author = {Yang, F and Feng, H and Massey, IY and Huang, F and Guo, J and Zhang, X}, title = {Genome-Wide Analysis Reveals Genetic Potential for Aromatic Compounds Biodegradation of Sphingopyxis.}, journal = {BioMed research international}, volume = {2020}, number = {}, pages = {5849123}, pmid = {32596333}, issn = {2314-6141}, mesh = {Amino Acids, Aromatic/*metabolism ; *Biodegradation, Environmental ; Genome, Bacterial ; Genome-Wide Association Study ; Hydrocarbons, Aromatic/*metabolism ; Sphingomonadaceae/chemistry/*genetics/*metabolism ; }, abstract = {Members of genus Sphingopyxis are frequently found in diverse eco-environments worldwide and have been traditionally considered to play vital roles in the degradation of aromatic compounds. Over recent decades, many aromatic-degrading Sphingopyxis strains have been isolated and recorded, but little is known about their genetic nature related to aromatic compounds biodegradation. In this study, bacterial genomes of 19 Sphingopyxis strains were used for comparative analyses. Phylogeny showed an ambiguous relatedness between bacterial strains and their habitat specificity, while clustering based on Cluster of Orthologous Groups suggested the potential link of functional profile with substrate-specific traits. Pan-genome analysis revealed that 19 individuals were predicted to share 1,066 orthologous genes, indicating a high genetic homogeneity among Sphingopyxis strains. Notably, KEGG Automatic Annotation Server results suggested that most genes pertaining aromatic compounds biodegradation were predicted to be involved in benzoate, phenylalanine, and aminobenzoate metabolism. Among them, β-ketoadipate biodegradation might be the main pathway in Sphingopyxis strains. Further inspection showed that a number of mobile genetic elements varied in Sphingopyxis genomes, and plasmid-mediated gene transfer coupled with prophage- and transposon-mediated rearrangements might play prominent roles in the evolution of bacterial genomes. Collectively, our findings presented that Sphingopyxis isolates might be the promising candidates for biodegradation of aromatic compounds in pollution sites.}, } @article {pmid32596166, year = {2020}, author = {Sun, Z and Zhou, D and Zhang, X and Li, Q and Lin, H and Lu, W and Liu, H and Lu, J and Lin, X and Li, K and Xu, T and Bao, Q and Zhang, H}, title = {Determining the Genetic Characteristics of Resistance and Virulence of the "Epidermidis Cluster Group" Through Pan-Genome Analysis.}, journal = {Frontiers in cellular and infection microbiology}, volume = {10}, number = {}, pages = {274}, pmid = {32596166}, issn = {2235-2988}, mesh = {*Staphylococcus ; *Staphylococcus epidermidis/genetics ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Staphylococcus caprae, Staphylococcus capitis, and Staphylococcus epidermidis belong to the "Epidermidis Cluster Group" (ECG) and are generally opportunistic pathogens. In this work, whole genome sequencing, molecular cloning and pan-genome analysis were performed to investigate the genetic characteristics of the resistance, virulence and genome structures of 69 ECG strains, including a clinical isolate (S. caprae SY333) obtained in this work. Two resistance genes (blaZ and aadD2) encoded on the plasmids pSY333-41 and pSY333-45 of S. caprae SY333 were confirmed to be functional. The bla region in ECG exhibited three distinct structures, and these chromosome- and plasmid-encoded bla operons seemed to follow two different evolutionary paths. Pan-genome analysis revealed their pan-genomes tend to be "open." For the virulence-related factors, the genes involved in primary attachment were observed almost exclusively in S. epidermidis, while the genes associated with intercellular aggregation were observed more frequently in S. caprae and S. capitis. The type VII secretion system was present in all strains of S. caprae and some of S. epidermidis but not in S. capitis. Moreover, the isd locus (iron regulated surface determinant) was first found to be encoded on the genomes of S. caprae and S. capitis. These findings suggested that the plasmid and chromosome encoded bla operons of ECG species underwent different evolution paths, as well as they differed in the abundance of virulence genes associated with adherence, invasion, secretion system and immune evasion. Identification of isd loci in S. caprae and S. capitis indicated their ability to acquire heme as nutrient iron during infection.}, } @article {pmid32589566, year = {2020}, author = {Hillman, ET and Kozik, AJ and Hooker, CA and Burnett, JL and Heo, Y and Kiesel, VA and Nevins, CJ and Oshiro, JMKI and Robins, MM and Thakkar, RD and Wu, ST and Lindemann, SR}, title = {Comparative genomics of the genus Roseburia reveals divergent biosynthetic pathways that may influence colonic competition among species.}, journal = {Microbial genomics}, volume = {6}, number = {7}, pages = {}, pmid = {32589566}, issn = {2057-5858}, mesh = {Amino Acids/biosynthesis ; Bacterial Proteins/genetics ; *Biosynthetic Pathways ; Clostridiales/*classification/*genetics/metabolism ; Energy Metabolism ; Gastrointestinal Microbiome ; Genome, Bacterial ; Genomics/*methods ; Humans ; Phylogeny ; Species Specificity ; Vitamin B Complex/biosynthesis ; }, abstract = {Roseburia species are important denizens of the human gut microbiome that ferment complex polysaccharides to butyrate as a terminal fermentation product, which influences human physiology and serves as an energy source for colonocytes. Previous comparative genomics analyses of the genus Roseburia have examined polysaccharide degradation genes. Here, we characterize the core and pangenomes of the genus Roseburia with respect to central carbon and energy metabolism, as well as biosynthesis of amino acids and B vitamins using orthology-based methods, uncovering significant differences among species in their biosynthetic capacities. Variation in gene content among Roseburia species and strains was most significant for cofactor biosynthesis. Unlike all other species of Roseburia that we analysed, Roseburia inulinivorans strains lacked biosynthetic genes for riboflavin or pantothenate but possessed folate biosynthesis genes. Differences in gene content for B vitamin synthesis were matched with differences in putative salvage and synthesis strategies among species. For example, we observed extended biotin salvage capabilities in R. intestinalis strains, which further suggest that B vitamin acquisition strategies may impact fitness in the gut ecosystem. As differences in the functional potential to synthesize components of biomass (e.g. amino acids, vitamins) can drive interspecies interactions, variation in auxotrophies of the Roseburia spp. genomes may influence in vivo gut ecology. This study serves to advance our understanding of the potential metabolic interactions that influence the ecology of Roseburia spp. and, ultimately, may provide a basis for rational strategies to manipulate the abundances of these species.}, } @article {pmid32586267, year = {2020}, author = {Nishitsuji, K and Arimoto, A and Yonashiro, Y and Hisata, K and Fujie, M and Kawamitsu, M and Shoguchi, E and Satoh, N}, title = {Comparative genomics of four strains of the edible brown alga, Cladosiphon okamuranus.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {422}, pmid = {32586267}, issn = {1471-2164}, mesh = {Aquaculture ; Evolution, Molecular ; Gene Expression Profiling ; Gene Expression Regulation ; Genome Size ; Genomics/*methods ; High-Throughput Nucleotide Sequencing ; Mitochondrial Proteins/genetics ; Phaeophyta/*classification/genetics ; Phylogeny ; Seaweed/classification/*genetics ; }, abstract = {BACKGROUND: The brown alga, Cladosiphon okamuranus (Okinawa mozuku), is one of the most important edible seaweeds, and it is cultivated for market primarily in Okinawa, Japan. Four strains, denominated S, K, O, and C, with distinctively different morphologies, have been cultivated commercially since the early 2000s. We previously reported a draft genome of the S-strain. To facilitate studies of seaweed biology for future aquaculture, we here decoded and analyzed genomes of the other three strains (K, O, and C).

RESULTS: Here we improved the genome of the S-strain (ver. 2, 130 Mbp, 12,999 genes), and decoded the K-strain (135 Mbp, 12,511 genes), the O-strain (140 Mbp, 12,548 genes), and the C-strain (143 Mbp, 12,182 genes). Molecular phylogenies, using mitochondrial and nuclear genes, showed that the S-strain diverged first, followed by the K-strain, and most recently the C- and O-strains. Comparisons of genome architecture among the four strains document the frequent occurrence of inversions. In addition to gene acquisitions and losses, the S-, K-, O-, and C-strains possess 457, 344, 367, and 262 gene families unique to each strain, respectively. Comprehensive Blast searches showed that most genes have no sequence similarity to any entries in the non-redundant protein sequence database, although GO annotation suggested that they likely function in relation to molecular and biological processes and cellular components.

CONCLUSIONS: Our study compares the genomes of four strains of C. okamuranus and examines their phylogenetic relationships. Due to global environmental changes, including temperature increases, acidification, and pollution, brown algal aquaculture is facing critical challenges. Genomic and phylogenetic information reported by the present research provides useful tools for isolation of novel strains.}, } @article {pmid32584859, year = {2020}, author = {Collis, RM and Biggs, PJ and Midwinter, AC and Browne, AS and Wilkinson, DA and Irshad, H and French, NP and Brightwell, G and Cookson, AL}, title = {Genomic epidemiology and carbon metabolism of Escherichia coli serogroup O145 reflect contrasting phylogenies.}, journal = {PloS one}, volume = {15}, number = {6}, pages = {e0235066}, pmid = {32584859}, issn = {1932-6203}, mesh = {Animals ; Carbon/*metabolism ; *Escherichia coli Infections/epidemiology/genetics/metabolism ; *Genotype ; Humans ; Malates/metabolism ; New Zealand/epidemiology ; *Phylogeny ; Serine/genetics/metabolism ; *Serogroup ; *Shiga-Toxigenic Escherichia coli/genetics/isolation & purification/metabolism ; }, abstract = {Shiga toxin-producing Escherichia coli (STEC) are a leading cause of foodborne outbreaks of human disease, but they reside harmlessly as an asymptomatic commensal in the ruminant gut. STEC serogroup O145 are difficult to isolate as routine diagnostic methods are unable to distinguish non-O157 serogroups due to their heterogeneous metabolic characteristics, resulting in under-reporting which is likely to conceal their true prevalence. In light of these deficiencies, the purpose of this study was a twofold approach to investigate enhanced STEC O145 diagnostic culture-based methods: firstly, to use a genomic epidemiology approach to understand the genetic diversity and population structure of serogroup O145 at both a local (New Zealand) (n = 47) and global scale (n = 75) and, secondly, to identify metabolic characteristics that will help the development of a differential media for this serogroup. Analysis of a subset of E. coli serogroup O145 strains demonstrated considerable diversity in carbon utilisation, which varied in association with eae subtype and sequence type. Several carbon substrates, such as D-serine and D-malic acid, were utilised by the majority of serogroup O145 strains, which, when coupled with current molecular and culture-based methods, could aid in the identification of presumptive E. coli serogroup O145 isolates. These carbon substrates warrant subsequent testing with additional serogroup O145 strains and non-O145 strains. Serogroup O145 strains displayed extensive genetic heterogeneity that was correlated with sequence type and eae subtype, suggesting these genetic markers are good indicators for distinct E. coli phylogenetic lineages. Pangenome analysis identified a core of 3,036 genes and an open pangenome of >14,000 genes, which is consistent with the identification of distinct phylogenetic lineages. Overall, this study highlighted the phenotypic and genotypic heterogeneity within E. coli serogroup O145, suggesting that the development of a differential media targeting this serogroup will be challenging.}, } @article {pmid32580505, year = {2020}, author = {Menéndez, E and Flores-Félix, JD and Ramírez-Bahena, MH and Igual, JM and García-Fraile, P and Peix, A and Velázquez, E}, title = {Genome Analysis of Endobacterium cerealis, a Novel Genus and Species Isolated from Zea mays Roots in North Spain.}, journal = {Microorganisms}, volume = {8}, number = {6}, pages = {}, pmid = {32580505}, issn = {2076-2607}, support = {AGL2013-48098-P//Ministerio de Economía, Industria y Competitividad, Gobierno de España/ ; }, abstract = {In the present work, we analyse the genomic and phenotypic characteristics of a strain named RZME27[T] isolated from roots of a Zea mays plant grown in Spain. The phylogenetic analyses of 16S rRNA gene and whole genome sequences showed that the strain RZME27[T] clustered with the type strains of Neorhizobium galegae and Pseudorhizobium pelagicum from the family Rhizobiaceae. This family encompasses several genera establishing symbiosis with legumes, but the genes involved in nodulation and nitrogen fixation are absent in its genome. Nevertheless, genes related to plant colonization, such as those involved in motility, chemotaxis, quorum sensing, exopolysaccharide biosynthesis and hydrolytic enzymes production were found. The comparative pangenomic analyses showed that 78 protein clusters present in the strain RZME27[T] were not found in the type strains of its closest relatives N. galegae and P. pelagicum. The calculated average nucleotide identity (ANI) values between the strain RZME27[T] and the type strains of N. galegae and P. pelagicum were 75.61% and 75.1%, respectively, similar or lower than those found for other genera from family Rhizobiaceae. Several phenotypic differences were also found, highlighting the absence of the fatty acid C19:0 cyclo ω8c and propionate assimilation. These results support the definition of a novel genus and species named Endobacterium cerealis gen. nov. sp. nov. whose type strain is RZME27[T].}, } @article {pmid32571204, year = {2020}, author = {Vázquez-Rosas-Landa, M and Ponce-Soto, GY and Aguirre-Liguori, JA and Thakur, S and Scheinvar, E and Barrera-Redondo, J and Ibarra-Laclette, E and Guttman, DS and Eguiarte, LE and Souza, V}, title = {Population genomics of Vibrionaceae isolated from an endangered oasis reveals local adaptation after an environmental perturbation.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {418}, pmid = {32571204}, issn = {1471-2164}, support = {IG200215//PAPIIT/ ; 238245//CONACYT/ ; }, mesh = {Adaptation, Physiological ; Gene Transfer, Horizontal ; Genetics, Population ; Genome, Bacterial ; Multigene Family ; Mutation ; Phylogeny ; *Polymorphism, Single Nucleotide ; Population Density ; Selection, Genetic ; Vibrionaceae/*classification/genetics/isolation & purification/*physiology ; Whole Genome Sequencing/*methods ; }, abstract = {BACKGROUND: In bacteria, pan-genomes are the result of an evolutionary "tug of war" between selection and horizontal gene transfer (HGT). High rates of HGT increase the genetic pool and the effective population size (Ne), resulting in open pan-genomes. In contrast, selective pressures can lead to local adaptation by purging the variation introduced by HGT and mutation, resulting in closed pan-genomes and clonal lineages. In this study, we explored both hypotheses, elucidating the pan-genome of Vibrionaceae isolates after a perturbation event in the endangered oasis of Cuatro Ciénegas Basin (CCB), Mexico, and looking for signals of adaptation to the environments in their genomes.

RESULTS: We obtained 42 genomes of Vibrionaceae distributed in six lineages, two of them did not showed any close reference strain in databases. Five of the lineages showed closed pan-genomes and were associated to either water or sediment environment; their high Ne estimates suggest that these lineages are not from a recent origin. The only clade with an open pan-genome was found in both environments and was formed by ten genetic groups with low Ne, suggesting a recent origin. The recombination and mutation estimators (r/m) ranged from 0.005 to 2.725, which are similar to oceanic Vibrionaceae estimations. However, we identified 367 gene families with signals of positive selection, most of them found in the core genome; suggesting that despite recombination, natural selection moves the Vibrionaceae CCB lineages to local adaptation, purging the genomes and keeping closed pan-genome patterns. Moreover, we identify 598 SNPs associated with an unstructured environment; some of the genes associated with these SNPs were related to sodium transport.

CONCLUSIONS: Different lines of evidence suggest that the sampled Vibrionaceae, are part of the rare biosphere usually living under famine conditions. Two of these lineages were reported for the first time. Most Vibrionaceae lineages of CCB are adapted to their micro-habitats rather than to the sampled environments. This pattern of adaptation is concordant with the association of closed pan-genomes and local adaptation.}, } @article {pmid32562810, year = {2020}, author = {Anani, H and Zgheib, R and Hasni, I and Raoult, D and Fournier, PE}, title = {Interest of bacterial pangenome analyses in clinical microbiology.}, journal = {Microbial pathogenesis}, volume = {149}, number = {}, pages = {104275}, doi = {10.1016/j.micpath.2020.104275}, pmid = {32562810}, issn = {1096-1208}, mesh = {*Bacteria/genetics ; *Genome, Bacterial ; Humans ; Phylogeny ; Software ; Whole Genome Sequencing ; }, abstract = {Thanks to the progress and decreasing costs in genome sequencing technologies, more than 250,000 bacterial genomes are currently available in public databases, covering most, if not all, of the major human-associated phylogenetic groups of these microorganisms, pathogenic or not. In addition, for many of them, sequences from several strains of a given species are available, thus enabling to evaluate their genetic diversity and study their evolution. In addition, the significant cost reduction of bacterial whole genome sequencing as well as the rapid increase in the number of available bacterial genomes have prompted the development of pangenomic software tools. The study of bacterial pangenome has many applications in clinical microbiology. It can unveil the pathogenic potential and ability of bacteria to resist antimicrobials as well identify specific sequences and predict antigenic epitopes that allow molecular or serologic assays and vaccines to be designed. Bacterial pangenome constitutes a powerful method for understanding the history of human bacteria and relating these findings to diagnosis in clinical microbiology laboratories in order to optimize patient management.}, } @article {pmid32553274, year = {2020}, author = {Liu, Y and Du, H and Li, P and Shen, Y and Peng, H and Liu, S and Zhou, GA and Zhang, H and Liu, Z and Shi, M and Huang, X and Li, Y and Zhang, M and Wang, Z and Zhu, B and Han, B and Liang, C and Tian, Z}, title = {Pan-Genome of Wild and Cultivated Soybeans.}, journal = {Cell}, volume = {182}, number = {1}, pages = {162-176.e13}, doi = {10.1016/j.cell.2020.05.023}, pmid = {32553274}, issn = {1097-4172}, mesh = {Base Sequence ; Chromosomes, Plant/genetics ; Domestication ; Ecotype ; Gene Duplication ; Gene Expression Regulation, Plant ; Gene Fusion ; *Genome, Plant ; Geography ; Molecular Sequence Annotation ; Phylogeny ; Polymorphism, Single Nucleotide/genetics ; Polyploidy ; Soybeans/*genetics/*growth & development ; }, abstract = {Soybean is one of the most important vegetable oil and protein feed crops. To capture the entire genomic diversity, it is needed to construct a complete high-quality pan-genome from diverse soybean accessions. In this study, we performed individual de novo genome assemblies for 26 representative soybeans that were selected from 2,898 deeply sequenced accessions. Using these assembled genomes together with three previously reported genomes, we constructed a graph-based genome and performed pan-genome analysis, which identified numerous genetic variations that cannot be detected by direct mapping of short sequence reads onto a single reference genome. The structural variations from the 2,898 accessions that were genotyped based on the graph-based genome and the RNA sequencing (RNA-seq) data from the representative 26 accessions helped to link genetic variations to candidate genes that are responsible for important traits. This pan-genome resource will promote evolutionary and functional genomics studies in soybean.}, } @article {pmid32547597, year = {2020}, author = {Didelon, M and Khafif, M and Godiard, L and Barbacci, A and Raffaele, S}, title = {Patterns of Sequence and Expression Diversification Associate Members of the PADRE Gene Family With Response to Fungal Pathogens.}, journal = {Frontiers in genetics}, volume = {11}, number = {}, pages = {491}, pmid = {32547597}, issn = {1664-8021}, abstract = {Pathogen infection triggers extensive reprogramming of the plant transcriptome, including numerous genes the function of which is unknown. Due to their wide taxonomic distribution, genes encoding proteins with Domains of Unknown Function (DUFs) activated upon pathogen challenge likely play important roles in disease. In Arabidopsis thaliana, we identified thirteen genes harboring a DUF4228 domain in the top 10% most induced genes after infection by the fungal pathogen Sclerotinia sclerotiorum. Based on functional information collected through homology and contextual searches, we propose to refer to this domain as the pathogen and abiotic stress response, cadmium tolerance, disordered region-containing (PADRE) domain. Genome-wide and phylogenetic analyses indicated that PADRE is specific to plants and diversified into 10 subfamilies early in the evolution of Angiosperms. PADRE typically occurs in small single-domain proteins with a bipartite architecture. PADRE N-terminus harbors conserved sequence motifs, while its C-terminus includes an intrinsically disordered region with multiple phosphorylation sites. A pangenomic survey of PADRE genes expression upon S. sclerotiorum inoculation in Arabidopsis, castor bean, and tomato indicated consistent expression across species within phylogenetic groups. Multi-stress expression profiling and co-expression network analyses associated AtPADRE genes with the induction of anthocyanin biosynthesis and responses to chitin and to hypoxia. Our analyses reveal patterns of sequence and expression diversification consistent with the evolution of a role in disease resistance for an uncharacterized family of plant genes. These findings highlight PADRE genes as prime candidates for the functional dissection of mechanisms underlying plant disease resistance to fungi.}, } @article {pmid32533320, year = {2020}, author = {Xu, YY and Huang, CJ and Xu, L and Jiang, XW and Xu, XW and Xu, XW}, title = {Complete Genome Sequences of Leclercia sp. W6 and W17 Isolated from a Gastric Cancer Patient.}, journal = {Current microbiology}, volume = {77}, number = {10}, pages = {2775-2782}, doi = {10.1007/s00284-020-02075-3}, pmid = {32533320}, issn = {1432-0991}, support = {Y2017KY202//Zhejiang Provincial Department of Health/ ; LQ19C010006//Natural Science Foundation of Zhejiang Province/ ; 17042187-Y//Scientific Research Foundation of Zhejiang Sci-Tech University/ ; }, mesh = {Aged ; Drug Resistance, Multiple/genetics ; *Enterobacteriaceae/classification/genetics/isolation & purification ; Enterobacteriaceae Infections/complications/microbiology ; Female ; *Genome, Bacterial/genetics ; Humans ; Phylogeny ; *Stomach Neoplasms/complications ; }, abstract = {Leclercia sp. W6 and W17, which belong to the Enterobacteriaceae, were isolated from a stomach sample from a 78-year-old female gastric cancer patient, and genomic sequencing and analysis were performed. The genome of Leclercia sp. W6 consists of one chromosome with a size of 4,945,486 bp, while that of Leclercia sp. W17 contains one chromosome and two plasmids with a total size of 5,125,645 bp. Average nucleotide identity (ANI) calculations indicated that strains W6 and W17 exhibited similarities < 91.0% to other strains within the Enterobacteriaceae, except for six Leclercia strains. Phylogenomic analysis based on core-genome showed that strains W6 and W17 belong to the genus Leclercia, and phylogenetic analysis based on ANI values revealed that strains W6 and W17 formed an independent clade from those six Leclercia strains. Furthermore, comparative genomic analysis revealed that strains W6 and W17 had 5086 orthologous clusters (OCs) in their pan-genomes, and 59 exclusive OCs which were absent in their closest relatives. Genomic annotations revealed that the genomes of strains W6 and W17 encoded genes related to multidrug resistance clusters, multiple antibiotic resistance loci, and multidrug efflux pumps and had an identical urease gene cluster and a dissimilatory nitrate reduction pathway. Bioinformatic analyses indicated that strains W6 and W17 represented a novel species within the genus Leclercia. Genomic annotations revealed that these strains encoded genes related to multidrug resistance, nitrate reduction, and urease activity, which contribute to gastric malignant transformation. This will broaden our knowledge of the genetic mechanisms of the Enterobacteriaceae and help improve the clinical conditions of gastric cancer patients.}, } @article {pmid32531278, year = {2020}, author = {Ellegaard, KM and Suenami, S and Miyazaki, R and Engel, P}, title = {Vast Differences in Strain-Level Diversity in the Gut Microbiota of Two Closely Related Honey Bee Species.}, journal = {Current biology : CB}, volume = {30}, number = {13}, pages = {2520-2531.e7}, pmid = {32531278}, issn = {1879-0445}, mesh = {Animals ; Bacteria/classification/*genetics/isolation & purification ; Bees/*microbiology ; *Gastrointestinal Microbiome ; RNA, Bacterial/analysis ; RNA, Ribosomal, 16S/analysis ; Species Specificity ; }, abstract = {Most bacterial species encompass strains with vastly different gene content. Strain diversity in microbial communities is therefore considered to be of functional importance. Yet little is known about the extent to which related microbial communities differ in diversity at this level and which underlying mechanisms may constrain and maintain strain-level diversity. Here, we used shotgun metagenomics to characterize and compare the gut microbiota of two honey bee species, Apis mellifera and Apis cerana, which diverged about 6 mya. Although the host species are colonized largely by the same bacterial 16S rRNA phylotypes, we find that their communities are host specific when analyzed with genomic resolution. Moreover, despite their similar ecology, A. mellifera displayed a much higher diversity of strains and functional gene content in the microbiota compared to A. cerana, both per colony and per individual bee. In particular, the gene repertoire for polysaccharide degradation was massively expanded in the microbiota of A. mellifera relative to A. cerana. Bee management practices, divergent ecological adaptation, or habitat size may have contributed to the observed differences in microbiota genomic diversity of these key pollinator species. Our results illustrate that the gut microbiota of closely related animal hosts can differ vastly in genomic diversity while displaying similar levels of diversity based on the 16S rRNA gene. Such differences are likely to have consequences for gut microbiota functioning and host-symbiont interactions, highlighting the need for metagenomic studies to understand the ecology and evolution of microbial communities.}, } @article {pmid32522778, year = {2020}, author = {Crouse, A and Schramm, C and Emond-Rheault, JG and Herod, A and Kerhoas, M and Rohde, J and Gruenheid, S and Kukavica-Ibrulj, I and Boyle, B and Greenwood, CMT and Goodridge, LD and Garduno, R and Levesque, RC and Malo, D and Daigle, F}, title = {Combining Whole-Genome Sequencing and Multimodel Phenotyping To Identify Genetic Predictors of Salmonella Virulence.}, journal = {mSphere}, volume = {5}, number = {3}, pages = {}, pmid = {32522778}, issn = {2379-5042}, mesh = {Acanthamoeba/microbiology ; Animals ; Disease Models, Animal ; Epithelial Cells/*microbiology ; Female ; *Genome, Bacterial ; Genomics ; Humans ; Macrophages/microbiology ; Male ; Mice ; Mice, Inbred C57BL ; Phenotype ; Phylogeny ; Salmonella/classification/*genetics/pathogenicity ; Salmonella Infections, Animal/blood/*microbiology ; Serogroup ; THP-1 Cells ; *Virulence ; Whole Genome Sequencing ; }, abstract = {Salmonella comprises more than 2,600 serovars. Very few environmental and uncommon serovars have been characterized for their potential role in virulence and human infections. A complementary in vitro and in vivo systematic high-throughput analysis of virulence was used to elucidate the association between genetic and phenotypic variations across Salmonella isolates. The goal was to develop a strategy for the classification of isolates as a benchmark and predict virulence levels of isolates. Thirty-five phylogenetically distant strains of unknown virulence were selected from the Salmonella Foodborne Syst-OMICS (SalFoS) collection, representing 34 different serovars isolated from various sources. Isolates were evaluated for virulence in 4 complementary models of infection to compare virulence traits with the genomics data, including interactions with human intestinal epithelial cells, human macrophages, and amoeba. In vivo testing was conducted using the mouse model of Salmonella systemic infection. Significant correlations were identified between the different models. We identified a collection of novel hypothetical and conserved proteins associated with isolates that generate a high burden. We also showed that blind prediction of virulence of 33 additional strains based on the pan-genome was high in the mouse model of systemic infection (82% agreement) and in the human epithelial cell model (74% agreement). These complementary approaches enabled us to define virulence potential in different isolates and present a novel strategy for risk assessment of specific strains and for better monitoring and source tracking during outbreaks.IMPORTANCESalmonella species are bacteria that are a major source of foodborne disease through contamination of a diversity of foods, including meat, eggs, fruits, nuts, and vegetables. More than 2,600 different Salmonella enterica serovars have been identified, and only a few of them are associated with illness in humans. Despite the fact that they are genetically closely related, there is enormous variation in the virulence of different isolates of Salmonella enterica Identification of foodborne pathogens is a lengthy process based on microbiological, biochemical, and immunological methods. Here, we worked toward new ways of integrating whole-genome sequencing (WGS) approaches into food safety practices. We used WGS to build associations between virulence and genetic diversity within 83 Salmonella isolates representing 77 different Salmonella serovars. Our work demonstrates the potential of combining a genomics approach and virulence tests to improve the diagnostics and assess risk of human illness associated with specific Salmonella isolates.}, } @article {pmid32518186, year = {2020}, author = {Gori, A and Harrison, OB and Mlia, E and Nishihara, Y and Chan, JM and Msefula, J and Mallewa, M and Dube, Q and Swarthout, TD and Nobbs, AH and Maiden, MCJ and French, N and Heyderman, RS}, title = {Pan-GWAS of Streptococcus agalactiae Highlights Lineage-Specific Genes Associated with Virulence and Niche Adaptation.}, journal = {mBio}, volume = {11}, number = {3}, pages = {}, pmid = {32518186}, issn = {2150-7511}, support = {MR/N023129/1/MRC_/Medical Research Council/United Kingdom ; /DH_/Department of Health/United Kingdom ; 106846/Z/15/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Adaptation, Physiological/*genetics ; Animals ; Anti-Bacterial Agents/pharmacology ; *Genome, Bacterial ; *Genome-Wide Association Study ; Host Microbial Interactions/genetics ; Humans ; Internationality ; Phylogeny ; Streptococcal Infections/microbiology ; Streptococcus agalactiae/*genetics/pathogenicity/*physiology ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Streptococcus agalactiae (group B streptococcus; GBS) is a colonizer of the gastrointestinal and urogenital tracts, and an opportunistic pathogen of infants and adults. The worldwide population of GBS is characterized by clonal complexes (CCs) with different invasive potentials. CC17, for example, is a hypervirulent lineage commonly associated with neonatal sepsis and meningitis, while CC1 is less invasive in neonates and more commonly causes invasive disease in adults with comorbidities. The genetic basis of GBS virulence and the extent to which different CCs have adapted to different host environments remain uncertain. We have therefore applied a pan-genome-wide association study (GWAS) approach to 1,988 GBS strains isolated from different hosts and countries. Our analysis identified 279 CC-specific genes associated with virulence, disease, metabolism, and regulation of cellular mechanisms that may explain the differential virulence potential of particular CCs. In CC17 and CC23, for example, we have identified genes encoding pilus, quorum-sensing proteins, and proteins for the uptake of ions and micronutrients which are absent in less invasive lineages. Moreover, in CC17, carriage and disease strains were distinguished by the allelic variants of 21 of these CC-specific genes. Together our data highlight the lineage-specific basis of GBS niche adaptation and virulence.IMPORTANCE GBS is a leading cause of mortality in newborn babies in high- and low-income countries worldwide. Different strains of GBS are characterized by different degrees of virulence, where some are harmlessly carried by humans or animals and others are much more likely to cause disease.The genome sequences of almost 2,000 GBS samples isolated from both animals and humans in high- and low- income countries were analyzed using a pan-genome-wide association study approach. This allowed us to identify 279 genes which are associated with different lineages of GBS, characterized by a different virulence and preferred host. Additionally, we propose that the GBS now carried in humans may have first evolved in animals before expanding clonally once adapted to the human host.These findings are essential to help understand what is causing GBS disease and how the bacteria have evolved and are transmitted.}, } @article {pmid32509595, year = {2020}, author = {Gonzales-Siles, L and Karlsson, R and Schmidt, P and Salvà-Serra, F and Jaén-Luchoro, D and Skovbjerg, S and Moore, ERB and Gomila, M}, title = {A Pangenome Approach for Discerning Species-Unique Gene Markers for Identifications of Streptococcus pneumoniae and Streptococcus pseudopneumoniae.}, journal = {Frontiers in cellular and infection microbiology}, volume = {10}, number = {}, pages = {222}, pmid = {32509595}, issn = {2235-2988}, mesh = {*Genomics ; Genotype ; *Streptococcus/genetics ; *Streptococcus pneumoniae/genetics ; }, abstract = {Correct identifications of isolates and strains of the Mitis-Group of the genus Streptococcus are particularly difficult, due to high genetic similarity, resulting from horizontal gene transfer and homologous recombination, and unreliable phenotypic and genotypic biomarkers for differentiating the species. Streptococcus pneumoniae and Streptococcus pseudopneumoniae are the most closely related species of the clade. In this study, publicly-available genome sequences for Streptococcus pneumoniae and S. pseudopneumoniae were analyzed, using a pangenomic approach, to find candidates for species-unique gene markers; ten species-unique genes for S. pneumoniae and nine for S. pseudopneumoniae were identified. These species-unique gene marker candidates were verified by PCR assays for identifying S. pneumoniae and S. pseudopneumoniae strains isolated from clinical samples. All determined species-level unique gene markers for S. pneumoniae were detected in all S. pneumoniae clinical isolates, whereas fewer of the unique S. pseudopneumoniae gene markers were present in more than 95% of the clinical isolates. In parallel, taxonomic identifications of the clinical isolates were confirmed, using conventional optochin sensitivity testing, targeted PCR-detection for the "Xisco" gene, as well as genomic ANIb similarity analyses for the genome sequences of selected strains. Using mass spectrometry-proteomics, species-specific peptide matches were observed for four of the S. pneumoniae gene markers and for three of the S. pseudopneumoniae gene markers. Application of multiple species-level unique biomarkers of S. pneumoniae and S. pseudopneumoniae, is proposed as a protocol for the routine clinical laboratory for improved, reliable differentiation, and identification of these pathogenic and commensal species.}, } @article {pmid32509458, year = {2020}, author = {Nasr Azadani, D and Zhang, D and Hatherill, JR and Silva, D and Turner, JW}, title = {Isolation, characterization, and comparative genomic analysis of a phage infecting high-level aminoglycoside-resistant (HLAR) Enterococcus faecalis.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e9171}, pmid = {32509458}, issn = {2167-8359}, abstract = {Enterococcus is a genus of Gram-positive bacteria that are commensal to the gastrointestinal tracts of humans but some species have been increasingly implicated as agents of nosocomial infections. The increase in infections and the spread of antibiotic-resistant strains have contributed to renewed interest in the discovery of Enterococcus phages. The aims of this study were (1) the isolation, characterization, and genome sequencing of a phage capable of infecting an antibiotic-resistant E. faecalis strain, and (2) the comparative genomic analysis of publicly-available Enterococcus phages. For this purpose, multiple phages were isolated from wastewater treatment plant (WWTP) influent using a high-level aminoglycoside-resistant (HLAR) E. faecalis strain as the host. One phage, phiNASRA1, demonstrated a high lytic efficiency (∼97.52%). Transmission electron microscopy (TEM) and whole-genome sequencing (WGS) showed that phiNASRA1 belongs to the Siphoviridae family of double-stranded DNA viruses. The phage was approximately 250 nm in length and its complete genome (40,139 bp, 34.7% GC) contained 62 open reading frames (ORFs). Phylogenetic comparisons of phiNASRA1 and 31 publicly-available Enterococcus phages, based on the large subunit terminase and portal proteins, grouped phage by provenance, size, and GC content. In particular, both phylogenies grouped phages larger than 100 kbp into distinct clades. A phylogeny based on a pangenome analysis of the same 32 phages also grouped phages by provenance, size, and GC content although agreement between the two single-locus phylogenies was higher. Per the pangenome phylogeny, phiNASRA1 was most closely related to phage LY0322 that was similar in size, GC content, and number of ORFs (40,139 and 40,934 bp, 34.77 and 34.80%, and 60 and 64 ORFs, respectively). The pangenome analysis did illustrate the high degree of sequence diversity and genome plasticity as no coding sequence was homologous across all 32 phages, and even 'conserved' structural proteins (e.g., the large subunit terminase and portal proteins) were homologous in no more than half of the 32 phage genomes. These findings contribute to a growing body of literature devoted to understanding phage biology and diversity. We propose that this high degree of diversity limited the value of the single-locus and pangenome phylogenies. By contrast, the high degree of homology between phages larger than 100 kbp suggests that pangenome analyses of more similar phages is a viable method for assessing subclade diversity. Future work is focused on validating phiNASRA1 as a potential therapeutic agent to eradicate antibiotic-resistant E. faecalis infections in an animal model.}, } @article {pmid32496181, year = {2020}, author = {Wang, LYR and Jokinen, CC and Laing, CR and Johnson, RP and Ziebell, K and Gannon, VPJ}, title = {Assessing the genomic relatedness and evolutionary rates of persistent verotoxigenic Escherichia coli serotypes within a closed beef herd in Canada.}, journal = {Microbial genomics}, volume = {6}, number = {6}, pages = {}, pmid = {32496181}, issn = {2057-5858}, mesh = {Animals ; Bayes Theorem ; Canada ; Cattle ; Cattle Diseases/*microbiology ; Escherichia coli Infections/*microbiology/veterinary ; Evolution, Molecular ; Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Humans ; Phylogeny ; *Polymorphism, Single Nucleotide ; Serogroup ; Shiga Toxin 2/genetics ; Shiga-Toxigenic Escherichia coli/*classification/genetics ; Whole Genome Sequencing/*methods ; }, abstract = {Verotoxigenic Escherichia coli (VTEC) are food- and water-borne pathogens associated with both sporadic illness and outbreaks of enteric disease. While it is known that cattle are reservoirs of VTEC, little is known about the genomic variation of VTEC in cattle, and whether the variation in genomes reported for human outbreak strains is consistent with individual animal or group/herd sources of infection. A previous study of VTEC prevalence identified serotypes carried persistently by three consecutive cohorts of heifers within a closed herd of cattle. This present study aimed to: (i) determine whether the genomic relatedness of bovine isolates is similar to that reported for human strains associated with single source outbreaks, (ii) estimate the rates of genome change among dominant serotypes over time within a cattle herd, and (iii) identify genomic features of serotypes associated with persistence in cattle. Illumina MiSeq genome sequencing and genotyping based on allelic and single nucleotide variations were completed, while genome change over time was measured using Bayesian evolutionary analysis sampling trees. The accessory genome, including the non-protein-encoding intergenic regions (IGRs), virulence factors, antimicrobial-resistance genes and plasmid gene content of representative persistent and sporadic cattle strains were compared using Fisher's exact test corrected for multiple comparisons. Herd strains from serotypes O6:H34 (n=22), O22:H8 (n=30), O108:H8 (n=39), O139:H19 (n=44) and O157:H7 (n=106) were readily distinguishable from epidemiologically unrelated strains of the same serotype using a similarity threshold of 10 or fewer allele differences between adjacent nodes. Temporal-cohort clustering within each serotype was supported by date randomization analysis. Substitutions per site per year were consistent with previously reported values for E. coli; however, there was low branch support for these values. Acquisition of the phage-encoded Shiga toxin 2 gene in serotype O22:H8 was observed. Pan-genome analyses identified accessory regions that were more prevalent in persistent serotypes (P≤0.05) than in sporadic serotypes. These results suggest that VTEC serotypes from a specific cattle population are highly clonal with a similar level of relatedness as human single-source outbreak-associated strains, but changes in the genome occur gradually over time. Additionally, elements in the accessory genomes may provide a selective advantage for persistence of VTEC within cattle herds.}, } @article {pmid32494685, year = {2020}, author = {Fan, X and Qiu, H and Han, W and Wang, Y and Xu, D and Zhang, X and Bhattacharya, D and Ye, N}, title = {Phytoplankton pangenome reveals extensive prokaryotic horizontal gene transfer of diverse functions.}, journal = {Science advances}, volume = {6}, number = {18}, pages = {eaba0111}, pmid = {32494685}, issn = {2375-2548}, abstract = {The extent and role of horizontal gene transfer (HGT) in phytoplankton and, more broadly, eukaryotic evolution remain controversial topics. Recent studies substantiate the importance of HGT in modifying or expanding functions such as metal or reactive species detoxification and buttressing halotolerance. Yet, the potential of HGT to significantly alter the fate of species in a major eukaryotic assemblage remains to be established. We provide such an example for the ecologically important lineages encompassed by cryptophytes, rhizarians, alveolates, stramenopiles, and haptophytes ("CRASH" taxa). We describe robust evidence of prokaryotic HGTs in these taxa affecting functions such as polysaccharide biosynthesis. Numbers of HGTs range from 0.16 to 1.44% of CRASH species gene inventories, comparable to the ca. 1% prokaryote-derived HGTs found in the genomes of extremophilic red algae. Our results substantially expand the impact of HGT in eukaryotes and define a set of general principles for prokaryotic gene fixation in phytoplankton genomes.}, } @article {pmid32493786, year = {2020}, author = {Wesevich, A and Sutton, G and Ruffin, F and Park, LP and Fouts, DE and Fowler, VG and Thaden, JT}, title = {Newly Named Klebsiella aerogenes (formerly Enterobacter aerogenes) Is Associated with Poor Clinical Outcomes Relative to Other Enterobacter Species in Patients with Bloodstream Infection.}, journal = {Journal of clinical microbiology}, volume = {58}, number = {9}, pages = {}, pmid = {32493786}, issn = {1098-660X}, support = {K24 AI093969/AI/NIAID NIH HHS/United States ; KL2 TR002554/TR/NCATS NIH HHS/United States ; U19 AI110819/AI/NIAID NIH HHS/United States ; }, mesh = {Anti-Bacterial Agents/pharmacology/therapeutic use ; *Bacteremia/drug therapy ; Enterobacter ; *Enterobacter aerogenes/genetics ; Humans ; *Sepsis/drug therapy ; }, abstract = {Enterobacter aerogenes was recently renamed Klebsiella aerogenes This study aimed to identify differences in clinical characteristics, outcomes, and bacterial genetics among patients with K. aerogenes versus Enterobacter species bloodstream infections (BSI). We prospectively enrolled patients with K. aerogenes or Enterobacter cloacae complex (Ecc) BSI from 2002 to 2015. We performed whole-genome sequencing (WGS) and pan-genome analysis on all bacteria. Overall, 150 patients with K. aerogenes (46/150 [31%]) or Ecc (104/150 [69%]) BSI were enrolled. The two groups had similar baseline characteristics. Neither total in-hospital mortality (13/46 [28%] versus 22/104 [21%]; P = 0.3) nor attributable in-hospital mortality (9/46 [20%] versus 13/104 [12%]; P = 0.3) differed between patients with K. aerogenes versus Ecc BSI, respectively. However, poor clinical outcome (death before discharge, recurrent BSI, and/or BSI complication) was higher for K. aerogenes than Ecc BSI (32/46 [70%] versus 42/104 [40%]; P = 0.001). In a multivariable regression model, K. aerogenes BSI, relative to Ecc BSI, was predictive of poor clinical outcome (odds ratio 3.3; 95% confidence interval 1.4 to 8.1; P = 0.008). Pan-genome analysis revealed 983 genes in 323 genomic islands unique to K. aerogenes isolates, including putative virulence genes involved in iron acquisition (n = 67), fimbriae/pili/flagella production (n = 117), and metal homeostasis (n = 34). Antibiotic resistance was largely found in Ecc lineage 1, which had a higher rate of multidrug resistant phenotype (23/54 [43%]) relative to all other bacterial isolates (23/96 [24%]; P = 0.03). K. aerogenes BSI was associated with poor clinical outcomes relative to Ecc BSI. Putative virulence factors in K. aerogenes may account for these differences.}, } @article {pmid32480355, year = {2020}, author = {Badet, T and Croll, D}, title = {The rise and fall of genes: origins and functions of plant pathogen pangenomes.}, journal = {Current opinion in plant biology}, volume = {56}, number = {}, pages = {65-73}, doi = {10.1016/j.pbi.2020.04.009}, pmid = {32480355}, issn = {1879-0356}, mesh = {*Plant Diseases/genetics ; *Plants/genetics ; Virulence ; }, abstract = {Plant pathogens can rapidly overcome resistance of their hosts by mutating key pathogenicity genes encoding for effectors. Pathogen adaptation is fuelled by extensive genetic variability in populations and different strains may not share the same set of genes. Recently, such an intra-specific variation in gene content became formalized as pangenomes distinguishing core genes (i.e. shared) and accessory genes (i.e. lineage or strain-specific). Across pathogens species, key effectors tend to be part of the rapidly evolving accessory genome. Here, we show how the construction and analysis of pathogen pangenomes provide deep insights into the dynamic host adaptation process. We also discuss how pangenomes should ideally be built and how geography, niche and lifestyle likely determine pangenome sizes.}, } @article {pmid32471418, year = {2020}, author = {Pilar, AVC and Petronella, N and Dussault, FM and Verster, AJ and Bekal, S and Levesque, RC and Goodridge, L and Tamber, S}, title = {Similar yet different: phylogenomic analysis to delineate Salmonella and Citrobacter species boundaries.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {377}, pmid = {32471418}, issn = {1471-2164}, support = {8505//Genome Canada/ ; }, mesh = {Citrobacter/*classification/*genetics ; Genome, Bacterial/genetics ; *Genomics ; *Phylogeny ; Salmonella enterica/*classification/*genetics ; }, abstract = {BACKGROUND: Salmonella enterica is a leading cause of foodborne illness worldwide resulting in considerable public health and economic costs. Testing for the presence of this pathogen in food is often hampered by the presence of background microflora that may present as Salmonella (false positives). False positive isolates belonging to the genus Citrobacter can be difficult to distinguish from Salmonella due to similarities in their genetics, cell surface antigens, and other phenotypes. In order to understand the genetic basis of these similarities, a comparative genomic approach was used to define the pan-, core, accessory, and unique coding sequences of a representative population of Salmonella and Citrobacter strains.

RESULTS: Analysis of the genomic content of 58 S. enterica strains and 37 Citrobacter strains revealed the presence of 31,130 and 1540 coding sequences within the pan- and core genome of this population. Amino acid sequences unique to either Salmonella (n = 1112) or Citrobacter (n = 195) were identified and revealed potential niche-specific adaptations. Phylogenetic network analysis of the protein families encoded by the pan-genome indicated that genetic exchange between Salmonella and Citrobacter may have led to the acquisition of similar traits and also diversification within the genera.

CONCLUSIONS: Core genome analysis suggests that the Salmonella enterica and Citrobacter populations investigated here share a common evolutionary history. Comparative analysis of the core and pan-genomes was able to define the genetic features that distinguish Salmonella from Citrobacter and highlight niche specific adaptations.}, } @article {pmid32469363, year = {2020}, author = {Li, M and Aye, SM and Ahmed, MU and Han, ML and Li, C and Song, J and Boyce, JD and Powell, DR and Azad, MAK and Velkov, T and Zhu, Y and Li, J}, title = {Pan-transcriptomic analysis identified common differentially expressed genes of Acinetobacter baumannii in response to polymyxin treatments.}, journal = {Molecular omics}, volume = {16}, number = {4}, pages = {327-338}, pmid = {32469363}, issn = {2515-4184}, support = {R01 AI132154/AI/NIAID NIH HHS/United States ; }, mesh = {Acinetobacter Infections/drug therapy/microbiology ; Acinetobacter baumannii/*drug effects/*genetics ; Anti-Bacterial Agents/*pharmacology/therapeutic use ; Computational Biology/methods ; Databases, Genetic ; *Gene Expression Profiling ; Gene Expression Regulation, Bacterial/*drug effects ; Gene Expression Regulation, Enzymologic ; Genomics/methods ; Humans ; Phylogeny ; Polymyxins/*pharmacology/therapeutic use ; Time Factors ; *Transcriptome ; }, abstract = {Multidrug-resistant Acinetobacter baumannii is a top-priority Gram-negative pathogen and polymyxins are a last-line therapeutic option. Previous systems pharmacological studies examining polymyxin killing and resistance usually focused on individual strains, and the derived knowledge could be limited by strain-specific genomic context. In this study, we examined the gene expression of five A. baumannii strains (34654, 1207552, 1428368, 1457504 and ATCC 19606) to determine the common differentially expressed genes in response to polymyxin treatments. A pan-genome containing 6061 genes was identified for 89 A. baumannii genomes from RefSeq database which included the five strains examined in this study; 2822 of the 6061 genes constituted the core genome. After 2 mg L[-1] or 0.75 × MIC polymyxin treatments for 15 min, 41 genes were commonly up-regulated, including those involved in membrane biogenesis and homeostasis, lipoprotein and phospholipid trafficking, efflux pump and poly-N-acetylglucosamine biosynthesis; six genes were commonly down-regulated, three of which were related to fatty acid biosynthesis. Additionally, comparison of the gene expression at 15 and 60 min in ATCC 19606 revealed that polymyxin treatment resulted in a rapid change in amino acid metabolism at 15 min and perturbations on envelope biogenesis at both time points. This is the first pan-transcriptomic study for polymyxin-treated A. baumannii and our results identified that the remodelled outer membrane, up-regulated efflux pumps and down-regulated fatty acid biosynthesis might be essential for early responses to polymyxins in A. baumannii. Our findings provide important mechanistic insights into bacterial responses to polymyxin killing and may facilitate the optimisation of polymyxin therapy against this problematic 'superbug'.}, } @article {pmid32468160, year = {2020}, author = {Tschoeke, D and Salazar, VW and Vidal, L and Campeão, M and Swings, J and Thompson, F and Thompson, C}, title = {Unlocking the Genomic Taxonomy of the Prochlorococcus Collective.}, journal = {Microbial ecology}, volume = {80}, number = {3}, pages = {546-558}, doi = {10.1007/s00248-020-01526-5}, pmid = {32468160}, issn = {1432-184X}, mesh = {*Genome, Bacterial ; Genomics ; *Life History Traits ; Prochlorococcus/*classification/genetics/physiology ; }, abstract = {Prochlorococcus is the most abundant photosynthetic prokaryote on our planet. The extensive ecological literature on the Prochlorococcus collective (PC) is based on the assumption that it comprises one single genus comprising the species Prochlorococcus marinus, containing itself a collective of ecotypes. Ecologists adopt the distributed genome hypothesis of an open pan-genome to explain the observed genomic diversity and evolution patterns of the ecotypes within PC. Novel genomic data for the PC prompted us to revisit this group, applying the current methods used in genomic taxonomy. As a result, we were able to distinguish the five genera: Prochlorococcus, Eurycolium, Prolificoccus, Thaumococcus, and Riococcus. The novel genera have distinct genomic and ecological attributes.}, } @article {pmid32466367, year = {2020}, author = {Sharma, P and Gupta, SK and Barrett, JB and Hiott, LM and Woodley, TA and Kariyawasam, S and Frye, JG and Jackson, CR}, title = {Comparison of Antimicrobial Resistance and Pan-Genome of Clinical and Non-Clinical Enterococcus cecorum from Poultry Using Whole-Genome Sequencing.}, journal = {Foods (Basel, Switzerland)}, volume = {9}, number = {6}, pages = {}, pmid = {32466367}, issn = {2304-8158}, support = {6040-32000-009-00D//Agricultural Research Service/ ; }, abstract = {Enterococcus cecorum is an emerging avian pathogen, particularly in chickens, but can be found in both diseased (clinical) and healthy (non-clinical) poultry. To better define differences between E. cecorum from the two groups, whole-genome sequencing (WGS) was used to identify and compare antimicrobial resistance genes as well as the pan-genome among the isolates. Eighteen strains selected from our previous study were subjected to WGS using Illumina MiSeq and comparatively analyzed. Assembled contigs were analyzed for resistance genes using ARG-ANNOT. Resistance to erythromycin was mediated by ermB, ermG, and mefA, in clinical isolates and ermB and mefA, in non-clinical isolates. Lincomycin resistance genes were identified as linB, lnuB, lnuC, and lnuD with lnuD found only in non-clinical E. cecorum; however, lnuB and linB were found in only one clinical isolate. For both groups of isolates, kanamycin resistance was mediated by aph3-III, while tetracycline resistance was conferred by tetM, tetO, and tetL. No mutations or known resistance genes were found for isolates resistant to either linezolid or chloramphenicol, suggesting possible new mechanisms of resistance to these drugs. A comparison of WGS results confirmed that non-clinical isolates contained more resistance genes than clinical isolates. The pan-genome of clinical and non-clinical isolates resulted in 3651 and 4950 gene families, respectively, whereas the core gene sets were comprised of 1559 and 1534 gene families in clinical and non-clinical isolates, respectively. Unique genes were found more frequently in non-clinical isolates than clinical. Phylogenetic analysis of the isolates and all the available complete and draft genomes showed no correlation between healthy and diseased poultry. Additional genomic comparison is required to elucidate genetic factors in E. cecorum that contribute to disease in poultry.}, } @article {pmid32463784, year = {2020}, author = {Liu, YH and Xie, YG and Li, L and Jiang, HC and Mohamad, OAA and Hozzein, W and Fang, BZ and Li, WJ}, title = {Cyclobacterium salsum sp. nov. and Cyclobacterium roseum sp. nov., isolated from a saline lake.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {70}, number = {6}, pages = {3785-3793}, doi = {10.1099/ijsem.0.004237}, pmid = {32463784}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Bacteroidetes/*classification/isolation & purification ; Base Composition ; China ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; Geologic Sediments/*microbiology ; Lakes/*microbiology ; Nucleic Acid Hybridization ; *Phylogeny ; Pigmentation ; RNA, Ribosomal, 16S/genetics ; *Saline Waters ; Sequence Analysis, DNA ; }, abstract = {Two novel strains, designated SYSU L10167[T] and SYSU L10180[T], were isolated from sediment sampled at Dabancheng saline lake in Xinjiang, PR China. A polyphasic approach was used to clarify the taxonomic positions of the two strains. Cells of the isolates were curved ring-like, horseshoe-shaped or rod-shaped, non-motile and non-spore-forming. Cells were Gram-stain-negative, aerobic, heterotrophic and rose-pigmented. The phylogenetic trees based on 16S rRNA gene sequences showed that strains SYSU L10167[T] and SYSU L10180[T] formed a distinct lineage within the genus Cyclobacterium. Strains SYSU L10167[T] and SYSU L10180[T] showed highest similarities to Cyclobacterium jeungdonense KCTC 23150[T] (98.0 and 97.4%, respectively). Results of genomic analyses (including average nucleotide identity, digital DNA-DNA hybridization and the marker gene tree) and pan-genome analysis further confirmed that strains SYSU L10167[T] and SYSU L10180[T] were separate from each other and other species of the genus Cyclobacterium. The draft genomes of the isolates had sizes of 5.5-5.7 Mb and reflected their major physiological capabilities. Based on phenotypic, physiological, chemotaxonomic and genotypic characterization, we propose that the isolates represent two novel species, for which the names Cyclobacterium salsum sp. nov. and Cyclobacterium roseum sp. nov. are proposed. The type strains of the species are SYSU L10167[T] (=KCTC 72390[T]=CGMCC 1.17521[T]) and SYSU L10180[T] (=KCTC 72391[T]=CGMCC 1.17278[T]).}, } @article {pmid32455698, year = {2020}, author = {Garrido-Sanz, D and Redondo-Nieto, M and Martín, M and Rivilla, R}, title = {Comparative Genomics of the Rhodococcus Genus Shows Wide Distribution of Biodegradation Traits.}, journal = {Microorganisms}, volume = {8}, number = {5}, pages = {}, pmid = {32455698}, issn = {2076-2607}, support = {Greener Grant Agreement 826312//Horizon 2020/ ; RTI2018-0933991-B-I00//Ministerio de Ciencia e Innovación/ ; FPU14/03965//Ministerio de Educación, Cultura y Deporte/ ; }, abstract = {The genus Rhodococcus exhibits great potential for bioremediation applications due to its huge metabolic diversity, including biotransformation of aromatic and aliphatic compounds. Comparative genomic studies of this genus are limited to a small number of genomes, while the high number of sequenced strains to date could provide more information about the Rhodococcus diversity. Phylogenomic analysis of 327 Rhodococcus genomes and clustering of intergenomic distances identified 42 phylogenomic groups and 83 species-level clusters. Rarefaction models show that these numbers are likely to increase as new Rhodococcus strains are sequenced. The Rhodococcus genus possesses a small "hard" core genome consisting of 381 orthologous groups (OGs), while a "soft" core genome of 1253 OGs is reached with 99.16% of the genomes. Models of sequentially randomly added genomes show that a small number of genomes are enough to explain most of the shared diversity of the Rhodococcus strains, while the "open" pangenome and strain-specific genome evidence that the diversity of the genus will increase, as new genomes still add more OGs to the whole genomic set. Most rhodococci possess genes involved in the degradation of aliphatic and aromatic compounds, while short-chain alkane degradation is restricted to a certain number of groups, among which a specific particulate methane monooxygenase (pMMO) is only found in Rhodococcus sp. WAY2. The analysis of Rieske 2Fe-2S dioxygenases among rhodococci genomes revealed that most of these enzymes remain uncharacterized.}, } @article {pmid32453966, year = {2020}, author = {Eizenga, JM and Novak, AM and Sibbesen, JA and Heumos, S and Ghaffaari, A and Hickey, G and Chang, X and Seaman, JD and Rounthwaite, R and Ebler, J and Rautiainen, M and Garg, S and Paten, B and Marschall, T and Sirén, J and Garrison, E}, title = {Pangenome Graphs.}, journal = {Annual review of genomics and human genetics}, volume = {21}, number = {}, pages = {139-162}, pmid = {32453966}, issn = {1545-293X}, support = {U01 HL137183/HL/NHLBI NIH HHS/United States ; U41 HG007234/HG/NHGRI NIH HHS/United States ; U54 HG007990/HG/NHGRI NIH HHS/United States ; BB/S004661/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Algorithms ; Computational Biology/*methods ; *Computer Graphics ; *Genome, Human ; High-Throughput Nucleotide Sequencing ; Humans ; Sequence Analysis, DNA ; }, abstract = {Low-cost whole-genome assembly has enabled the collection of haplotype-resolved pangenomes for numerous organisms. In turn, this technological change is encouraging the development of methods that can precisely address the sequence and variation described in large collections of related genomes. These approaches often use graphical models of the pangenome to support algorithms for sequence alignment, visualization, functional genomics, and association studies. The additional information provided to these methods by the pangenome allows them to achieve superior performance on a variety of bioinformatic tasks, including read alignment, variant calling, and genotyping. Pangenome graphs stand to become a ubiquitous tool in genomics. Although it is unclear whether they will replace linearreference genomes, their ability to harmoniously relate multiple sequence and coordinate systems will make them useful irrespective of which pangenomic models become most common in the future.}, } @article {pmid32451426, year = {2020}, author = {Kelly, LJ and Plumb, WJ and Carey, DW and Mason, ME and Cooper, ED and Crowther, W and Whittemore, AT and Rossiter, SJ and Koch, JL and Buggs, RJA}, title = {Convergent molecular evolution among ash species resistant to the emerald ash borer.}, journal = {Nature ecology & evolution}, volume = {4}, number = {8}, pages = {1116-1128}, pmid = {32451426}, issn = {2397-334X}, support = {BB/L012162/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; *Coleoptera/genetics ; Evolution, Molecular ; *Fraxinus/genetics ; Larva ; Phylogeny ; }, abstract = {Recent studies show that molecular convergence plays an unexpectedly common role in the evolution of convergent phenotypes. We exploited this phenomenon to find candidate loci underlying resistance to the emerald ash borer (EAB, Agrilus planipennis), the United States' most costly invasive forest insect to date, within the pan-genome of ash trees (the genus Fraxinus). We show that EAB-resistant taxa occur within three independent phylogenetic lineages. In genomes from these resistant lineages, we detect 53 genes with evidence of convergent amino acid evolution. Gene-tree reconstruction indicates that, for 48 of these candidates, the convergent amino acids are more likely to have arisen via independent evolution than by another process such as hybridization or incomplete lineage sorting. Seven of the candidate genes have putative roles connected to the phenylpropanoid biosynthesis pathway and 17 relate to herbivore recognition, defence signalling or programmed cell death. Evidence for loss-of-function mutations among these candidates is more frequent in susceptible species than in resistant ones. Our results on evolutionary relationships, variability in resistance, and candidate genes for defence response within the ash genus could inform breeding for EAB resistance, facilitating ecological restoration in areas invaded by this beetle.}, } @article {pmid32448920, year = {2020}, author = {Gao, S and Wu, J and Stiller, J and Zheng, Z and Zhou, M and Wang, YG and Liu, C}, title = {Identifying barley pan-genome sequence anchors using genetic mapping and machine learning.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {133}, number = {9}, pages = {2535-2544}, doi = {10.1007/s00122-020-03615-y}, pmid = {32448920}, issn = {1432-2242}, support = {R-10191-01//Commonwealth Scientific and Industrial Research Organisation/ ; DP160104292//Centre of Excellence for Mathematical and Statistical Frontiers, Australian Research Council/ ; }, mesh = {Algorithms ; *Chromosome Mapping ; *Genome, Plant ; Genotype ; Hordeum/*genetics ; Linkage Disequilibrium ; *Machine Learning ; }, abstract = {We identified 1.844 million barley pan-genome sequence anchors from 12,306 genotypes using genetic mapping and machine learning. There is increasing evidence that genes from a given crop genotype are far to cover all genes in that species; thus, building more comprehensive pan-genomes is of great importance in genetic research and breeding. Obtaining a thousand-genotype scale pan-genome using deep-sequencing data is currently impractical for species like barley which has a huge and highly repetitive genome. To this end, we attempted to identify barley pan-genome sequence anchors from a large quantity of genotype-by-sequencing (GBS) datasets by combining genetic mapping and machine learning algorithms. Based on the GBS sequences from 11,166 domesticated and 1140 wild barley genotypes, we identified 1.844 million pan-genome sequence anchors. Of them, 532,253 were identified as presence/absence variation (PAV) tags. Through aligning these PAV tags to the genome of hulless barley genotype Zangqing320, our analysis resulted in a validation of 83.6% of them from the domesticated genotypes and 88.6% from the wild barley genotypes. Association analyses against flowering time, plant height and kernel size showed that the relative importance of the PAV and non-PAV tags varied for different traits. The pan-genome sequence anchors based on GBS tags can facilitate the construction of a comprehensive pan-genome and greatly assist various genetic studies including identification of structural variation, genetic mapping and breeding in barley.}, } @article {pmid32443820, year = {2020}, author = {Oshkin, IY and Miroshnikov, KK and Grouzdev, DS and Dedysh, SN}, title = {Pan-Genome-Based Analysis as a Framework for Demarcating Two Closely Related Methanotroph Genera Methylocystis and Methylosinus.}, journal = {Microorganisms}, volume = {8}, number = {5}, pages = {}, pmid = {32443820}, issn = {2076-2607}, support = {18-74-00058//Russian Science Foundation/ ; 18-34-00363//Russian Foundation for Basic Research/ ; }, abstract = {The Methylocystis and Methylosinus are two of the five genera that were included in the first taxonomic framework of methanotrophic bacteria created half a century ago. Members of both genera are widely distributed in various environments and play a key role in reducing methane fluxes from soils and wetlands. The original separation of these methanotrophs in two distinct genera was based mainly on their differences in cell morphology. Further comparative studies that explored various single-gene-based phylogenies suggested the monophyletic nature of each of these genera. Current availability of genome sequences from members of the Methylocystis/ Methylosinus clade opens the possibility for in-depth comparison of the genomic potentials of these methanotrophs. Here, we report the finished genome sequence of Methylocystis heyeri H2[T] and compare it to 23 currently available genomes of Methylocystis and Methylosinus species. The phylogenomic analysis confirmed that members of these genera form two separate clades. The Methylocystis/Methylosinus pan-genome core comprised 1,173 genes, with the accessory genome containing 4,941 and 11,192 genes in the shell and the cloud, respectively. Major differences between the genome-encoded environmental traits of these methanotrophs include a variety of enzymes for methane oxidation and dinitrogen fixation as well as genomic determinants for cell motility and photosynthesis.}, } @article {pmid32434538, year = {2020}, author = {Castillo, AI and Chacón-Díaz, C and Rodríguez-Murillo, N and Coletta-Filho, HD and Almeida, RPP}, title = {Impacts of local population history and ecology on the evolution of a globally dispersed pathogen.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {369}, pmid = {32434538}, issn = {1471-2164}, support = {European Union's Horizon 2020 research and innovation programme under grant agreement No 635646: POnTE (Pest Organisms Threatening Europe), the California Department of Food and Agriculture Pierce's Disease Research Program, and grant agreement No 727987: XF-ACTORS//California Department of Food and Agriculture (US)/ ; }, mesh = {Costa Rica ; *Evolution, Molecular ; Genetic Introgression ; Genetic Variation ; Genome, Bacterial/genetics ; Introduced Species ; Phylogeny ; Phylogeography ; Plant Diseases/microbiology ; Recombination, Genetic ; Species Specificity ; Xylella/classification/*genetics/isolation & purification ; }, abstract = {BACKGROUND: Pathogens with a global distribution face diverse biotic and abiotic conditions across populations. Moreover, the ecological and evolutionary history of each population is unique. Xylella fastidiosa is a xylem-dwelling bacterium infecting multiple plant hosts, often with detrimental effects. As a group, X. fastidiosa is divided into distinct subspecies with allopatric historical distributions and patterns of multiple introductions from numerous source populations. The capacity of X. fastidiosa to successfully colonize and cause disease in naïve plant hosts varies among subspecies, and potentially, among populations. Within Central America (i.e. Costa Rica) two X. fastidiosa subspecies coexist: the native subsp. fastidiosa and the introduced subsp. pauca. Using whole genome sequences, the patterns of gene gain/loss, genomic introgression, and genetic diversity were characterized within Costa Rica and contrasted to other X. fastidiosa populations.

RESULTS: Within Costa Rica, accessory and core genome analyses showed a highly malleable genome with numerous intra- and inter-subspecific gain/loss events. Likewise, variable levels of inter-subspecific introgression were found within and between both coexisting subspecies; nonetheless, the direction of donor/recipient subspecies to the recombinant segments varied. Some strains appeared to recombine more frequently than others; however, no group of genes or gene functions were overrepresented within recombinant segments. Finally, the patterns of genetic diversity of subsp. fastidiosa in Costa Rica were consistent with those of other native populations (i.e. subsp. pauca in Brazil).

CONCLUSIONS: Overall, this study shows the importance of characterizing local evolutionary and ecological history in the context of world-wide pathogen distribution.}, } @article {pmid32431712, year = {2020}, author = {Fiuza, TS and Lima, JPMS and de Souza, GA}, title = {EpitoCore: Mining Conserved Epitope Vaccine Candidates in the Core Proteome of Multiple Bacteria Strains.}, journal = {Frontiers in immunology}, volume = {11}, number = {}, pages = {816}, pmid = {32431712}, issn = {1664-3224}, mesh = {Alleles ; Antigens, Bacterial/immunology ; Bacterial Vaccines/*immunology ; Computational Biology/methods ; Epitopes/*immunology ; Genome, Bacterial ; Genomics/methods ; Histocompatibility Antigens Class I/genetics ; Humans ; Mycobacterium/genetics/*immunology/metabolism/*pathogenicity ; Mycobacterium Infections/*prevention & control ; Proteome/*immunology ; Vaccines, Subunit/immunology ; Vaccinology/methods ; Virulence/immunology ; }, abstract = {In reverse vaccinology approaches, complete proteomes of bacteria are submitted to multiple computational prediction steps in order to filter proteins that are possible vaccine candidates. Most available tools perform such analysis only in a single strain, or a very limited number of strains. But the vast amount of genomic data had shown that most bacteria contain pangenomes, i.e., their genomic information contains core, conserved genes, and random accessory genes specific to each strain. Therefore, in reverse vaccinology methods it is of the utmost importance to define core proteins and core epitopes. EpitoCore is a decision-tree pipeline developed to fulfill that need. It provides surfaceome prediction of proteins from related strains, defines core proteins within those, calculate their immunogenicity, predicts epitopes for a given set of MHC alleles defined by the user, and then reports if epitopes are located extracellularly and if they are conserved among the core homologs. Pipeline performance is illustrated by mining peptide vaccine candidates in Mycobacterium avium hominissuis strains. From a total proteome of ~4,800 proteins per strain, EpitoCore predicted 103 highly immunogenic core homologs located at cell surface, many of those related to virulence and drug resistance. Conserved epitopes identified among these homologs allows the users to define sets of peptides with potential to immunize the largest coverage of tested HLA alleles using peptide-based vaccines. Therefore, EpitoCore is able to provide automated identification of conserved epitopes in bacterial pangenomic datasets.}, } @article {pmid32428556, year = {2020}, author = {Gohil, K and Rajput, V and Dharne, M}, title = {Pan-genomics of Ochrobactrum species from clinical and environmental origins reveals distinct populations and possible links.}, journal = {Genomics}, volume = {112}, number = {5}, pages = {3003-3012}, doi = {10.1016/j.ygeno.2020.04.030}, pmid = {32428556}, issn = {1089-8646}, mesh = {Drug Resistance, Bacterial/genetics ; Environmental Microbiology ; Genes, Bacterial ; *Genome, Bacterial ; Genomics ; Humans ; Interspersed Repetitive Sequences ; Molecular Sequence Annotation ; Ochrobactrum/classification/*genetics/isolation & purification/pathogenicity ; Phylogeny ; Virulence Factors ; }, abstract = {Ochrobactrum genus is comprised of soil-dwelling Gram-negative bacteria mainly reported for bioremediation of toxic compounds. Since last few years, mainly two species of this genus, O. intermedium and O. anthropi were documented for causing infections mostly in the immunocompromised patients. Despite such ubiquitous presence, study of adaptation in various niches is still lacking. Thus, to gain insights into the niche adaptation strategies, pan-genome analysis was carried out by comparing 67 genome sequences belonging to Ochrobactrum species. Pan-genome analysis revealed it is an open pan-genome indicative of the continuously evolving nature of the genus. The presence/absence of gene clusters also illustrated the unique presence of antibiotic efflux transporter genes and type IV secretion system genes in the clinical strains while the genes of solvent resistance and exporter pumps in the environmental strains. A phylogenomic investigation based on 75 core genes depicted better and robust phylogenetic resolution and topology than the 16S rRNA gene. To support the pan-genome analysis, individual genomes were also investigated for the mobile genetic elements (MGE), antibiotic resistance genes (ARG), metal resistance genes (MRG) and virulence factors (VF). The analysis revealed the presence of MGE, ARG, and MRG in all the strains which play an important role in the species evolution which is in agreement with the pan-genome analysis. The average nucleotide identity (ANI) based on the genetic relatedness between the Ochrobactrum species indicated a distinction between individual species. Interestingly, the ANI tool was able to classify the Ochrobactrum genomes to the species level which were assigned till the genus level on the NCBI database.}, } @article {pmid32427945, year = {2020}, author = {Katiyar, A and Sharma, P and Dahiya, S and Singh, H and Kapil, A and Kaur, P}, title = {Genomic profiling of antimicrobial resistance genes in clinical isolates of Salmonella Typhi from patients infected with Typhoid fever in India.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {8299}, pmid = {32427945}, issn = {2045-2322}, mesh = {Anti-Bacterial Agents/*pharmacology ; Bacterial Proteins/*genetics ; Cefixime/pharmacology/therapeutic use ; Ceftriaxone/pharmacology/therapeutic use ; DNA Gyrase/genetics ; DNA Topoisomerase IV/genetics ; *Drug Resistance, Multiple, Bacterial ; Fluoroquinolones/pharmacology/therapeutic use ; Gene Expression Regulation, Bacterial/drug effects ; Humans ; India ; Microbial Sensitivity Tests ; Phenotype ; Salmonella typhi/drug effects/*genetics/isolation & purification ; Typhoid Fever/drug therapy/*microbiology ; Whole Genome Sequencing/*methods ; }, abstract = {The development of multidrug resistance in Salmonella enterica serovar Typhi currently forms a major roadblock for the treatment of enteric fever. This poses a major health problem in endemic regions and extends to travellers returning from developing countries. The appearance of fluoroquinolone non-susceptible strains has resulted in use of ceftriaxone as drug of choice with azithromycin being recommended for uncomplicated cases of typhoid fever. A recent sporadic instance of decreased susceptibility to the latest drug regime has necessitated a detailed analysis of antimicrobial resistance genes and possible relationships with their phenotypes to facilitate selection of future treatment regimes. Whole genome sequencing (WGS) was conducted for 133 clinical isolates from typhoid patients. Sequence output files were processed for pan-genome analysis and prediction of antimicrobial resistance genes. The WGS analyses disclosed the existence of fluoroquinolone resistance conferring mutations in gyrA, gyrB, parC and parE genes of all strains. Acquired resistance determining mechanisms observed included catA1 genes for chloramphenicol resistance, dfrA7, dfrA15, sul1 and sul2 for trimethoprim-sulfamethoxazole and blaTEM-116/blaTEM-1B genes for amoxicillin. No resistance determinants were found for ceftriaxone and cefixime. The genotypes were further correlated with their respective phenotypes for chloramphenicol, ampicillin, co-trimoxazole, ciprofloxacin and ceftriaxone. A high correlation was observed between genotypes and phenotypes in isolates of S. Typhi. The pan-genome analysis revealed that core genes were enriched in metabolic functions and accessory genes were majorly implicated in pathogenesis and antimicrobial resistance. The pan-genome of S. Typhi appears to be closed (Bpan = 0.09) as analysed by Heap's law. Simpson's diversity index of 0.51 showed a lower level of genetic diversity among isolates of S. Typhi. Overall, this study augments the present knowledge that WGS can help predict resistance genotypes and eventual correlation with phenotypes, enabling the chance to spot AMR determinants for fast diagnosis and prioritize antibiotic use directly from sequence.}, } @article {pmid32424209, year = {2020}, author = {Datta, S and Saha, D and Chattopadhyay, L and Majumdar, B}, title = {Genome Comparison Identifies Different Bacillus Species in a Bast Fibre-Retting Bacterial Consortium and Provides Insights into Pectin Degrading Genes.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {8169}, pmid = {32424209}, issn = {2045-2322}, mesh = {Bacillus/classification/enzymology/*genetics/metabolism ; Bacterial Proteins/genetics/*metabolism ; Biodegradation, Environmental ; Genome Size ; *Genome, Bacterial ; Genomics ; Microbial Consortia ; Pectins/*metabolism ; Phylogeny ; Polysaccharide-Lyases/genetics/metabolism ; }, abstract = {Retting of bast fibres requires removal of pectin, hemicellulose and other non-cellulosic materials from plant stem tissues by a complex microbial community. A microbial retting consortium with high-efficiency pectinolytic bacterial strains is effective in reducing retting-time and enhancing fibre quality. We report comprehensive genomic analyses of three bacterial strains (PJRB 1, 2 and 3) of the consortium and resolve their taxonomic status, genomic features, variations, and pan-genome dynamics. The genome sizes of the strains are ~3.8 Mb with 3729 to 4002 protein-coding genes. Detailed annotations of the protein-coding genes revealed different carbohydrate-degrading CAZy classes viz. PL1, PL9, GH28, CE8, and CE12. Phylogeny and structural features of pectate lyase proteins of PJRB strains divulge their functional uniqueness and evolutionary convergence with closely related Bacillus strains. Genome-wide prediction of genomic variations revealed 12461 to 67381 SNPs, and notably many unique SNPs were localized within the important pectin metabolism genes. The variations in the pectate lyase genes possibly contribute to their specialized pectinolytic function during the retting process. These findings encompass a strong foundation for fundamental and evolutionary studies on this unique microbial degradation of decaying plant material with immense industrial significance. These have preponderant implications in plant biomass research and food industry, and also posit application in the reclamation of water pollution from plant materials.}, } @article {pmid32421490, year = {2020}, author = {Huang, CH and Chen, CC and Liou, JS and Lee, AY and Blom, J and Lin, YC and Huang, L and Watanabe, K}, title = {Genome-based reclassification of Lactobacillus casei: emended classification and description of the species Lactobacillus zeae.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {70}, number = {6}, pages = {3755-3762}, doi = {10.1099/ijsem.0.003969}, pmid = {32421490}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; Genes, Bacterial ; Lactobacillus/*classification ; Lacticaseibacillus casei/*classification ; Multilocus Sequence Typing ; Nucleic Acid Hybridization ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {Taxonomic relationships between Lactobacillus casei, Lactobacillus paracasei and Lactobacillus zeae have long been debated. Results of previous analyses have shown that overall genome relatedness indices (such as average nucleotide identity and core nucleotide identity) between the type strains L. casei ATCC 393[T] and L. zeae ATCC 15820[T] were 94.6 and 95.3 %, respectively, which are borderline for species definition. However, the digital DNA‒DNA hybridization value was 57.3 %, which was clearly lower than the species delineation threshold of 70 %, and hence raised the possibility that L. casei could be reclassified into two species. To re-evaluate the taxonomic relationship of these taxa, multilocus sequence analysis (MLSA) based on the concatenated five housekeeping gene (dnaJ, dnaK, mutL, pheS and yycH) sequences, phylogenomic and core genome multilocus sequence typing analyses, gene presence and absence profiles using pan-genome analysis, matrix-assisted laser desorption/ionization time-of-flight mass spectrometry (MALDI-TOF MS) profiling analysis, cellular fatty acid compositions, and phenotype analysis were carried out. The results of phenotypic characterization, MLSA, whole-genome sequence-based analyses and MALDI-TOF MS profiling justified an independent species designation for the L. zeae strains, and supported an emended the description of the name of Lactobacillus zeae (ex Kuznetsov 1956) Dicks et al. 1996, with ATCC 15820[T] (=DSM 20178[T]=BCRC 17942[T]) as the type strain.}, } @article {pmid32418154, year = {2020}, author = {García-Alfonso, P and García-Carbonero, R and García-Foncillas, J and Pérez-Segura, P and Salazar, R and Vera, R and Ramón Y Cajal, S and Hernández-Losa, J and Landolfi, S and Musulén, E and Cuatrecasas, M and Navarro, S}, title = {Update of the recommendations for the determination of biomarkers in colorectal carcinoma: National Consensus of the Spanish Society of Medical Oncology and the Spanish Society of Pathology.}, journal = {Clinical & translational oncology : official publication of the Federation of Spanish Oncology Societies and of the National Cancer Institute of Mexico}, volume = {22}, number = {11}, pages = {1976-1991}, pmid = {32418154}, issn = {1699-3055}, mesh = {Biomarkers, Tumor/*analysis ; Colorectal Neoplasms/*diagnosis/genetics/pathology ; Consensus ; High-Throughput Nucleotide Sequencing ; Humans ; Liquid Biopsy ; Medical Oncology ; Mutation ; Pathology ; Societies, Medical ; Spain ; }, abstract = {In this update of the consensus of the Spanish Society of Medical Oncology (Sociedad Española de Oncología Médica-SEOM) and the Spanish Society of Pathology (Sociedad Española de Anatomía Patológica-SEAP), advances in the analysis of biomarkers in advanced colorectal cancer (CRC) as well as susceptibility markers of hereditary CRC and molecular biomarkers of localized CRC are reviewed. Recently published information on the essential determination of KRAS, NRAS and BRAF mutations and the convenience of determining the amplification of human epidermal growth factor receptor 2 (HER2), the expression of proteins in the DNA repair pathway and the study of NTRK fusions are also evaluated. From the pathological point of view, the importance of analysing the tumour budding and poorly differentiated clusters, and its prognostic value in CRC is reviewed, as well as the impact of molecular lymph node analysis on lymph node staging in CRC. The incorporation of pan-genomic technologies, such as next-generation sequencing (NGS) and liquid biopsy in the clinical management of patients with CRC is also outlined. All these aspects are developed in this guide, which, like the previous one, will remain open to any necessary revision in the future.}, } @article {pmid32407252, year = {2021}, author = {Bakhshi Ganje, M and Mackay, J and Nicolaisen, M and Shams-Bakhsh, M}, title = {Comparative Genomics, Pangenome, and Phylogenomic Analyses of Brenneria spp., and Delineation of Brenneria izadpanahii sp. nov.}, journal = {Phytopathology}, volume = {111}, number = {1}, pages = {78-95}, doi = {10.1094/PHYTO-04-20-0129-FI}, pmid = {32407252}, issn = {0031-949X}, mesh = {DNA, Bacterial ; *Enterobacteriaceae/genetics ; Genomics ; Iran ; Phylogeny ; *Plant Diseases ; United Kingdom ; }, abstract = {Brenneria species are bacterial plant pathogens mainly affecting woody plants. Association of all members with devastating disorders (e.g., acute oak decline in Iran and United Kingdom) are due to adaptation and pathogenic behavior in response to host and environmental factors. Some species, including B. goodwinii, B. salicis, and B. nigrifluens, also show endophytic residence. Here we show that all species including novel Brenneria sp. are closely related. Gene-based and genome/pangenome-based phylogeny divide the genus into two distinct lineages, Brenneria clades A and B. The two clades were functionally distinct and were consistent with their common and special potential activities as determined via annotation of functional domains. Pangenome analysis demonstrated that the core pathogenicity factors were highly conserved, an hrp gene cluster encoding a type III secretion system was found in all species except B. corticis. An extensive repertoire of candidate virulence factors was identified. Comparative genomics indicated a repertoire of plant cell wall degrading enzymes, metabolites/antibiotics, and numerous prophages providing new insights into Brenneria-host interactions and appropriate targets for further characterization. This work not only documented the genetic differentiation of Brenneria species but also delineates a more functionally driven understanding of Brenneria by comparison with relevant Pectobacteriaceae thereby substantially enriching the extent of information available for functional genomic investigations.}, } @article {pmid32403359, year = {2020}, author = {Wang, M and Zhu, H and Kong, Z and Li, T and Ma, L and Liu, D and Shen, Q}, title = {Pan-Genome Analyses of Geobacillus spp. Reveal Genetic Characteristics and Composting Potential.}, journal = {International journal of molecular sciences}, volume = {21}, number = {9}, pages = {}, pmid = {32403359}, issn = {1422-0067}, support = {2018YFD0500201//National Key R&D Program of China/ ; 31972513//National Natural Science Foundation of China/ ; BK20150059//Jiangsu Province Natural Sciences Fund Subsidization Project/ ; KYZ201716//Fundamental Research Funds for the Central Universities/ ; }, mesh = {Agriculture/methods ; Biotechnology/methods ; Composting/*methods ; Evolution, Molecular ; *Gene Transfer, Horizontal ; Genes, Bacterial/genetics ; Genome, Bacterial/*genetics ; Genomics/*methods ; Geobacillus/classification/*genetics ; Phylogeny ; Species Specificity ; }, abstract = {The genus Geobacillus is abundant in ecological diversity and is also well-known as an authoritative source for producing various thermostable enzymes. Although it is clear now that Geobacillus evolved from Bacillus, relatively little knowledge has been obtained regarding its evolutionary mechanism, which might also contribute to its ecological diversity and biotechnology potential. Here, a statistical comparison of thirty-two Geobacillus genomes was performed with a specific focus on pan- and core genomes. The pan-genome of this set of Geobacillus strains contained 14,913 genes, and the core genome contained 940 genes. The Clusters of Orthologous Groups (COG) and Carbohydrate-Active Enzymes (CAZymes) analysis revealed that the Geobacillus strains had huge potential industrial application in composting for agricultural waste management. Detailed comparative analyses showed that basic functional classes and housekeeping genes were conserved in the core genome, while genes associated with environmental interaction or energy metabolism were more enriched in the pan-genome. Therefore, the evolution of Geobacillus seems to be guided by environmental parameters. In addition, horizontal gene transfer (HGT) events among different Geobacillus species were detected. Altogether, pan-genome analysis was a useful method for detecting the evolutionary mechanism, and Geobacillus' evolution was directed by the environment and HGT events.}, } @article {pmid32397147, year = {2020}, author = {Brunt, J and van Vliet, AHM and Stringer, SC and Carter, AT and Lindström, M and Peck, MW}, title = {Pan-Genomic Analysis of Clostridium botulinum Group II (Non-Proteolytic C. botulinum) Associated with Foodborne Botulism and Isolated from the Environment.}, journal = {Toxins}, volume = {12}, number = {5}, pages = {}, pmid = {32397147}, issn = {2072-6651}, support = {BB/J004529/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Botulinum Toxins/*genetics ; Botulism/epidemiology/*microbiology/prevention & control/transmission ; Clostridium botulinum/classification/*genetics/isolation & purification/pathogenicity ; *Evolution, Molecular ; Genome, Bacterial ; Genotype ; Neurotoxins/*genetics ; Phenotype ; Phylogeny ; *Polymorphism, Single Nucleotide ; Whole Genome Sequencing ; }, abstract = {The neurotoxin formed by Clostridium botulinum Group II is a major cause of foodborne botulism, a deadly intoxication. This study aims to understand the genetic diversity and spread of C. botulinum Group II strains and their neurotoxin genes. A comparative genomic study has been conducted with 208 highly diverse C. botulinum Group II strains (180 newly sequenced strains isolated from 16 countries over 80 years, 28 sequences from Genbank). Strains possessed a single type B, E, or F neurotoxin gene or were closely related strains with no neurotoxin gene. Botulinum neurotoxin subtype variants (including novel variants) with a unique amino acid sequence were identified. Core genome single-nucleotide polymorphism (SNP) analysis identified two major lineages-one with type E strains, and the second dominated by subtype B4 strains with subtype F6 strains. This study revealed novel details of population structure/diversity and established relationships between whole-genome lineage, botulinum neurotoxin subtype variant, association with foodborne botulism, epidemiology, and geographical source. Additionally, the genome sequences represent a valuable resource for the research community (e.g., understanding evolution of C. botulinum and its neurotoxin genes, dissecting key aspects of C. botulinum Group II biology). This may contribute to improved risk assessments and the prevention of foodborne botulism.}, } @article {pmid32393168, year = {2020}, author = {Chibani, CM and Roth, O and Liesegang, H and Wendling, CC}, title = {Genomic variation among closely related Vibrio alginolyticus strains is located on mobile genetic elements.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {354}, pmid = {32393168}, issn = {1471-2164}, support = {WE 5822/ 1-1//Deutsche Forschungsgemeinschaft/ ; WE5822/1-2//Deutsche Forschungsgemeinschaft/ ; RO462/4-2//Deutsche Forschungsgemeinschaft/ ; na//KAAD/ ; }, mesh = {Drug Resistance/genetics ; Evolution, Molecular ; Gene Transfer, Horizontal ; *Genetic Variation ; *Genome, Bacterial ; Genomic Islands ; Phylogeny ; Vibrio alginolyticus/classification/*genetics/isolation & purification/pathogenicity ; Virulence/genetics ; }, abstract = {BACKGROUND: Species of the genus Vibrio, one of the most diverse bacteria genera, have undergone niche adaptation followed by clonal expansion. Niche adaptation and ultimately the formation of ecotypes and speciation in this genus has been suggested to be mainly driven by horizontal gene transfer (HGT) through mobile genetic elements (MGEs). Our knowledge about the diversity and distribution of Vibrio MGEs is heavily biased towards human pathogens and our understanding of the distribution of core genomic signatures and accessory genes encoded on MGEs within specific Vibrio clades is still incomplete. We used nine different strains of the marine bacterium Vibrio alginolyticus isolated from pipefish in the Kiel-Fjord to perform a multiscale-comparative genomic approach that allowed us to investigate [1] those genomic signatures that characterize a habitat-specific ecotype and [2] the source of genomic variation within this ecotype.

RESULTS: We found that the nine isolates from the Kiel-Fjord have a closed-pangenome and did not differ based on core-genomic signatures. Unique genomic regions and a unique repertoire of MGEs within the Kiel-Fjord isolates suggest that the acquisition of gene-blocks by HGT played an important role in the evolution of this ecotype. Additionally, we found that ~ 90% of the genomic variation among the nine isolates is encoded on MGEs, which supports ongoing theory that accessory genes are predominately located on MGEs and shared by HGT. Lastly, we could show that these nine isolates share a unique virulence and resistance profile which clearly separates them from all other investigated V. alginolyticus strains and suggests that these are habitat-specific genes, required for a successful colonization of the pipefish, the niche of this ecotype.

CONCLUSION: We conclude that all nine V. alginolyticus strains from the Kiel-Fjord belong to a unique ecotype, which we named the Kiel-alginolyticus ecotype. The low sequence variation of the core-genome in combination with the presence of MGE encoded relevant traits, as well as the presence of a suitable niche (here the pipefish), suggest, that this ecotype might have evolved from a clonal expansion following HGT driven niche-adaptation.}, } @article {pmid32386604, year = {2020}, author = {Molina, L and Segura, A and Duque, E and Ramos, JL}, title = {The versatility of Pseudomonas putida in the rhizosphere environment.}, journal = {Advances in applied microbiology}, volume = {110}, number = {}, pages = {149-180}, doi = {10.1016/bs.aambs.2019.12.002}, pmid = {32386604}, issn = {0065-2164}, mesh = {Bacterial Proteins/genetics/metabolism ; Biodegradation, Environmental ; Biofilms/growth & development ; Chemotaxis ; Plant Development ; Plants/microbiology ; Pseudomonas putida/genetics/growth & development/metabolism/*physiology ; *Rhizosphere ; Soil Microbiology ; Symbiosis ; }, abstract = {This article addresses the lifestyle of Pseudomonas and focuses on how Pseudomonas putida can be used as a model system for biotechnological processes in agriculture, and in the removal of pollutants from soils. In this chapter we aim to show how a deep analysis using genetic information and experimental tests has helped to reveal insights into the lifestyle of Pseudomonads. Pseudomonas putida is a Plant Growth Promoting Rhizobacteria (PGPR) that establishes commensal relationships with plants. The interaction involves a series of functions encoded by core genes which favor nutrient mobilization, prevention of pathogen development and efficient niche colonization. Certain Pseudomonas putida strains harbor accessory genes that confer specific biodegradative properties and because these microorganisms can thrive on the roots of plants they can be exploited to remove pollutants via rhizoremediation, making the consortium plant/Pseudomonas a useful tool to combat pollution.}, } @article {pmid32381322, year = {2020}, author = {Kim, YB and Kim, JY and Song, HS and Lee, SH and Shin, NR and Bae, JW and Myoung, J and Lee, KE and Cha, IT and Rhee, JK and Roh, SW}, title = {Haloplanus rubicundus sp. nov., an extremely halophilic archaeon isolated from solar salt.}, journal = {Systematic and applied microbiology}, volume = {43}, number = {3}, pages = {126085}, doi = {10.1016/j.syapm.2020.126085}, pmid = {32381322}, issn = {1618-0984}, mesh = {Bacterial Typing Techniques ; Base Composition ; Gene Library ; Genome, Archaeal ; Genomics/methods ; Halobacteriaceae/*classification/*genetics/isolation & purification ; Phenotype ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Two extremely halophilic archaea strains, CBA1112[T] and CBA1113, were isolated from solar salt in Korea. The genome sizes and G+C content of CBA1112[T] and CBA1113 were 3.77 and 3.53Mb, and 66.0 and 66.5mol%, respectively. Phylogenetic analysis based on closely related taxa and environmental Haloplanus sequences indicated that both CBA1112[T] and CBA1113 strains are grouped within the genus Haloplanus. OrthoANI and in silico DNA-DNA hybridization values were below the species delineation threshold. Pan-genomic analysis showed that the two novel strains and four reference strains had 6203 pan-orthologous groups in total. Six Haloplanus strains shared 1728 core pan-genome orthologous groups, which were mainly associated with amino acid transport and metabolism and translation, ribosomal structure and biogenesis categories, and amino acid metabolism and carbohydrate metabolism related categories. The novel strain-specific pan-genome orthologous groups were mainly involved with replication, recombination and repair category and replication and repair pathway or amino acid metabolism pathway. Cells of both strains were Gram-negative and pleomorphic, and colonies were red-pigmented. The major polar lipids of both strains were phosphatidylglycerol, phosphatidylglycerol phosphate methyl ester, phosphatidylglycerol sulfate, and one glycolipid, sulfated mannosyl glucosyl diether. Based on genomic, phylogenetic, phenotypic, and chemotaxonomic features, strains CBA1112[T] and CBA1113 are described as novel species of the genus Haloplanus. Thus, we propose the name Haloplanus rubicundus sp. nov. The type strain is CBA1112[T] (=KCCM 43224[T]=JCM 30475[T]).}, } @article {pmid32375991, year = {2020}, author = {Gladstone, RA and Lo, SW and Goater, R and Yeats, C and Taylor, B and Hadfield, J and Lees, JA and Croucher, NJ and van Tonder, AJ and Bentley, LJ and Quah, FX and Blaschke, AJ and Pershing, NL and Byington, CL and Balaji, V and Hryniewicz, W and Sigauque, B and Ravikumar, KL and Almeida, SCG and Ochoa, TJ and Ho, PL and du Plessis, M and Ndlangisa, KM and Cornick, JE and Kwambana-Adams, B and Benisty, R and Nzenze, SA and Madhi, SA and Hawkins, PA and Pollard, AJ and Everett, DB and Antonio, M and Dagan, R and Klugman, KP and von Gottberg, A and Metcalf, BJ and Li, Y and Beall, BW and McGee, L and Breiman, RF and Aanensen, DM and Bentley, SD and , }, title = {Visualizing variation within Global Pneumococcal Sequence Clusters (GPSCs) and country population snapshots to contextualize pneumococcal isolates.}, journal = {Microbial genomics}, volume = {6}, number = {5}, pages = {}, pmid = {32375991}, issn = {2057-5858}, support = {T32 AI055434/AI/NIAID NIH HHS/United States ; UL1 TR002538/TR/NCATS NIH HHS/United States ; 206194/WT_/Wellcome Trust/United Kingdom ; MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; 098051/WT_/Wellcome Trust/United Kingdom ; }, mesh = {*DNA Transposable Elements ; Databases, Genetic ; Drug Resistance, Bacterial ; Evolution, Molecular ; High-Throughput Nucleotide Sequencing ; Phylogeny ; Phylogeography ; Poland ; Polysaccharides, Bacterial/*genetics ; Sequence Analysis, DNA/*methods ; Serogroup ; South Africa ; Streptococcus pneumoniae/*classification/genetics/isolation & purification ; Utah ; }, abstract = {Knowledge of pneumococcal lineages, their geographic distribution and antibiotic resistance patterns, can give insights into global pneumococcal disease. We provide interactive bioinformatic outputs to explore such topics, aiming to increase dissemination of genomic insights to the wider community, without the need for specialist training. We prepared 12 country-specific phylogenetic snapshots, and international phylogenetic snapshots of 73 common Global Pneumococcal Sequence Clusters (GPSCs) previously defined using PopPUNK, and present them in Microreact. Gene presence and absence defined using Roary, and recombination profiles derived from Gubbins are presented in Phandango for each GPSC. Temporal phylogenetic signal was assessed for each GPSC using BactDating. We provide examples of how such resources can be used. In our example use of a country-specific phylogenetic snapshot we determined that serotype 14 was observed in nine unrelated genetic backgrounds in South Africa. The international phylogenetic snapshot of GPSC9, in which most serotype 14 isolates from South Africa were observed, highlights that there were three independent sub-clusters represented by South African serotype 14 isolates. We estimated from the GPSC9-dated tree that the sub-clusters were each established in South Africa during the 1980s. We show how recombination plots allowed the identification of a 20 kb recombination spanning the capsular polysaccharide locus within GPSC97. This was consistent with a switch from serotype 6A to 19A estimated to have occured in the 1990s from the GPSC97-dated tree. Plots of gene presence/absence of resistance genes (tet, erm, cat) across the GPSC23 phylogeny were consistent with acquisition of a composite transposon. We estimated from the GPSC23-dated tree that the acquisition occurred between 1953 and 1975. Finally, we demonstrate the assignment of GPSC31 to 17 externally generated pneumococcal serotype 1 assemblies from Utah via Pathogenwatch. Most of the Utah isolates clustered within GPSC31 in a USA-specific clade with the most recent common ancestor estimated between 1958 and 1981. The resources we have provided can be used to explore to data, test hypothesis and generate new hypotheses. The accessible assignment of GPSCs allows others to contextualize their own collections beyond the data presented here.}, } @article {pmid32375781, year = {2020}, author = {Bu, QT and Li, YP and Xie, H and Wang, J and Li, ZY and Chen, XA and Mao, XM and Li, YQ}, title = {Comprehensive dissection of dispensable genomic regions in Streptomyces based on comparative analysis approach.}, journal = {Microbial cell factories}, volume = {19}, number = {1}, pages = {99}, pmid = {32375781}, issn = {1475-2859}, support = {2019YFA09005400//National Key Research and Development Program/ ; 31520103901//National Natural Science Foundation of China/ ; 3173002//National Natural Science Foundation of China/ ; }, mesh = {Bacterial Proteins/genetics ; *Genome, Bacterial ; Genomics/*methods ; Multigene Family ; Phylogeny ; Sequence Deletion ; Streptomyces/*genetics ; }, abstract = {BACKGROUND: Large-scale genome reduction has been performed to significantly improve the performance of microbial chassis. Identification of the essential or dispensable genes is pivotal for genome reduction to avoid synthetic lethality. Here, taking Streptomyces as an example, we developed a combinatorial strategy for systematic identification of large and dispensable genomic regions in Streptomyces based on multi-omics approaches.

RESULTS: Phylogenetic tree analysis revealed that the model strains including S. coelicolor A3(2), S. albus J1074 and S. avermitilis MA-4680 were preferred reference for comparative analysis of candidate genomes. Multiple genome alignment suggested that the Streptomyces genomes embodied highly conserved core region and variable sub-telomeric regions, and may present symmetric or asymmetric structure. Pan-genome and functional genome analyses showed that most conserved genes responsible for the fundamental functions of cell viability were concentrated in the core region and the vast majority of abundant genes were dispersed in the sub-telomeric regions. These results suggested that large-scale deletion can be performed in sub-telomeric regions to greatly streamline the Streptomyces genomes for developing versatile chassis.

CONCLUSIONS: The integrative approach of comparative genomics, functional genomics and pan-genomics can not only be applied to perform a multi-tiered dissection for Streptomyces genomes, but also work as a universal method for systematic analysis of removable regions in other microbial hosts in order to generate more miscellaneous and versatile chassis with minimized genome for drug discovery.}, } @article {pmid32375367, year = {2020}, author = {Maghembe, R and Damian, D and Makaranga, A and Nyandoro, SS and Lyantagaye, SL and Kusari, S and Hatti-Kaul, R}, title = {Omics for Bioprospecting and Drug Discovery from Bacteria and Microalgae.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {9}, number = {5}, pages = {}, pmid = {32375367}, issn = {2079-6382}, support = {IMB-2015/2020//Styrelsen för Internationellt Utvecklingssamarbete/ ; }, abstract = {"Omics" represent a combinatorial approach to high-throughput analysis of biological entities for various purposes. It broadly encompasses genomics, transcriptomics, proteomics, lipidomics, and metabolomics. Bacteria and microalgae exhibit a wide range of genetic, biochemical and concomitantly, physiological variations owing to their exposure to biotic and abiotic dynamics in their ecosystem conditions. Consequently, optimal conditions for adequate growth and production of useful bacterial or microalgal metabolites are critically unpredictable. Traditional methods employ microbe isolation and 'blind'-culture optimization with numerous chemical analyses making the bioprospecting process laborious, strenuous, and costly. Advances in the next generation sequencing (NGS) technologies have offered a platform for the pan-genomic analysis of microbes from community and strain downstream to the gene level. Changing conditions in nature or laboratory accompany epigenetic modulation, variation in gene expression, and subsequent biochemical profiles defining an organism's inherent metabolic repertoire. Proteome and metabolome analysis could further our understanding of the molecular and biochemical attributes of the microbes under research. This review provides an overview of recent studies that have employed omics as a robust, broad-spectrum approach for screening bacteria and microalgae to exploit their potential as sources of drug leads by focusing on their genomes, secondary metabolite biosynthetic pathway genes, transcriptomes, and metabolomes. We also highlight how recent studies have combined molecular biology with analytical chemistry methods, which further underscore the need for advances in bioinformatics and chemoinformatics as vital instruments in the discovery of novel bacterial and microalgal strains as well as new drug leads.}, } @article {pmid32373862, year = {2020}, author = {Zwarycz, AS and Livingstone, PG and Whitworth, DE}, title = {Within-species variation in OMV cargo proteins: the Myxococcus xanthus OMV pan-proteome.}, journal = {Molecular omics}, volume = {16}, number = {4}, pages = {387-397}, doi = {10.1039/d0mo00027b}, pmid = {32373862}, issn = {2515-4184}, mesh = {Bacterial Outer Membrane Proteins/genetics/metabolism ; Bacterial Proteins/genetics/*metabolism ; Chromatography, Liquid ; Extracellular Vesicles/*metabolism ; Genome, Bacterial ; Genomics/methods ; Myxococcus xanthus/genetics/*metabolism ; *Proteome ; *Proteomics/methods ; Tandem Mass Spectrometry ; }, abstract = {Extracellular membrane vesicles are produced by all domains of life (bacteria, archaea and eukaryotes). Bacterial extracellular vesicles (outer membrane vesicles or OMVs) are produced by outer membrane blebbing, and contain proteins, nucleic acids, virulence factors, lipids and metabolites. OMV functions depend on their internal composition, therefore understanding the proteome of OMVs, and how it varies between organisms, is imperative. Here, we report a comparative proteomic profiling of OMVs from strains of Myxococcus xanthus, a predatory species of Gram-negative myxobacteria whose secretions include secondary metabolites and hydrolytic enzymes, thought to be involved in prey lysis. Ten strains were chosen for study, of which seven had genome sequences available. The remaining three strains were genome sequenced allowing definition of the core and accessory genes and genome-derived proteins found within the pan-genome and pan-proteome respectively. OMVs were isolated from each strain and proteins identified using mass spectrometry. The M. xanthus OMV pan-proteome was found to contain tens of 'core' and hundreds of 'accessory' proteins. Properties of the OMV pan-proteome were compared with those of the pan-proteome deduced from the M. xanthus pan-genome. On average, 80% of 'core' OMV proteins are encoded by genes of the core genome, yet the OMV proteomes of individual strains contain subsets of core genome-derived proteins which only partially overlap. In addition, the distribution of characteristics of vesicle proteins does not correlate with the genome-derived proteome characteristic distribution. We hypothesize that M. xanthus cells package a personalized subset of proteins whose availability is only partially dictated by the presence/absence of encoding genes within the genome.}, } @article {pmid32373098, year = {2020}, author = {Garcia-Gutierrez, E and Walsh, CJ and Sayavedra, L and Diaz-Calvo, T and Thapa, D and Ruas-Madiedo, P and Mayer, MJ and Cotter, PD and Narbad, A}, title = {Genotypic and Phenotypic Characterization of Fecal Staphylococcus epidermidis Isolates Suggests Plasticity to Adapt to Different Human Body Sites.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {688}, pmid = {32373098}, issn = {1664-302X}, support = {BBS/E/F/00042241/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/F/00044453/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/F/000PR10356/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/OS/NW/000006/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {Staphylococcus epidermidis is a commensal species that has been increasingly identified as a nosocomial agent. Despite the interest, little is known about the ability of S. epidermidis isolates to adapt to different ecological niches through comparisons at genotype or phenotype levels. One niche where S. epidermidis has been reported is the human gut. Here, we present three S. epidermidis strains isolated from feces and show that they are not phylogenetically distinct from S. epidermidis isolated from other human body sites. Both gut and skin strains harbored multiple genes associated with biofilm formation and showed similar levels of biofilm formation on abiotic surfaces. High-throughput physiological tests using the BIOLOG technology showed no major metabolic differences between isolates from stool, skin, or cheese, while an isolate from bovine mastitis showed more phenotypic variation. Gut and skin isolates showed the ability to metabolize glycine-conjugated bile acids and to grow in the presence of bile, but the gut isolates exhibited faster anaerobic growth compared to isolates of skin origin.}, } @article {pmid32368999, year = {2020}, author = {Dangel, A and Berger, A and Rau, J and Eisenberg, T and Kämpfer, P and Margos, G and Contzen, M and Busse, HJ and Konrad, R and Peters, M and Sting, R and Sing, A}, title = {Corynebacterium silvaticum sp. nov., a unique group of NTTB corynebacteria in wild boar and roe deer.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {70}, number = {6}, pages = {3614-3624}, doi = {10.1099/ijsem.0.004195}, pmid = {32368999}, issn = {1466-5034}, mesh = {Abscess/*microbiology ; Animals ; Bacterial Typing Techniques ; Base Composition ; Corynebacterium/*classification/isolation & purification ; DNA, Bacterial/genetics ; Deer/*microbiology ; Fatty Acids/chemistry ; Germany ; Glycolipids/chemistry ; Lymph Nodes/*microbiology/pathology ; Multilocus Sequence Typing ; Phospholipids/chemistry ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Sus scrofa/*microbiology ; Swine ; Vitamin K 2/analogs & derivatives/chemistry ; Whole Genome Sequencing ; }, abstract = {A total of 34 Corynebacterium sp. strains were isolated from caseous lymph node abscesses of wild boar and roe deer in different regions of Germany. They showed slow growth on Columbia sheep blood agar and sparse growth on Hoyle's tellurite agar. Cellular fatty acid analysis allocated them in the C. diphtheriae group of genus Corynebacterium. MALDI-TOF MS using specific database extensions and rpoB sequencing resulted in classification as C. ulcerans. Their quinone system is similar to C. ulcerans, with major menaquinone MK-8(H2). Their complex polar lipid profile includes major lipids phosphatidylinositol, phosphatidylinositol-mannoside, diphosphatidylglycerol, but also unidentified glycolipids, distinguishing them clearly from C. ulcerans. They ferment glucose, ribose and maltose (like C. ulcerans), but do not utilise d-xylose, mannitol, lactose, sucrose and glycogen (like C. pseudotuberculosis). They showed activity of catalase, urease and phospholipase D, but variable results for alkaline phosphatase and alpha-glucosidase. All were non-toxigenic, tox gene bearing and susceptible to clindamycin, penicillin and erythromycin. In 16SrRNA gene and RpoB protein phylogenies the strains formed distinct brancheswith C. ulcerans as nearest relative.Whole genome sequencing revealed the unique sequence type 578, a distinctbranch in pangenomic core genome MLST, average nucleotide identities <91%, enhancedgenome sizes (2.55 Mbp) and G/C content (54.4 mol%) compared to related species.These results suggest that the strains represent a novel species, for which wepropose the name Corynebactriumsilvaticum sp. nov., based on their first isolation from forest-dwellinggame animals. The type strain isKL0182[T] (= CVUAS 4292[T] = DSM 109166[T] = LMG 31313[T]= CIP 111 672[T]).}, } @article {pmid32363038, year = {2020}, author = {Zhang, Y and Wang, J and Yajun, C and Zhou, M and Wang, W and Geng, M and Xu, D and Xu, Z}, title = {Comparative Genomics Uncovers the Genetic Diversity and Synthetic Biology of Secondary Metabolite Production of Trametes.}, journal = {Mycobiology}, volume = {48}, number = {2}, pages = {104-114}, pmid = {32363038}, issn = {1229-8093}, abstract = {The carbohydrate-active enzyme (CAZyme) genes of Trametes contribute to polysaccharide degradation. However, the comprehensive analysis of the composition of CAZymes and the biosynthetic gene clusters (BGCs) of Trametes remain unclear. Here, we conducted comparative analysis, detected the CAZyme genes, and predicted the BGCs for nine Trametes strains. Among the 82,053 homologous clusters obtained for Trametes, we identified 8518 core genes, 60,441 accessory genes, and 13,094 specific genes. A large proportion of CAZyme genes were cataloged into glycoside hydrolases, glycosyltransferases, and carbohydrate esterases. The predicted BGCs of Trametes were divided into six strategies, and the nine Trametes strains harbored 47.78 BGCs on average. Our study revealed that Trametes exhibits an open pan-genome structure. These findings provide insights into the genetic diversity and explored the synthetic biology of secondary metabolite production for Trametes.}, } @article {pmid32360114, year = {2020}, author = {Farin, W and Oñate, FP and Plassais, J and Bonny, C and Beglinger, C and Woelnerhanssen, B and Nocca, D and Magoules, F and Le Chatelier, E and Pons, N and Cervino, ACL and Ehrlich, SD}, title = {Impact of laparoscopic Roux-en-Y gastric bypass and sleeve gastrectomy on gut microbiota: a metagenomic comparative analysis.}, journal = {Surgery for obesity and related diseases : official journal of the American Society for Bariatric Surgery}, volume = {16}, number = {7}, pages = {852-862}, doi = {10.1016/j.soard.2020.03.014}, pmid = {32360114}, issn = {1878-7533}, mesh = {France ; Gastrectomy ; *Gastric Bypass ; *Gastrointestinal Microbiome ; Humans ; *Laparoscopy ; *Obesity, Morbid/surgery ; Switzerland ; }, abstract = {BACKGROUND: Bariatric surgery is an effective therapeutic procedure for morbidly obese patients. The 2 most common interventions are sleeve gastrectomy (SG) and laparoscopic Roux-en-Y gastric bypass (LRYGB).

OBJECTIVES: The aim of this study was to compare microbiome long-term microbiome after SG and LRYGB surgery in obese patients.

SETTING: University Hospital, France; University Hospital, United States; and University Hospital, Switzerland.

METHODS: Eighty-nine and 108 patients who underwent SG and LRYGB, respectively, were recruited. Stools were collected before and 6 months after surgery. Microbial DNA was analyzed with shotgun metagenomic sequencing (SOLiD 5500 xl Wildfire). MSPminer, a novel innovative tool to characterize new in silico biological entities, was used to identify 715 Metagenomic Species Pan-genome. One hundred forty-eight functional modules were analyzed using GOmixer and KEGG database.

RESULTS: Both interventions resulted in a similar increase of Shannon's diversity index and gene richness of gut microbiota, in parallel with weight loss, but the changes of microbial composition were different. LRYGB led to higher relative abundance of aero-tolerant bacteria, such as Escherichia coli and buccal species, such as Streptococcus and Veillonella spp. In contrast, anaerobes, such as Clostridium, were more abundant after SG, suggesting better conservation of anaerobic conditions in the gut. Enrichment of Akkermansia muciniphila was also observed after both surgeries. Function-level changes included higher potential for bacterial use of supplements, such as vitamin B12, B1, and iron upon LRYGB.

CONCLUSION: Microbiota changes after bariatric surgery depend on the nature of the intervention. LRYGB induces greater taxonomic and functional changes in gut microbiota than SG. Possible long-term health consequences of these alterations remain to be established.}, } @article {pmid32354325, year = {2020}, author = {Li, J and Gu, T and Li, L and Wu, X and Shen, L and Yu, R and Liu, Y and Qiu, G and Zeng, W}, title = {Complete genome sequencing and comparative genomic analyses of Bacillus sp. S3, a novel hyper Sb(III)-oxidizing bacterium.}, journal = {BMC microbiology}, volume = {20}, number = {1}, pages = {106}, pmid = {32354325}, issn = {1471-2180}, support = {2019JJ40361//Natural Science Foundation of Hunan Province of China/International ; 31470230, 51320105006, 51604308//National Natural Science Foundation of China/International ; 2018WK2012//Key Research and Development Projects in Hunan Province/International ; 2019zzts687//Fundamental Research Funds for the Central University of Central South University/International ; 2017RS3003//Youth Talent Foundation of Hunan Province of China/International ; }, mesh = {Antimony/*metabolism ; Bacillus/*genetics/metabolism ; Base Composition ; Biodegradation, Environmental ; Chromosomes, Bacterial/genetics ; Evolution, Molecular ; Genome Size ; Genome, Bacterial ; Genomics ; Molecular Sequence Annotation ; Phylogeny ; Plasmids/genetics ; Whole Genome Sequencing/*methods ; }, abstract = {BACKGROUND: Antimonite [Sb(III)]-oxidizing bacterium has great potential in the environmental bioremediation of Sb-polluted sites. Bacillus sp. S3 that was previously isolated from antimony-contaminated soil displayed high Sb(III) resistance and Sb(III) oxidation efficiency. However, the genomic information and evolutionary feature of Bacillus sp. S3 are very scarce.

RESULTS: Here, we identified a 5,436,472 bp chromosome with 40.30% GC content and a 241,339 bp plasmid with 36.74% GC content in the complete genome of Bacillus sp. S3. Genomic annotation showed that Bacillus sp. S3 contained a key aioB gene potentially encoding As (III)/Sb(III) oxidase, which was not shared with other Bacillus strains. Furthermore, a wide variety of genes associated with Sb(III) and other heavy metal (loid) s were also ascertained in Bacillus sp. S3, reflecting its adaptive advantage for growth in the harsh eco-environment. Based on the analysis of phylogenetic relationship and the average nucleotide identities (ANI), Bacillus sp. S3 was proved to a novel species within the Bacillus genus. The majority of mobile genetic elements (MGEs) mainly distributed on chromosomes within the Bacillus genus. Pan-genome analysis showed that the 45 genomes contained 554 core genes and many unique genes were dissected in analyzed genomes. Whole genomic alignment showed that Bacillus genus underwent frequently large-scale evolutionary events. In addition, the origin and evolution analysis of Sb(III)-resistance genes revealed the evolutionary relationships and horizontal gene transfer (HGT) events among the Bacillus genus. The assessment of functionality of heavy metal (loid) s resistance genes emphasized its indispensable role in the harsh eco-environment of Bacillus genus. Real-time quantitative PCR (RT-qPCR) analysis indicated that Sb(III)-related genes were all induced under the Sb(III) stress, while arsC gene was down-regulated.

CONCLUSIONS: The results in this study shed light on the molecular mechanisms of Bacillus sp. S3 coping with Sb(III), extended our understanding on the evolutionary relationships between Bacillus sp. S3 and other closely related species, and further enriched the Sb(III) resistance genetic data sources.}, } @article {pmid32336352, year = {2020}, author = {Kim, E and Yang, SM and Cho, EJ and Kim, HY}, title = {Novel real-time PCR assay for Lactobacillus casei group species using comparative genomics.}, journal = {Food microbiology}, volume = {90}, number = {}, pages = {103485}, doi = {10.1016/j.fm.2020.103485}, pmid = {32336352}, issn = {1095-9998}, mesh = {DNA Primers/genetics ; DNA, Bacterial/*genetics ; Genomics/*methods ; Lacticaseibacillus casei/classification/*genetics ; Probiotics ; Real-Time Polymerase Chain Reaction/*methods ; Sequence Analysis, DNA ; }, abstract = {The Lactobacillus casei group, which includes the closely related species L. casei, L. paracasei, L. rhamnosus, and L. chiayiensis, has been under debate regarding its taxonomy because of the difficulty in distinguishing the species from each other. In the present study, we developed a novel real-time PCR assay for distinguishing the L. casei group species. The pan-genome, as determined by the genomes of 44 strains, comprised 6789 genes, comparative genomic analysis showed that L. casei group strains were classified by species. Based on these results, species-specific genes were identified, and primers were designed from those genes. Real-time PCR clearly distinguished each species of the L. casei group and specifically amplified only to the target species. The method was applied to 29 probiotic products, and the detected results and label claims were compared. Total 23 products were in accordance with the label claims, and the remaining products contained species different from those stated in the label claims. Our method can rapidly and accurately distinguish the L. casei group species in a single reaction. Hence, our assay can be applied to identify L. casei group species from food or environmental samples and to accurately determine the nomenclature of the species.}, } @article {pmid32331872, year = {2020}, author = {Bickhart, DM and McClure, JC and Schnabel, RD and Rosen, BD and Medrano, JF and Smith, TPL}, title = {Symposium review: Advances in sequencing technology herald a new frontier in cattle genomics and genome-enabled selection.}, journal = {Journal of dairy science}, volume = {103}, number = {6}, pages = {5278-5290}, doi = {10.3168/jds.2019-17693}, pmid = {32331872}, issn = {1525-3198}, mesh = {Animals ; Cattle/*genetics ; *Genome ; Genomics/*instrumentation ; *Selection, Genetic ; Sequence Analysis, DNA/*veterinary ; }, abstract = {The cattle reference genome assembly has underpinned major innovations in beef and dairy genetics through genome-enabled selection, including removal of deleterious recessive variants and selection for favorable alleles affecting quantitative production traits. The initial reference assemblies, up to and including UMD3.1 and Btau4.1, were based on a combination of clone-by-clone sequencing of bacterial artificial chromosome clones generated from blood DNA of a Hereford bull and whole-genome shotgun sequencing of blood DNA from his inbred daughter/granddaughter named L1 Dominette 01449 (Dominette). The approach introduced assembly gaps, misassemblies, and errors, and it limited the ability to assemble regions that undergo rearrangement in blood cells, such as immune gene clusters. Nonetheless, the reference supported the creation of genotyping tools and provided a basis for many studies of gene expression. Recently, long-read sequencing technologies have emerged that facilitated a re-assembly of the reference genome, using lung tissue from Dominette to resolve many of the problems and providing a bridge to place historical studies in common context. The new reference, ARS-UCD1.2, successfully assembled germline immune gene clusters and improved overall continuity (i.e., reduction of gaps and inversions) by over 250-fold. This reference properly places nearly all of the legacy genetic markers used for over a decade in the industry. In this review, we discuss the improvements made to the cattle reference; remaining issues present in the assembly; tools developed to support genome-based studies in beef and dairy cattle; and the emergence of newer genome assembly methods that are producing even higher-quality assemblies for other breeds of cattle at a fraction of the cost. The new frontier for cattle genomics research will likely include a transition from the individual Hereford reference genome, to a "pan-genome" reference, representing all the DNA segments existing in commonly used cattle breeds, bringing the cattle reference into line with the current direction of human genome research.}, } @article {pmid32320376, year = {2020}, author = {Guillier, L and Gourmelon, M and Lozach, S and Cadel-Six, S and Vignaud, ML and Munck, N and Hald, T and Palma, F}, title = {AB_SA: Accessory genes-Based Source Attribution - tracing the source of Salmonella enterica Typhimurium environmental strains.}, journal = {Microbial genomics}, volume = {6}, number = {7}, pages = {}, pmid = {32320376}, issn = {2057-5858}, mesh = {Animals ; Bacterial Proteins/*genetics ; Computational Biology/*methods ; Databases, Genetic ; Food Microbiology ; Livestock/*classification/microbiology ; Logistic Models ; Models, Theoretical ; Salmonella typhimurium/*classification/genetics ; User-Computer Interface ; }, abstract = {The partitioning of pathogenic strains isolated in environmental or human cases to their sources is challenging. The pathogens usually colonize multiple animal hosts, including livestock, which contaminate the food-production chain and the environment (e.g. soil and water), posing an additional public-health burden and major challenges in the identification of the source. Genomic data opens up new opportunities for the development of statistical models aiming to indicate the likely source of pathogen contamination. Here, we propose a computationally fast and efficient multinomial logistic regression source-attribution classifier to predict the animal source of bacterial isolates based on 'source-enriched' loci extracted from the accessory-genome profiles of a pangenomic dataset. Depending on the accuracy of the model's self-attribution step, the modeller selects the number of candidate accessory genes that best fit the model for calculating the likelihood of (source) category membership. The Accessory genes-Based Source Attribution (AB_SA) method was applied to a dataset of strains of Salmonella enterica Typhimurium and its monophasic variant (S. enterica 1,4,[5],12:i:-). The model was trained on 69 strains with known animal-source categories (i.e. poultry, ruminant and pig). The AB_SA method helped to identify 8 genes as predictors among the 2802 accessory genes. The self-attribution accuracy was 80 %. The AB_SA model was then able to classify 25 of the 29 S. enterica Typhimurium and S. enterica 1,4,[5],12:i:- isolates collected from the environment (considered to be of unknown source) into a specific category (i.e. animal source), with more than 85 % of probability. The AB_SA method herein described provides a user-friendly and valuable tool for performing source-attribution studies in only a few steps. AB_SA is written in R and freely available at https://github.com/lguillier/AB_SA.}, } @article {pmid32307574, year = {2020}, author = {Teixeira, P and Tacão, M and Baraúna, RA and Silva, A and Henriques, I}, title = {Genomic analysis of Chromobacterium haemolyticum: insights into the species resistome, virulence determinants and genome plasticity.}, journal = {Molecular genetics and genomics : MGG}, volume = {295}, number = {4}, pages = {1001-1012}, doi = {10.1007/s00438-020-01676-8}, pmid = {32307574}, issn = {1617-4623}, support = {CEECIND/01304/2017//Fundação para a Ciência e a Tecnologia/ ; SFRH/BD/132046/2017//Fundação para a Ciência e a Tecnologia/ ; UID/AMB/50017/2019//Fundação para a Ciência e a Tecnologia/ ; }, mesh = {Anti-Bacterial Agents/adverse effects/therapeutic use ; Bacterial Infections/drug therapy/*genetics/microbiology ; Chromobacterium/classification/drug effects/*genetics/pathogenicity ; Drug Resistance, Multiple, Bacterial/*genetics ; Genome, Bacterial/drug effects/genetics ; Genomics ; Humans ; Microbial Sensitivity Tests ; *Phylogeny ; Virulence/genetics ; }, abstract = {The increasing number of Chromobacterium haemolyticum human infection reports, especially in tropical regions and connected with environmental sources, resulted in an urge to better describe this species. This study aimed to characterize the C. haemolyticum resistome, virulence determinants and genetic platforms related with genome plasticity. A comparative genomic analysis was conducted between clinical C. haemolyticum genomes publicly available and the genome of an environmental isolate obtained in this study. The pangenome of C. haemolyticum was calculated and a total of 3378 core genes were predicted in its core genome, corresponding to 51.7% of the pangenome. Genetic determinants putatively encoding resistance to beta-lactams, fosfomycin, aminoglycosides and trimethoprim were predicted in all genomes, possibly constituting the intrinsic resistome of this species. In terms of resistance to beta-lactams, 4 genes were predicted encoding beta-lactamases of classes A, C and D. Moreover, the analysis of Chromobacterium genomes and C. haemolyticum environmental isolates reinforced the role of this genus as progenitor of the blaKPC gene. Putative virulence factors (VFs) were predicted in all genomes, related to adherence, toxins production, colonization and cell invasion. Secretion systems, including type III, were detected. A significant number of transposases and genomic islands were predicted in C. haemolyticum, in some cases above the average reported for Gram-negative bacterial genomes. We conclude that C. haemolyticum strains, including those of environmental origin, present a noteworthy collection of antibiotic resistance genes and VFs. Furthermore, sequences related to gene mobility and genome plasticity suggest high adaptability potential and a possible role as disseminator of antibiotic resistance.}, } @article {pmid32302403, year = {2020}, author = {Gounot, JS and Neuvéglise, C and Freel, KC and Devillers, H and Piškur, J and Friedrich, A and Schacherer, J}, title = {High Complexity and Degree of Genetic Variation in Brettanomyces bruxellensis Population.}, journal = {Genome biology and evolution}, volume = {12}, number = {6}, pages = {795-807}, pmid = {32302403}, issn = {1759-6653}, mesh = {Brettanomyces/*genetics ; *Genetic Variation ; *Genome, Fungal ; Loss of Heterozygosity ; Phylogeny ; *Ploidies ; Whole Genome Sequencing ; }, abstract = {Genome-wide characterization of genetic variants of a large population of individuals within the same species is essential to have a deeper insight into its evolutionary history as well as the genotype-phenotype relationship. Population genomic surveys have been performed in multiple yeast species, including the two model organisms, Saccharomyces cerevisiae and Schizosaccharomyces pombe. In this context, we sought to characterize at the population level the Brettanomyces bruxellensis yeast species, which is a major cause of wine spoilage and can contribute to the specific flavor profile of some Belgium beers. We have completely sequenced the genome of 53 B. bruxellensis strains isolated worldwide. The annotation of the reference genome allowed us to define the gene content of this species. As previously suggested, our genomic data clearly highlighted that genetic diversity variation is related to ploidy level, which is variable in the B. bruxellensis species. Genomes are punctuated by multiple loss-of-heterozygosity regions, whereas aneuploidies as well as segmental duplications are uncommon. Interestingly, triploid genomes are more prone to gene copy number variation than diploids. Finally, the pangenome of the species was reconstructed and was found to be small with few accessory genes compared with S. cerevisiae. The pangenome is composed of 5,409 ORFs (open reading frames) among which 5,106 core ORFs and 303 ORFs that are variable within the population. All these results highlight the different trajectories of species evolution and consequently the interest of establishing population genomic surveys in more populations.}, } @article {pmid32299360, year = {2020}, author = {Dziadkiewicz, P and Dojer, N}, title = {Getting insight into the pan-genome structure with PangTree.}, journal = {BMC genomics}, volume = {21}, number = {Suppl 2}, pages = {274}, pmid = {32299360}, issn = {1471-2164}, mesh = {Algorithms ; Computational Biology ; Computer Simulation ; Databases, Genetic ; Ebolavirus/*genetics ; Genomics/*methods ; Models, Genetic ; Phylogeny ; Sequence Alignment ; Software ; }, abstract = {BACKGROUND: The term pan-genome was proposed to denominate collections of genomic sequences jointly analyzed or used as a reference. The constant growth of genomic data intensifies development of data structures and algorithms to investigate pan-genomes efficiently.

RESULTS: This work focuses on providing a tool for discovering and visualizing the relationships between the sequences constituting a pan-genome. A new structure to represent such relationships - called affinity tree - is proposed. Each node of this tree has assigned a subset of genomes, as well as their homogeneity level and averaged consensus sequence. Moreover, subsets assigned to sibling nodes form a partition of the genomes assigned to their parent.

CONCLUSIONS: Functionality of affinity tree is demonstrated on simulated data and on the Ebola virus pan-genome. Furthermore, two software packages are provided: PangTreeBuild constructs affinity tree, while PangTreeVis presents its result.}, } @article {pmid32298190, year = {2020}, author = {Momeni, SS and Beno, SM and Baker, JL and Edlund, A and Ghazal, T and Childers, NK and Wu, H}, title = {Caries-Associated Biosynthetic Gene Clusters in Streptococcus mutans.}, journal = {Journal of dental research}, volume = {99}, number = {8}, pages = {969-976}, pmid = {32298190}, issn = {1544-0591}, support = {R01 DE016684/DE/NIDCR NIH HHS/United States ; R01 DE017954/DE/NIDCR NIH HHS/United States ; R01 DE022350/DE/NIDCR NIH HHS/United States ; K12 GM088010/GM/NIGMS NIH HHS/United States ; T90 DE022736/DE/NIDCR NIH HHS/United States ; }, mesh = {Biofilms ; *Dental Caries ; Humans ; Multigene Family ; *Streptococcus mutans/genetics ; Virulence/genetics ; }, abstract = {Early childhood caries (ECC) is a chronic disease affecting the oral health of children globally. This disease is multifactorial, but a primary factor is cariogenic microorganisms such as Streptococcus mutans. Biosynthetic gene clusters (BGCs) encode small molecules with diverse biological activities that influence the development of many microbial diseases, including caries. The purpose of this study was to identify BGCs in S. mutans from a high-caries risk study population using whole-genome sequencing and assess their association with ECC. Forty representative S. mutans isolates were selected for genome sequencing from a large-scale epidemiological study of oral microbiology and dental caries in children from a localized Alabama population. A total of 252 BGCs were identified using the antiSMASH BGC-mining tool. Three types of BGCs identified herein-butyrolactone-like, ladderane-like, and butyrolactone-ladderane-like hybrid (BL-BGC)-have not been reported in S. mutans. These 3 BGCs were cross-referenced against public transcriptomics data, and were found to be highly expressed in caries subjects. Furthermore, based on a polymerase chain reaction screening for core BL genes, 93% of children with BL-BGC had ECC. The role of BL-BGC was further investigated by examining cariogenic traits and strain fitness in a deletion mutant using in vitro biofilm models. Deletion of the BL-BGC significantly increased biofilm pH as compared to the parent strain, while other virulence and fitness properties remained unchanged. Intriguingly, BL-BGC containing strains produced more acid, a key cariogenic feature, and less biofilm than the model cariogenic strain S. mutans UA159, suggesting the importance of this BL-BGC in S. mutans-mediated cariogenesity. The structure of any BL-BGC derived metabolites, their functions, and mechanistic connection with acid production remain to be elucidated. Nevertheless, this study is the first to report the clinical significance of a BL-BGC in S. mutans. This study also highlights pangenomic diversity, which is likely to affect phenotype and virulence.}, } @article {pmid32296571, year = {2020}, author = {Yu, Y and Wei, C}, title = {A powerful HUPAN on a pan-genome study: significance and perspectives.}, journal = {Cancer biology & medicine}, volume = {17}, number = {1}, pages = {1-5}, pmid = {32296571}, issn = {2095-3941}, mesh = {*Genome, Human ; *Human Genome Project ; Humans ; Neoplasms/*genetics ; *Software ; }, } @article {pmid32291353, year = {2020}, author = {Moulana, A and Anderson, RE and Fortunato, CS and Huber, JA}, title = {Selection Is a Significant Driver of Gene Gain and Loss in the Pangenome of the Bacterial Genus Sulfurovum in Geographically Distinct Deep-Sea Hydrothermal Vents.}, journal = {mSystems}, volume = {5}, number = {2}, pages = {}, pmid = {32291353}, issn = {2379-5077}, abstract = {Microbial genomes have highly variable gene content, and the evolutionary history of microbial populations is shaped by gene gain and loss mediated by horizontal gene transfer and selection. To evaluate the influence of selection on gene content variation in hydrothermal vent microbial populations, we examined 22 metagenome-assembled genomes (MAGs) (70 to 97% complete) from the ubiquitous vent Epsilonbacteraeota genus Sulfurovum that were recovered from two deep-sea hydrothermal vent regions, Axial Seamount in the northeastern Pacific Ocean (13 MAGs) and the Mid-Cayman Rise in the Caribbean Sea (9 MAGs). Genes involved in housekeeping functions were highly conserved across Sulfurovum lineages. However, genes involved in environment-specific functions, and in particular phosphate regulation, were found mostly in Sulfurovum genomes from the Mid-Cayman Rise in the low-phosphate Atlantic Ocean environment, suggesting that nutrient limitation is an important selective pressure for these bacteria. Furthermore, genes that were rare within the pangenome were more likely to undergo positive selection than genes that were highly conserved in the pangenome, and they also appeared to have experienced gene-specific sweeps. Our results suggest that selection is a significant driver of gene gain and loss for dominant microbial lineages in hydrothermal vents and highlight the importance of factors like nutrient limitation in driving microbial adaptation and evolution.IMPORTANCE Microbes can alter their gene content through the gain and loss of genes. However, there is some debate as to whether natural selection or neutral processes play a stronger role in molding the gene content of microbial genomes. In this study, we examined variation in gene content for the Epsilonbacteraeota genus Sulfurovum from deep-sea hydrothermal vents, which are dynamic habitats known for extensive horizontal gene transfer within microbial populations. Our results show that natural selection is a strong driver of Sulfurovum gene content and that nutrient limitation in particular has shaped the Sulfurovum genome, leading to differences in gene content between ocean basins. Our results also suggest that recently acquired genes undergo stronger selection than genes that were acquired in the more distant past. Overall, our results highlight the importance of natural selection in driving the evolution of microbial populations in these dynamic habitats.}, } @article {pmid32279278, year = {2020}, author = {Oh, YJ and Kim, JY and Jo, HE and Park, HK and Lim, SK and Kwon, MS and Choi, HJ}, title = {Lentibacillus cibarius sp. nov., isolated from kimchi, a Korean fermented food.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {58}, number = {5}, pages = {387-394}, pmid = {32279278}, issn = {1976-3794}, mesh = {Bacillaceae/*classification/isolation & purification ; Bacterial Typing Techniques ; Base Composition ; Brassica/*microbiology ; DNA, Bacterial/genetics ; Diaminopimelic Acid/chemistry ; Fatty Acids/chemistry ; Fermented Foods/*microbiology ; *Food Microbiology ; Phospholipids/chemistry ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Republic of Korea ; Sequence Analysis, DNA ; Vitamin K 2/analogs & derivatives/chemistry ; }, abstract = {Two bacterial strains designated NKC220-2[T] and NKC851-2 were isolated from commercial kimchi from different areas in Korea. The strains were Gram-positive, aerobic, oxidaseand catalase-positive, rod-shaped, spore-forming, non-motile, and halophilic bacteria. Both strains grew without NaCl, unlike type species in the genus Lentibacillus. The optimal pH for growth was 8.0, higher than that of the type species in the genus Lentibacillus, although growth was observed at pH 5.5-9.0. 16S rRNA gene sequence-based phylogenetic analysis indicated that the two strains (99.3-99.9% similarity) are grouped within the genus Lentibacillus and most closely related to Lentibacillus juripiscarius IS40-3[T] (97.4-97.6% similarity) isolated from fish sauce in Thailand. OrthoANI value between two novel strains and Lentibacillus lipolyticus SSKP1-9[T] (79.5-79.6% similarity) was far lower than the species demarcation threshold. Comparative genomic analysis displayed differences between the two strains as well as among other strains belonging to Lentibacillus. Furthermore, each isolate had strain-specific groups of orthologous genes based on pangenome analysis. Genomic G + C contents of strains NKC-220-2[T] and NKC851-2 were 41.9 and 42.2 mol%, respectively. The strains contained meso-diaminopimelic acid in their cell walls, and the major menaquinone was menaquinone-7. Phosphatidylglycerol, diphosphatidylglycerol, and an unidentified glycolipid, aminophospholipid, and phospholipid were the major polar lipid components of both strains. The major cellular fatty acids of the strains were anteiso-C15:0 and anteiso-C17:0. Based on phenotypic, genomic, phylogenetic, and chemotaxonomic features, strains NKC220-2[T] and NKC851-2 represent novel species of the genus Lentibacillus, for which the name Lentibacillus cibarius sp. nov. is proposed. The type strain is NKC220-2[T] (= KACC 21232[T] = JCM 33390[T]).}, } @article {pmid32278144, year = {2020}, author = {Zeb, S and Gulfam, SM and Bokhari, H}, title = {Comparative core/pan genome analysis of Vibrio cholerae isolates from Pakistan.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {82}, number = {}, pages = {104316}, doi = {10.1016/j.meegid.2020.104316}, pmid = {32278144}, issn = {1567-7257}, mesh = {Base Composition ; Biological Evolution ; Cholera/epidemiology/microbiology ; Genes, Bacterial ; Genetic Variation ; *Genome, Bacterial ; Genome-Wide Association Study ; Humans ; Pakistan/epidemiology ; *Phylogeny ; Vibrio cholerae/*genetics/isolation & purification/pathogenicity ; }, abstract = {Cholera is an endemic disease in many regions of Asia including, Pakistan. Vibrio cholerae, the causative agent of cholera, is considered as one of the best adapted bacteria due to its ability to withstand severe environmental stresses. The V. cholerae genome is very plastic with many gene additions and deletions. In this study, we sought to understand the diversity of V. cholerae genes in two Pakistani subclades [e.g. Pakistani subclade I (PSC I) and Pakistani subclade II (PSC II)]. We have analyzed 44 PSC I and 56 PSC II strains, respectively. By analyzing our data, it was concluded that subclade group 2 (PSC II) has 2967 core genes repositories, while the PSC 1 group has just 1062 core genes. It was observed that the pangenome in the PSC II group is open while the pan-genome in PSC I are closed. It was also noted that the number of accessory genes (n = 2500) is higher in the PSC I group compared to the PSC II group (n = 550). Furthermore, analysis extended to the study of unique gene profiles suggested that all strains of the PSC II group have unique genes. One strain among the PSC II group had a high number of unique genes (n = 2612). However, in the PSC I group, only a few strains had unique genes with a maximum of 86 unique genes being found in a single strain. Core phylogeny of PSC I indicated that just three groups initially arose from a single common ancestor. At the same time, a complex pattern of evolution was found in the PSC II phylogenetic tree based on core gene information. This comparative genomic analysis has revealed 'waves' of V. cholerae evolution and information on its transmission and ability to modify its genetic content to survive in different environmental conditions. Here, we have investigated how the versatility of V. cholerae, a bacterium that persists across different habitats, is reflected in its genome. The data generated during the study should be extremely beneficial in defining the evolutionary relationship as well as diversity between V. cholerae subclades. It will also benefit epidemiological studies and the design of better treatment strategies for controlling epidemics.}, } @article {pmid32278068, year = {2020}, author = {Zhao, J and Liu, C and Liu, Y and Zhang, Y and Xiong, Z and Fan, Y and Zou, X and Lu, B and Cao, B}, title = {Genomic characteristics of clinically important ST11 Klebsiella pneumoniae strains worldwide.}, journal = {Journal of global antimicrobial resistance}, volume = {22}, number = {}, pages = {519-526}, doi = {10.1016/j.jgar.2020.03.023}, pmid = {32278068}, issn = {2213-7173}, mesh = {China ; Genomics ; Humans ; *Klebsiella Infections/epidemiology ; *Klebsiella pneumoniae/genetics ; Multilocus Sequence Typing ; Phylogeny ; beta-Lactamases/genetics ; }, abstract = {OBJECTIVES: ST11 Klebsiella pneumoniae is among the most important clinical pathogens in China, and KL47 and KL64 are the dominant K types of these strains. Understanding the genomic characteristics of these strains would be critical to their anti-infection treatment.

METHODS: There were 364 genome sequences of ST11 K. pneumoniae strains isolated and collected from 13 countries from 2003 to 2018. These genome sequences included 338 downloaded from the National Center for Biotechnology Information (NCBI) database and 26 newly sequenced. Phylogenetic analyses of pan-genome and unique genes, and resistance and virulence gene analyses, were carried out to elucidate the molecular characteristics of these strains.

RESULTS: A total of 19 732 genes were identified from the 364 ST11 strains, and the pan-genome was open, indicating the genetic diversity of ST11 K. pneumoniae. These strains were clustered into three clades. Clade 1 contained the most various K types (14/15, 93.3%) and unique genes. KL47 and KL64 were the dominant K types of clades 2 and 3, accounting for 100% and 99.4% of strains in each clade, respectively. KL64 strains contained the most virulence genes, including iucA and rmpA, and the two genes tend to coexist. In addition, strains in clade 1 were isolated from all 13 countries; the strains in clades 2 and 3 were isolated mainly from China.

CONCLUSIONS: The ST11 K. pneumoniae strain of KL64 is a newly emerging superbug, with more resistance and virulence genes in China; this was significantly different from other countries, and we should be alert to the dissemination of this subclone.}, } @article {pmid32276876, year = {2020}, author = {Hurtado, R and Maturrano, L and Azevedo, V and Aburjaile, F}, title = {Pathogenomics insights for understanding Pasteurella multocida adaptation.}, journal = {International journal of medical microbiology : IJMM}, volume = {310}, number = {4}, pages = {151417}, doi = {10.1016/j.ijmm.2020.151417}, pmid = {32276876}, issn = {1618-0607}, mesh = {Adaptation, Physiological ; Animals ; Evolution, Molecular ; *Genetic Variation ; Genome, Bacterial ; *Genomics ; Humans ; Interspersed Repetitive Sequences ; Pasteurella Infections/*pathology/transmission/*veterinary ; Pasteurella multocida/*genetics ; Phylogeny ; Polymorphism, Single Nucleotide ; Poultry/microbiology ; Virulence/genetics ; }, abstract = {Pasteurella multocida is an important veterinary pathogen able to infect a wide range of animals in a broad spectrum of diseases. P. multocida is a complex microorganism in relation to its genomic flexibility, host adaptation and pathogenesis. Epidemiological analysis based on multilocus sequence typing, serotyping, genotyping, association with virulence genes and single nucleotide polymorphisms (SNPs), enables assessment of intraspecies diversity, phylogenetic and strain-specific relationships associated with host predilection or disease. A high number of sequenced genomes provides us a more accurate genomic and epidemiological interpretation to determine whether certain lineages can infect a host or produce disease. Comparative genomic analysis and pan-genomic approaches have revealed a flexible genome for hosting mobile genetic elements (MGEs) and therefore significant variation in gene content. Moreover, it was possible to find lineage-specific MGEs from the same niche, showing acquisition probably due to an evolutionary convergence event or to a genetic group with infective capacity. Furthermore, diversification selection analysis exhibits proteins exposed on the surface subject to selection pressures with an interstrain heterogeneity related to their ability to adapt. This article is the first review describing the genomic relationship to elucidate the diversity and evolution of P. multocida.}, } @article {pmid32269101, year = {2020}, author = {Park, CJ and Li, J and Zhang, X and Gao, F and Benton, CS and Andam, CP}, title = {Genomic Epidemiology and Evolution of Diverse Lineages of Clinical Campylobacter jejuni Cocirculating in New Hampshire, USA, 2017.}, journal = {Journal of clinical microbiology}, volume = {58}, number = {6}, pages = {}, pmid = {32269101}, issn = {1098-660X}, mesh = {Anti-Bacterial Agents/pharmacology ; *Campylobacter Infections/epidemiology ; *Campylobacter jejuni/genetics ; Drug Resistance, Bacterial/genetics ; Genomics ; Humans ; New Hampshire/epidemiology ; Phylogeny ; }, abstract = {Campylobacter jejuni is one of the leading causes of bacterial gastroenteritis worldwide. In the United States, New Hampshire was one of the 18 states that reported cases in the 2016 to 2018 multistate outbreak of multidrug-resistant C. jejuni Here, we aimed to elucidate the baseline diversity of the wider New Hampshire C. jejuni population during the outbreak. We used genome sequences of 52 clinical isolates sampled in New Hampshire in 2017, including 1 of the 2 isolates from the outbreak. Results revealed a remarkably diverse population composed of at least 28 sequence types, which are mostly represented by 1 or a few strains. A comparison of our isolates with 249 clinical C. jejuni from other states showed frequent phylogenetic intermingling, suggesting a lack of geographical structure and minimal local diversification within the state. Multiple independent acquisitions of resistance genes from 5 classes of antibiotics characterize the population, with 47/52 (90.4%) of the genomes carrying at least 1 horizontally acquired resistance gene. Frequently recombining genes include those associated with heptose biosynthesis, colonization, and stress resistance. We conclude that the diversity of clinical C. jejuni in New Hampshire in 2017 was driven mainly by the coexistence of phylogenetically diverse antibiotic-resistant lineages, widespread geographical mixing, and frequent recombination. This study provides an important baseline census of the standing pangenomic variation and drug resistance to aid the development of a statewide database for epidemiological studies and clinical decision making. Continued genomic surveillance will be necessary to accurately assess how the population of C. jejuni changes over the long term.}, } @article {pmid32265447, year = {2020}, author = {Zhou, Y and Chebotarov, D and Kudrna, D and Llaca, V and Lee, S and Rajasekar, S and Mohammed, N and Al-Bader, N and Sobel-Sorenson, C and Parakkal, P and Arbelaez, LJ and Franco, N and Alexandrov, N and Hamilton, NRS and Leung, H and Mauleon, R and Lorieux, M and Zuccolo, A and McNally, K and Zhang, J and Wing, RA}, title = {A platinum standard pan-genome resource that represents the population structure of Asian rice.}, journal = {Scientific data}, volume = {7}, number = {1}, pages = {113}, pmid = {32265447}, issn = {2052-4463}, mesh = {Crops, Agricultural/genetics ; Genetic Variation ; *Genome, Plant ; Genomics ; Oryza/*genetics ; }, abstract = {As the human population grows from 7.8 billion to 10 billion over the next 30 years, breeders must do everything possible to create crops that are highly productive and nutritious, while simultaneously having less of an environmental footprint. Rice will play a critical role in meeting this demand and thus, knowledge of the full repertoire of genetic diversity that exists in germplasm banks across the globe is required. To meet this demand, we describe the generation, validation and preliminary analyses of transposable element and long-range structural variation content of 12 near-gap-free reference genome sequences (RefSeqs) from representatives of 12 of 15 subpopulations of cultivated Asian rice. When combined with 4 existing RefSeqs, that represent the 3 remaining rice subpopulations and the largest admixed population, this collection of 16 Platinum Standard RefSeqs (PSRefSeq) can be used as a template to map resequencing data to detect virtually all standing natural variation that exists in the pan-genome of cultivated Asian rice.}, } @article {pmid32245763, year = {2020}, author = {Smith, EA and Miller, EA and Weber, BP and Munoz Aguayo, J and Flores Figueroa, C and Huisinga, J and Nezworski, J and Kromm, M and Wileman, B and Johnson, TJ}, title = {Genomic Landscape of Ornithobacterium rhinotracheale in Commercial Turkey Production in the United States.}, journal = {Applied and environmental microbiology}, volume = {86}, number = {11}, pages = {}, pmid = {32245763}, issn = {1098-5336}, mesh = {Animal Husbandry ; Animals ; Cross-Sectional Studies ; Flavobacteriaceae Infections/microbiology/veterinary ; *Genome, Bacterial ; Midwestern United States ; Ornithobacterium/*genetics ; Poultry Diseases/microbiology ; Retrospective Studies ; Southeastern United States ; Turkeys/*microbiology ; }, abstract = {Ornithobacterium rhinotracheale is a causative agent of respiratory tract infections in avian hosts worldwide but is a particular problem for commercial turkey production. Little is known about the ecologic and evolutionary dynamics of O. rhinotracheale, which makes prevention and control of this pathogen a challenge. The purpose of this study was to gain insight into the genetic relationships between O. rhinotracheale populations through comparative genomics of clinical isolates from different U.S. turkey producers. O. rhinotracheale clinical isolates were collected from four major U.S. turkey producers and several independent turkey growers from the upper Midwest and Southeast, and whole-genome sequencing was performed. Genomes were compared phylogenetically using single nucleotide polymorphism (SNP)-based analysis, and then assembly and annotations were performed to identify genes encoding putative virulence factors and antimicrobial resistance determinants. A pangenome approach was also used to establish a core set of genes consistently present in O. rhinotracheale and to highlight differences in gene content between phylogenetic clades. A total of 1,457 nonrecombinant SNPs were identified from 157 O. rhinotracheale genomes, and four distinct phylogenetic clades were identified. Isolates clustered by company on the phylogenetic tree, however, and each company had isolates in multiple clades with similar collection dates, indicating that there are multiple O. rhinotracheale strains circulating within each of the companies examined. Additionally, several antimicrobial resistance proteins, putative virulence factors, and the pOR1 plasmid were associated with particular clades and multilocus sequence types, which may explain why the same strains seem to have persisted in the same turkey operations for decades.IMPORTANCE The whole-genome approach enhances our understanding of evolutionary relationships between clinical Ornithobacterium rhinotracheale isolates from different commercial turkey producers and allows for identification of genes associated with virulence, antimicrobial resistance, or mobile genetic elements that are often excluded using traditional typing methods. Additionally, differentiating O. rhinotracheale isolates at the whole-genome level may provide insight into selection of the most appropriate autogenous vaccine strain, or groups of strains, for a given population of clinical isolates.}, } @article {pmid32240724, year = {2020}, author = {Narsing Rao, MP and Dong, ZY and Jiao, JY and Zhou, Y and Zhao, J and Xiao, M and Li, WJ}, title = {Genome sequence and comparative analysis of DRQ-2, the type strain of Nonomuraea indica.}, journal = {Genomics}, volume = {112}, number = {4}, pages = {2842-2844}, doi = {10.1016/j.ygeno.2020.03.023}, pmid = {32240724}, issn = {1089-8646}, mesh = {Actinobacteria/*genetics/metabolism ; *Genome, Bacterial ; Osmotic Pressure ; Oxidative Stress/genetics ; Secondary Metabolism/genetics ; }, abstract = {Strain DRQ-2[T] (type strain of Nonomuraea indica) is worthy for genome sequencing, due to its ability to produce a wide variety of industrially important enzymes such as amylase, asparaginase, cellulase, gelatinase, glutaminase, and protease. Genome sequencing and comparison of strain DRQ-2[T] is described in the present work. The genome size was estimated to be 8,288,417 (bp) that consisted of 59 contigs. The G + C content of the genome was 72.4%. A total of 7730 genes were predicted with two rRNAs and 64 tRNAs. The genome analysis of the strain DRQ-2[T] showed the presence of a wide range of secondary metabolite gene clusters. Pan-Genomes Analysis Pipeline (PGAP) indicated that strain DRQ-2[T] had large numbers of unique genes. The majority of N. indica DRQ-2[T] genes encode for hypothetical proteins, indicating the functions of these ortholog clusters were still remain to be determined.}, } @article {pmid32239329, year = {2020}, author = {Zhu, L and Zhao, M and Chen, M and Li, L and Jiang, Y and Liu, S and Jiang, Y and Wang, K and Wang, Y and Sun, C and Chen, J and Chen, P and Lei, J and Su, Y and Wang, Y and Zhang, M}, title = {The bHLH gene family and its response to saline stress in Jilin ginseng, Panax ginseng C.A. Meyer.}, journal = {Molecular genetics and genomics : MGG}, volume = {295}, number = {4}, pages = {877-890}, doi = {10.1007/s00438-020-01658-w}, pmid = {32239329}, issn = {1617-4623}, support = {2013AA102604-3//China 863 Project/ ; 20170101010JC//the Bureau of Science and Technology of Jilin Province/ ; 20180414077GH//the Bureau of Science and Technology of Jilin Province/ ; 20180101027JC//the Bureau of Science and Technology of Jilin Province/ ; 20190201264JC//the Bureau of Science and Technology of Jilin Province/ ; 20190103104JH//the Bureau of Science and Technology of Jilin Province/ ; 2016C064//the Development and Reform Commission of Jilin Province/ ; 2018C047-3//the Development and Reform Commission of Jilin Province/ ; }, mesh = {Alternative Splicing/genetics ; Basic Helix-Loop-Helix Transcription Factors/*genetics ; China ; *Evolution, Molecular ; Gene Expression Regulation, Plant/genetics ; Gene Ontology ; Multigene Family/genetics ; Panax/drug effects/*genetics/growth & development ; Phylogeny ; Saline Solution/toxicity ; Salt Stress/*genetics ; Transcription Factors ; }, abstract = {Basic helix-loop-helix (bHLH) gene family is a gene family of transcription factors that plays essential roles in plant growth and development, secondary metabolism and response to biotic and abiotic stresses. Therefore, a comprehensive knowledge of the bHLH gene family is paramount to understand the molecular mechanisms underlying these processes and develop advanced technologies to manipulate the processes efficiently. Ginseng, Panax ginseng C.A. Meyer, is a well-known medicinal herb; however, little is known about the bHLH genes (PgbHLH) in the species. Here, we identified 137 PgbHLH genes from Jilin ginseng cultivar, Damaya, widely cultivated in Jilin, China, of which 50 are newly identified by pan-genome analysis. These 137 PgbHLH genes were phylogenetically classified into 26 subfamilies, suggesting their sequence diversification. They are alternatively spliced into 366 transcripts in a 4-year-old plant and involved in 11 functional subcategories of the gene ontology, indicating their functional differentiation in ginseng. The expressions of the PgbHLH genes dramatically vary spatio-temporally and across 42 genotypes, but they are still somehow functionally correlated. Moreover, the PgbHLH gene family, at least some of its genes, is shown to have roles in plant response to the abiotic stress of saline. These results provide a new insight into the evolution and functional differentiation of the bHLH gene family in plants, new bHLH genes to the PgbHLH gene family, and saline stress-responsive genes for genetic improvement in ginseng and other plant species.}, } @article {pmid32228746, year = {2020}, author = {Niu, XK and Narsing Rao, MP and Dong, ZY and Kan, Y and Li, QR and Huang, J and Zhao, L and Wang, MZ and Shen, ZP and Kang, YQ and Li, WJ}, title = {Vulcaniibacterium gelatinicum sp. nov., a moderately thermophilic bacterium isolated from a hot spring.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {70}, number = {3}, pages = {1571-1577}, doi = {10.1099/ijsem.0.003934}, pmid = {32228746}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; Genes, Bacterial ; Hot Springs/*microbiology ; Phospholipids/chemistry ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Ubiquinone/chemistry ; Water Microbiology ; Xanthomonadaceae/*classification/isolation & purification ; }, abstract = {The present study aimed to determine the taxonomic positions of strains designated R-5-52-3[T], R-5-33-5-1-2, R-5-48-2 and R-5-51-4 isolated from hot spring water samples. Cells of these strains were Gram-stain-negative, non-motile and rod-shaped. The strains shared highest 16S rRNA gene sequence similarity with Vulcaniibacterium thermophilum KCTC 32020[T] (95.1%). Growth occurred at 28-55 °C, at pH 6-8 and with up to 3 % (w/v) NaCl. DNA fingerprinting, biochemical, phylogenetic and 16S rRNA gene sequence analyses suggested that R-5-52-3[T], R-5-33-5-1-2, R-5-48-2 and R-5-51-4 were different strains but belonged to the same species. Hence, R-5-52-3[T] was chosen for further analysis and R-5-33-5-1-2, R-5-48-2 and R-5-51-4 were considered as additional strains of this species. R-5-52-3[T] possessed Q-8 as the only quinone and iso-C15:0, iso-C11:0, C16 : 0 and iso-C17 : 0 as major fatty acids. The polar lipids were diphosphatidylglycerol, phosphatidylglycerol, phosphatidylethanolamine, unidentified polar lipids and two unidentified phospholipids. The genomic G+C content was 71.6 mol%. Heat shock proteins (e.g. Hsp20, GroEL, DnaK and Clp ATPases) were noted in the R-5-52-3[T] genome, which could suggest its protection in the hot spring environment. Pan-genome analysis showed the number of singleton gene clusters among Vulcaniibacterium members varied. Average nucleotide identity (ANI) values between R-5-52-3[T], Vulcaniibacterium tengchongense YIM 77520[T] and V. thermophilum KCTC 32020[T] were 80.1-85.8 %, which were below the cut-off level (95-96 %) recommended as the ANI criterion for interspecies identity. Thus, based on the above results, strain R-5-52-3[T] represents a novel species of the genus Vulcaniibacterium, for which the name Vulcaniibacterium gelatinicum sp. nov. is proposed. The type strain is R-5-52-3[T] (=KCTC 72061[T]=CGMCC 1.16678[T]).}, } @article {pmid32198762, year = {2020}, author = {Dunning, LT and Christin, PA}, title = {Reticulate evolution, lateral gene transfer, and innovation in plants.}, journal = {American journal of botany}, volume = {107}, number = {4}, pages = {541-544}, doi = {10.1002/ajb2.1452}, pmid = {32198762}, issn = {1537-2197}, mesh = {Evolution, Molecular ; *Gene Transfer, Horizontal ; Phylogeny ; *Plants ; }, } @article {pmid32196089, year = {2020}, author = {Muthukumarasamy, U and Preusse, M and Kordes, A and Koska, M and Schniederjans, M and Khaledi, A and Häussler, S}, title = {Single-Nucleotide Polymorphism-Based Genetic Diversity Analysis of Clinical Pseudomonas aeruginosa Isolates.}, journal = {Genome biology and evolution}, volume = {12}, number = {4}, pages = {396-406}, pmid = {32196089}, issn = {1759-6653}, mesh = {*Adaptation, Physiological ; *Genome, Bacterial ; Humans ; Phenotype ; Phylogeny ; *Polymorphism, Single Nucleotide ; Pseudomonas aeruginosa/*genetics/growth & development/*isolation & purification ; }, abstract = {Extensive use of next-generation sequencing has the potential to transform our knowledge on how genomic variation within bacterial species impacts phenotypic versatility. Because different environments have unique selection pressures, they drive divergent evolution. However, there is also parallel or convergent evolution of traits in independent bacterial isolates inhabiting similar environments. The application of tools to describe population-wide genomic diversity provides an opportunity to measure the predictability of genetic changes underlying adaptation. Here, we describe patterns of sequence variations in the core genome among 99 individual Pseudomonas aeruginosa clinical isolates and identified single-nucleotide polymorphisms that are the basis for branching of the phylogenetic tree. We also identified single-nucleotide polymorphisms that were acquired independently, in separate lineages, and not through inheritance from a common ancestor. Although our results demonstrate that the Pseudomonas aeruginosa core genome is highly conserved and in general, not subject to adaptive evolution, instances of parallel evolution will provide an opportunity to uncover genetic changes that underlie phenotypic diversity.}, } @article {pmid32191703, year = {2020}, author = {Gautreau, G and Bazin, A and Gachet, M and Planel, R and Burlot, L and Dubois, M and Perrin, A and Médigue, C and Calteau, A and Cruveiller, S and Matias, C and Ambroise, C and Rocha, EPC and Vallenet, D}, title = {PPanGGOLiN: Depicting microbial diversity via a partitioned pangenome graph.}, journal = {PLoS computational biology}, volume = {16}, number = {3}, pages = {e1007732}, pmid = {32191703}, issn = {1553-7358}, mesh = {Algorithms ; Bacteria/classification/genetics ; Genome, Bacterial/*genetics ; Genomics/*methods ; Multivariate Analysis ; *Software ; }, abstract = {The use of comparative genomics for functional, evolutionary, and epidemiological studies requires methods to classify gene families in terms of occurrence in a given species. These methods usually lack multivariate statistical models to infer the partitions and the optimal number of classes and don't account for genome organization. We introduce a graph structure to model pangenomes in which nodes represent gene families and edges represent genomic neighborhood. Our method, named PPanGGOLiN, partitions nodes using an Expectation-Maximization algorithm based on multivariate Bernoulli Mixture Model coupled with a Markov Random Field. This approach takes into account the topology of the graph and the presence/absence of genes in pangenomes to classify gene families into persistent, cloud, and one or several shell partitions. By analyzing the partitioned pangenome graphs of isolate genomes from 439 species and metagenome-assembled genomes from 78 species, we demonstrate that our method is effective in estimating the persistent genome. Interestingly, it shows that the shell genome is a key element to understand genome dynamics, presumably because it reflects how genes present at intermediate frequencies drive adaptation of species, and its proportion in genomes is independent of genome size. The graph-based approach proposed by PPanGGOLiN is useful to depict the overall genomic diversity of thousands of strains in a compact structure and provides an effective basis for very large scale comparative genomics. The software is freely available at https://github.com/labgem/PPanGGOLiN.}, } @article {pmid32188120, year = {2020}, author = {Hasni, I and Andréani, J and Colson, P and La Scola, B}, title = {Description of Virulent Factors and Horizontal Gene Transfers of Keratitis-Associated Amoeba Acanthamoeba Triangularis by Genome Analysis.}, journal = {Pathogens (Basel, Switzerland)}, volume = {9}, number = {3}, pages = {}, pmid = {32188120}, issn = {2076-0817}, support = {10-IAHU-03//Agence Nationale de la Recherche/ ; }, abstract = {Acanthamoeba triangularis strain SH 621 is a free-living amoeba belonging to Acanthamoeba ribo-genotype T4. This ubiquitous protist is among the free-living amoebas responsible for Acanthamoeba keratitis, a severe infection of human cornea. Genome sequencing and genomic comparison were carried out to explore the biological functions and to better understand the virulence mechanism related to the pathogenicity of Acanthamoeba keratitis. The genome assembly harbored a length of 66.43 Mb encompassing 13,849 scaffolds. The analysis of predicted proteins reported the presence of 37,062 ORFs. A complete annotation revealed 33,168 and 16,605 genes that matched with NCBI non-redundant protein sequence (nr) and Cluster of Orthologous Group of proteins (COG) databases, respectively. The Kyoto Encyclopedia of Genes and Genomes Pathway (KEGG) annotation reported a great number of genes related to carbohydrate, amino acid and lipid metabolic pathways. The pangenome performed with 8 available amoeba genomes belonging to genus Acanthamoeba revealed a core genome containing 843 clusters of orthologous genes with a ratio core genome/pangenome of less than 0.02. We detected 48 genes related to virulent factors of Acanthamoeba keratitis. Best hit analyses in nr database identified 99 homologous genes shared with amoeba-resisting microorganisms. This study allows the deciphering the genome of a free-living amoeba with medical interest and provides genomic data to better understand virulence-related Acanthamoeba keratitis.}, } @article {pmid32188055, year = {2020}, author = {Kim, YJ and Park, JY and Balusamy, SR and Huo, Y and Nong, LK and Thi Le, H and Yang, DC and Kim, D}, title = {Comprehensive Genome Analysis on the Novel Species Sphingomonas panacis DCY99[T] Reveals Insights into Iron Tolerance of Ginseng.}, journal = {International journal of molecular sciences}, volume = {21}, number = {6}, pages = {}, pmid = {32188055}, issn = {1422-0067}, support = {2019R1A2C1010428//National Research Foundation of Korea/ ; PJ0128132017//Rural Development Administration/ ; }, mesh = {DNA, Bacterial ; Drug Tolerance/*genetics ; Genes, Bacterial/genetics ; Genome Size ; *Genome, Bacterial ; Hydroxybenzoates ; Iron/*metabolism/toxicity ; Metals, Heavy ; Panax/*microbiology ; Plant Development ; Plant Roots/microbiology ; Soil Microbiology ; Sphingomonas/drug effects/*genetics/isolation & purification/*physiology ; Stress, Physiological ; }, abstract = {Plant growth-promoting rhizobacteria play vital roles not only in plant growth, but also in reducing biotic/abiotic stress. Sphingomonas panacis DCY99[T] is isolated from soil and root of Panax ginseng with rusty root disease, characterized by raised reddish-brown root and this is seriously affects ginseng cultivation. To investigate the relationship between 159 sequenced Sphingomonas strains, pan-genome analysis was carried out, which suggested genomic diversity of the Sphingomonas genus. Comparative analysis of S. panacis DCY99[T] with Sphingomonas sp. LK11 revealed plant growth-promoting potential of S. panacis DCY99[T] through indole acetic acid production, phosphate solubilizing, and antifungal abilities. Detailed genomic analysis has shown that S. panacis DCY99[T] contain various heavy metals resistance genes in its genome and the plasmid. Functional analysis with Sphingomonas paucimobilis EPA505 predicted that S. panacis DCY99[T] possess genes for degradation of polyaromatic hydrocarbon and phenolic compounds in rusty-ginseng root. Interestingly, when primed ginseng with S. panacis DCY99[T] during high concentration of iron exposure, iron stress of ginseng was suppressed. In order to detect S. panacis DCY99[T] in soil, biomarker was designed using spt gene. This study brings new insights into the role of S. panacis DCY99[T] as a microbial inoculant to protect ginseng plants against rusty root disease.}, } @article {pmid32182882, year = {2020}, author = {Kang, SM and Asaf, S and Khan, AL and Lubna, and Khan, A and Mun, BG and Khan, MA and Gul, H and Lee, IJ}, title = {Complete Genome Sequence of Pseudomonas psychrotolerans CS51, a Plant Growth-Promoting Bacterium, Under Heavy Metal Stress Conditions.}, journal = {Microorganisms}, volume = {8}, number = {3}, pages = {}, pmid = {32182882}, issn = {2076-2607}, abstract = {In the current study, we aimed to elucidate the plant growth-promoting characteristics of Pseudomonas psychrotolerans CS51 under heavy metal stress conditions (Zn, Cu, and Cd) and determine the genetic makeup of the CS51 genome using the single-molecule real-time (SMRT) sequencing technology of Pacific Biosciences. The results revealed that inoculation with CS51 induced endogenous indole-3-acetic acid (IAA) and gibberellins (GAs), which significantly enhanced cucumber growth (root shoot length) and increased the heavy metal tolerance of cucumber plants. Moreover, genomic analysis revealed that the CS51 genome consisted of a circular chromosome of 5,364,174 base pairs with an average G+C content of 64.71%. There were around 4774 predicted protein-coding sequences (CDSs) in 4859 genes, 15 rRNA genes, and 67 tRNA genes. Around 3950 protein-coding genes with function prediction and 733 genes without function prediction were identified. Furthermore, functional analyses predicted that the CS51 genome could encode genes required for auxin biosynthesis, nitrate and nitrite ammonification, the phosphate-specific transport system, and the sulfate transport system, which are beneficial for plant growth promotion. The heavy metal resistance of CS51 was confirmed by the presence of genes responsible for cobalt-zinc-cadmium resistance, nickel transport, and copper homeostasis in the CS51 genome. The extrapolation of the curve showed that the core genome contained a minimum of 2122 genes (95% confidence interval = 2034.24 to 2080.215). Our findings indicated that the genome sequence of CS51 may be used as an eco-friendly bioresource to promote plant growth in heavy metal-contaminated areas.}, } @article {pmid32181684, year = {2020}, author = {Kuhnle, A and Mun, T and Boucher, C and Gagie, T and Langmead, B and Manzini, G}, title = {Efficient Construction of a Complete Index for Pan-Genomics Read Alignment.}, journal = {Journal of computational biology : a journal of computational molecular cell biology}, volume = {27}, number = {4}, pages = {500-513}, pmid = {32181684}, issn = {1557-8666}, support = {R01 AI141810/AI/NIAID NIH HHS/United States ; R01 GM118568/GM/NIGMS NIH HHS/United States ; }, mesh = {Algorithms ; Genome, Human/genetics ; Genomics/*methods ; High-Throughput Nucleotide Sequencing/*methods ; Humans ; Sequence Alignment/*methods ; Sequence Analysis, DNA/methods ; *Software ; }, abstract = {Short-read aligners predominantly use the FM-index, which is easily able to index one or a few human genomes. However, it does not scale well to indexing collections of thousands of genomes. Driving this issue are the two chief components of the index: (1) a rank data structure over the Burrows-Wheeler Transform (BWT) of the string that will allow us to find the interval in the string's suffix array (SA), and (2) a sample of the SA that-when used with the rank data structure-allows us to access the SA. The rank data structure can be kept small even for large genomic databases, by run-length compressing the BWT, but until recently there was no means known to keep the SA sample small without greatly slowing down access to the SA. Now that (SODA 2018) has defined an SA sample that takes about the same space as the run-length compressed BWT, we have the design for efficient FM-indexes of genomic databases but are faced with the problem of building them. In 2018, we showed how to build the BWT of large genomic databases efficiently (WABI 2018), but the problem of building the sample efficiently was left open. We compare our approach to state-of-the-art methods for constructing the SA sample, and demonstrate that it is the fastest and most space-efficient method on highly repetitive genomic databases. Lastly, we apply our method for indexing partial and whole human genomes and show that it improves over the FM-index-based Bowtie method with respect to both memory and time and over the hybrid index-based CHIC method with respect to query time and memory required for indexing.}, } @article {pmid32170080, year = {2020}, author = {Cazares, A and Moore, MP and Hall, JPJ and Wright, LL and Grimes, M and Emond-Rhéault, JG and Pongchaikul, P and Santanirand, P and Levesque, RC and Fothergill, JL and Winstanley, C}, title = {A megaplasmid family driving dissemination of multidrug resistance in Pseudomonas.}, journal = {Nature communications}, volume = {11}, number = {1}, pages = {1370}, pmid = {32170080}, issn = {2041-1723}, support = {MRF_MRF-091-0006-RG-FOTHE/MRF/MRF/United Kingdom ; 204822/Z/16/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Anti-Bacterial Agents/pharmacology ; DNA, Bacterial/genetics ; Drug Resistance, Multiple, Bacterial/*genetics ; Evolution, Molecular ; Genes, Bacterial/*genetics ; Genomics ; Humans ; Microbial Sensitivity Tests ; Phylogeny ; Plasmids/classification/*genetics/isolation & purification ; Pseudomonas/*genetics ; Pseudomonas Infections/microbiology ; Pseudomonas aeruginosa/genetics ; Thailand ; Whole Genome Sequencing ; }, abstract = {Multidrug resistance (MDR) represents a global threat to health. Here, we used whole genome sequencing to characterise Pseudomonas aeruginosa MDR clinical isolates from a hospital in Thailand. Using long-read sequence data we obtained complete sequences of two closely related megaplasmids (>420 kb) carrying large arrays of antibiotic resistance genes located in discrete, complex and dynamic resistance regions, and revealing evidence of extensive duplication and recombination events. A comprehensive pangenomic and phylogenomic analysis indicates that: 1) these large plasmids comprise an emerging family present in different members of the Pseudomonas genus, and associated with multiple sources (geographical, clinical or environmental); 2) the megaplasmids encode diverse niche-adaptive accessory traits, including multidrug resistance; 3) the accessory genome of the megaplasmid family is highly flexible and diverse. The history of the megaplasmid family, inferred from our analysis of the available database, suggests that members carrying multiple resistance genes date back to at least the 1970s.}, } @article {pmid32169520, year = {2020}, author = {Satyam, R and Bhardwaj, T and Jha, NK and Jha, SK and Nand, P}, title = {Toward a chimeric vaccine against multiple isolates of Mycobacteroides - An integrative approach.}, journal = {Life sciences}, volume = {250}, number = {}, pages = {117541}, doi = {10.1016/j.lfs.2020.117541}, pmid = {32169520}, issn = {1879-0631}, mesh = {Alleles ; B-Lymphocytes/immunology ; Bacterial Vaccines/*chemistry ; Bacteriophages ; CRISPR-Cas Systems ; Computational Biology ; Drug Resistance, Bacterial ; Epitopes ; Epitopes, T-Lymphocyte/genetics ; Gastrointestinal Microbiome ; Genome, Bacterial ; Genomics ; Gram-Positive Bacterial Infections/*prevention & control ; Histocompatibility Antigens Class I/metabolism ; Histocompatibility Antigens Class II/metabolism ; Humans ; Immunotherapy ; Molecular Docking Simulation ; Molecular Dynamics Simulation ; Mycobacteriaceae/*genetics/pathogenicity ; Proteome ; *Vaccinology ; Virulence ; }, abstract = {AIM: Nontuberculous mycobacterial (NTM) infection such as endophthalmitis, dacryocystitis, and canaliculitis are pervasive across the globe and are currently managed by antibiotics. However, the recent cases of Mycobacteroides developing drug resistance reported along with the improper practice of medicine intrigued us to explore its genomic and proteomic canvas at a global scale and develop a chimeric vaccine against Mycobacteroides.

MAIN METHODS: We carried out a vivid genomic study on five recently sequenced strains of Mycobacteroides and explored their Pan-core genome/proteome in three different phases. The promiscuous antigenic proteins were identified via a subtractive proteomics approach that qualified for virulence causation, resistance and essentiality factors for this notorious bacterium. An integrated pipeline was developed for the identification of B-Cell, MHC (Major histocompatibility complex) class I and II epitopes.

KEY FINDINGS: Phase I identified the shreds of evidence of reductive evolution and propensity of the Pan-genome of Mycobacteroides getting closed soon. Phase II and Phase III produced 8 vaccine constructs. Our final vaccine construct, V6 qualified for all tests such as absence for allergenicity, presence of antigenicity, etc. V6 contains β-defensin as an adjuvant, linkers, Lysosomal-associated membrane protein 1 (LAMP1) signal peptide, and PADRE (Pan HLA-DR epitopes) amino acid sequence. Besides, V6 also interacts with a maximum number of MHC molecules and the TLR4/MD2 (Toll-like receptor 4/Myeloid differentiation factor 2) complex confirmed by docking and molecular dynamics simulation studies.

SIGNIFICANCE: The knowledge harnessed from the current study can help improve the current treatment regimens or in an event of an outbreak and propel further related studies.}, } @article {pmid32151246, year = {2020}, author = {Chen, M and Xu, CY and Wang, X and Ren, CY and Ding, J and Li, L}, title = {Comparative genomics analysis of c-di-GMP metabolism and regulation in Microcystis aeruginosa.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {217}, pmid = {32151246}, issn = {1471-2164}, support = {21577081//Natural Science Foundation of China/ ; }, mesh = {Computational Biology ; Cyclic GMP/*metabolism ; Escherichia coli Proteins/genetics/metabolism ; Gene Expression Regulation, Bacterial/*genetics ; Genomics ; Microcystis/classification/genetics/*metabolism ; Phosphorus-Oxygen Lyases/genetics/metabolism ; Phylogeny ; Protein Domains ; Signal Transduction ; }, abstract = {BACKGROUND: Cyanobacteria are of special concern because they proliferate in eutrophic water bodies worldwide and affect water quality. As an ancient photosynthetic microorganism, cyanobacteria can survive in ecologically diverse habitats because of their capacity to rapidly respond to environmental changes through a web of complex signaling networks, including using second messengers to regulate physiology or metabolism. A ubiquitous second messenger, bis-(3',5')-cyclic-dimeric-guanosine monophosphate (c-di-GMP), has been found to regulate essential behaviors in a few cyanobacteria but not Microcystis, which are the most dominant species in cyanobacterial blooms. In this study, comparative genomics analysis was performed to explore the genomic basis of c-di-GMP signaling in Microcystis aeruginosa.

RESULTS: Proteins involved in c-di-GMP metabolism and regulation, such as diguanylate cyclases, phosphodiesterases, and PilZ-containing proteins, were encoded in M. aeruginosa genomes. However, the number of identified protein domains involved in c-di-GMP signaling was not proportional to the size of M. aeruginosa genomes (4.97 Mb in average). Pan-genome analysis showed that genes involved in c-di-GMP metabolism and regulation are conservative in M. aeruginosa strains. Phylogenetic analysis showed good congruence between the two types of phylogenetic trees based on 31 highly conserved protein-coding genes and sensor domain-coding genes. Propensity for gene loss analysis revealed that most of genes involved in c-di-GMP signaling are stable in M. aeruginosa strains. Moreover, bioinformatics and structure analysis of c-di-GMP signal-related GGDEF and EAL domains revealed that they all possess essential conserved amino acid residues that bind the substrate. In addition, it was also found that all selected M. aeruginosa genomes encode PilZ domain containing proteins.

CONCLUSIONS: Comparative genomics analysis of c-di-GMP metabolism and regulation in M. aeruginosa strains helped elucidating the genetic basis of c-di-GMP signaling pathways in M. aeruginosa. Knowledge of c-di-GMP metabolism and relevant signal regulatory processes in cyanobacteria can enhance our understanding of their adaptability to various environments and bloom-forming mechanism.}, } @article {pmid32150870, year = {2020}, author = {Bhaskarla, V and Zinta, G and Ford, R and Jain, M and Varshney, RK and Mantri, N}, title = {Comparative Root Transcriptomics Provide Insights into Drought Adaptation Strategies in Chickpea (Cicer arietinum L.).}, journal = {International journal of molecular sciences}, volume = {21}, number = {5}, pages = {}, pmid = {32150870}, issn = {1422-0067}, support = {GCF010013//Australia-India Strategic Research Fund/ ; }, mesh = {*Adaptation, Physiological ; Cicer/*genetics/physiology ; Computational Biology ; Droughts ; Gene Expression Profiling ; *Gene Expression Regulation, Plant ; Plant Proteins/*genetics ; Plant Roots/*genetics/physiology ; *Stress, Physiological ; *Transcriptome ; }, abstract = {Drought adversely affects crop production across the globe. The root system immensely contributes to water management and the adaptability of plants to drought stress. In this study, drought-induced phenotypic and transcriptomic responses of two contrasting chickpea (Cicer arietinum L.) genotypes were compared at the vegetative, reproductive transition, and reproductive stages. At the vegetative stage, drought-tolerant genotype maintained higher root biomass, length, and surface area under drought stress as compared to sensitive genotype. However, at the reproductive stage, root length and surface area of tolerant genotype was lower but displayed higher root diameter than sensitive genotype. The shoot biomass of tolerant genotype was overall higher than the sensitive genotype under drought stress. RNA-seq analysis identified genotype- and developmental-stage specific differentially expressed genes (DEGs) in response to drought stress. At the vegetative stage, a total of 2161 and 1873 DEGs, and at reproductive stage 4109 and 3772 DEGs, were identified in the tolerant and sensitive genotypes, respectively. Gene ontology (GO) analysis revealed enrichment of biological categories related to cellular process, metabolic process, response to stimulus, response to abiotic stress, and response to hormones. Interestingly, the expression of stress-responsive transcription factors, kinases, ROS signaling and scavenging, transporters, root nodulation, and oxylipin biosynthesis genes were robustly upregulated in the tolerant genotype, possibly contributing to drought adaptation. Furthermore, activation/repression of hormone signaling and biosynthesis genes was observed. Overall, this study sheds new insights on drought tolerance mechanisms operating in roots with broader implications for chickpea improvement.}, } @article {pmid32149071, year = {2020}, author = {Aaltonen, K and Kant, R and Eklund, M and Raunio-Saarnisto, M and Paulin, L and Vapalahti, O and Grönthal, T and Rantala, M and Sironen, T}, title = {Streptococcus halichoeri: Comparative Genomics of an Emerging Pathogen.}, journal = {International journal of genomics}, volume = {2020}, number = {}, pages = {8708305}, pmid = {32149071}, issn = {2314-4378}, abstract = {Streptococcus halichoeri is an emerging pathogen with a variety of host species and zoonotic potential. It has been isolated from grey seals and other marine mammals as well as from human infections. Beginning in 2010, two concurrent epidemics were identified in Finland, in fur animals and domestic dogs, respectively. The fur animals suffered from a new disease fur animal epidemic necrotic pyoderma (FENP) and the dogs presented with ear infections with poor treatment response. S. halichoeri was isolated in both studies, albeit among other pathogens, indicating a possible role in the disease etiologies. The aim was to find a possible common origin of the fur animal and dog isolates and study the virulence factors to assess pathogenic potential. Isolates from seal, human, dogs, and fur animals were obtained for comparison. The whole genomes were sequenced from 20 different strains using the Illumina MiSeq platform and annotated using an automatic annotation pipeline RAST. The core and pangenomes were formed by comparing the genomes against each other in an all-against-all comparison. A phylogenetic tree was constructed using the genes of the core genome. Virulence factors were assessed using the Virulence Factor Database (VFDB) concentrating on the previously confirmed streptococcal factors. A core genome was formed which encompassed approximately half of the genes in Streptococcus halichoeri. The resulting core was nearly saturated and would not change significantly by adding more genomes. The remaining genes formed the pangenome which was highly variable and would still evolve after additional genomes. The results highlight the great adaptability of this bacterium possibly explaining the ease at which it switches hosts and environments. Virulence factors were also analyzed and were found primarily in the core genome. They represented many classes and functions, but the largest single category was adhesins which again supports the marine origin of this species.}, } @article {pmid32138767, year = {2020}, author = {Moustafa, AM and Planet, PJ}, title = {WhatsGNU: a tool for identifying proteomic novelty.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {58}, pmid = {32138767}, issn = {1474-760X}, support = {R01 AI137526/AI/NIAID NIH HHS/United States ; 1R01AI137526-01/NH/NIH HHS/United States ; 1K08AI101005/NH/NIH HHS/United States ; }, mesh = {Bacterial Proteins/*genetics ; Gene Frequency ; Genome, Bacterial ; Mycobacterium tuberculosis/genetics ; Proteomics/*methods ; Pseudomonas aeruginosa/genetics ; Salmonella enterica/genetics ; *Software ; Staphylococcus aureus/genetics ; }, abstract = {To understand diversity in enormous collections of genome sequences, we need computationally scalable tools that can quickly contextualize individual genomes based on their similarities and identify features of each genome that make them unique. We present WhatsGNU, a tool based on exact match proteomic compression that, in seconds, classifies any new genome and provides a detailed report of protein alleles that may have novel functional differences. We use this technique to characterize the total allelic diversity (panallelome) of Salmonella enterica, Mycobacterium tuberculosis, Pseudomonas aeruginosa, and Staphylococcus aureus. It could be extended to others. WhatsGNU is available from https://github.com/ahmedmagds/WhatsGNU.}, } @article {pmid32132208, year = {2020}, author = {Seif, Y and Choudhary, KS and Hefner, Y and Anand, A and Yang, L and Palsson, BO}, title = {Metabolic and genetic basis for auxotrophies in Gram-negative species.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {117}, number = {11}, pages = {6264-6273}, pmid = {32132208}, issn = {1091-6490}, support = {U01 AI124316/AI/NIAID NIH HHS/United States ; }, mesh = {Algorithms ; Computer Simulation ; Energy Metabolism/*genetics ; Genome, Bacterial/*physiology ; Genomics ; Gram-Negative Bacteria/*physiology ; Host Microbial Interactions/*physiology ; Interspersed Repetitive Sequences/genetics ; Metabolic Networks and Pathways/genetics ; Metabolomics ; *Models, Biological ; Nutrients/metabolism ; }, abstract = {Auxotrophies constrain the interactions of bacteria with their environment, but are often difficult to identify. Here, we develop an algorithm (AuxoFind) using genome-scale metabolic reconstruction to predict auxotrophies and apply it to a series of available genome sequences of over 1,300 Gram-negative strains. We identify 54 auxotrophs, along with the corresponding metabolic and genetic basis, using a pangenome approach, and highlight auxotrophies conferring a fitness advantage in vivo. We show that the metabolic basis of auxotrophy is species-dependent and varies with 1) pathway structure, 2) enzyme promiscuity, and 3) network redundancy. Various levels of complexity constitute the genetic basis, including 1) deleterious single-nucleotide polymorphisms (SNPs), in-frame indels, and deletions; 2) single/multigene deletion; and 3) movement of mobile genetic elements (including prophages) combined with genomic rearrangements. Fourteen out of 19 predictions agree with experimental evidence, with the remaining cases highlighting shortcomings of sequencing, assembly, annotation, and reconstruction that prevent predictions of auxotrophies. We thus develop a framework to identify the metabolic and genetic basis for auxotrophies in Gram-negatives.}, } @article {pmid32131884, year = {2020}, author = {Jin, Y and Zhou, J and Zhou, J and Hu, M and Zhang, Q and Kong, N and Ren, H and Liang, L and Yue, J}, title = {Genome-based classification of Burkholderia cepacia complex provides new insight into its taxonomic status.}, journal = {Biology direct}, volume = {15}, number = {1}, pages = {6}, pmid = {32131884}, issn = {1745-6150}, mesh = {Bacterial Proteins/analysis ; Burkholderia cepacia complex/*classification/genetics ; *Genome, Bacterial ; Multilocus Sequence Typing ; *Phylogeny ; RNA, Bacterial/analysis ; RNA, Ribosomal, 16S/analysis ; Rec A Recombinases/analysis ; }, abstract = {BACKGROUND: Accurate classification of different Burkholderia cepacia complex (BCC) species is essential for therapy, prognosis assessment and research. The taxonomic status of BCC remains problematic and an improved knowledge about the classification of BCC is in particular needed.

METHODS: We compared phylogenetic trees of BCC based on 16S rRNA, recA, hisA and MLSA (multilocus sequence analysis). Using the available whole genome sequences of BCC, we inferred a species tree based on estimated single-copy orthologous genes and demarcated species of BCC using dDDH/ANI clustering.

RESULTS: We showed that 16S rRNA, recA, hisA and MLSA have limited resolutions in the taxonomic study of closely related bacteria such as BCC. Our estimated species tree and dDDH/ANI clustering clearly separated 116 BCC strains into 36 clusters. With the appropriate reclassification of misidentified strains, these clusters corresponded to 22 known species as well as 14 putative novel species.

CONCLUSIONS: This is the first large-scale and systematic study of the taxonomic status of the BCC and could contribute to further insights into BCC taxonomy. Our study suggested that conjunctive use of core phylogeny based on single-copy orthologous genes, as well as pangenome-based dDDH/ANI clustering would provide a preferable framework for demarcating closely related species.

REVIEWER: This article was reviewed by Dr. Xianwen Ren.}, } @article {pmid32128256, year = {2020}, author = {Thukral, A and Ross, K and Hansen, C and Phanse, Y and Narasimhan, B and Steinberg, H and Talaat, AM}, title = {A single dose polyanhydride-based nanovaccine against paratuberculosis infection.}, journal = {NPJ vaccines}, volume = {5}, number = {1}, pages = {15}, pmid = {32128256}, issn = {2059-0105}, abstract = {Mycobacterium avium subsp. paratuberculosis (M. paratuberculosis) causes Johne's disease in ruminants and is characterized by chronic gastroenteritis leading to heavy economic losses to the dairy industry worldwide. The currently available vaccine (inactivated bacterin in oil base) is not effective in preventing pathogen shedding and is rarely used to control Johne's disease in dairy herds. To develop a better vaccine that can prevent the spread of Johne's disease, we utilized polyanhydride nanoparticles (PAN) to encapsulate mycobacterial antigens composed of whole cell lysate (PAN-Lysate) and culture filtrate (PAN-Cf) of M. paratuberculosis. These nanoparticle-based vaccines (i.e., nanovaccines) were well tolerated in mice causing no inflammatory lesions at the site of injection. Immunological assays demonstrated a substantial increase in the levels of antigen-specific T cell responses post-vaccination in the PAN-Cf vaccinated group as indicated by high percentages of triple cytokine (IFN-γ, IL-2, TNF-α) producing CD8[+] T cells. Following challenge, animals vaccinated with PAN-Cf continued to produce significant levels of double (IFN-γ, TNF-α) and single cytokine (IFN-γ) secreting CD8[+] T cells compared with animals vaccinated with an inactivated vaccine. A significant reduction in bacterial load was observed in multiple organs of animals vaccinated with PAN-Cf, which is a clear indication of protection. Overall, the use of polyanhydride nanovaccines resulted in development of protective and sustained immunity against Johne's disease, an approach that could be applied to counter other intracellular pathogens.}, } @article {pmid32119670, year = {2020}, author = {Hyun, JC and Kavvas, ES and Monk, JM and Palsson, BO}, title = {Machine learning with random subspace ensembles identifies antimicrobial resistance determinants from pan-genomes of three pathogens.}, journal = {PLoS computational biology}, volume = {16}, number = {3}, pages = {e1007608}, pmid = {32119670}, issn = {1553-7358}, support = {U01 AI124316/AI/NIAID NIH HHS/United States ; }, mesh = {Anti-Bacterial Agents/pharmacology ; Anti-Infective Agents ; Computational Biology/*methods ; Drug Resistance, Bacterial/*genetics ; Drug Resistance, Multiple, Bacterial/drug effects ; Escherichia coli/genetics ; Fluoroquinolones/pharmacology ; Genome, Bacterial/*genetics ; Humans ; Machine Learning ; Microbial Sensitivity Tests ; Pseudomonas aeruginosa/genetics ; Staphylococcus aureus/genetics ; Whole Genome Sequencing/methods ; }, abstract = {The evolution of antimicrobial resistance (AMR) poses a persistent threat to global public health. Sequencing efforts have already yielded genome sequences for thousands of resistant microbial isolates and require robust computational tools to systematically elucidate the genetic basis for AMR. Here, we present a generalizable machine learning workflow for identifying genetic features driving AMR based on constructing reference strain-agnostic pan-genomes and training random subspace ensembles (RSEs). This workflow was applied to the resistance profiles of 14 antimicrobials across three urgent threat pathogens encompassing 288 Staphylococcus aureus, 456 Pseudomonas aeruginosa, and 1588 Escherichia coli genomes. We find that feature selection by RSE detects known AMR associations more reliably than common statistical tests and previous ensemble approaches, identifying a total of 45 known AMR-conferring genes and alleles across the three organisms, as well as 25 candidate associations backed by domain-level annotations. Furthermore, we find that results from the RSE approach are consistent with existing understanding of fluoroquinolone (FQ) resistance due to mutations in the main drug targets, gyrA and parC, in all three organisms, and suggest the mutational landscape of those genes with respect to FQ resistance is simple. As larger datasets become available, we expect this approach to more reliably predict AMR determinants for a wider range of microbial pathogens.}, } @article {pmid32108566, year = {2020}, author = {Tekedar, HC and Blom, J and Kalindamar, S and Nho, S and Karsi, A and Lawrence, ML}, title = {Comparative genomics of the fish pathogens Edwardsiella ictaluri 93-146 and Edwardsiella piscicida C07-087.}, journal = {Microbial genomics}, volume = {6}, number = {2}, pages = {}, pmid = {32108566}, issn = {2057-5858}, mesh = {Animals ; Catfishes/microbiology ; Edwardsiella/*genetics/isolation & purification/metabolism ; Edwardsiella ictaluri/*genetics/isolation & purification/metabolism ; Enterobacteriaceae Infections/microbiology/*veterinary ; Fish Diseases/*microbiology ; *Genome, Bacterial ; Genomics ; Phylogeny ; }, abstract = {Edwardsiella ictaluri and Edwardsiella piscicida are important fish pathogens affecting cultured and wild fish worldwide. To investigate the genome-level differences and similarities between catfish-adapted strains in these two species, the complete E. ictaluri 93-146 and E. piscicida C07-087 genomes were evaluated by applying comparative genomics analysis. All available complete (10) and non-complete (19) genomes from five Edwardsiella species were also included in a systematic analysis. Average nucleotide identity and core-genome phylogenetic tree analyses indicated that the five Edwardsiella species were separated from each other. Pan-/core-genome analyses for the 29 strains from the five species showed that genus Edwardsiella members have 9474 genes in their pan genome, while the core genome consists of 1421 genes. Orthology cluster analysis showed that E. ictaluri and E. piscicida genomes have the greatest number of shared clusters. However, E. ictaluri and E. piscicida also have unique features; for example, the E. ictaluri genome encodes urease enzymes and cytochrome o ubiquinol oxidase subunits, whereas E. piscicida genomes encode tetrathionate reductase operons, capsular polysaccharide synthesis enzymes and vibrioferrin-related genes. Additionally, we report for what is believed to be the first time that E. ictaluri 93-146 and three other E. ictaluri genomes encode a type IV secretion system (T4SS), whereas none of the E. piscicida genomes encode this system. Additionally, the E. piscicida C07-087 genome encodes two different type VI secretion systems. E. ictaluri genomes tend to encode more insertion elements, phage regions and genomic islands than E. piscicida. We speculate that the T4SS could contribute to the increased number of mobilome elements in E. ictaluri compared to E. piscicida. Two of the E. piscicida genomes encode full CRISPR-Cas regions, whereas none of the E. ictaluri genomes encode Cas proteins. Overall, comparison of the E. ictaluri and E. piscicida genomes reveals unique features and provides new insights on pathogenicity that may reflect the host adaptation of the two species.}, } @article {pmid32106516, year = {2020}, author = {Li, Q and Cooper, RE and Wegner, CE and Küsel, K}, title = {Molecular Mechanisms Underpinning Aggregation in Acidiphilium sp. C61 Isolated from Iron-Rich Pelagic Aggregates.}, journal = {Microorganisms}, volume = {8}, number = {3}, pages = {}, pmid = {32106516}, issn = {2076-2607}, support = {Jena School for Microbial Communication (JSMC) graduate school//Deutsche Forschungsgemeinschaft/ ; SFB 1127 ChemBioSys//Deutsche Forschungsgemeinschaft/ ; German Centre for Integrative Biodiversity Research (iDiv) Halle-Jena-Leipzig//Deutsche Forschungsgemeinschaft/ ; Jena School for Microbial Communication (JSMC) graduate school//Carl-Zeiss-Stiftung/ ; }, abstract = {Iron-rich pelagic aggregates (iron snow) are hot spots for microbial interactions. Using iron snow isolates, we previously demonstrated that the iron-oxidizer Acidithrix sp. C25 triggers Acidiphilium sp. C61 aggregation by producing the infochemical 2-phenethylamine (PEA). Here, we showed slightly enhanced aggregate formation in the presence of PEA on different Acidiphilium spp. but not other iron-snow microorganisms, including Acidocella sp. C78 and Ferrovum sp. PN-J47. Next, we sequenced the Acidiphilium sp. C61 genome to reconstruct its metabolic potential. Pangenome analyses of Acidiphilium spp. genomes revealed the core genome contained 65 gene clusters associated with aggregation, including autoaggregation, motility, and biofilm formation. Screening the Acidiphilium sp. C61 genome revealed the presence of autotransporter, flagellar, and extracellular polymeric substances (EPS) production genes. RNA-seq analyses of Acidiphilium sp. C61 incubations (+/- 10 µM PEA) indicated genes involved in energy production, respiration, and genetic processing were the most upregulated differentially expressed genes in the presence of PEA. Additionally, genes involved in flagellar basal body synthesis were highly upregulated, whereas the expression pattern of biofilm formation-related genes was inconclusive. Our data shows aggregation is a common trait among Acidiphilium spp. and PEA stimulates the central cellular metabolism, potentially advantageous in aggregates rapidly falling through the water column.}, } @article {pmid32103378, year = {2020}, author = {González-Castillo, A and Enciso-Ibarra, J and Gomez-Gil, B}, title = {Genomic taxonomy of the Mediterranei clade of the genus Vibrio (Gammaproteobacteria).}, journal = {Antonie van Leeuwenhoek}, volume = {113}, number = {6}, pages = {851-859}, doi = {10.1007/s10482-020-01396-4}, pmid = {32103378}, issn = {1572-9699}, support = {CB-2009-01 132328//CONACYT/ ; }, mesh = {Aquatic Organisms/microbiology ; DNA, Bacterial/genetics ; Genome, Bacterial ; Geologic Sediments/microbiology ; Multilocus Sequence Typing ; Phylogeny ; Seawater/microbiology ; *Vibrio/classification/genetics/isolation & purification ; }, abstract = {The first genomic study of Mediterranei clade using five type strains (V. mediterranei, V. maritimus, V. variabilis, V. thalassae, and V. barjaei) and fourteen reference strains isolated from marine organisms, seawater, water and sediments of the sea was performed. These bacterial strains were characterised by means of a polyphasic approach comprising 16S rRNA gene, multilocus sequence analysis (MLSA) of 139 single-copy genes, the DNA G + C content, ANI, and in silico phenotypic characterisation. We found that the species of the Mediterranei clade formed two separate clusters based in 16S rRNA gene sequence similarity, MLSA, OrthoANI, and Codon and Amino Acid usage. The Mediterranei clade species showed values between 76 and 95% for ANIb, 84 and 95% for ANIm. The core genome consisted of 2057 gene families and the pan-genome of 13,094 gene families. Based on the genomic analyses performed, the Mediterranei clade can be divided in two clusters, one with the strains of V. maritimus, V. variabilis and two potential new species, and the other cluster with the strains of V. mediterranei, V. thalassae, and V. barjaei.}, } @article {pmid32100706, year = {2020}, author = {Whelan, FJ and Rusilowicz, M and McInerney, JO}, title = {Coinfinder: detecting significant associations and dissociations in pangenomes.}, journal = {Microbial genomics}, volume = {6}, number = {3}, pages = {}, pmid = {32100706}, issn = {2057-5858}, support = {BB/N018044/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Computational Biology ; *Genome ; Phylogeny ; *Software ; Streptococcus pneumoniae/genetics ; }, abstract = {The accessory genes of prokaryote and eukaryote pangenomes accumulate by horizontal gene transfer, differential gene loss, and the effects of selection and drift. We have developed Coinfinder, a software program that assesses whether sets of homologous genes (gene families) in pangenomes associate or dissociate with each other (i.e. are 'coincident') more often than would be expected by chance. Coinfinder employs a user-supplied phylogenetic tree in order to assess the lineage-dependence (i.e. the phylogenetic distribution) of each accessory gene, allowing Coinfinder to focus on coincident gene pairs whose joint presence is not simply because they happened to appear in the same clade, but rather that they tend to appear together more often than expected across the phylogeny. Coinfinder is implemented in C++, Python3 and R and is freely available under the GNU license from https://github.com/fwhelan/coinfinder.}, } @article {pmid32086304, year = {2020}, author = {Khan, AMAM and Hauk, VJ and Ibrahim, M and Raffel, TR and Blumer-Schuette, SE}, title = {Caldicellulosiruptor bescii Adheres to Polysaccharides via a Type IV Pilin-Dependent Mechanism.}, journal = {Applied and environmental microbiology}, volume = {86}, number = {9}, pages = {}, pmid = {32086304}, issn = {1098-5336}, mesh = {*Bacterial Adhesion ; Caldicellulosiruptor ; Fimbriae Proteins/*metabolism ; Firmicutes/metabolism/*physiology ; Polysaccharides, Bacterial/*metabolism ; }, abstract = {Biological hydrolysis of cellulose above 70°C involves microorganisms that secrete free enzymes and deploy separate protein systems to adhere to their substrate. Strongly cellulolytic Caldicellulosiruptor bescii is one such extreme thermophile, which deploys modular, multifunctional carbohydrate-acting enzymes to deconstruct plant biomass. Additionally, C. bescii also encodes noncatalytic carbohydrate binding proteins, which likely evolved as a mechanism to compete against other heterotrophs in carbon-limited biotopes that these bacteria inhabit. Analysis of the Caldicellulosiruptor pangenome identified a type IV pilus (T4P) locus encoded upstream of the tāpirins, that is encoded by all Caldicellulosiruptor species. In this study, we sought to determine if the C. bescii T4P plays a role in attachment to plant polysaccharides. The major C. bescii pilin (CbPilA) was identified by the presence of pilin-like protein domains, paired with transcriptomics and proteomics data. Using immuno-dot blots, we determined that the plant polysaccharide xylan induced production of CbPilA 10- to 14-fold higher than glucomannan or xylose. Furthermore, we are able to demonstrate that recombinant CbPilA directly interacts with xylan and cellulose at elevated temperatures. Localization of CbPilA at the cell surface was confirmed by immunofluorescence microscopy. Lastly, a direct role for CbPilA in cell adhesion was demonstrated using recombinant CbPilA or anti-CbPilA antibodies to reduce C. bescii cell adhesion to xylan and crystalline cellulose up to 4.5- and 2-fold, respectively. Based on these observations, we propose that CbPilA and, by extension, the T4P play a role in Caldicellulosiruptor cell attachment to plant biomass.IMPORTANCE Most microorganisms are capable of attaching to surfaces in order to persist in their environment. Type IV (T4) pili produced by certain mesophilic Firmicutes promote adherence; however, a role for T4 pili encoded by thermophilic members of this phylum has yet to be demonstrated. Prior comparative genomics analyses identified a T4 pilus locus possessed by an extremely thermophilic genus within the Firmicutes Here, we demonstrate that attachment to plant biomass-related carbohydrates by strongly cellulolytic Caldicellulosiruptor bescii is mediated by T4 pilins. Surprisingly, xylan but not cellulose induced expression of the major T4 pilin. Regardless, the C. bescii T4 pilin interacts with both polysaccharides at high temperatures and is located to the cell surface, where it is directly involved in C. bescii attachment. Adherence to polysaccharides is likely key to survival in environments where carbon sources are limiting, allowing C. bescii to compete against other plant-degrading microorganisms.}, } @article {pmid32076431, year = {2020}, author = {Romano, I and Ventorino, V and Pepe, O}, title = {Effectiveness of Plant Beneficial Microbes: Overview of the Methodological Approaches for the Assessment of Root Colonization and Persistence.}, journal = {Frontiers in plant science}, volume = {11}, number = {}, pages = {6}, pmid = {32076431}, issn = {1664-462X}, abstract = {Issues concerning the use of harmful chemical fertilizers and pesticides that have large negative impacts on environmental and human health have generated increasing interest in the use of beneficial microorganisms for the development of sustainable agri-food systems. A successful microbial inoculant has to colonize the root system, establish a positive interaction and persist in the environment in competition with native microorganisms living in the soil through rhizocompetence traits. Currently, several approaches based on culture-dependent, microscopic and molecular methods have been developed to follow bioinoculants in the soil and plant surface over time. Although culture-dependent methods are commonly used to estimate the persistence of bioinoculants, it is difficult to differentiate inoculated organisms from native populations based on morphological characteristics. Therefore, these methods should be used complementary to culture-independent approaches. Microscopy-based techniques (bright-field, electron and fluorescence microscopy) allow to obtain a picture of microbial colonization outside and inside plant tissues also at high resolution, but it is not possible to always distinguish living cells from dead cells by direct observation as well as distinguish bioinoculants from indigenous microbial populations living in soils. In addition, the development of metagenomic techniques, including the use of DNA probes, PCR-based methods, next-generation sequencing, whole-genome sequencing and pangenome methods, provides a complementary approach useful to understand plant-soil-microbe interactions. However, to ensure good results in microbiological analysis, the first fundamental prerequisite is correct soil sampling and sample preparation for the different methodological approaches that will be assayed. Here, we provide an overview of the advantages and limitations of the currently used methods and new methodological approaches that could be developed to assess the presence, plant colonization and soil persistence of bioinoculants in the rhizosphere. We further discuss the possibility of integrating multidisciplinary approaches to examine the variations in microbial communities after inoculation and to track the inoculated microbial strains.}, } @article {pmid32074720, year = {2020}, author = {Yu, YY and Wei, CC}, title = {[HUPAN promotes striding across of biomedical research from human genome to human pan-genome].}, journal = {Zhonghua bing li xue za zhi = Chinese journal of pathology}, volume = {49}, number = {2}, pages = {105-107}, doi = {10.3760/cma.j.issn.0529-5807.2020.02.001}, pmid = {32074720}, issn = {0529-5807}, mesh = {*Biomedical Research ; *Genome, Human ; High-Throughput Nucleotide Sequencing ; Humans ; Sequence Analysis, DNA ; }, } @article {pmid32074104, year = {2020}, author = {O'Rourke, A and Lee, MD and Nierman, WC and Everroad, RC and Dupont, CL}, title = {Genomic and phenotypic characterization of Burkholderia isolates from the potable water system of the International Space Station.}, journal = {PloS one}, volume = {15}, number = {2}, pages = {e0227152}, pmid = {32074104}, issn = {1932-6203}, mesh = {*Burkholderia/classification/isolation & purification/pathogenicity ; Burkholderia Infections/*microbiology ; *Burkholderia cepacia/classification/isolation & purification/pathogenicity ; Drinking Water/*microbiology ; *Phylogeny ; *Spacecraft ; Virulence ; }, abstract = {The opportunistic pathogens Burkholderia cepacia and Burkholderia contaminans, both genomovars of the Burkholderia cepacia complex (BCC), are frequently cultured from the potable water dispenser (PWD) of the International Space Station (ISS). Here, we sequenced the genomes and conducted phenotypic assays to characterize these Burkholderia isolates. All recovered isolates of the two species fall within monophyletic clades based on phylogenomic trees of conserved single-copy core genes. Within species, the ISS-derived isolates all demonstrate greater than 99% average nucleotide identity (with 95-99% of genomes aligning) and share around 90% of the identified gene clusters from a pangenomic analysis-suggesting that the two groups are each composed of highly similar genomic lineages and their members may have all stemmed from the same two founding populations. The differences that can be observed between the recovered isolates at the pangenomic level are primarily located within putative plasmids. Phenotypically, macrophage intracellularization and lysis occurred at generally similar rates between all ISS-derived isolates, as well as with their respective type-terrestrial strain references. All ISS-derived isolates exhibited antibiotic sensitivity similar to that of the terrestrial reference strains, and minimal differences between isolates were observed. With a few exceptions, biofilm formation rates were generally consistent across each species. And lastly, though isolation date does not necessarily provide any insight into how long a given isolate had been aboard the ISS, none of the assayed physiology correlated with either date of isolation or distances based on nucleotide variation. Overall, we find that while the populations of Burkholderia present in the ISS PWS each maintain virulence, they are likely are not more virulent than those that might be encountered on planet and remain susceptible to clinically used antibiotics.}, } @article {pmid32066773, year = {2020}, author = {Iversen, KH and Rasmussen, LH and Al-Nakeeb, K and Armenteros, JJA and Jensen, CS and Dargis, R and Lukjancenko, O and Justesen, US and Moser, C and Rosenvinge, FS and Nielsen, XC and Christensen, JJ and Rasmussen, S}, title = {Similar genomic patterns of clinical infective endocarditis and oral isolates of Streptococcus sanguinis and Streptococcus gordonii.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {2728}, pmid = {32066773}, issn = {2045-2322}, mesh = {Endocarditis/*microbiology/pathology ; Endocarditis, Bacterial/*microbiology/pathology ; Endocardium/microbiology/pathology ; *Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Humans ; Machine Learning ; Mouth/microbiology/pathology ; Phylogeny ; Streptococcal Infections/*microbiology/pathology ; Streptococcus gordonii/classification/*genetics/isolation & purification/pathogenicity ; Streptococcus sanguis/classification/*genetics/isolation & purification/pathogenicity ; Symbiosis/physiology ; Virulence ; Virulence Factors/classification/*genetics/metabolism ; }, abstract = {Streptococcus gordonii and Streptococcus sanguinis belong to the Mitis group streptococci, which mostly are commensals in the human oral cavity. Though they are oral commensals, they can escape their niche and cause infective endocarditis, a severe infection with high mortality. Several virulence factors important for the development of infective endocarditis have been described in these two species. However, the background for how the commensal bacteria, in some cases, become pathogenic is still not known. To gain a greater understanding of the mechanisms of the pathogenic potential, we performed a comparative analysis of 38 blood culture strains, S. sanguinis (n = 20) and S. gordonii (n = 18) from patients with verified infective endocarditis, along with 21 publicly available oral isolates from healthy individuals, S. sanguinis (n = 12) and S. gordonii (n = 9). Using whole genome sequencing data of the 59 streptococci genomes, functional profiles were constructed, using protein domain predictions based on the translated genes. These functional profiles were used for clustering, phylogenetics and machine learning. A clear separation could be made between the two species. No clear differences between oral isolates and clinical infective endocarditis isolates were found in any of the 675 translated core-genes. Additionally, random forest-based machine learning and clustering of the pan-genome data as well as amino acid variations in the core-genome could not separate the clinical and oral isolates. A total of 151 different virulence genes was identified in the 59 genomes. Among these homologs of genes important for adhesion and evasion of the immune system were found in all of the strains. Based on the functional profiles and virulence gene content of the genomes, we believe that all analysed strains had the ability to become pathogenic.}, } @article {pmid32065216, year = {2021}, author = {Wu, H and Wang, D and Gao, F}, title = {Toward a high-quality pan-genome landscape of Bacillus subtilis by removal of confounding strains.}, journal = {Briefings in bioinformatics}, volume = {22}, number = {2}, pages = {1951-1971}, doi = {10.1093/bib/bbaa013}, pmid = {32065216}, issn = {1477-4054}, mesh = {Bacillus subtilis/classification/*genetics ; Chromosomes, Bacterial ; Genetic Variation ; *Genome, Bacterial ; Phylogeny ; Pseudogenes ; }, abstract = {Pan-genome analysis is widely used to study the evolution and genetic diversity of species, particularly in bacteria. However, the impact of strain selection on the outcome of pan-genome analysis is poorly understood. Furthermore, a standard protocol to ensure high-quality pan-genome results is lacking. In this study, we carried out a series of pan-genome analyses of different strain sets of Bacillus subtilis to understand the impact of various strains on the performance and output quality of pan-genome analyses. Consequently, we found that the results obtained by pan-genome analyses of B. subtilis can be influenced by the inclusion of incorrectly classified Bacillus subspecies strains, phylogenetically distinct strains, engineered genome-reduced strains, chimeric strains, strains with a large number of unique genes or a large proportion of pseudogenes, and multiple clonal strains. Since the presence of these confounding strains can seriously affect the quality and true landscape of the pan-genome, we should remove these deviations in the process of pan-genome analyses. Our study provides new insights into the removal of biases from confounding strains in pan-genome analyses at the beginning of data processing, which enables the achievement of a closer representation of a high-quality pan-genome landscape of B. subtilis that better reflects the performance and credibility of the B. subtilis pan-genome. This procedure could be added as an important quality control step in pan-genome analyses for improving the efficiency of analyses, and ultimately contributing to a better understanding of genome function, evolution and genome-reduction strategies for B. subtilis in the future.}, } @article {pmid32054757, year = {2020}, author = {Laflamme, B and Dillon, MM and Martel, A and Almeida, RND and Desveaux, D and Guttman, DS}, title = {The pan-genome effector-triggered immunity landscape of a host-pathogen interaction.}, journal = {Science (New York, N.Y.)}, volume = {367}, number = {6479}, pages = {763-768}, doi = {10.1126/science.aax4079}, pmid = {32054757}, issn = {1095-9203}, mesh = {Arabidopsis/genetics/*immunology/*microbiology ; Arabidopsis Proteins/genetics/physiology ; Bacterial Proteins/genetics/immunology ; Carrier Proteins/genetics/physiology ; Genome, Plant ; Host-Pathogen Interactions/genetics/*immunology ; Immunity, Innate/*genetics ; Plant Diseases/genetics/*immunology/*microbiology ; Plant Immunity/*genetics ; Pseudomonas syringae/genetics/*pathogenicity ; }, abstract = {Effector-triggered immunity (ETI), induced by host immune receptors in response to microbial effectors, protects plants against virulent pathogens. However, a systematic study of ETI prevalence against species-wide pathogen diversity is lacking. We constructed the Pseudomonas syringae Type III Effector Compendium (PsyTEC) to reduce the pan-genome complexity of 5127 unique effector proteins, distributed among 70 families from 494 strains, to 529 representative alleles. We screened PsyTEC on the model plant Arabidopsis thaliana and identified 59 ETI-eliciting alleles (11.2%) from 19 families (27.1%), with orthologs distributed among 96.8% of P. syringae strains. We also identified two previously undescribed host immune receptors, including CAR1, which recognizes the conserved effectors AvrE and HopAA1, and found that 94.7% of strains harbor alleles predicted to be recognized by either CAR1 or ZAR1.}, } @article {pmid32054452, year = {2020}, author = {Liao, F and Mo, Z and Gu, W and Xu, W and Fu, X and Zhang, Y}, title = {A comparative genomic analysis between methicillin-resistant Staphylococcus aureus strains of hospital acquired and community infections in Yunnan province of China.}, journal = {BMC infectious diseases}, volume = {20}, number = {1}, pages = {137}, pmid = {32054452}, issn = {1471-2334}, support = {2017ZX10103010//National Sci-Tech key project/ ; 2019LCZXKF-HX01//open subject of the first people's hospital of Yunnan province/ ; }, mesh = {Adolescent ; Adult ; Anti-Bacterial Agents/adverse effects/therapeutic use ; Child ; Child, Preschool ; China/epidemiology ; Community-Acquired Infections/microbiology ; Cross Infection/*microbiology ; Female ; Food Microbiology ; Genome, Bacterial/genetics ; Genomics/*methods ; Genotype ; Humans ; Male ; Methicillin/adverse effects/therapeutic use ; Methicillin-Resistant Staphylococcus aureus/*genetics/isolation & purification ; Microbial Sensitivity Tests ; Phylogeny ; Polymorphism, Single Nucleotide/genetics ; Staphylococcal Infections/drug therapy/*epidemiology/*genetics/microbiology ; Whole Genome Sequencing ; Young Adult ; }, abstract = {BACKGROUND: Currently, Staphylococcus aureus is one of the most important pathogens worldwide, especially for methicillin-resistant S. aureus (MRSA) infection. However, few reports referred to patients' MRSA infections in Yunnan province, southwest China.

METHODS: In this study, we selected representative MRSA strains from patients' systemic surveillance in Yunnan province of China, performed the genomic sequencing and compared their features, together with some food derived strains.

RESULTS: Among sixty selective isolates, forty strains were isolated from patients, and twenty isolated from food. Among the patients' strains, sixteen were recognized as community-acquired (CA), compared with 24 for hospital-acquired (HA). ST6-t701, ST59-t437 and ST239-t030 were the three major genotype profiles. ST6-t701 was predominated in food strains, while ST59-t437 and ST239-t030 were the primary clones in patients. The clinical features between CA and HA-MRSA of patients were statistical different. Compared the antibiotic resistant results between patients and food indicated that higher antibiotic resistant rates were found in patients' strains. Totally, the average genome sizes of 60 isolates were 2.79 ± 0.05 Mbp, with GC content 33% and 84.50 ± 0.20% of coding rate. The core genomes of these isolates were 1593 genes. Phylogenetic analysis based on pan-genome and SNP of strains showed that five clustering groups were generated. Clustering ST239-t030 contained all the HA-MRSA cases in this study; clustering ST6-t701 referred to food and CA-MRSA infections in community; clustering ST59-t437 showed the heterogeneity for provoking different clinical diseases in both community and hospital. Phylogenetic tree, incorporating 24 isolates from different regions, indicated ST239-t030 strains in this study were more closely related to T0131 isolate from Tianjin, China, belonged to 'Turkish clade' from Eastern Europe; two groups of ST59-t437 clones of MRSA in Yunnan province were generated, belonged to the 'Asian-Pacific' clone (AP) and 'Taiwan' clone (TW) respectively.

CONCLUSIONS: ST239-t030, ST59-t437 and ST6-t701 were the three major MRSA clones in Yunnan province of China. ST239-t030 clonal Yunnan isolates demonstrated the local endemic of clone establishment for a number of years, whereas ST59-t437 strains revealed the multi-origins of this clone. In general, genomic study on epidemic clones of MRSA in southwest China provided the features and evolution of this pathogen.}, } @article {pmid32052196, year = {2020}, author = {Dos Santos Silva, LK and Rodrigues, RAL and Dos Santos Pereira Andrade, AC and Hikida, H and Andreani, J and Levasseur, A and La Scola, B and Abrahão, JS}, title = {Isolation and genomic characterization of a new mimivirus of lineage B from a Brazilian river.}, journal = {Archives of virology}, volume = {165}, number = {4}, pages = {853-863}, doi = {10.1007/s00705-020-04542-5}, pmid = {32052196}, issn = {1432-8798}, mesh = {Brazil ; *Genome, Viral ; Genomics ; Mimiviridae/classification/genetics/*isolation & purification/physiology ; Phylogeny ; Rivers/*virology ; Virus Replication ; }, abstract = {Since its discovery, the first identified giant virus associated with amoebae, Acanthamoeba polyphaga mimivirus (APMV), has been rigorously studied to understand the structural and genomic complexity of this virus. In this work, we report the isolation and genomic characterization of a new mimivirus of lineage B, named "Borely moumouvirus". This new virus exhibits a structure and replicative cycle similar to those of other members of the family Mimiviridae. The genome of the new isolate is a linear double-strand DNA molecule of ~1.0 Mb, containing over 900 open reading frames. Genome annotation highlighted different translation system components encoded in the DNA of Borely moumouvirus, including aminoacyl-tRNA synthetases, translation factors, and tRNA molecules, in a distribution similar to that in other lineage B mimiviruses. Pan-genome analysis indicated an increase in the genetic arsenal of this group of viruses, showing that the family Mimiviridae is still expanding. Furthermore, phylogenetic analysis has shown that Borely moumouvirus is closely related to moumouvirus australiensis. This is the first mimivirus lineage B isolated from Brazilian territory to be characterized. Further prospecting studies are necessary for us to better understand the diversity of these viruses so a better classification system can be established.}, } @article {pmid32051000, year = {2020}, author = {Hickey, G and Heller, D and Monlong, J and Sibbesen, JA and Sirén, J and Eizenga, J and Dawson, ET and Garrison, E and Novak, AM and Paten, B}, title = {Genotyping structural variants in pangenome graphs using the vg toolkit.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {35}, pmid = {32051000}, issn = {1474-760X}, support = {T32 HG008345/HG/NHGRI NIH HHS/United States ; U01 HL137183/HL/NHLBI NIH HHS/United States ; U41 HG007234/HG/NHGRI NIH HHS/United States ; U54 HG007990/HG/NHGRI NIH HHS/United States ; }, mesh = {Genome, Fungal ; *Genomic Structural Variation ; Genotyping Techniques/*methods ; Saccharomyces cerevisiae ; *Software ; Whole Genome Sequencing/methods ; }, abstract = {Structural variants (SVs) remain challenging to represent and study relative to point mutations despite their demonstrated importance. We show that variation graphs, as implemented in the vg toolkit, provide an effective means for leveraging SV catalogs for short-read SV genotyping experiments. We benchmark vg against state-of-the-art SV genotypers using three sequence-resolved SV catalogs generated by recent long-read sequencing studies. In addition, we use assemblies from 12 yeast strains to show that graphs constructed directly from aligned de novo assemblies improve genotyping compared to graphs built from intermediate SV catalogs in the VCF format.}, } @article {pmid32049967, year = {2020}, author = {Han, H and McGivney, BA and Farries, G and Katz, LM and MacHugh, DE and Randhawa, IAS and Hill, EW}, title = {Selection in Australian Thoroughbred horses acts on a locus associated with early two-year old speed.}, journal = {PloS one}, volume = {15}, number = {2}, pages = {e0227212}, pmid = {32049967}, issn = {1932-6203}, mesh = {Animals ; Australia ; Genome ; Genome-Wide Association Study/methods ; Horses/*genetics ; Locomotion/*genetics ; Phenotype ; *Physical Conditioning, Animal ; }, abstract = {Thoroughbred horse racing is a global sport with major hubs in Europe, North America, Australasia and Japan. Regional preferences for certain traits have resulted in phenotypic variation that may result from adaptation to the local racing ecosystem. Here, we test the hypothesis that genes selected for regional phenotypic variation may be identified by analysis of selection signatures in pan-genomic SNP genotype data. Comparing Australian to non-Australian Thoroughbred horses (n = 99), the most highly differentiated loci in a composite selection signals (CSS) analysis were on ECA6 (34.75-34.85 Mb), ECA14 (33.2-33.52 Mb and 35.52-36.94 Mb) and ECA16 (24.28-26.52 Mb) in regions containing candidate genes for exercise adaptations including cardiac function (ARHGAP26, HBEGF, SRA1), synapse development and locomotion (APBB3, ATXN7, CLSTN3), stress response (NR3C1) and the skeletal muscle response to exercise (ARHGAP26, NDUFA2). In a genome-wide association study for field-measured speed in two-year-olds (n = 179) SNPs contained within the single association peak (33.2-35.6 Mb) overlapped with the ECA14 CSS signals and spanned a protocadherin gene cluster. Association tests using higher density SNP genotypes across the ECA14 locus identified a SNP within the PCDHGC5 gene associated with elite racing performance (n = 922). These results indicate that there may be differential selection for racing performance under racing and management conditions that are specific to certain geographic racing regions. In Australia breeders have principally selected horses for favourable genetic variants at loci containing genes that modulate behaviour, locomotion and skeletal muscle physiology that together appear to be contributing to early two-year-old speed.}, } @article {pmid32047279, year = {2020}, author = {Maistrenko, OM and Mende, DR and Luetge, M and Hildebrand, F and Schmidt, TSB and Li, SS and Rodrigues, JFM and von Mering, C and Pedro Coelho, L and Huerta-Cepas, J and Sunagawa, S and Bork, P}, title = {Disentangling the impact of environmental and phylogenetic constraints on prokaryotic within-species diversity.}, journal = {The ISME journal}, volume = {14}, number = {5}, pages = {1247-1259}, pmid = {32047279}, issn = {1751-7370}, support = {669830/ERC_/European Research Council/International ; BBS/E/F/000PR10353/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/F/000PR10355/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Biodiversity ; Ecosystem ; Genome Size ; Phylogeny ; *Prokaryotic Cells ; }, abstract = {Microbial organisms inhabit virtually all environments and encompass a vast biological diversity. The pangenome concept aims to facilitate an understanding of diversity within defined phylogenetic groups. Hence, pangenomes are increasingly used to characterize the strain diversity of prokaryotic species. To understand the interdependence of pangenome features (such as the number of core and accessory genes) and to study the impact of environmental and phylogenetic constraints on the evolution of conspecific strains, we computed pangenomes for 155 phylogenetically diverse species (from ten phyla) using 7,000 high-quality genomes to each of which the respective habitats were assigned. Species habitat ubiquity was associated with several pangenome features. In particular, core-genome size was more important for ubiquity than accessory genome size. In general, environmental preferences had a stronger impact on pangenome evolution than phylogenetic inertia. Environmental preferences explained up to 49% of the variance for pangenome features, compared with 18% by phylogenetic inertia. This observation was robust when the dataset was extended to 10,100 species (59 phyla). The importance of environmental preferences was further accentuated by convergent evolution of pangenome features in a given habitat type across different phylogenetic clades. For example, the soil environment promotes expansion of pangenome size, while host-associated habitats lead to its reduction. Taken together, we explored the global principles of pangenome evolution, quantified the influence of habitat, and phylogenetic inertia on the evolution of pangenomes and identified criteria governing species ubiquity and habitat specificity.}, } @article {pmid32046716, year = {2020}, author = {Badet, T and Oggenfuss, U and Abraham, L and McDonald, BA and Croll, D}, title = {A 19-isolate reference-quality global pangenome for the fungal wheat pathogen Zymoseptoria tritici.}, journal = {BMC biology}, volume = {18}, number = {1}, pages = {12}, pmid = {32046716}, issn = {1741-7007}, mesh = {Ascomycota/*genetics ; *DNA Transposable Elements ; *Genome, Fungal ; Plant Diseases/microbiology ; *Transcriptome ; Triticum/microbiology ; }, abstract = {BACKGROUND: The gene content of a species largely governs its ecological interactions and adaptive potential. A species is therefore defined by both core genes shared between all individuals and accessory genes segregating presence-absence variation. There is growing evidence that eukaryotes, similar to bacteria, show intra-specific variability in gene content. However, it remains largely unknown how functionally relevant such a pangenome structure is for eukaryotes and what mechanisms underlie the emergence of highly polymorphic genome structures.

RESULTS: Here, we establish a reference-quality pangenome of a fungal pathogen of wheat based on 19 complete genomes from isolates sampled across six continents. Zymoseptoria tritici causes substantial worldwide losses to wheat production due to rapidly evolved tolerance to fungicides and evasion of host resistance. We performed transcriptome-assisted annotations of each genome to construct a global pangenome. Major chromosomal rearrangements are segregating within the species and underlie extensive gene presence-absence variation. Conserved orthogroups account for only ~ 60% of the species pangenome. Investigating gene functions, we find that the accessory genome is enriched for pathogenesis-related functions and encodes genes involved in metabolite production, host tissue degradation and manipulation of the immune system. De novo transposon annotation of the 19 complete genomes shows that the highly diverse chromosomal structure is tightly associated with transposable element content. Furthermore, transposable element expansions likely underlie recent genome expansions within the species.

CONCLUSIONS: Taken together, our work establishes a highly complex eukaryotic pangenome providing an unprecedented toolbox to study how pangenome structure impacts crop-pathogen interactions.}, } @article {pmid32046654, year = {2020}, author = {Zwickl, NF and Stralis-Pavese, N and Schäffer, C and Dohm, JC and Himmelbauer, H}, title = {Comparative genome characterization of the periodontal pathogen Tannerella forsythia.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {150}, pmid = {32046654}, issn = {1471-2164}, mesh = {Codon Usage ; *Genome, Bacterial ; Genomic Islands ; Glycosylation ; Phylogeny ; Tannerella forsythia/classification/*genetics/pathogenicity ; Virulence Factors/genetics ; }, abstract = {BACKGROUND: Tannerella forsythia is a bacterial pathogen implicated in periodontal disease. Numerous virulence-associated T. forsythia genes have been described, however, it is necessary to expand the knowledge on T. forsythia's genome structure and genetic repertoire to further elucidate its role within pathogenesis. Tannerella sp. BU063, a putative periodontal health-associated sister taxon and closest known relative to T. forsythia is available for comparative analyses. In the past, strain confusion involving the T. forsythia reference type strain ATCC 43037 led to discrepancies between results obtained from in silico analyses and wet-lab experimentation.

RESULTS: We generated a substantially improved genome assembly of T. forsythia ATCC 43037 covering 99% of the genome in three sequences. Using annotated genomes of ten Tannerella strains we established a soft core genome encompassing 2108 genes, based on orthologs present in > = 80% of the strains analysed. We used a set of known and hypothetical virulence factors for comparisons in pathogenic strains and the putative periodontal health-associated isolate Tannerella sp. BU063 to identify candidate genes promoting T. forsythia's pathogenesis. Searching for pathogenicity islands we detected 38 candidate regions in the T. forsythia genome. Only four of these regions corresponded to previously described pathogenicity islands. While the general protein O-glycosylation gene cluster of T. forsythia ATCC 43037 has been described previously, genes required for the initiation of glycan synthesis are yet to be discovered. We found six putative glycosylation loci which were only partially conserved in other bacteria. Lastly, we performed a comparative analysis of translational bias in T. forsythia and Tannerella sp. BU063 and detected highly biased genes.

CONCLUSIONS: We provide resources and important information on the genomes of Tannerella strains. Comparative analyses enabled us to assess the suitability of T. forsythia virulence factors as therapeutic targets and to suggest novel putative virulence factors. Further, we report on gene loci that should be addressed in the context of elucidating T. forsythia's protein O-glycosylation pathway. In summary, our work paves the way for further molecular dissection of T. forsythia biology in general and virulence of this species in particular.}, } @article {pmid32034321, year = {2020}, author = {Sherman, RM and Salzberg, SL}, title = {Pan-genomics in the human genome era.}, journal = {Nature reviews. Genetics}, volume = {21}, number = {4}, pages = {243-254}, pmid = {32034321}, issn = {1471-0064}, support = {R01 HG006677/HG/NHGRI NIH HHS/United States ; R01 HL129239/HL/NHLBI NIH HHS/United States ; R35 GM130151/GM/NIGMS NIH HHS/United States ; }, mesh = {Genome, Bacterial ; *Genome, Human ; Genome, Plant ; *Genomics ; Humans ; }, abstract = {Since the early days of the genome era, the scientific community has relied on a single 'reference' genome for each species, which is used as the basis for a wide range of genetic analyses, including studies of variation within and across species. As sequencing costs have dropped, thousands of new genomes have been sequenced, and scientists have come to realize that a single reference genome is inadequate for many purposes. By sampling a diverse set of individuals, one can begin to assemble a pan-genome: a collection of all the DNA sequences that occur in a species. Here we review efforts to create pan-genomes for a range of species, from bacteria to humans, and we further consider the computational methods that have been proposed in order to capture, interpret and compare pan-genome data. As scientists continue to survey and catalogue the genomic variation across human populations and begin to assemble a human pan-genome, these efforts will increase our power to connect variation to human diversity, disease and beyond.}, } @article {pmid32020732, year = {2020}, author = {Zhao, J and Bayer, PE and Ruperao, P and Saxena, RK and Khan, AW and Golicz, AA and Nguyen, HT and Batley, J and Edwards, D and Varshney, RK}, title = {Trait associations in the pangenome of pigeon pea (Cajanus cajan).}, journal = {Plant biotechnology journal}, volume = {18}, number = {9}, pages = {1946-1954}, pmid = {32020732}, issn = {1467-7652}, mesh = {Africa ; *Cajanus/genetics ; India ; Peas/genetics ; }, abstract = {Pigeon pea (Cajanus cajan) is an important orphan crop mainly grown by smallholder farmers in India and Africa. Here, we present the first pigeon pea pangenome based on 89 accessions mainly from India and the Philippines, showing that there is significant genetic diversity in Philippine individuals that is not present in Indian individuals. Annotation of variable genes suggests that they are associated with self-fertilization and response to disease. We identified 225 SNPs associated with nine agronomically important traits over three locations and two different time points, with SNPs associated with genes for transcription factors and kinases. These results will lead the way to an improved pigeon pea breeding programme.}, } @article {pmid32013858, year = {2020}, author = {Zhou, X and Yang, B and Stanton, C and Ross, RP and Zhao, J and Zhang, H and Chen, W}, title = {Comparative analysis of Lactobacillus gasseri from Chinese subjects reveals a new species-level taxa.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {119}, pmid = {32013858}, issn = {1471-2164}, support = {Nos. 31530056, 31820103010, 31801521//National Natural Science Foundation of China/ ; JUSRP11733//Fundamental Research Funds for the Central Universities/ ; JUFSTR20180102//National Firs-Class Discipline Program of Food Science and Technology/ ; }, mesh = {Adult ; Asian People ; Bacterial Proteins/genetics ; Bacteriocins/genetics ; CRISPR-Cas Systems/genetics ; DNA, Bacterial/genetics ; Feces/microbiology ; Genome, Bacterial/genetics ; Genomics/methods ; Humans ; Infant ; Lactobacillus gasseri/*genetics ; Phylogeny ; Probiotics/metabolism ; RNA, Ribosomal, 16S/genetics ; }, abstract = {BACKGROUND: Lactobacillus gasseri as a probiotic has history of safe consumption is prevalent in infants and adults gut microbiota to maintain gut homeostasis.

RESULTS: In this study, to explore the genomic diversity and mine potential probiotic characteristics of L. gasseri, 92 strains of L. gasseri were isolated from Chinese human feces and identified based on 16 s rDNA sequencing, after draft genomes sequencing, further average nucleotide identity (ANI) value and phylogenetic analysis reclassified them as L. paragasseri (n = 79) and L. gasseri (n = 13), respectively. Their pan/core-genomes were determined, revealing that L. paragasseri had an open pan-genome. Comparative analysis was carried out to identify genetic features, and the results indicated that 39 strains of L. paragasseri harboured Type II-A CRISPR-Cas system while 12 strains of L. gasseri contained Type I-E and II-A CRISPR-Cas systems. Bacteriocin operons and the number of carbohydrate-active enzymes were significantly different between the two species.

CONCLUSIONS: This is the first time to study pan/core-genome of L. gasseri and L. paragasseri, and compare their genetic diversity, and all the results provided better understating on genetics of the two species.}, } @article {pmid32006709, year = {2020}, author = {Isidro, J and Ferreira, S and Pinto, M and Domingues, F and Oleastro, M and Gomes, JP and Borges, V}, title = {Virulence and antibiotic resistance plasticity of Arcobacter butzleri: Insights on the genomic diversity of an emerging human pathogen.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {80}, number = {}, pages = {104213}, doi = {10.1016/j.meegid.2020.104213}, pmid = {32006709}, issn = {1567-7257}, mesh = {Anti-Bacterial Agents/*pharmacology ; Arcobacter/*drug effects/*genetics/pathogenicity ; Communicable Diseases, Emerging/epidemiology/*microbiology ; *Drug Resistance, Bacterial ; Genetic Variation ; Genome, Bacterial ; Genomics ; Gram-Negative Bacterial Infections/epidemiology/*microbiology ; Humans ; Microbial Sensitivity Tests ; Virulence/*genetics ; Virulence Factors/genetics ; }, abstract = {Arcobacter butzleri is a foodborne emerging human pathogen, frequently displaying a multidrug resistant character. Still, the lack of comprehensive genome-scale comparative analysis has limited our knowledge on A. butzleri diversification and pathogenicity. Here, we performed a deep genome analysis of A. butzleri focused on decoding its core- and pan-genome diversity and specific genetic traits underlying its pathogenic potential and diverse ecology. A. butzleri (genome size 2.07-2.58 Mbp) revealed a large open pan-genome with 7474 genes (about 50% being singletons) and a small but diverse core-genome with 1165 genes. It presents a plastic virulome (including newly identified determinants), marked by the differential presence of multiple adaptation-related virulence factors, such as the urease cluster ureD(AB)CEFG (phenotypically confirmed), the hypervariable hemagglutinin-encoding hecA, a type I secretion system (T1SS) harboring another agglutinin and a novel VirB/D4 T4SS likely linked to interbacterial competition and cytotoxicity. In addition, A. butzleri harbors a large repertoire of efflux pumps (EPs) and other antibiotic resistant determinants. We unprecedentedly describe a genetic mechanism of A. butzleri macrolides resistance, (inactivation of a TetR repressor likely regulating an EP). Fluoroquinolones resistance correlated with Thr-85-Ile in GyrA and ampicillin resistance was linked to an OXA-15-like β-lactamase. Remarkably, by decoding the polymorphism pattern of the main antigen PorA, we show that A. butzleri is able to exchange porA as a whole and/or hypervariable epitope-encoding regions separately, leading to a multitude of chimeric PorA presentations that can impact pathogen-host interaction during infection. Ultimately, our unprecedented screening of short sequence repeats indicates that phase variation likely modulates A. butzleri key adaptive functions. In summary, this study constitutes a turning point on A. butzleri comparative genomics revealing that this human gastrointestinal pathogen is equipped with vast and diverse virulence and antibiotic resistance arsenals that open a multitude of phenotypic fingerprints for environmental/host adaptation and pathogenicity.}, } @article {pmid31998346, year = {2019}, author = {Luo, M and Ríos, G and Sarnowski, TJ and Zhang, S and Mantri, N and Charron, JB and Libault, M}, title = {Editorial: New Insights Into Mechanisms of Epigenetic Modifiers in Plant Growth and Development.}, journal = {Frontiers in plant science}, volume = {10}, number = {}, pages = {1661}, doi = {10.3389/fpls.2019.01661}, pmid = {31998346}, issn = {1664-462X}, } @article {pmid31982844, year = {2020}, author = {Danilevicz, MF and Tay Fernandez, CG and Marsh, JI and Bayer, PE and Edwards, D}, title = {Plant pangenomics: approaches, applications and advancements.}, journal = {Current opinion in plant biology}, volume = {54}, number = {}, pages = {18-25}, doi = {10.1016/j.pbi.2019.12.005}, pmid = {31982844}, issn = {1879-0356}, mesh = {Biological Evolution ; *Genome, Plant ; *Plants ; }, abstract = {With the assembly of increasing numbers of plant genomes, it is becoming accepted that a single reference assembly does not reflect the gene diversity of a species. The production of pangenomes, which reflect the structural variation and polymorphisms in genomes, enables in depth comparisons of variation within species or higher taxonomic groups. In this review, we discuss the current and emerging approaches for pangenome assembly, analysis and visualisation. In addition, we consider the potential of pangenomes for applied crop improvement, evolutionary and biodiversity studies. To fully exploit the value of pangenomes it is important to integrate broad information such as phenotypic, environmental, and expression data to gain insights into the role of variable regions within genomes.}, } @article {pmid31981929, year = {2020}, author = {Michael, TP and VanBuren, R}, title = {Building near-complete plant genomes.}, journal = {Current opinion in plant biology}, volume = {54}, number = {}, pages = {26-33}, doi = {10.1016/j.pbi.2019.12.009}, pmid = {31981929}, issn = {1879-0356}, mesh = {DNA Transposable Elements ; *Genome, Plant ; Genomics ; *High-Throughput Nucleotide Sequencing ; Sequence Analysis, DNA ; }, abstract = {Plant genomes span several orders of magnitude in size, vary in levels of ploidy and heterozygosity, and contain old and recent bursts of transposable elements, which render them challenging but interesting to assemble. Recent advances in single molecule sequencing and physical mapping technologies have enabled high-quality, chromosome scale assemblies of plant species with increasing complexity and size. Single molecule reads can now exceed megabases in length, providing unprecedented opportunities to untangle genomic regions missed by short read technologies. However, polyploid and heterozygous plant genomes are still difficult to assemble but provide opportunities for new tools and approaches. Haplotype phasing, structural variant analysis and de novo pan-genomics are the emerging frontiers in plant genome assembly.}, } @article {pmid31980727, year = {2020}, author = {Talwar, C and Nagar, S and Kumar, R and Scaria, J and Lal, R and Negi, RK}, title = {Defining the Environmental Adaptations of Genus Devosia: Insights into its Expansive Short Peptide Transport System and Positively Selected Genes.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {1151}, pmid = {31980727}, issn = {2045-2322}, mesh = {Adaptation, Physiological ; Bacterial Proteins/*genetics ; Base Composition ; DNA, Bacterial/genetics/isolation & purification ; Environment ; Gene Ontology ; *Genes, Bacterial ; Genome, Bacterial ; Hyphomicrobiaceae/classification/*genetics/metabolism ; Membrane Transport Proteins/*genetics ; Metabolic Networks and Pathways/genetics ; Nutrients/metabolism ; Open Reading Frames ; Operon ; Peptides/metabolism ; Phylogeny ; Selection, Genetic ; Sequence Alignment ; Soil Microbiology ; Soil Pollutants ; Species Specificity ; }, abstract = {Devosia are well known for their dominance in soil habitats contaminated with various toxins and are best characterized for their bioremediation potential. In this study, we compared the genomes of 27 strains of Devosia with aim to understand their metabolic abilities. The analysis revealed their adaptive gene repertoire which was bared from 52% unique pan-gene content. A striking feature of all genomes was the abundance of oligo- and di-peptide permeases (oppABCDF and dppABCDF) with each genome harboring an average of 60.7 ± 19.1 and 36.5 ± 10.6 operon associated genes respectively. Apart from their primary role in nutrition, these permeases may help Devosia to sense environmental signals and in chemotaxis at stressed habitats. Through sequence similarity network analyses, we identified 29 Opp and 19 Dpp sequences that shared very little homology with any other sequence suggesting an expansive short peptidic transport system within Devosia. The substrate determining components of these permeases viz. OppA and DppA further displayed a large diversity that separated into 12 and 9 homologous clusters respectively in addition to large number of isolated nodes. We also dissected the genome scale positive evolution and found genes associated with growth (exopolyphosphatase, HesB_IscA_SufA family protein), detoxification (moeB, nifU-like domain protein, alpha/beta hydrolase), chemotaxis (cheB, luxR) and stress response (phoQ, uspA, luxR, sufE) were positively selected. The study highlights the genomic plasticity of the Devosia spp. for conferring adaptation, bioremediation and the potential to utilize a wide range of substrates. The widespread toxin-antitoxin loci and 'open' state of the pangenome provided evidence of plastic genomes and a much larger genetic repertoire of the genus which is yet uncovered.}, } @article {pmid31980014, year = {2020}, author = {Sanderson, H and Ortega-Polo, R and Zaheer, R and Goji, N and Amoako, KK and Brown, RS and Majury, A and Liss, SN and McAllister, TA}, title = {Comparative genomics of multidrug-resistant Enterococcus spp. isolated from wastewater treatment plants.}, journal = {BMC microbiology}, volume = {20}, number = {1}, pages = {20}, pmid = {31980014}, issn = {1471-2180}, support = {001//Genomics Research and Development Initiative/International ; }, mesh = {Bacterial Proteins/*genetics ; *Drug Resistance, Multiple, Bacterial ; Enterococcus faecium/*genetics ; Genome Size ; Genomics/*methods ; Interspersed Repetitive Sequences ; Multilocus Sequence Typing ; Phylogeny ; Vancomycin Resistance ; Virulence Factors/genetics ; Wastewater/*microbiology ; Whole Genome Sequencing ; }, abstract = {BACKGROUND: Wastewater treatment plants (WWTPs) are considered hotspots for the environmental dissemination of antimicrobial resistance (AMR) determinants. Vancomycin-Resistant Enterococcus (VRE) are candidates for gauging the degree of AMR bacteria in wastewater. Enterococcus faecalis and Enterococcus faecium are recognized indicators of fecal contamination in water. Comparative genomics of enterococci isolated from conventional activated sludge (CAS) and biological aerated filter (BAF) WWTPs was conducted.

RESULTS: VRE isolates, including E. faecalis (n = 24), E. faecium (n = 11), E. casseliflavus (n = 2) and E. gallinarum (n = 2) were selected for sequencing based on WWTP source, species and AMR phenotype. The pangenomes of E. faecium and E. faecalis were both open. The genomic fraction related to the mobilome was positively correlated with genome size in E. faecium (p < 0.001) and E. faecalis (p < 0.001) and with the number of AMR genes in E. faecium (p = 0.005). Genes conferring vancomycin resistance, including vanA and vanM (E. faecium), vanG (E. faecalis), and vanC (E. casseliflavus/E. gallinarum), were detected in 20 genomes. The most prominent functional AMR genes were efflux pumps and transporters. A minimum of 16, 6, 5 and 3 virulence genes were detected in E. faecium, E. faecalis, E. casseliflavus and E. gallinarum, respectively. Virulence genes were more common in E. faecalis and E. faecium, than E. casseliflavus and E. gallinarum. A number of mobile genetic elements were shared among species. Functional CRISPR/Cas arrays were detected in 13 E. faecalis genomes, with all but one also containing a prophage. The lack of a functional CRISPR/Cas arrays was associated with multi-drug resistance in E. faecium. Phylogenetic analysis demonstrated differential clustering of isolates based on original source but not WWTP. Genes related to phage and CRISPR/Cas arrays could potentially serve as environmental biomarkers.

CONCLUSIONS: There was no discernible difference between enterococcal genomes from the CAS and BAF WWTPs. E. faecalis and E. faecium have smaller genomes and harbor more virulence, AMR, and mobile genetic elements than other Enterococcus spp.}, } @article {pmid31972312, year = {2020}, author = {Yun, BR and Malik, A and Kim, SB}, title = {Genome based characterization of Kitasatospora sp. MMS16-BH015, a multiple heavy metal resistant soil actinobacterium with high antimicrobial potential.}, journal = {Gene}, volume = {733}, number = {}, pages = {144379}, doi = {10.1016/j.gene.2020.144379}, pmid = {31972312}, issn = {1879-0038}, mesh = {Actinobacteria/classification/*drug effects/*genetics/isolation & purification ; Anti-Bacterial Agents/*pharmacology ; Bacterial Proteins/*genetics ; Drug Resistance, Bacterial/*genetics ; *Genome, Bacterial ; Metals, Heavy/*pharmacology ; Mining ; Multigene Family ; Phylogeny ; Soil Microbiology ; }, abstract = {An actinobacterial strain designated Kitasatospora sp. MMS16-BH015, exhibiting high level of heavy metal resistance, was isolated from soil of an abandoned metal mining site, and its potential for metal resistance and secondary metabolite production was studied. The strain was resistant to multiple heavy metals including zinc (up to 100 mM), nickel (up to 2 mM) and copper (up to 0.8 mM), and also showed antimicrobial potential against a broad group of microorganisms, in particular filamentous fungi. The genome of strain MMS16-BH015 was 8.96 Mbp in size with a G + C content of 72.7%, and contained 7270 protein-coding genes and 107 tRNA/rRNA genes. The genome analysis revealed presence of at least 121 metal resistance related genes, which was prominently higher in strain MMS16-BH015 compared to other genomes of Kitasatospora. The genes included those for proteins representing various families involved in the transport of heavy metals, for example dipeptide transport ATP-binding proteins, high-affinity nickel transport proteins, and P-type heavy metal-transporting ATPases. Additionally, 43 biosynthetic gene clusters (BGCs) for secondary metabolites, enriched with those for non-ribosomal peptides, were detected in this multiple heavy metal resistant actinobacterium, which was again the highest among the compared genomes of Kitasatospora. The pan-genome analysis also identified higher numbers of unique genes related to secondary metabolite production and metal resistance mechanism in strain MMS16-BH015. A high level of correlation between the biosynthetic potential and heavy metal resistance could be observed, thus indicating that heavy metal resistant actinobacteria can be a promising source of bioactive compounds.}, } @article {pmid31965706, year = {2020}, author = {Wang, L and Luo, Y and Zhao, Y and Gao, GF and Bi, Y and Qiu, HJ}, title = {Comparative genomic analysis reveals an 'open' pan-genome of African swine fever virus.}, journal = {Transboundary and emerging diseases}, volume = {67}, number = {4}, pages = {1553-1562}, doi = {10.1111/tbed.13489}, pmid = {31965706}, issn = {1865-1682}, support = {2018YFC0840401//National Key Research and Development Program of China/ ; XDB29010102//Strategic Priority Research Program of the Chinese Academy of Sciences (CAS)/ ; KJZD-SW-L06-01//Intramural Special Grants for African Swine Fever Research from the Chinese Academy of Sciences/ ; 31941003//National Natural Science Foundation (NSFC) of China/ ; Y2019YJ07-02//Central Public-interest Scientific Institution Basal Research Fund of China/ ; 81621091//NSFC Innovative Research Group/ ; 31822055//NSFC Outstanding Young Scholars/ ; 2017122//Youth Innovation Promotion Association of CAS/ ; }, mesh = {African Swine Fever/*virology ; African Swine Fever Virus/*genetics ; Animals ; DNA, Viral/*genetics ; Genome, Viral/*genetics ; Genome-Wide Association Study ; Genomics ; Polymorphism, Genetic ; Sequence Analysis, DNA ; Swine ; Swine Diseases/*virology ; Viral Proteins/*genetics ; Virulence ; }, abstract = {The worldwide transmission of African swine fever virus (ASFV) drastically affects the pig industry and global trade. Development of vaccines is hindered by the lack of knowledge of the genomic characteristics of ASFV. In this study, we developed a pipeline for the de novo assembly of ASFV genome without virus isolation and purification. We then used a comparative genomics approach to systematically study 46 genomes of ASFVs to reveal the genomic characteristics. The analysis revealed that ASFV has an 'open' pan-genome based on both protein-coding genes and intergenic regions. Of the 151-174 genes found in the ASFV strains, only 86 were identified as core genes; the remainder were flexible accessory genes. Notably, 44 of the 86 core genes and 155 of the 324 accessory genes have been functionally annotated according to the known proteins. Interestingly, a dynamic number of taxis-related genes were identified in the accessory genes, and two potential virulence genes were identified in all ASFV isolates. The 'open' pan-genome of ASFV based on gene and intergenic regions reveals its pronounced natural diversity concerning genomic composition and regulation.}, } @article {pmid31964928, year = {2020}, author = {Koch, H and Germscheid, N and Freese, HM and Noriega-Ortega, B and Lücking, D and Berger, M and Qiu, G and Marzinelli, EM and Campbell, AH and Steinberg, PD and Overmann, J and Dittmar, T and Simon, M and Wietz, M}, title = {Genomic, metabolic and phenotypic variability shapes ecological differentiation and intraspecies interactions of Alteromonas macleodii.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {809}, pmid = {31964928}, issn = {2045-2322}, mesh = {Adaptation, Biological ; Alteromonas/metabolism/*physiology ; Biological Variation, Population ; Ecosystem ; Ecotype ; Genetic Variation ; Genome, Bacterial ; Iron/metabolism ; Pacific Ocean ; Phylogeny ; Plasmids ; Polysaccharides/metabolism ; Prochlorococcus/physiology ; Seawater/microbiology ; Seaweed/metabolism ; Secondary Metabolism ; }, abstract = {Ecological differentiation between strains of bacterial species is shaped by genomic and metabolic variability. However, connecting genotypes to ecological niches remains a major challenge. Here, we linked bacterial geno- and phenotypes by contextualizing pangenomic, exometabolomic and physiological evidence in twelve strains of the marine bacterium Alteromonas macleodii, illuminating adaptive strategies of carbon metabolism, microbial interactions, cellular communication and iron acquisition. In A. macleodii strain MIT1002, secretion of amino acids and the unique capacity for phenol degradation may promote associations with Prochlorococcus cyanobacteria. Strain 83-1 and three novel Pacific isolates, featuring clonal genomes despite originating from distant locations, have profound abilities for algal polysaccharide utilization but without detrimental implications for Ecklonia macroalgae. Degradation of toluene and xylene, mediated via a plasmid syntenic to terrestrial Pseudomonas, was unique to strain EZ55. Benzoate degradation by strain EC673 related to a chromosomal gene cluster shared with the plasmid of A. mediterranea EC615, underlining that mobile genetic elements drive adaptations. Furthermore, we revealed strain-specific production of siderophores and homoserine lactones, with implications for nutrient acquisition and cellular communication. Phenotypic variability corresponded to different competitiveness in co-culture and geographic distribution, indicating linkages between intraspecific diversity, microbial interactions and biogeography. The finding of "ecological microdiversity" helps understanding the widespread occurrence of A. macleodii and contributes to the interpretation of bacterial niche specialization, population ecology and biogeochemical roles.}, } @article {pmid31962204, year = {2020}, author = {Jiang, W and Wu, Z and Wang, T and Mantri, N and Huang, H and Li, H and Tao, Z and Guo, Q}, title = {Physiological and transcriptomic analyses of cadmium stress response in Dendrobium officinale seedling.}, journal = {Plant physiology and biochemistry : PPB}, volume = {148}, number = {}, pages = {152-165}, doi = {10.1016/j.plaphy.2020.01.010}, pmid = {31962204}, issn = {1873-2690}, mesh = {*Cadmium/toxicity ; *Dendrobium/drug effects/genetics ; Gene Expression Profiling ; Gene Expression Regulation, Plant/drug effects ; Plant Roots/drug effects/genetics ; *Seedlings/drug effects/genetics ; *Stress, Physiological/genetics ; *Transcriptome/drug effects ; }, abstract = {Dendrobium officinale is an economically important Chinese herb with ornamental and medicinal values. However, the mechanisms by which D. officinale adapts to cadmium (Cd) stress is unknown. Here, physiological changes in D. officinale roots and leaves exposed to increasing levels of Cd stress (CdSO4 concentration of 2, 5, 9, 14 mg L[-1]) were analyzed at 7, 15, 30, and 45 days after treatment. The Cd stress of 14 mg L[-1] significantly increased the levels of antioxidants and induced malondialdehyde and proline accumulation (P < 0.05). Cd subcellular distribution showed that Cd sequestration into soluble fraction is the major detoxification mechanism in D. officinale roots. Subsequently, the transcriptome profile of D. officinale roots treated with 14 mg L[-1] Cd for 15 and 30 days was analyzed. Compared to control, 2,469 differentially expressed genes (DEGs) were identified, comprising 1,486 up-regulated genes and 983 down-regulated genes. The DEGs associated with metabolic pathways for Cd uptake, transportation and detoxification were analyzed. Several processes such as metal transporter, sulfate glutathione metabolism, cell wall metabolism, phenylpropanoid metabolism were identified to be important for Cd stress adaptation. More genes were expressed at 15 days after treatment compared to 30 days. WRKY, Trihelix, NF-YC, MYB, bZIP and bHLH transcription factors were over-expressed at both time points. Furthermore, candidate genes from the glutathione metabolism pathway were identified, and qRT-PCR analysis of ten DEGs indicated a high coorelation with RNA-seq expression profiles. Our findings provide significant information for further research of Cd stress responsive genes functions in D. officinale, especially the genes from the glutathione metabolism pathway.}, } @article {pmid31956321, year = {2019}, author = {Alexandraki, V and Kazou, M and Blom, J and Pot, B and Papadimitriou, K and Tsakalidou, E}, title = {Comparative Genomics of Streptococcus thermophilus Support Important Traits Concerning the Evolution, Biology and Technological Properties of the Species.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2916}, pmid = {31956321}, issn = {1664-302X}, abstract = {Streptococcus thermophilus is a major starter for the dairy industry with great economic importance. In this study we analyzed 23 fully sequenced genomes of S. thermophilus to highlight novel aspects of the evolution, biology and technological properties of this species. Pan/core genome analysis revealed that the species has an important number of conserved genes and that the pan genome is probably going to be closed soon. According to whole genome phylogeny and average nucleotide identity (ANI) analysis, most S. thermophilus strains were grouped in two major clusters (i.e., clusters A and B). More specifically, cluster A includes strains with chromosomes above 1.83 Mbp, while cluster B includes chromosomes below this threshold. This observation suggests that strains belonging to the two clusters may be differentiated by gene gain or gene loss events. Furthermore, certain strains of cluster A could be further subdivided in subgroups, i.e., subgroup I (ASCC 1275, DGCC 7710, KLDS SM, MN-BM-A02, and ND07), II (MN-BM-A01 and MN-ZLW-002), III (LMD-9 and SMQ-301), and IV (APC151 and ND03). In cluster B certain strains formed one distinct subgroup, i.e., subgroup I (CNRZ1066, CS8, EPS, and S9). Clusters and subgroups observed for S. thermophilus indicate the existence of lineages within the species, an observation which was further supported to a variable degree by the distribution and/or the architecture of several genomic traits. These would include exopolysaccharide (EPS) gene clusters, Clustered Regularly Interspaced Short Palindromic Repeats (CRISPRs)-CRISPR associated (Cas) systems, as well as restriction-modification (R-M) systems and genomic islands (GIs). Of note, the histidine biosynthetic cluster was found present in all cluster A strains (plus strain NCTC12958[T]) but was absent from all strains in cluster B. Other loci related to lactose/galactose catabolism and urea metabolism, aminopeptidases, the majority of amino acid and peptide transporters, as well as amino acid biosynthetic pathways were found to be conserved in all strains suggesting their central role for the species. Our study highlights the necessity of sequencing and analyzing more S. thermophilus complete genomes to further elucidate important aspects of strain diversity within this starter culture that may be related to its application in the dairy industry.}, } @article {pmid31954181, year = {2020}, author = {Lannes-Costa, PS and Baraúna, RA and Ramos, JN and Veras, JFC and Conceição, MVR and Vieira, VV and de Mattos-Guaraldi, AL and Ramos, RTJ and Doran, KS and Silva, A and Nagao, PE}, title = {Comparative genomic analysis and identification of pathogenicity islands of hypervirulent ST-17 Streptococcus agalactiae Brazilian strain.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {80}, number = {}, pages = {104195}, doi = {10.1016/j.meegid.2020.104195}, pmid = {31954181}, issn = {1567-7257}, mesh = {Brazil/epidemiology ; Computational Biology/methods ; *Genome, Bacterial ; *Genomics/methods ; Humans ; Molecular Sequence Annotation ; Phylogeny ; Public Health Surveillance ; Streptococcal Infections/*epidemiology/*microbiology ; Streptococcus agalactiae/*classification/*genetics/isolation & purification/pathogenicity ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Streptococcus agalactiae are important pathogenic bacteria that cause severe infections in humans, especially neonates. The mechanism by which ST-17 causes invasive infections than other STs is not well understood. In this study, we sequenced the first genome of a S. agalactiae ST-17 strain isolated in Brazil using the Illumina HiSeq 2500 technology. S. agalactiae GBS90356 ST-17 belongs to the capsular type III and was isolated from a neonatal with a fatal case of meningitis. The genome presented a size of 2.03 Mbp and a G + C content of 35.2%. S. agalactiae has 706 genes in its core genome and an open pan-genome with a size of 5.020 genes, suggesting a high genomic plasticity. GIPSy software was used to identify 10 Pathogenicity islands (PAIs) which corresponded to 15% of the genome size. IslandViewer4 corroborated the prediction of six PAIs. The pathogenicity islands showed important virulence factors genes for S. agalactiae e.g. neu, cps, dlt, fbs, cfb, lmb. SignalP detected 20 proteins with signal peptides among the 352 proteins found in PAIs, which 60% were located in the SagPAI_5. SagPAI_2 and 5 were mainly detected in ST-17 strains studied. Moreover, we identified 51 unique genes, 9 recombination regions and a large number of SNPs with an average of 760.3 polymorphisms, which can be related with high genomic plasticity and virulence during host-pathogen interactions. Our results showed implications for pathogenesis, evolution, concept of species and in silico analysis value to understand the epidemiology and genome plasticity of S. agalactiae.}, } @article {pmid31950028, year = {2019}, author = {Ying, J and Ye, J and Xu, T and Wang, Q and Bao, Q and Li, A}, title = {Comparative Genomic Analysis of Rhodococcus equi: An Insight into Genomic Diversity and Genome Evolution.}, journal = {International journal of genomics}, volume = {2019}, number = {}, pages = {8987436}, pmid = {31950028}, issn = {2314-4378}, abstract = {Rhodococcus equi, a member of the Rhodococcus genus, is a gram-positive pathogenic bacterium. Rhodococcus possesses an open pan-genome that constitutes the basis of its high genomic diversity and allows for adaptation to specific niche conditions and the changing host environments. Our analysis further showed that the core genome of R. equi contributes to the pathogenicity and niche adaptation of R. equi. Comparative genomic analysis revealed that the genomes of R. equi shared identical collinearity relationship, and heterogeneity was mainly acquired by means of genomic islands and prophages. Moreover, genomic islands in R. equi were always involved in virulence, resistance, or niche adaptation and possibly working with prophages to cause the majority of genome expansion. These findings provide an insight into the genomic diversity, evolution, and structural variation of R. equi and a valuable resource for functional genomic studies.}, } @article {pmid31949252, year = {2020}, author = {McGivney, BA and Han, H and Corduff, LR and Katz, LM and Tozaki, T and MacHugh, DE and Hill, EW}, title = {Genomic inbreeding trends, influential sire lines and selection in the global Thoroughbred horse population.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {466}, pmid = {31949252}, issn = {2045-2322}, support = {11/PI/1166//Science Foundation Ireland (SFI)/International ; }, mesh = {Animals ; *Genetics, Population ; *Genome ; Genomics ; Genotype ; Horses/*genetics ; *Inbreeding ; Phenotype ; *Polymorphism, Single Nucleotide ; *Selection, Genetic ; }, abstract = {The Thoroughbred horse is a highly valued domestic animal population under strong selection for athletic phenotypes. Here we present a high resolution genomics-based analysis of inbreeding in the population that may form the basis for evidence-based discussion amid concerns in the breeding industry over the increasing use of small numbers of popular sire lines, which may accelerate a loss of genetic diversity. In the most comprehensive globally representative sample of Thoroughbreds to-date (n = 10,118), including prominent stallions (n = 305) from the major bloodstock regions of the world, we show using pan-genomic SNP genotypes that there has been a highly significant decline in global genetic diversity during the last five decades (FIS R[2] = 0.942, P = 2.19 × 10[-13]; FROH R[2] = 0.88, P = 1.81 × 10[-10]) that has likely been influenced by the use of popular sire lines. Estimates of effective population size in the global and regional populations indicate that there is some level of regional variation that may be exploited to improve global genetic diversity. Inbreeding is often a consequence of selection, which in managed animal populations tends to be driven by preferences for cultural, aesthetic or economically advantageous phenotypes. Using a composite selection signals approach, we show that centuries of selection for favourable athletic traits among Thoroughbreds acts on genes with functions in behaviour, musculoskeletal conformation and metabolism. As well as classical selective sweeps at core loci, polygenic adaptation for functional modalities in cardiovascular signalling, organismal growth and development, cellular stress and injury, metabolic pathways and neurotransmitters and other nervous system signalling has shaped the Thoroughbred athletic phenotype. Our results demonstrate that genomics-based approaches to identify genetic outcrosses will add valuable objectivity to augment traditional methods of stallion selection and that genomics-based methods will be beneficial to actively monitor the population to address the marked inbreeding trend.}, } @article {pmid31948633, year = {2020}, author = {Mataragas, M}, title = {Investigation of genomic characteristics and carbohydrates' metabolic activity of Lactococcus lactis subsp. lactis during ripening of a Swiss-type cheese.}, journal = {Food microbiology}, volume = {87}, number = {}, pages = {103392}, doi = {10.1016/j.fm.2019.103392}, pmid = {31948633}, issn = {1095-9998}, mesh = {*Carbohydrate Metabolism ; Carbohydrates/chemistry ; Cheese/*microbiology ; Fermentation ; Food Microbiology ; Genetic Variation ; Genomics ; Lactococcus lactis/classification/*genetics/isolation & purification/*metabolism ; Phylogeny ; }, abstract = {Genetic diversity and metabolic properties of Lactococcus lactis subsp. lactis were explored using phylogenetic, pan-genomic and metatranscriptomic analysis. The genomes, used in the current study, were available and downloaded from the GenBank which were primarily related with microorganisms isolated from dairy products and secondarily from other foodstuffs. To study the genetic diversity of the microorganism, various bioinformatics tools were employed such as average nucleotide identity, digital DNA-DNA hybridization, phylogenetic analysis, clusters of orthologous groups analysis, KEGG orthology analysis and pan-genomic analysis. The results showed that Lc. lactis subsp. lactis strains cannot be sufficiently separated into phylogenetic lineages based on the 16S rRNA gene sequences and core genome-based phylogenetic analysis was more appropriate. Pan-genomic analysis of the strains indicated that the core, accessory and unique genome comprised of 1036, 3146 and 1296 genes, respectively. Considering the results of pan-genomic and KEGG orthology analyses, the metabolic network of Lc. lactis subsp. lactis was rebuild regarding its carbohydrates' metabolic capabilities. Based on the metatranscriptomic data during the ripening of the Swiss-type Maasdam cheese at 20 °C and 5 °C, it was shown that the microorganism performed mixed acid fermentation producing lactate, formate, acetate, ethanol and 2,3-butanediol. Mixed acid fermentation was more pronounced at higher ripening temperatures. At lower ripening temperatures, the genes involved in mixed acid fermentation were repressed while lactate production remained unaffected resembling to a homolactic fermentation. Comparative genomics and metatranscriptomic analysis are powerful tools to gain knowledge on the genomic diversity of the lactic acid bacteria used as starter cultures as well as on the metabolic activities occurring in fermented dairy products.}, } @article {pmid31947593, year = {2020}, author = {Jia, Y and Yang, B and Ross, P and Stanton, C and Zhang, H and Zhao, J and Chen, W}, title = {Comparative Genomics Analysis of Lactobacillus mucosae from Different Niches.}, journal = {Genes}, volume = {11}, number = {1}, pages = {}, pmid = {31947593}, issn = {2073-4425}, mesh = {Animals ; *Clustered Regularly Interspaced Short Palindromic Repeats ; *Genome, Bacterial ; Humans ; Lactobacillus/*genetics/isolation & purification ; *Operon ; Species Specificity ; }, abstract = {The potential probiotic benefits of Lactobacillus mucosae have received increasing attention. To investigate the genetic diversity of L. mucosae, comparative genomic analyses of 93 strains isolated from different niches (human and animal gut, human vagina, etc.) and eight strains of published genomes were conducted. The results showed that the core genome of L. mucosae mainly encoded translation and transcription, amino acid biosynthesis, sugar metabolism, and defense function while the pan-genomic curve tended to be close. The genetic diversity of L. mucosae mainly reflected in carbohydrate metabolism and immune/competitive-related factors, such as exopolysaccharide (EPS), enterolysin A, and clustered regularly interspaced short palindromic repeats (CRISPR)-Cas. It was worth noting that this research firstly predicted the complete EPS operon shared among L. mucosae. Additionally, the type IIIA CRISPR-Cas system was discovered in L. mucosae for the first time. This work provided new ideas for the study of this species.}, } @article {pmid31943080, year = {2020}, author = {Yu, J and Xiang, X and Huang, J and Liang, X and Pan, X and Dong, Z and Petersen, TS and Qu, K and Yang, L and Zhao, X and Li, S and Zheng, T and Xu, Z and Liu, C and Han, P and Xu, F and Yang, H and Liu, X and Zhang, X and Bolund, L and Luo, Y and Lin, L}, title = {Haplotyping by CRISPR-mediated DNA circularization (CRISPR-hapC) broadens allele-specific gene editing.}, journal = {Nucleic acids research}, volume = {48}, number = {5}, pages = {e25}, pmid = {31943080}, issn = {1362-4962}, mesh = {Alleles ; Base Sequence ; CRISPR-Associated Protein 9/*genetics/metabolism ; *CRISPR-Cas Systems ; Cell Line, Tumor ; *Clustered Regularly Interspaced Short Palindromic Repeats ; DNA, Circular/*genetics/metabolism ; Gene Editing/methods ; HEK293 Cells ; Haplotypes ; Hep G2 Cells ; Humans ; Plasmids/chemistry/metabolism ; RNA, Guide, Kinetoplastida/*genetics/metabolism ; }, abstract = {Allele-specific protospacer adjacent motif (asPAM)-positioning SNPs and CRISPRs are valuable resources for gene therapy of dominant disorders. However, one technical hurdle is to identify the haplotype comprising the disease-causing allele and the distal asPAM SNPs. Here, we describe a novel CRISPR-based method (CRISPR-hapC) for haplotyping. Based on the generation (with a pair of CRISPRs) of extrachromosomal circular DNA in cells, the CRISPR-hapC can map haplotypes from a few hundred bases to over 200 Mb. To streamline and demonstrate the applicability of the CRISPR-hapC and asPAM CRISPR for allele-specific gene editing, we reanalyzed the 1000 human pan-genome and generated a high frequency asPAM SNP and CRISPR database (www.crispratlas.com/knockout) for four CRISPR systems (SaCas9, SpCas9, xCas9 and Cas12a). Using the huntingtin (HTT) CAG expansion and transthyretin (TTR) exon 2 mutation as examples, we showed that the asPAM CRISPRs can specifically discriminate active and dead PAMs for all 23 loci tested. Combination of the CRISPR-hapC and asPAM CRISPRs further demonstrated the capability for achieving highly accurate and haplotype-specific deletion of the HTT CAG expansion allele and TTR exon 2 mutation in human cells. Taken together, our study provides a new approach and an important resource for genome research and allele-specific (haplotype-specific) gene therapy.}, } @article {pmid31941435, year = {2020}, author = {He, Y and Zhou, X and Chen, Z and Deng, X and Gehring, A and Ou, H and Zhang, L and Shi, X}, title = {PRAP: Pan Resistome analysis pipeline.}, journal = {BMC bioinformatics}, volume = {21}, number = {1}, pages = {20}, pmid = {31941435}, issn = {1471-2105}, support = {2017YFC1601200//Key Technologies Research and Development Program/ ; 31601562//National Natural Science Foundation of China/ ; }, mesh = {Alleles ; China ; Drug Resistance, Microbial/*genetics ; Salmonella enterica/genetics ; *Software ; Whole Genome Sequencing ; }, abstract = {BACKGROUND: Antibiotic resistance genes (ARGs) can spread among pathogens via horizontal gene transfer, resulting in imparities in their distribution even within the same species. Therefore, a pan-genome approach to analyzing resistomes is necessary for thoroughly characterizing patterns of ARGs distribution within particular pathogen populations. Software tools are readily available for either ARGs identification or pan-genome analysis, but few exist to combine the two functions.

RESULTS: We developed Pan Resistome Analysis Pipeline (PRAP) for the rapid identification of antibiotic resistance genes from various formats of whole genome sequences based on the CARD or ResFinder databases. Detailed annotations were used to analyze pan-resistome features and characterize distributions of ARGs. The contribution of different alleles to antibiotic resistance was predicted by a random forest classifier. Results of analysis were presented in browsable files along with a variety of visualization options. We demonstrated the performance of PRAP by analyzing the genomes of 26 Salmonella enterica isolates from Shanghai, China.

CONCLUSIONS: PRAP was effective for identifying ARGs and visualizing pan-resistome features, therefore facilitating pan-genomic investigation of ARGs. This tool has the ability to further excavate potential relationships between antibiotic resistance genes and their phenotypic traits.}, } @article {pmid31937675, year = {2020}, author = {Park, CJ and Andam, CP}, title = {Distinct but Intertwined Evolutionary Histories of Multiple Salmonella enterica Subspecies.}, journal = {mSystems}, volume = {5}, number = {1}, pages = {}, pmid = {31937675}, issn = {2379-5077}, abstract = {Salmonella is responsible for many nontyphoidal foodborne infections and enteric (typhoid) fever in humans. Of the two Salmonella species, Salmonella enterica is highly diverse and includes 10 known subspecies and approximately 2,600 serotypes. Understanding the evolutionary processes that generate the tremendous diversity in Salmonella is important in reducing and controlling the incidence of disease outbreaks and the emergence of virulent strains. In this study, we aim to elucidate the impact of homologous recombination in the diversification of S. enterica subspecies. Using a data set of previously published 926 Salmonella genomes representing the 10 S. enterica subspecies and Salmonella bongori, we calculated a genus-wide pan-genome composed of 84,041 genes and the S. enterica pan-genome of 81,371 genes. The size of the accessory genomes varies between 12,429 genes in S. enterica subsp. arizonae (subsp. IIIa) to 33,257 genes in S. enterica subsp. enterica (subsp. I). A total of 12,136 genes in the Salmonella pan-genome show evidence of recombination, representing 14.44% of the pan-genome. We identified genomic hot spots of recombination that include genes associated with flagellin and the synthesis of methionine and thiamine pyrophosphate, which are known to influence host adaptation and virulence. Last, we uncovered within-species heterogeneity in rates of recombination and preferential genetic exchange between certain donor and recipient strains. Frequent but biased recombination within a bacterial species may suggest that lineages vary in their response to environmental selection pressure. Certain lineages, such as the more uncommon non-enterica subspecies (non-S. enterica subsp. enterica), may also act as a major reservoir of genetic diversity for the wider population.IMPORTANCE S. enterica is a major foodborne pathogen, which can be transmitted via several distinct routes from animals and environmental sources to human hosts. Multiple subspecies and serotypes of S. enterica exhibit considerable differences in virulence, host specificity, and colonization. This study provides detailed insights into the dynamics of recombination and its contributions to S. enterica subspecies evolution. Widespread recombination within the species means that new adaptations arising in one lineage can be rapidly transferred to another lineage. We therefore predict that recombination has been an important factor in the emergence of several major disease-causing strains from diverse genomic backgrounds and their ability to adapt to disparate environments.}, } @article {pmid31935184, year = {2020}, author = {Nakamura, K and Murase, K and Sato, MP and Toyoda, A and Itoh, T and Mainil, JG and Piérard, D and Yoshino, S and Kimata, K and Isobe, J and Seto, K and Etoh, Y and Narimatsu, H and Saito, S and Yatsuyanagi, J and Lee, K and Iyoda, S and Ohnishi, M and Ooka, T and Gotoh, Y and Ogura, Y and Hayashi, T}, title = {Differential dynamics and impacts of prophages and plasmids on the pangenome and virulence factor repertoires of Shiga toxin-producing Escherichia coli O145:H28.}, journal = {Microbial genomics}, volume = {6}, number = {1}, pages = {}, pmid = {31935184}, issn = {2057-5858}, mesh = {*Genome, Bacterial ; Phylogeny ; *Plasmids ; Polymorphism, Single Nucleotide ; *Prophages ; Shiga-Toxigenic Escherichia coli/*genetics ; *Siphoviridae ; Virulence Factors/*genetics ; }, abstract = {Phages and plasmids play important roles in bacterial evolution and diversification. Although many draft genomes have been generated, phage and plasmid genomes are usually fragmented, limiting our understanding of their dynamics. Here, we performed a systematic analysis of 239 draft genomes and 7 complete genomes of Shiga toxin (Stx)-producing Escherichia coli O145:H28, the major virulence factors of which are encoded by prophages (PPs) or plasmids. The results indicated that PPs are more stably maintained than plasmids. A set of ancestrally acquired PPs was well conserved, while various PPs, including Stx phages, were acquired by multiple sublineages. In contrast, gains and losses of a wide range of plasmids have frequently occurred across the O145:H28 lineage, and only the virulence plasmid was well conserved. The different dynamics of PPs and plasmids have differentially impacted the pangenome of O145:H28, with high proportions of PP- and plasmid-associated genes in the variably present and rare gene fractions, respectively. The dynamics of PPs and plasmids have also strongly impacted virulence gene repertoires, such as the highly variable distribution of stx genes and the high conservation of a set of type III secretion effectors, which probably represents the core effectors of O145:H28 and the genes on the virulence plasmid in the entire O145:H28 population. These results provide detailed insights into the dynamics of PPs and plasmids, and show the application of genomic analyses using a large set of draft genomes and appropriately selected complete genomes.}, } @article {pmid31934876, year = {2020}, author = {Tetz, VV and Tetz, GV}, title = {A new biological definition of life.}, journal = {Biomolecular concepts}, volume = {11}, number = {1}, pages = {1-6}, doi = {10.1515/bmc-2020-0001}, pmid = {31934876}, issn = {1868-503X}, mesh = {Biological Evolution ; Genes/*physiology ; Heredity/*genetics ; *Life ; Models, Biological ; Models, Theoretical ; }, abstract = {Here we have proposed a new biological definition of life based on the function and reproduction of existing genes and creation of new ones, which is applicable to both unicellular and multicellular organisms. First, we coined a new term "genetic information metabolism" comprising functioning, reproduction, and creation of genes and their distribution among living and non-living carriers of genetic information. Encompassing this concept, life is defined as organized matter that provides genetic information metabolism. Additionally, we have articulated the general biological function of life as Tetz biological law: "General biological function of life is to provide genetic information metabolism" and formulated novel definition of life: "Life is an organized matter that provides genetic information metabolism". New definition of life and Tetz biological law allow to distinguish in a new way living and non-living objects on Earth and other planets based on providing genetic information metabolism.}, } @article {pmid31932676, year = {2020}, author = {Song, JM and Guan, Z and Hu, J and Guo, C and Yang, Z and Wang, S and Liu, D and Wang, B and Lu, S and Zhou, R and Xie, WZ and Cheng, Y and Zhang, Y and Liu, K and Yang, QY and Chen, LL and Guo, L}, title = {Eight high-quality genomes reveal pan-genome architecture and ecotype differentiation of Brassica napus.}, journal = {Nature plants}, volume = {6}, number = {1}, pages = {34-45}, pmid = {31932676}, issn = {2055-0278}, mesh = {Brassica napus/*genetics ; Chromosome Mapping ; *Ecotype ; Flowers/genetics ; *Genome, Plant ; Genome-Wide Association Study ; Polymorphism, Single Nucleotide ; Seeds/genetics ; }, abstract = {Rapeseed (Brassica napus) is the second most important oilseed crop in the world but the genetic diversity underlying its massive phenotypic variations remains largely unexplored. Here, we report the sequencing, de novo assembly and annotation of eight B. napus accessions. Using pan-genome comparative analysis, millions of small variations and 77.2-149.6 megabase presence and absence variations (PAVs) were identified. More than 9.4% of the genes contained large-effect mutations or structural variations. PAV-based genome-wide association study (PAV-GWAS) directly identified causal structural variations for silique length, seed weight and flowering time in a nested association mapping population with ZS11 (reference line) as the donor, which were not detected by single-nucleotide polymorphisms-based GWAS (SNP-GWAS), demonstrating that PAV-GWAS was complementary to SNP-GWAS in identifying associations to traits. Further analysis showed that PAVs in three FLOWERING LOCUS C genes were closely related to flowering time and ecotype differentiation. This study provides resources to support a better understanding of the genome architecture and acceleration of the genetic improvement of B. napus.}, } @article {pmid31924165, year = {2020}, author = {Jaiswal, AK and Tiwari, S and Jamal, SB and de Castro Oliveira, L and Alves, LG and Azevedo, V and Ghosh, P and Oliveira, CJF and Soares, SC}, title = {The pan-genome of Treponema pallidum reveals differences in genome plasticity between subspecies related to venereal and non-venereal syphilis.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {33}, pmid = {31924165}, issn = {1471-2164}, mesh = {Genome, Bacterial/genetics ; Genomic Islands/genetics ; Humans ; Phylogeny ; Syphilis/*microbiology ; Treponema pallidum/classification/*genetics ; }, abstract = {BACKGROUND: Spirochetal organisms of the Treponema genus are responsible for causing Treponematoses. Pathogenic treponemes is a Gram-negative, motile, spirochete pathogen that causes syphilis in human. Treponema pallidum subsp. endemicum (TEN) causes endemic syphilis (bejel); T. pallidum subsp. pallidum (TPA) causes venereal syphilis; T. pallidum subsp. pertenue (TPE) causes yaws; and T. pallidum subsp. Ccarateum causes pinta. Out of these four high morbidity diseases, venereal syphilis is mediated by sexual contact; the other three diseases are transmitted by close personal contact. The global distribution of syphilis is alarming and there is an increasing need of proper treatment and preventive measures. Unfortunately, effective measures are limited.

RESULTS: Here, the genome sequences of 53 T. pallidum strains isolated from different parts of the world and a diverse range of hosts were comparatively analysed using pan-genomic strategy. Phylogenomic, pan-genomic, core genomic and singleton analysis disclosed the close connection among all strains of the pathogen T. pallidum, its clonal behaviour and showed increases in the sizes of the pan-genome. Based on the genome plasticity analysis of the subsets containing the subspecies T pallidum subsp. pallidum, T. pallidum subsp. endemicum and T. pallidum subsp. pertenue, we found differences in the presence/absence of pathogenicity islands (PAIs) and genomic islands (GIs) on subsp.-based study.

CONCLUSIONS: In summary, we identified four pathogenicity islands (PAIs), eight genomic islands (GIs) in subsp. pallidum, whereas subsp. endemicum has three PAIs and seven GIs and subsp. pertenue harbours three PAIs and eight GIs. Concerning the presence of genes in PAIs and GIs, we found some genes related to lipid and amino acid biosynthesis that were only present in the subsp. of T. pallidum, compared to T. pallidum subsp. endemicum and T. pallidum subsp. pertenue.}, } @article {pmid31917358, year = {2020}, author = {Si-Tuan, N and Ngoc, HM and Nhat, LD and Nguyen, C and Pham, HQ and Huong, NT}, title = {Genomic features, whole-genome phylogenetic and comparative genomic analysis of extreme-drug-resistant ventilator-associated-pneumonia Acinetobacter baumannii strain in a Vietnam hospital.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {80}, number = {}, pages = {104178}, doi = {10.1016/j.meegid.2020.104178}, pmid = {31917358}, issn = {1567-7257}, mesh = {Acinetobacter Infections/*epidemiology/*microbiology ; Acinetobacter baumannii/*classification/drug effects/*genetics ; Anti-Bacterial Agents/pharmacology ; Computational Biology/methods ; *Cross Infection ; Drug Resistance, Bacterial ; *Genome, Bacterial ; *Genomics/methods ; Humans ; Microbial Sensitivity Tests ; Phylogeny ; Vietnam/epidemiology ; Virulence Factors/genetics ; }, abstract = {OBJECTIVES: Acinetobacter baumannii is a major cause of ventilator-associated-pneumonia (VAP) worldwide due to its impressive propensity to rapidly acquire resistance elements to a wide range of antibacterial agents. We sought to explore the genomic features of this pathogen from a sputum specimen of a VAP male patient.

METHODS: Whole genome analysis of A. baumannii DMS06670 included de novo assembly; functional annotation, whole-genome-phylogenetic analysis, antibiotics genes identification, prophage regions, virulent factor and pan-genome analysis.

RESULTS: Assembly of whole-genome shotgun sequences of strain DMS06670 yielded an estimated genome size of 3.8 Mb with Sequence Type 447. Functional annotation and orthologous protein cluster analysis identified several potential antibiotic resistance genes was conducted (with 1 novel gene), prophage regions, virulent factors. The clusters of orthologous groups (COGs) analysis in protein sequence of the A. baumannii strain was compared with the other five genomes showed that the orthologous protein clusters responsible for multi-drug exist inside highly antimicrobial resistant strains. Whole-genome phylogenetic and in silico MLST analysis revealed that this A. baumannii strain is in the same clade as strains LAC-4 and BJAB0715. Comparative analysis of 23 available genomes of A. baumannii revealed a pan-genome consisting of 15,883 genes.

CONCLUSION: Our findings provide insight into the virulence-associated genes and then compared with the genomes of other A. baumannii strains by calculation of ANI values and pan-genome analysis. Functional studies of these pathogens are required to validate these findings.}, } @article {pmid31915776, year = {2020}, author = {Zhai, Q and Shen, X and Cen, S and Zhang, C and Tian, F and Zhao, J and Zhang, H and Xue, Y and Chen, W}, title = {Screening of Lactobacillus salivarius strains from the feces of Chinese populations and the evaluation of their effects against intestinal inflammation in mice.}, journal = {Food & function}, volume = {11}, number = {1}, pages = {221-235}, doi = {10.1039/c9fo02116g}, pmid = {31915776}, issn = {2042-650X}, support = {BB/J004529/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; China ; Feces/*microbiology ; Gastrointestinal Tract/*microbiology ; Humans ; Inflammation/*metabolism ; Ligilactobacillus salivarius/classification/*isolation & purification ; Male ; Mice ; Mice, Inbred C57BL ; RAW 264.7 Cells ; T-Lymphocytes, Regulatory/cytology ; Th17 Cells/cytology ; }, abstract = {Lactobacillus salivarius is a species of lactic acid bacteria with probiotic potency. Compared to such well-known probiotics as L. rhamnosus and L. casei, the genomic characteristics and health-beneficial effects of L. salivarius are inadequately researched. For this study, a medium with enhanced selectivity for the isolation of L. salivarius was developed by optimizing the carbon source and antibiotics in the medium. Seventy-three L. salivarius strains were isolated from 472 fecal samples from Chinese populations, and their pan-genomic and phylogenetic characterizations were analyzed. Three strains (L. salivarius HN26-4, NT4-8, and FXJCJ7-2) that were clearly categorized in different sub-phylotypes of the phylogenetic tree were randomly selected for further studies. Compared to the other two tested strains, L. salivarius FXJCJ7-2 showed higher tolerance to simulated gastrointestinal tract conditions and more significant anti-inflammatory effects in lipopolysaccharides (LPS)-treated RAW264.7 murine macrophages. This strain was also more effective in reversing LPS-induced alterations in gut barrier function, colonic histopathology, Treg/Th-17 balance, immunomodulatory indicators, nuclear factor kappa B pathway activation, and the intestinal microenvironment of the mice than the other two tested strains. Comparative genomic analysis indicated that these protective effects may be related to the specific genes of L. salivarius FXJCJ7-2 that were involved in the tolerance to the gastrointestinal environment, short-chain fatty acid production, and host-bacterium interaction.}, } @article {pmid31914919, year = {2020}, author = {Rodriguez, CI and Martiny, JBH}, title = {Evolutionary relationships among bifidobacteria and their hosts and environments.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {26}, pmid = {31914919}, issn = {1471-2164}, support = {na//University of California, Irvine/ ; na//University of California, Irvine/ ; }, mesh = {Animals ; Bifidobacterium/classification/*genetics ; Biological Evolution ; Gastrointestinal Microbiome/genetics ; Genome, Bacterial/genetics ; Humans ; Phylogeny ; }, abstract = {BACKGROUND: The assembly of animal microbiomes is influenced by multiple environmental factors and host genetics, although the relative importance of these factors remains unclear. Bifidobacteria (genus Bifidobacterium, phylum Actinobacteria) are common first colonizers of gut microbiomes in humans and inhabit other mammals, social insects, food, and sewages. In humans, the presence of bifidobacteria in the gut has been correlated with health-promoting benefits. Here, we compared the genome sequences of a subset of the over 400 Bifidobacterium strains publicly available to investigate the adaptation of bifidobacteria diversity. We tested 1) whether bifidobacteria show a phylogenetic signal with their isolation sources (hosts and environments) and 2) whether key traits encoded by the bifidobacteria genomes depend on the host or environment from which they were isolated. We analyzed Bifidobacterium genomes available in the PATRIC and NCBI repositories and identified the hosts and/or environment from which they were isolated. A multilocus phylogenetic analysis was conducted to compare the genetic relatedness the strains harbored by different hosts and environments. Furthermore, we examined differences in genomic traits and genes related to amino acid biosynthesis and degradation of carbohydrates.

RESULTS: We found that bifidobacteria diversity appears to have evolved with their hosts as strains isolated from the same host were non-randomly associated with their phylogenetic relatedness. Moreover, bifidobacteria isolated from different sources displayed differences in genomic traits such as genome size and accessory gene composition and on particular traits related to amino acid production and degradation of carbohydrates. In contrast, when analyzing diversity within human-derived bifidobacteria, we observed no phylogenetic signal or differences on specific traits (amino acid biosynthesis genes and CAZymes).

CONCLUSIONS: Overall, our study shows that bifidobacteria diversity is strongly adapted to specific hosts and environments and that several genomic traits were associated with their isolation sources. However, this signal is not observed in human-derived strains alone. Looking into the genomic signatures of bifidobacteria strains in different environments can give insights into how this bacterial group adapts to their environment and what types of traits are important for these adaptations.}, } @article {pmid31911493, year = {2020}, author = {Saw, JHW and Nunoura, T and Hirai, M and Takaki, Y and Parsons, R and Michelsen, M and Longnecker, K and Kujawinski, EB and Stepanauskas, R and Landry, Z and Carlson, CA and Giovannoni, SJ}, title = {Pangenomics Analysis Reveals Diversification of Enzyme Families and Niche Specialization in Globally Abundant SAR202 Bacteria.}, journal = {mBio}, volume = {11}, number = {1}, pages = {}, pmid = {31911493}, issn = {2150-7511}, mesh = {Biodiversity ; Chloroflexi/*enzymology/*genetics ; Computational Biology/methods ; *Genome, Bacterial ; Metabolic Networks and Pathways ; Metabolomics/methods ; *Metagenome ; *Metagenomics ; *Multigene Family ; Phylogeny ; Phylogeography ; }, abstract = {It has been hypothesized that the abundant heterotrophic ocean bacterioplankton in the SAR202 clade of the phylum Chloroflexi evolved specialized metabolisms for the oxidation of organic compounds that are resistant to microbial degradation via common metabolic pathways. Expansions of paralogous enzymes were reported and implicated in hypothetical metabolism involving monooxygenase and dioxygenase enzymes. In the proposed metabolic schemes, the paralogs serve the purpose of diversifying the range of organic molecules that cells can utilize. To further explore SAR202 evolution and metabolism, we reconstructed single amplified genomes and metagenome-assembled genomes from locations around the world that included the deepest ocean trenches. In an analysis of 122 SAR202 genomes that included seven subclades spanning SAR202 diversity, we observed additional evidence of paralog expansions that correlated with evolutionary history, as well as further evidence of metabolic specialization. Consistent with previous reports, families of flavin-dependent monooxygenases were observed mainly in the group III SAR202 genomes, and expansions of dioxygenase enzymes were prevalent in those of group VII. We found that group I SAR202 genomes encode expansions of racemases in the enolase superfamily, which we propose evolved for the degradation of compounds that resist biological oxidation because of chiral complexity. Supporting the conclusion that the paralog expansions indicate metabolic specialization, fragment recruitment and fluorescent in situ hybridization (FISH) with phylogenetic probes showed that SAR202 subclades are indigenous to different ocean depths and geographical regions. Surprisingly, some of the subclades were abundant in surface waters and contained rhodopsin genes, altering our understanding of the ecological role of SAR202 species in stratified water columns.IMPORTANCE The oceans contain an estimated 662 Pg C in the form of dissolved organic matter (DOM). Information about microbial interactions with this vast resource is limited, despite broad recognition that DOM turnover has a major impact on the global carbon cycle. To explain patterns in the genomes of marine bacteria, we propose hypothetical metabolic pathways for the oxidation of organic molecules that are resistant to oxidation via common pathways. The hypothetical schemes we propose suggest new metabolic pathways and classes of compounds that could be important for understanding the distribution of organic carbon throughout the biosphere. These genome-based schemes will remain hypothetical until evidence from experimental cell biology can be gathered to test them. Our findings also fundamentally change our understanding of the ecology of SAR202 bacteria, showing that metabolically diverse variants of these cells occupy niches spanning all depths and are not relegated to the dark ocean.}, } @article {pmid31899322, year = {2020}, author = {Garcia Teijeiro, R and Belimov, AA and Dodd, IC}, title = {Microbial inoculum development for ameliorating crop drought stress: A case study of Variovorax paradoxus 5C-2.}, journal = {New biotechnology}, volume = {56}, number = {}, pages = {103-113}, doi = {10.1016/j.nbt.2019.12.006}, pmid = {31899322}, issn = {1876-4347}, mesh = {Agricultural Inoculants/*metabolism ; Comamonadaceae/*metabolism ; Crops, Agricultural/growth & development/metabolism/*microbiology ; *Droughts ; Rhizosphere ; *Stress, Physiological ; }, abstract = {Drought affects plant hormonal homeostasis, including root to shoot signalling. The plant is intimately connected below-ground with soil-dwelling microbes, including plant growth promoting rhizobacteria (PGPR) that can modulate plant hormonal homeostasis. Incorporating PGPR into the rhizosphere often delivers favourable results in greenhouse experiments, while field applications are much less predictable. We review the natural processes that affect the formation and dynamics of the rhizosphere, establishing a model for successful field application of PGPR utilizing an example microbial inoculum, Variovorax paradoxus 5C-2.}, } @article {pmid31897516, year = {2020}, author = {Rasheed, A and Takumi, S and Hassan, MA and Imtiaz, M and Ali, M and Morgunov, AI and Mahmood, T and He, Z}, title = {Appraisal of wheat genomics for gene discovery and breeding applications: a special emphasis on advances in Asia.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {133}, number = {5}, pages = {1503-1520}, doi = {10.1007/s00122-019-03523-w}, pmid = {31897516}, issn = {1432-2242}, support = {31950410563//National Natural Science Foundation of China-Yunnan Joint Fund/ ; 2019//National Natural Science Foundation of China/ ; 16H04862//Grant-in-Aids for Scientific Research/ ; 19H04863//Scientific Research on Innovative Areas/ ; }, mesh = {Asia ; *Chromosome Mapping ; *Gene Expression Regulation, Plant ; *Genome, Plant ; Genomics/*methods ; Plant Breeding/*methods ; *Quantitative Trait Loci ; Triticum/*genetics/growth & development ; }, abstract = {We discussed the most recent efforts in wheat functional genomics to discover new genes and their deployment in breeding with special emphasis on advances in Asian countries. Wheat research community is making significant progress to bridge genotype-to-phenotype gap and then applying this knowledge in genetic improvement. The advances in genomics and phenomics have intrigued wheat researchers in Asia to make best use of this knowledge in gene and trait discovery. These advancements include, but not limited to, map-based gene cloning, translational genomics, gene mapping, association genetics, gene editing and genomic selection. We reviewed more than 57 homeologous genes discovered underpinning important traits and multiple strategies used for their discovery. Further, the complementary advancements in wheat phenomics and analytical approaches to understand the genetics of wheat adaptability, resilience to climate extremes and resistance to pest and diseases were discussed. The challenge to build a gold standard reference genome sequence of bread wheat is now achieved and several de novo reference sequences from the cultivars representing different gene pools will be available soon. New pan-genome sequencing resources of wheat will strengthen the foundation required for accelerated gene discovery and provide more opportunities to practice the knowledge-based breeding.}, } @article {pmid31896243, year = {2019}, author = {Sulthana, A and Lakshmi, SG and Madempudi, RS}, title = {High-quality draft genome and characterization of commercially potent probiotic Lactobacillus strains.}, journal = {Genomics & informatics}, volume = {17}, number = {4}, pages = {e43}, pmid = {31896243}, issn = {1598-866X}, abstract = {Lactobacillus acidophilus UBLA-34, L. paracasei UBLPC-35, L. plantarum UBLP-40, and L. reuteri UBLRU-87 were isolated from different varieties of fermented foods. To determine the probiotic safety at the strain level, the whole genome of the respective strains was sequenced, assembled, and characterized. Both the core-genome and pan-genome phylogeny showed that L. reuteri was closest to L. plantarum than to L. acidophilus, which was closest to L. paracasei. The genomic analysis of all the strains confirmed the absence of genes encoding putative virulence factors, antibiotic resistance, and the plasmids.}, } @article {pmid31893442, year = {2020}, author = {Hu, H and Yuan, Y and Bayer, PE and Fernandez, CT and Scheben, A and Golicz, AA and Edwards, D}, title = {Legume Pangenome Construction Using an Iterative Mapping and Assembly Approach.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2107}, number = {}, pages = {35-47}, doi = {10.1007/978-1-0716-0235-5_3}, pmid = {31893442}, issn = {1940-6029}, mesh = {Chromosome Mapping/*methods ; Computational Biology/*methods ; Fabaceae/classification/*genetics ; Genetic Variation ; Genome, Plant ; High-Throughput Nucleotide Sequencing ; Species Specificity ; Whole Genome Sequencing ; }, abstract = {A pangenome is a collection of genomic sequences found in the entire species rather than a single individual. It allows for comprehensive, species-wide characterization of genetic variations and mining of variable genes which may play important roles in phenotypes of interest. Recent advances in sequencing technologies have facilitated draft genome sequence construction and have made pangenome constructions feasible. Here, we present a reference genome-based iterative mapping and assembly method to construct a pangenome for a legume species.}, } @article {pmid31891864, year = {2020}, author = {Kim, Y and Gu, C and Kim, HU and Lee, SY}, title = {Current status of pan-genome analysis for pathogenic bacteria.}, journal = {Current opinion in biotechnology}, volume = {63}, number = {}, pages = {54-62}, doi = {10.1016/j.copbio.2019.12.001}, pmid = {31891864}, issn = {1879-0429}, mesh = {*Bacteria/genetics ; *Genome ; Genome, Bacterial/genetics ; }, abstract = {Biological knowledge accumulated over the decades and advances in computational methods have facilitated the implementation of pan-genome analysis that aims at better understanding of genotype-phenotype associations of a specific group of organisms. Pan-genome analysis has been shown to be an effective approach to better understand a clade of pathogenic bacteria because it helps developing various and tailored therapeutic strategies on the basis of their biological similarities and differences. Here, we review recent progress in the pan-genome analysis of pathogenic bacteria. In particular, we focus on computational tools that allow streamlined pan-genome analysis. Also, various applications of pan-genome analysis including those relevant to devising strategies for the prevention and treatment of pathogenic bacteria are reviewed.}, } @article {pmid31884971, year = {2019}, author = {Coutinho, FH and Edwards, RA and Rodríguez-Valera, F}, title = {Charting the diversity of uncultured viruses of Archaea and Bacteria.}, journal = {BMC biology}, volume = {17}, number = {1}, pages = {109}, pmid = {31884971}, issn = {1741-7007}, mesh = {Archaea/*virology ; Bacteria/*virology ; Bacteriophages/*genetics ; *Genome, Viral ; *Microbiota ; Phylogeny ; }, abstract = {BACKGROUND: Viruses of Archaea and Bacteria are among the most abundant and diverse biological entities on Earth. Unraveling their biodiversity has been challenging due to methodological limitations. Recent advances in culture-independent techniques, such as metagenomics, shed light on the unknown viral diversity, revealing thousands of new viral nucleotide sequences at an unprecedented scale. However, these novel sequences have not been properly classified and the evolutionary associations between them were not resolved.

RESULTS: Here, we performed phylogenomic analysis of nearly 200,000 viral nucleotide sequences to establish GL-UVAB: Genomic Lineages of Uncultured Viruses of Archaea and Bacteria. The pan-genome content of the identified lineages shed light on some of their infection strategies, potential to modulate host physiology, and mechanisms to escape host resistance systems. Furthermore, using GL-UVAB as a reference database for annotating metagenomes revealed elusive habitat distribution patterns of viral lineages and environmental drivers of community composition.

CONCLUSIONS: These findings provide insights about the genomic diversity and ecology of viruses of prokaryotes. The source code used in these analyses is freely available at https://sourceforge.net/projects/gluvab/.}, } @article {pmid31883967, year = {2020}, author = {Neou, M and Villa, C and Armignacco, R and Jouinot, A and Raffin-Sanson, ML and Septier, A and Letourneur, F and Diry, S and Diedisheim, M and Izac, B and Gaspar, C and Perlemoine, K and Verjus, V and Bernier, M and Boulin, A and Emile, JF and Bertagna, X and Jaffrezic, F and Laloe, D and Baussart, B and Bertherat, J and Gaillard, S and Assié, G}, title = {Pangenomic Classification of Pituitary Neuroendocrine Tumors.}, journal = {Cancer cell}, volume = {37}, number = {1}, pages = {123-134.e5}, doi = {10.1016/j.ccell.2019.11.002}, pmid = {31883967}, issn = {1878-3686}, mesh = {Adolescent ; Adult ; Aged ; Aged, 80 and over ; Cell Lineage ; Chromosome Aberrations ; DNA Methylation ; Endopeptidases/metabolism ; Endosomal Sorting Complexes Required for Transport/metabolism ; Epigenesis, Genetic ; Epigenome ; Exome ; Female ; Humans ; Male ; Middle Aged ; Mutation ; Neoplasm Invasiveness ; Neuroendocrine Tumors/*diagnosis/*genetics/pathology ; Pituitary Gland/metabolism ; Pituitary Neoplasms/*diagnosis/*genetics/pathology ; Prognosis ; Transcriptome ; Ubiquitin Thiolesterase/metabolism ; Young Adult ; }, abstract = {Pituitary neuroendocrine tumors (PitNETs) are common, with five main histological subtypes: lactotroph, somatotroph, and thyrotroph (POU1F1/PIT1 lineage); corticotroph (TBX19/TPIT lineage); and gonadotroph (NR5A1/SF1 lineage). We report a comprehensive pangenomic classification of PitNETs. PitNETs from POU1F1/PIT1 lineage showed an epigenetic signature of diffuse DNA hypomethylation, with transposable elements expression and chromosomal instability (except for GNAS-mutated somatotrophs). In TPIT lineage, corticotrophs were divided into three classes: the USP8-mutated with overt secretion, the USP8-wild-type with increased invasiveness and increased epithelial-mesenchymal transition, and the large silent tumors with gonadotroph transdifferentiation. Unexpected expression of gonadotroph markers was also found in GNAS-wild-type somatotrophs (SF1 expression), challenging the current definition of SF1/gonadotroph lineage. This classification improves our understanding and affects the clinical stratification of patients with PitNETs.}, } @article {pmid31882191, year = {2020}, author = {Golicz, AA and Bayer, PE and Bhalla, PL and Batley, J and Edwards, D}, title = {Pangenomics Comes of Age: From Bacteria to Plant and Animal Applications.}, journal = {Trends in genetics : TIG}, volume = {36}, number = {2}, pages = {132-145}, doi = {10.1016/j.tig.2019.11.006}, pmid = {31882191}, issn = {0168-9525}, mesh = {Animals ; Bacteria/genetics ; *Biological Evolution ; Genome, Bacterial/*genetics ; *Genomics ; Humans ; Phylogeny ; Plants/*genetics ; }, abstract = {The pangenome refers to a collection of genomic sequence found in the entire species or population rather than in a single individual; the sequence can be core, present in all individuals, or accessory (variable or dispensable), found in a subset of individuals only. While pangenomic studies were first undertaken in bacterial species, developments in genome sequencing and assembly approaches have allowed construction of pangenomes for eukaryotic organisms, fungi, plants, and animals, including two large-scale human pangenome projects. Analysis of the these pangenomes revealed key differences, most likely stemming from divergent evolutionary histories, but also surprising similarities.}, } @article {pmid31881843, year = {2019}, author = {Lee, IPA and Andam, CP}, title = {Pan-genome diversification and recombination in Cronobacter sakazakii, an opportunistic pathogen in neonates, and insights to its xerotolerant lifestyle.}, journal = {BMC microbiology}, volume = {19}, number = {1}, pages = {306}, pmid = {31881843}, issn = {1471-2180}, mesh = {Cronobacter sakazakii/*genetics/*pathogenicity ; *Genome, Bacterial ; Genomics ; Humans ; Multigene Family ; Phylogeny ; *Recombination, Genetic ; Species Specificity ; Virulence ; }, abstract = {BACKGROUND: Cronobacter sakazakii is an emerging opportunistic bacterial pathogen known to cause neonatal and pediatric infections, including meningitis, necrotizing enterocolitis, and bacteremia. Multiple disease outbreaks of C. sakazakii have been documented in the past few decades, yet little is known of its genomic diversity, adaptation, and evolution. Here, we analyzed the pan-genome characteristics and phylogenetic relationships of 237 genomes of C. sakazakii and 48 genomes of related Cronobacter species isolated from diverse sources.

RESULTS: The C. sakazakii pan-genome contains 17,158 orthologous gene clusters, and approximately 19.5% of these constitute the core genome. Phylogenetic analyses reveal the presence of at least ten deep branching monophyletic lineages indicative of ancestral diversification. We detected enrichment of functions involved in proton transport and rotational mechanism in accessory genes exclusively found in human-derived strains. In environment-exclusive accessory genes, we detected enrichment for those involved in tryptophan biosynthesis and indole metabolism. However, we did not find significantly enriched gene functions for those genes exclusively found in food strains. The most frequently detected virulence genes are those that encode proteins associated with chemotaxis, enterobactin synthesis, ferrienterobactin transporter, type VI secretion system, galactose metabolism, and mannose metabolism. The genes fos which encodes resistance against fosfomycin, a broad-spectrum cell wall synthesis inhibitor, and mdf(A) which encodes a multidrug efflux transporter were found in nearly all genomes. We found that a total of 2991 genes in the pan-genome have had a history of recombination. Many of the most frequently recombined genes are associated with nutrient acquisition, metabolism and toxin production.

CONCLUSIONS: Overall, our results indicate that the presence of a large accessory gene pool, ability to switch between ecological niches, a diverse suite of antibiotic resistance, virulence and niche-specific genes, and frequent recombination partly explain the remarkable adaptability of C. sakazakii within and outside the human host. These findings provide critical insights that can help define the development of effective disease surveillance and control strategies for Cronobacter-related diseases.}, } @article {pmid31875848, year = {2020}, author = {Song, X and Sun, X and Oh, SF and Wu, M and Zhang, Y and Zheng, W and Geva-Zatorsky, N and Jupp, R and Mathis, D and Benoist, C and Kasper, DL}, title = {Microbial bile acid metabolites modulate gut RORγ[+] regulatory T cell homeostasis.}, journal = {Nature}, volume = {577}, number = {7790}, pages = {410-415}, pmid = {31875848}, issn = {1476-4687}, support = {K01 DK102771/DK/NIDDK NIH HHS/United States ; R01 AI125603/AI/NIAID NIH HHS/United States ; R01 AT010268/AT/NCCIH NIH HHS/United States ; }, mesh = {Animals ; Bile Acids and Salts/chemistry/*metabolism ; *Gastrointestinal Microbiome ; *Homeostasis ; Intestines/*immunology/*microbiology ; Mice, Inbred C57BL ; Nuclear Receptor Subfamily 1, Group F, Member 3/genetics/*immunology ; T-Lymphocytes, Regulatory/*immunology ; }, abstract = {The metabolic pathways encoded by the human gut microbiome constantly interact with host gene products through numerous bioactive molecules[1]. Primary bile acids (BAs) are synthesized within hepatocytes and released into the duodenum to facilitate absorption of lipids or fat-soluble vitamins[2]. Some BAs (approximately 5%) escape into the colon, where gut commensal bacteria convert them into various intestinal BAs[2] that are important hormones that regulate host cholesterol metabolism and energy balance via several nuclear receptors and/or G-protein-coupled receptors[3,4]. These receptors have pivotal roles in shaping host innate immune responses[1,5]. However, the effect of this host-microorganism biliary network on the adaptive immune system remains poorly characterized. Here we report that both dietary and microbial factors influence the composition of the gut BA pool and modulate an important population of colonic FOXP3[+] regulatory T (Treg) cells expressing the transcription factor RORγ. Genetic abolition of BA metabolic pathways in individual gut symbionts significantly decreases this Treg cell population. Restoration of the intestinal BA pool increases colonic RORγ[+ ]Treg cell counts and ameliorates host susceptibility to inflammatory colitis via BA nuclear receptors. Thus, a pan-genomic biliary network interaction between hosts and their bacterial symbionts can control host immunological homeostasis via the resulting metabolites.}, } @article {pmid31870294, year = {2019}, author = {Wang, Y and Luo, L and Li, Q and Wang, H and Wang, Y and Sun, H and Xu, J and Lan, R and Ye, C}, title = {Genomic dissection of the most prevalent Listeria monocytogenes clone, sequence type ST87, in China.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {1014}, pmid = {31870294}, issn = {1471-2164}, support = {2018ZZKTB07//State Key Laboratory of Infectious Disease Prevention and Control/ ; 2018SKLID801//State Key Laboratory of Infectious Disease Prevention and Control/ ; 31800004//Young Scientists Fund/ ; }, mesh = {China ; Genome, Bacterial/genetics ; *Genomics ; Listeria monocytogenes/*genetics/pathogenicity/virology ; Multigene Family/genetics ; Phylogeny ; Plasmids/genetics ; Polymorphism, Single Nucleotide ; Prophages/physiology ; Virulence/genetics ; Whole Genome Sequencing/*methods ; }, abstract = {BACKGROUND: Listeria monocytogenes consists of four lineages that occupy a wide variety of ecological niches. Sequence type (ST) 87 (serotype 1/2b), belonging to lineage I, is one of the most common STs isolated from food products, food associated environments and sporadic listeriosis in China. Here, we performed a comparative genomic analysis of the L. monocytogenes ST87 clone by sequencing 71 strains representing a diverse range of sources, different geographical locations and isolation years.

RESULTS: The core genome and pan genome of ST87 contained 2667 genes and 3687 genes respectively. Phylogenetic analysis based on core genome SNPs divided the 71 strains into 10 clades. The clinical strains were distributed among multiple clades. Four clades contained strains from multiple geographic regions and showed high genetic diversity. The major gene content variation of ST87 genomes was due to putative prophages, with eleven hotspots of the genome that harbor prophages. All strains carry an intact CRISRP/Cas system. Two major CRISPR spacer profiles were found which were not clustered phylogenetically. A large plasmid of about 90 Kb, which carried heavy metal resistance genes, was found in 32.4% (23/71) of the strains. All ST87 strains harbored the Listeria pathogenicity island (LIPI)-4 and a unique 10-open read frame (ORF) genomic island containing a novel restriction-modification system.

CONCLUSION: Whole genome sequence analysis of L. monocytogenes ST87 enabled a clearer understanding of the population structure and the evolutionary history of ST87 L. monocytogenes in China. The novel genetic elements identified may contribute to its virulence and adaptation to different environmental niches. Our findings will be useful for the development of effective strategies for the prevention and treatment of listeriosis caused by this prevalent clone.}, } @article {pmid31861401, year = {2019}, author = {Albert, K and Rani, A and Sela, DA}, title = {Comparative Pangenomics of the Mammalian Gut Commensal Bifidobacterium longum.}, journal = {Microorganisms}, volume = {8}, number = {1}, pages = {}, pmid = {31861401}, issn = {2076-2607}, abstract = {Bifidobacterium longum colonizes mammalian gastrointestinal tracts where it could metabolize host-indigestible oligosaccharides. Although B. longum strains are currently segregated into three subspecies that reflect common metabolic capacities and genetic similarity, heterogeneity within subspecies suggests that these taxonomic boundaries may not be completely resolved. To address this, the B. longum pangenome was analyzed from representative strains isolated from a diverse set of sources. As a result, the B. longum pangenome is open and contains almost 17,000 genes, with over 85% of genes found in ≤28 of 191 strains. B. longum genomes share a small core gene set of only ~500 genes, or ~3% of the total pangenome. Although the individual B. longum subspecies pangenomes share similar relative abundances of clusters of orthologous groups, strains show inter- and intrasubspecies differences with respect to carbohydrate utilization gene content and growth phenotypes.}, } @article {pmid31849336, year = {2019}, author = {Monat, C and Padmarasu, S and Lux, T and Wicker, T and Gundlach, H and Himmelbach, A and Ens, J and Li, C and Muehlbauer, GJ and Schulman, AH and Waugh, R and Braumann, I and Pozniak, C and Scholz, U and Mayer, KFX and Spannagl, M and Stein, N and Mascher, M}, title = {TRITEX: chromosome-scale sequence assembly of Triticeae genomes with open-source tools.}, journal = {Genome biology}, volume = {20}, number = {1}, pages = {284}, pmid = {31849336}, issn = {1474-760X}, mesh = {*Chromosomes, Plant ; *Genetic Techniques ; *Genome, Plant ; Hordeum/*genetics ; Software ; Triticum/*genetics ; }, abstract = {Chromosome-scale genome sequence assemblies underpin pan-genomic studies. Recent genome assembly efforts in the large-genome Triticeae crops wheat and barley have relied on the commercial closed-source assembly algorithm DeNovoMagic. We present TRITEX, an open-source computational workflow that combines paired-end, mate-pair, 10X Genomics linked-read with chromosome conformation capture sequencing data to construct sequence scaffolds with megabase-scale contiguity ordered into chromosomal pseudomolecules. We evaluate the performance of TRITEX on publicly available sequence data of tetraploid wild emmer and hexaploid bread wheat, and construct an improved annotated reference genome sequence assembly of the barley cultivar Morex as a community resource.}, } @article {pmid31848603, year = {2020}, author = {Sitto, F and Battistuzzi, FU}, title = {Estimating Pangenomes with Roary.}, journal = {Molecular biology and evolution}, volume = {37}, number = {3}, pages = {933-939}, pmid = {31848603}, issn = {1537-1719}, support = {R15 GM121981/GM/NIGMS NIH HHS/United States ; }, mesh = {Bacteria/*classification/genetics ; Bacterial Proteins/*genetics ; Computational Biology/*methods ; Phenotype ; Phylogeny ; Software ; Species Specificity ; }, abstract = {A description of the genetic makeup of a species based on a single genome is often insufficient because it ignores the variability in gene repertoire among multiple strains. The estimation of the pangenome of a species is a solution to this issue as it provides an overview of genes that are shared by all strains and genes that are present in only some of the genomes. These different sets of genes can then be analyzed functionally to explore correlations with unique phenotypes and adaptations. This protocol presents the usage of Roary, a Linux-native pangenome application. Roary is a straightforward software that provides 1) an overview about core and accessory genes for those interested in general trends and, also, 2) detailed information on gene presence/absence in each genome for in-depth analyses. Results are provided both in text and graphic format.}, } @article {pmid31847510, year = {2020}, author = {Heo, S and Lee, JS and Lee, JH and Jeong, DW}, title = {Comparative Genomic Analysis of Food-Originated Coagulase-Negative Staphylococcus: Analysis of Conserved Core Genes and Diversity of the Pan-Genome.}, journal = {Journal of microbiology and biotechnology}, volume = {30}, number = {3}, pages = {341-351}, pmid = {31847510}, issn = {1738-8872}, mesh = {Food Microbiology ; *Genome, Bacterial ; Genomics ; Phylogeny ; Plasmids ; Staphylococcus/*genetics/isolation & purification ; Transposases/metabolism ; }, abstract = {To shed light on the genetic differences among food-originated coagulase-negative Staphylococcus (CNS), we performed pan-genome analysis of five species: Staphylococcus carnosus (two strains), Staphylococcus equorum (two strains), Staphylococcus succinus (three strains), Staphylococcus xylosus (two strains), and Staphylococcus saprophyticus (one strain). The pan-genome size increases with each new strain and currently holds about 4,500 genes from 10 genomes. Specific genes were shown to be strain dependent but not species dependent. Most specific genes were of unknown function or encoded restriction-modification enzymes, transposases, or prophages. Our results indicate that unique genes have been acquired or lost by convergent evolution within individual strains.}, } @article {pmid31844108, year = {2019}, author = {Liang, CY and Yang, CH and Lai, CH and Huang, YH and Lin, JN}, title = {Comparative Genomics of 86 Whole-Genome Sequences in the Six Species of the Elizabethkingia Genus Reveals Intraspecific and Interspecific Divergence.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {19167}, pmid = {31844108}, issn = {2045-2322}, mesh = {Base Sequence ; Computer Simulation ; Drug Resistance, Bacterial/genetics ; Evolution, Molecular ; Flavobacteriaceae/*genetics/pathogenicity ; *Genetic Variation ; *Genome, Bacterial ; *Genomics ; Phylogeny ; Species Specificity ; Virulence Factors/genetics ; *Whole Genome Sequencing ; }, abstract = {Bacteria of the genus Elizabethkingia are emerging infectious agents that can cause infection in humans. The number of published whole-genome sequences of Elizabethkingia is rapidly increasing. In this study, we used comparative genomics to investigate the genomes of the six species in the Elizabethkingia genus, namely E. meningoseptica, E. anophelis, E. miricola, E. bruuniana, E. ursingii, and E. occulta. In silico DNA-DNA hybridization, whole-genome sequence-based phylogeny, pan genome analysis, and Kyoto Encyclopedia of Genes and Genomes (KEGG) analyses were performed, and clusters of orthologous groups were evaluated. Of the 86 whole-genome sequences available in GenBank, 21 were complete genome sequences and 65 were shotgun sequences. In silico DNA-DNA hybridization clearly delineated the six Elizabethkingia species. Phylogenetic analysis confirmed that E. bruuniana, E. ursingii, and E. occulta were closer to E. miricola than to E. meningoseptica and E. anophelis. A total of 2,609 clusters of orthologous groups were identified among the six type strains of the Elizabethkingia genus. Metabolism-related clusters of orthologous groups accounted for the majority of gene families in KEGG analysis. New genes were identified that substantially increased the total repertoire of the pan genome after the addition of 86 Elizabethkingia genomes, which suggests that Elizabethkingia has shown adaptive evolution to environmental change. This study presents a comparative genomic analysis of Elizabethkingia, and the results of this study provide knowledge that facilitates a better understanding of this microorganism.}, } @article {pmid31842745, year = {2019}, author = {D'Mello, A and Ahearn, CP and Murphy, TF and Tettelin, H}, title = {ReVac: a reverse vaccinology computational pipeline for prioritization of prokaryotic protein vaccine candidates.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {981}, pmid = {31842745}, issn = {1471-2164}, support = {R01 AI019641/AI/NIAID NIH HHS/United States ; UL1 TR001412/TR/NCATS NIH HHS/United States ; UL1TR001412//National Institutes of Health (US)/ ; R01AI019641//National Institutes of Health (US)/ ; }, mesh = {Bacteria/*genetics/immunology ; Bacterial Proteins/genetics/*immunology ; Bacterial Vaccines/genetics/immunology ; Computational Biology/*methods ; Humans ; Machine Learning ; Software ; Vaccines, Subunit/genetics/immunology ; Vaccinology/*methods ; }, abstract = {BACKGROUND: Reverse vaccinology accelerates the discovery of potential vaccine candidates (PVCs) prior to experimental validation. Current programs typically use one bacterial proteome to identify PVCs through a filtering architecture using feature prediction programs or a machine learning approach. Filtering approaches may eliminate potential antigens based on limitations in the accuracy of prediction tools used. Machine learning approaches are heavily dependent on the selection of training datasets with experimentally validated antigens (positive control) and non-protective-antigens (negative control). The use of one or few bacterial proteomes does not assess PVC conservation among strains, an important feature of vaccine antigens.

RESULTS: We present ReVac, which implements both a panoply of feature prediction programs without filtering out proteins, and scoring of candidates based on predictions made on curated positive and negative control PVCs datasets. ReVac surveys several genomes assessing protein conservation, as well as DNA and protein repeats, which may result in variable expression of PVCs. ReVac's orthologous clustering of conserved genes, identifies core and dispensable genome components. This is useful for determining the degree of conservation of PVCs among the population of isolates for a given pathogen. Potential vaccine candidates are then prioritized based on conservation and overall feature-based scoring. We present the application of ReVac, applied to 69 Moraxella catarrhalis and 270 non-typeable Haemophilus influenzae genomes, prioritizing 64 and 29 proteins as PVCs, respectively.

CONCLUSION: ReVac's use of a scoring scheme ranks PVCs for subsequent experimental testing. It employs a redundancy-based approach in its predictions of features using several prediction tools. The protein's features are collated, and each protein is ranked based on the scoring scheme. Multi-genome analyses performed in ReVac allow for a comprehensive overview of PVCs from a pan-genome perspective, as an essential pre-requisite for any bacterial subunit vaccine design. ReVac prioritized PVCs of two human respiratory pathogens, identifying both novel and previously validated PVCs.}, } @article {pmid31840817, year = {2020}, author = {Diaz-Hernandez, ME and Khan, NM and Trochez, CM and Yoon, T and Maye, P and Presciutti, SM and Gibson, G and Drissi, H}, title = {Derivation of notochordal cells from human embryonic stem cells reveals unique regulatory networks by single cell-transcriptomics.}, journal = {Journal of cellular physiology}, volume = {235}, number = {6}, pages = {5241-5255}, pmid = {31840817}, issn = {1097-4652}, support = {R21 AR067903/AR/NIAMS NIH HHS/United States ; }, mesh = {Biomarkers/metabolism ; Cell Differentiation/*genetics ; Fetal Proteins/genetics ; Forkhead Transcription Factors/genetics ; GPI-Linked Proteins/genetics ; Gene Regulatory Networks/genetics ; Growth Differentiation Factor 3/genetics ; Human Embryonic Stem Cells/cytology/*metabolism ; Humans ; Induced Pluripotent Stem Cells ; Intercellular Signaling Peptides and Proteins/genetics ; Intervertebral Disc/growth & development ; Intervertebral Disc Degeneration/*genetics/pathology ; Neoplasm Proteins/genetics ; Notochord/growth & development/metabolism ; Nucleus Pulposus/growth & development/metabolism ; PAX6 Transcription Factor/genetics ; Regeneration/genetics ; SOXD Transcription Factors/genetics ; Single-Cell Analysis ; T-Box Domain Proteins/genetics ; Transcriptome/*genetics ; }, abstract = {Intervertebral disc degeneration (IDD) is a public health dilemma as it is associated with low back and neck pain, a frequent reason for patients to visit the physician. During IDD, nucleus pulposus (NP), the central compartment of intervertebral disc (IVD) undergo degeneration. Stem cells have been adopted as a promising biological source to regenerate the IVD and restore its function. Here, we describe a simple, two-step differentiation strategy using a cocktail of four factors (LDN, AGN, FGF, and CHIR) for efficient derivation of notochordal cells from human embryonic stem cells (hESCs). We employed a CRISPR/Cas9 based genome-editing approach to knock-in the mCherry reporter vector upstream of the 3' untranslated region of the Noto gene in H9-hESCs and monitored notochordal cell differentiation. Our data show that treatment of H9-hESCs with the above-mentioned four factors for 6 days successfully resulted in notochordal cells. These cells were characterized by morphology, immunostaining, and gene and protein expression analyses for established notochordal cell markers including FoxA2, SHH, and Brachyury. Additionally, pan-genomic high-throughput single cell RNA-sequencing revealed an efficient and robust notochordal differentiation. We further identified a key regulatory network consisting of eight candidate genes encoding transcription factors including PAX6, GDF3, FOXD3, TDGF1, and SOX5, which are considered as potential drivers of notochordal differentiation. This is the first single cell transcriptomic analysis of notochordal cells derived from hESCs. The ability to efficiently obtain notochordal cells from pluripotent stem cells provides an additional tool to develop new cell-based therapies for the treatment of IDD.}, } @article {pmid31840364, year = {2020}, author = {Haro-Moreno, JM and Rodriguez-Valera, F and Rosselli, R and Martinez-Hernandez, F and Roda-Garcia, JJ and Gomez, ML and Fornas, O and Martinez-Garcia, M and López-Pérez, M}, title = {Ecogenomics of the SAR11 clade.}, journal = {Environmental microbiology}, volume = {22}, number = {5}, pages = {1748-1763}, pmid = {31840364}, issn = {1462-2920}, support = {ACIF/2015/332//Generalitat Valenciana/International ; 5334//Gordon and Betty Moore Foundation/International ; BES-2014-067828//Ministerio de Economía y Competitividad/International ; CGL2013-40564-R//Ministerio de Economía y Competitividad/International ; CGL2016-76273-P//Ministerio de Economía y Competitividad/International ; IJCI-2017-34002//Ministerio de Economía y Competitividad/International ; SAF2013-49267-EXP//Ministerio de Economía y Competitividad/International ; }, mesh = {Genome, Bacterial/*genetics ; Genomics ; Hyphomicrobiaceae/classification/*genetics ; Mediterranean Region ; Metagenome/genetics ; Metagenomics ; Oceans and Seas ; Organophosphonates/metabolism ; Phylogeny ; Purines/metabolism ; Seawater/microbiology ; Water Microbiology ; }, abstract = {Members of the SAR11 clade, despite their high abundance, are often poorly represented by metagenome-assembled genomes. This fact has hampered our knowledge about their ecology and genetic diversity. Here we examined 175 SAR11 genomes, including 47 new single-amplified genomes. The presence of the first genomes associated with subclade IV suggests that, in the same way as subclade V, they might be outside the proposed Pelagibacterales order. An expanded phylogenomic classification together with patterns of metagenomic recruitment at a global scale have allowed us to define new ecogenomic units of classification (genomospecies), appearing at different, and sometimes restricted, metagenomic data sets. We detected greater microdiversity across the water column at a single location than in samples collected from similar depth across the global ocean, suggesting little influence of biogeography. In addition, pangenome analysis revealed that the flexible genome was essential to shape genomospecies distribution. In one genomospecies preferentially found within the Mediterranean, a set of genes involved in phosphonate utilization was detected. While another, with a more cosmopolitan distribution, was unique in having an aerobic purine degradation pathway. Together, these results provide a glimpse of the enormous genomic diversity within this clade at a finer resolution than the currently defined clades.}, } @article {pmid31838800, year = {2020}, author = {Choi, JY and Kim, SC and Lee, PC}, title = {Comparative Genome Analysis of Psychrobacillus Strain PB01, Isolated from an Iceberg.}, journal = {Journal of microbiology and biotechnology}, volume = {30}, number = {2}, pages = {237-243}, pmid = {31838800}, issn = {1738-8872}, mesh = {Bacillaceae/*genetics/isolation & purification/metabolism ; Citric Acid Cycle ; Energy Metabolism ; *Environmental Microbiology ; *Genome, Bacterial ; *Genomics/methods ; Glyoxylates/metabolism ; Ice Cover/*microbiology ; Metabolic Networks and Pathways ; Plasmids/genetics ; }, abstract = {A novel psychrotolerant Psychrobacillus strain PB01, isolated from an Antarctic iceberg, was comparatively analyzed with five related strains. The complete genome of strain PB01 consists of a single circular chromosome (4.3 Mb) and a plasmid (19 Kb). As potential low-temperature adaptation strategies, strain PB01 has four genes encoding cold-shock proteins, two genes encoding DEAD-box RNA helicases, and eight genes encoding transporters for glycine betaine, which can serve as a cryoprotectant, on the genome. The pan-genome structure of the six Psychrobacillus strains suggests that strain PB01 might have evolved to adapt to extreme environments by changing its genome content to gain higher capacity for DNA repair, translation, and membrane transport. Notably, strain PB01 possesses a complete TCA cycle consisting of eight enzymes as well as three additional Helicobacter pylori-type enzymes: ferredoxin-dependent 2-oxoglutarate synthase, succinyl-CoA/acetoacetyl-CoA transferase, and malate/quinone oxidoreductase. The co-existence of the genes for TCA cycle enzymes has also been identified in the other five Psychrobacillus strains.}, } @article {pmid31828586, year = {2020}, author = {Wang, Z and Jia, L and Li, J and Liu, H and Liu, D}, title = {Pan-Genomic Analysis of African Swine Fever Virus.}, journal = {Virologica Sinica}, volume = {35}, number = {5}, pages = {662-665}, pmid = {31828586}, issn = {1995-820X}, mesh = {*African Swine Fever ; *African Swine Fever Virus ; Animals ; Genomics ; Swine ; }, } @article {pmid31824466, year = {2019}, author = {Lee, BH and Cole, S and Badel-Berchoux, S and Guillier, L and Felix, B and Krezdorn, N and Hébraud, M and Bernardi, T and Sultan, I and Piveteau, P}, title = {Biofilm Formation of Listeria monocytogenes Strains Under Food Processing Environments and Pan-Genome-Wide Association Study.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2698}, pmid = {31824466}, issn = {1664-302X}, abstract = {Concerns about food contamination by Listeria monocytogenes are on the rise with increasing consumption of ready-to-eat foods. Biofilm production of L. monocytogenes is presumed to be one of the ways that confer its increased resistance and persistence in the food chain. In this study, a collection of isolates from foods and food processing environments (FPEs) representing persistent, prevalent, and rarely detected genotypes was evaluated for biofilm forming capacities including adhesion and sessile biomass production under diverse environmental conditions. The quantity of sessile biomass varied according to growth conditions, lineage, serotype as well as genotype but association of clonal complex (CC) 26 genotype with biofilm production was evidenced under cold temperature. In general, relative biofilm productivity of each strain varied inconsistently across growth conditions. Under our experimental conditions, there were no clear associations between biofilm formation efficiency and persistent or prevalent genotypes. Distinct extrinsic factors affected specific steps of biofilm formation. Sudden nutrient deprivation enhanced cellular adhesion while a prolonged nutrient deficiency impeded biofilm maturation. Salt addition increased biofilm production, moreover, nutrient limitation supplemented by salt significantly stimulated biofilm formation. Pan-genome-wide association study (Pan-GWAS) assessed genetic composition with regard to biofilm phenotypes for the first time. The number of reported genes differed depending on the growth conditions and the number of common genes was low. However, a broad overview of the ontology contents revealed similar patterns regardless of the conditions. Functional analysis showed that functions related to transformation/competence and surface proteins including Internalins were highly enriched.}, } @article {pmid31815935, year = {2019}, author = {Jandrasits, C and Kröger, S and Haas, W and Renard, BY}, title = {Computational pan-genome mapping and pairwise SNP-distance improve detection of Mycobacterium tuberculosis transmission clusters.}, journal = {PLoS computational biology}, volume = {15}, number = {12}, pages = {e1007527}, pmid = {31815935}, issn = {1553-7358}, mesh = {Chromosome Mapping ; Computational Biology ; Computer Simulation ; DNA, Bacterial/genetics ; Databases, Genetic/statistics & numerical data ; Disease Outbreaks/statistics & numerical data ; Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Humans ; Molecular Epidemiology/statistics & numerical data ; Mycobacterium tuberculosis/classification/*genetics ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; Tuberculosis/epidemiology/microbiology/*transmission ; Whole Genome Sequencing ; }, abstract = {Next-generation sequencing based base-by-base distance measures have become an integral complement to epidemiological investigation of infectious disease outbreaks. This study introduces PANPASCO, a computational pan-genome mapping based, pairwise distance method that is highly sensitive to differences between cases, even when located in regions of lineage specific reference genomes. We show that our approach is superior to previously published methods in several datasets and across different Mycobacterium tuberculosis lineages, as its characteristics allow the comparison of a high number of diverse samples in one analysis-a scenario that becomes more and more likely with the increased usage of whole-genome sequencing in transmission surveillance.}, } @article {pmid31804713, year = {2020}, author = {Emery, A and Marpaux, N and Naegelen, C and Valot, B and Morel, P and Hocquet, D}, title = {Genotypic study of Citrobacter koseri, an emergent platelet contaminant since 2012 in France.}, journal = {Transfusion}, volume = {60}, number = {2}, pages = {245-249}, doi = {10.1111/trf.15617}, pmid = {31804713}, issn = {1537-2995}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/genetics/metabolism ; Citrobacter koseri/drug effects/*genetics ; France ; Genotype ; Humans ; Phylogeny ; }, abstract = {BACKGROUND: Transfusion-transmitted bacterial infection is a rare occurrence but the most feared complication in transfusion practices. Between 2012 and 2017, five cases of platelet concentrates (PCs) contaminated with the bacterial pathogen Citrobacter koseri (PC-Ck) have been reported in France, with two leading to the death of the recipients. We tested the possibilities of the emergence of a PC-specific clone of C. koseri (Ck) and of specific bacterial genes associated with PC contamination.

STUDY DESIGN AND METHODS: The phylogenetic network, based on a homemade Ck core genome scheme, inferred from the genomes of 20 worldwide Ck isolates unrelated to PC contamination taken as controls (U-Ck) and the genomes of the five PC-Ck, explored the clonal relationship between the genomes and evaluated the distribution of PC-Ck throughout the species. Along with this core genome multilocus sequence typing approach, a Ck pan genome has been used to seek genes specific to PC-Ck isolates.

RESULTS: Our genomic approach suggested that the population of C. koseri is nonclonal, although it also identified a cluster containing three PC-Ck and eight U-Ck. Indeed, the PC-Ck did not share any specific genes.

CONCLUSION: The elevated incidence of PCs contaminated by C. koseri in France between 2012 and 2017 was not due to the dissemination of a clone. The determinants of the recent outbreaks of PC contamination with C. koseri are still unknown.}, } @article {pmid31803240, year = {2019}, author = {Li, R and Fu, W and Su, R and Tian, X and Du, D and Zhao, Y and Zheng, Z and Chen, Q and Gao, S and Cai, Y and Wang, X and Li, J and Jiang, Y}, title = {Towards the Complete Goat Pan-Genome by Recovering Missing Genomic Segments From the Reference Genome.}, journal = {Frontiers in genetics}, volume = {10}, number = {}, pages = {1169}, pmid = {31803240}, issn = {1664-8021}, abstract = {It is broadly expected that next generation sequencing will ultimately generate a complete genome as is the latest goat reference genome (ARS1), which is considered to be one of the most continuous assemblies in livestock. However, the rich diversity of worldwide goat breeds indicates that a genome from one individual would be insufficient to represent the whole genomic contents of goats. By comparing nine de novo assemblies from seven sibling species of domestic goat with ARS1 and using resequencing and transcriptome data from goats for verification, we identified a total of 38.3 Mb sequences that were absent in ARS1. The pan-sequences contain genic fractions with considerable expression. Using the pan-genome (ARS1 together with the pan-sequences) as a reference genome, variation calling efficacy can be appreciably improved. A total of 56,657 spurious SNPs per individual were repressed and 24,414 novel SNPs per individual on average were recovered as a result of better reads mapping quality. The transcriptomic mapping rate was also increased by ∼1.15%. Our study demonstrated that comparing de novo assemblies from closely related species is an efficient and reliable strategy for finding missing sequences from the reference genome and could be applicable to other species. Pan-genome can serve as an improved reference genome in animals for a better exploration of the underlying genomic variations and could increase the probability of finding genotype-phenotype associations assessed by a comprehensive variation database containing much more differences between individuals. We have constructed a goat pan-genome web interface for data visualization (http://animal.nwsuaf.edu.cn/panGoat).}, } @article {pmid31798566, year = {2019}, author = {Sutton, D and Livingstone, PG and Furness, E and Swain, MT and Whitworth, DE}, title = {Genome-Wide Identification of Myxobacterial Predation Genes and Demonstration of Formaldehyde Secretion as a Potentially Predation-Resistant Trait of Pseudomonas aeruginosa.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2650}, pmid = {31798566}, issn = {1664-302X}, abstract = {Despite widespread use in human biology, genome-wide association studies (GWAS) of bacteria are few and have, to date, focused primarily on pathogens. Myxobacteria are predatory microbes with large patchwork genomes, with individual strains secreting unique cocktails of predatory proteins and metabolites. We investigated whether a GWAS strategy could be applied to myxobacteria to identify genes associated with predation. Deduced proteomes from 29 myxobacterial genomes (including eight Myxococcus genomes sequenced for this study), were clustered into orthologous groups, and the presence/absence of orthologues assessed in superior and inferior predators of ten prey organisms. 139 'predation genes' were identified as being associated significantly with predation, including some whose annotation suggested a testable predatory mechanism. Formaldehyde dismutase (fdm) was associated with superior predation of Pseudomonas aeruginosa, and predatory activity of a strain lacking fdm could be increased by the exogenous addition of a formaldehyde detoxifying enzyme, suggesting that production of formaldehyde by P. aeruginosa acts as an anti-predation behaviour. This study establishes the utility of bacterial GWAS to investigate microbial processes beyond pathogenesis, giving plausible and verifiable associations between gene presence/absence and predatory phenotype. We propose that the slow growth rate of myxobacteria, coupled with their predatory mechanism of constitutive secretion, has rendered them relatively resistant to genome streamlining. The resultant genome expansion made possible their observed accumulation of prey-specific predatory genes, without requiring them to be selected for by frequent or recent predation on diverse prey, potentially explaining both the large pan-genome and broad prey range of myxobacteria.}, } @article {pmid31796803, year = {2019}, author = {Anand, S and Deighton, M and Livanos, G and Pang, ECK and Mantri, N}, title = {Agastache honey has superior antifungal activity in comparison with important commercial honeys.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {18197}, pmid = {31796803}, issn = {2045-2322}, mesh = {Agastache/*chemistry ; Antifungal Agents/*pharmacology ; Apitherapy/*methods ; Arthrodermataceae/drug effects ; Candida albicans/drug effects ; Dermatomyositis/microbiology/*therapy ; *Honey ; Humans ; Microbial Sensitivity Tests ; }, abstract = {There is an urgent need for new effective antifungal agents suitable for the treatment of superficial skin infections, since acquired resistance of fungi to currently available agents is increasing. The antifungal activity of mono-floral Agastache honey and commercially available honeys were tested against dermatophytes (T. mentagrophytes and T. rubrum) and C. albicans (ATCC 10231 and a clinical isolate) by agar well diffusion and micro-dilution (AWD and MD). In AWD and MD assays, Agastache honey was effective at 40% concentration against dermatophytes (zone diameter, 19.5-20 mm) and C. albicans with the same MIC and MFC values indicating fungicidal activity. Tea tree honey was effective at 80% concentration (zone diameter, 14 mm) against dermatophytes and at 40% concentration against T. mentagrophytes and C. albicans. Manuka was effective at 80% concentration only against T. mentagrophytes (zone diameter, 12 mm) and at 40% against T. rubrum and C. albicans with fungistatic activity. Similar to the AWD results, Jelly bush, Super Manuka, and Jarrah showed no activity against dermatophytes but showed some activity against C. albicans. Headspace volatiles of six honeys were isolated by SPME and identified by GC-MS. The characteristic chemical markers for each honey were as follows: Agastache- Phenol, 2,4-bis(1,1-dimethylethyl) and Estragole; Manuka and Tea-tree- Acetanisole and Methyl 3,5-dimethoxybenzoate; Jelly bush- Linalool and Nonanal; Super Manuka- Methyl 3,5-dimethoxybenzoate and Nonanal; Jarrah- Isophorone and Nonanoic acid. Overall, analysis of the bioactive compound content and antifungal activity of Agastache honey indicated possible use as an antifungal agent for management of superficial fungal infections.}, } @article {pmid31796569, year = {2019}, author = {Yuan, J and Li, YY and Xu, Y and Sun, BJ and Shao, J and Zhang, D and Li, K and Fan, DD and Xue, ZB and Chen, WH and Pak, C and Lou, YL and Su, JZ and Zheng, MQ}, title = {Molecular Signatures Related to the Virulence of Bacillus cereus Sensu Lato, a Leading Cause of Devastating Endophthalmitis.}, journal = {mSystems}, volume = {4}, number = {6}, pages = {}, pmid = {31796569}, issn = {2379-5077}, abstract = {Bacillus endophthalmitis is a devastating eye infection that causes rapid blindness through extracellular tissue-destructive exotoxins. Despite its importance, knowledge of the phylogenetic relationships and population structure of intraocular Bacillus spp. is lacking. In this study, we sequenced the whole genomes of eight Bacillus intraocular pathogens independently isolated from 8/52 patients with posttraumatic Bacillus endophthalmitis infections in the Eye Hospital of Wenzhou Medical University between January 2010 and December 2018. Phylogenetic analysis revealed that the pathogenic intraocular isolates belonged to Bacillus cereus, Bacillus thuringiensis and Bacillus toyonensis To determine the virulence of the ocular isolates, three representative strains were injected into mouse models, and severe endophthalmitis leading to blindness was observed. Through incorporating publicly available genomes for Bacillus spp., we found that the intraocular pathogens could be isolated independently but displayed a similar genetic context. In addition, our data provide genome-wide support for intraocular and gastrointestinal sources of Bacillus spp. belonging to different lineages. Importantly, we identified five molecular signatures of virulence and motility genes associated with intraocular infection, namely, plcA-2, InhA-3, InhA-4, hblA-5, and fliD using pangenome-wide association studies. The characterization of overrepresented genes in the intraocular isolates holds value to predict bacterial evolution and for the design of future intervention strategies in patients with endophthalmitis.IMPORTANCE In this study, we provided a detailed and comprehensive clinicopathological and pathogenic report of Bacillus endophthalmitis over the 8 years of the study period. We first reported the whole-genome sequence of Bacillus spp. causing devastating endophthalmitis and found that Bacillus toyonensis is able to cause endophthalmitis. Finally, we revealed significant endophthalmitis-associated virulence genes involved in hemolysis, immunity inhibition, and pathogenesis. Overall, as more sequencing data sets become available, these data will facilitate comparative research and will reveal the emergence of pathogenic "ocular bacteria."}, } @article {pmid31787539, year = {2020}, author = {Khan, AW and Garg, V and Roorkiwal, M and Golicz, AA and Edwards, D and Varshney, RK}, title = {Super-Pangenome by Integrating the Wild Side of a Species for Accelerated Crop Improvement.}, journal = {Trends in plant science}, volume = {25}, number = {2}, pages = {148-158}, pmid = {31787539}, issn = {1878-4372}, mesh = {*Genetic Variation ; *Genome, Plant ; Genomics ; }, abstract = {The pangenome provides genomic variations in the cultivated gene pool for a given species. However, as the crop's gene pool comprises many species, especially wild relatives with diverse genetic stock, here we suggest using accessions from all available species of a given genus for the development of a more comprehensive and complete pangenome, which we refer to as a super-pangenome. The super-pangenome provides a complete genomic variation repertoire of a genus and offers unprecedented opportunities for crop improvement. This opinion article focuses on recent developments in crop pangenomics, the need for a super-pangenome that should include wild species, and its application for crop improvement.}, } @article {pmid31785311, year = {2020}, author = {Chaudhry, V and Patil, PB}, title = {Evolutionary insights into adaptation of Staphylococcus haemolyticus to human and non-human niches.}, journal = {Genomics}, volume = {112}, number = {2}, pages = {2052-2062}, doi = {10.1016/j.ygeno.2019.11.018}, pmid = {31785311}, issn = {1089-8646}, mesh = {*Adaptation, Physiological ; Drug Resistance, Bacterial ; *Evolution, Molecular ; *Genome, Bacterial ; Humans ; Oryza/microbiology ; Phylogeny ; Staphylococcus haemolyticus/classification/*genetics/pathogenicity ; }, abstract = {Staphylococcus haemolyticus is a well-known member of human skin microbiome and an emerging opportunistic human pathogen. Presently, evolutionary studies are limited to human isolates even though it is reported from plants with beneficial properties and in environmental settings. In the present study, we report isolation of novel S. haemolyticus strains from surface sterilized rice seeds and compare their genome to other isolates from diverse niches available in public domain. The study showed expanding nature of pan-genome and revealed set of genes with putative functions related to its adaptability. This is seen by presence of type II lanthipeptide cluster in rice isolates, metal homeostasis genes in an isolate from copper coin and gene encoding methicillin resistance in human isolates. The present study on differential genome dynamics and role of horizontal gene transfers has provided novel insights into capability for ecological diversification of a bacterium of significance to human health.}, } @article {pmid31781066, year = {2019}, author = {Peeters, C and De Canck, E and Cnockaert, M and De Brandt, E and Snauwaert, C and Verheyde, B and Depoorter, E and Spilker, T and LiPuma, JJ and Vandamme, P}, title = {Comparative Genomics of Pandoraea, a Genus Enriched in Xenobiotic Biodegradation and Metabolism.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2556}, pmid = {31781066}, issn = {1664-302X}, support = {/WT_/Wellcome Trust/United Kingdom ; }, abstract = {Comparative analysis of partial gyrB, recA, and gltB gene sequences of 84 Pandoraea reference strains and field isolates revealed several clusters that included no taxonomic reference strains. The gyrB, recA, and gltB phylogenetic trees were used to select 27 strains for whole-genome sequence analysis and for a comparative genomics study that also included 41 publicly available Pandoraea genome sequences. The phylogenomic analyses included a Genome BLAST Distance Phylogeny approach to calculate pairwise digital DNA-DNA hybridization values and their confidence intervals, average nucleotide identity analyses using the OrthoANIu algorithm, and a whole-genome phylogeny reconstruction based on 107 single-copy core genes using bcgTree. These analyses, along with subsequent chemotaxonomic and traditional phenotypic analyses, revealed the presence of 17 novel Pandoraea species among the strains analyzed, and allowed the identification of several unclassified Pandoraea strains reported in the literature. The genus Pandoraea has an open pan genome that includes many orthogroups in the 'Xenobiotics biodegradation and metabolism' KEGG pathway, which likely explains the enrichment of these species in polluted soils and participation in the biodegradation of complex organic substances. We propose to formally classify the 17 novel Pandoraea species as P. anapnoica sp. nov. (type strain LMG 31117[T] = CCUG 73385[T]), P. anhela sp. nov. (type strain LMG 31108[T] = CCUG 73386[T]), P. aquatica sp. nov. (type strain LMG 31011[T] = CCUG 73384[T]), P. bronchicola sp. nov. (type strain LMG 20603[T] = ATCC BAA-110[T]), P. capi sp. nov. (type strain LMG 20602[T] = ATCC BAA-109[T]), P. captiosa sp. nov. (type strain LMG 31118[T] = CCUG 73387[T]), P. cepalis sp. nov. (type strain LMG 31106[T] = CCUG 39680[T]), P. commovens sp. nov. (type strain LMG 31010[T] = CCUG 73378[T]), P. communis sp. nov. (type strain LMG 31110[T] = CCUG 73383[T]), P. eparura sp. nov. (type strain LMG 31012[T] = CCUG 73380[T]), P. horticolens sp. nov. (type strain LMG 31112[T] = CCUG 73379[T]), P. iniqua sp. nov. (type strain LMG 31009[T] = CCUG 73377[T]), P. morbifera sp. nov. (type strain LMG 31116[T] = CCUG 73389[T]), P. nosoerga sp. nov. (type strain LMG 31109[T] = CCUG 73390[T]), P. pneumonica sp. nov. (type strain LMG 31114[T] = CCUG 73388[T]), P. soli sp. nov. (type strain LMG 31014[T] = CCUG 73382[T]), and P. terrigena sp. nov. (type strain LMG 31013[T] = CCUG 73381[T]).}, } @article {pmid31778355, year = {2019}, author = {Lupolova, N and Lycett, SJ and Gally, DL}, title = {A guide to machine learning for bacterial host attribution using genome sequence data.}, journal = {Microbial genomics}, volume = {5}, number = {12}, pages = {}, pmid = {31778355}, issn = {2057-5858}, support = {BB/P02095X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; Birds ; Cattle ; Genome, Bacterial ; Genome-Wide Association Study/methods ; *Host Specificity ; Humans ; *Machine Learning ; Salmonella Infections/*microbiology ; Salmonella Infections, Animal/*microbiology ; Salmonella typhimurium/*genetics/isolation & purification/pathogenicity ; Swine ; Whole Genome Sequencing/*methods ; }, abstract = {With the ever-expanding number of available sequences from bacterial genomes, and the expectation that this data type will be the primary one generated from both diagnostic and research laboratories for the foreseeable future, then there is both an opportunity and a need to evaluate how effectively computational approaches can be used within bacterial genomics to predict and understand complex phenotypes, such as pathogenic potential and host source. This article applied various quantitative methods such as diversity indexes, pangenome-wide association studies (GWAS) and dimensionality reduction techniques to better understand the data and then compared how well unsupervised and supervised machine learning (ML) methods could predict the source host of the isolates. The study uses the example of the pangenomes of 1203 Salmonella enterica serovar Typhimurium isolates in order to predict 'host of isolation' using these different methods. The article is aimed as a review of recent applications of ML in infection biology, but also, by working through this specific dataset, it allows discussion of the advantages and drawbacks of the different techniques. As with all such sub-population studies, the biological relevance will be dependent on the quality and diversity of the input data. Given this major caveat, we show that supervised ML has the potential to add real value to interpretation of bacterial genomic data, as it can provide probabilistic outcomes for important phenotypes, something that is very difficult to achieve with the other methods.}, } @article {pmid31776332, year = {2019}, author = {Eggertsson, HP and Kristmundsdottir, S and Beyter, D and Jonsson, H and Skuladottir, A and Hardarson, MT and Gudbjartsson, DF and Stefansson, K and Halldorsson, BV and Melsted, P}, title = {GraphTyper2 enables population-scale genotyping of structural variation using pangenome graphs.}, journal = {Nature communications}, volume = {10}, number = {1}, pages = {5402}, pmid = {31776332}, issn = {2041-1723}, mesh = {Computer Graphics ; Databases, Genetic ; Genetics, Population ; *Genome, Human ; *Genomic Structural Variation ; Genotyping Techniques/*methods/statistics & numerical data ; Humans ; Iceland ; Pedigree ; Polymorphism, Single Nucleotide ; Reproducibility of Results ; *Software ; Workflow ; }, abstract = {Analysis of sequence diversity in the human genome is fundamental for genetic studies. Structural variants (SVs) are frequently omitted in sequence analysis studies, although each has a relatively large impact on the genome. Here, we present GraphTyper2, which uses pangenome graphs to genotype SVs and small variants using short-reads. Comparison to the syndip benchmark dataset shows that our SV genotyping is sensitive and variant segregation in families demonstrates the accuracy of our approach. We demonstrate that incorporating public assembly data into our pipeline greatly improves sensitivity, particularly for large insertions. We validate 6,812 SVs on average per genome using long-read data of 41 Icelanders. We show that GraphTyper2 can simultaneously genotype tens of thousands of whole-genomes by characterizing 60 million small variants and half a million SVs in 49,962 Icelanders, including 80 thousand SVs with high-confidence.}, } @article {pmid31771309, year = {2019}, author = {Chernysheva, N and Bystritskaya, E and Stenkova, A and Golovkin, I and Nedashkovskaya, O and Isaeva, M}, title = {Comparative Genomics and CAZyme Genome Repertoires of Marine Zobellia amurskyensis KMM 3526[T] and Zobellia laminariae KMM 3676[T].}, journal = {Marine drugs}, volume = {17}, number = {12}, pages = {}, pmid = {31771309}, issn = {1660-3397}, support = {17-14-01065//Russian Science Foundation/ ; }, mesh = {Aquatic Organisms/enzymology/*genetics ; Bacterial Proteins/*genetics/metabolism ; Biotechnology/methods ; Carbohydrate Metabolism ; Flavobacteriaceae/enzymology/*genetics ; Genome, Bacterial/*genetics ; *Genomics ; Phylogeny ; Polysaccharides/metabolism ; Seaweed/chemistry/metabolism ; Sequence Analysis, DNA ; }, abstract = {We obtained two novel draft genomes of type Zobellia strains with estimated genome sizes of 5.14 Mb for Z. amurskyensis KMM 3526[Т] and 5.16 Mb for Z. laminariae KMM 3676[Т]. Comparative genomic analysis has been carried out between obtained and known genomes of Zobellia representatives. The pan-genome of Zobellia genus is composed of 4853 orthologous clusters and the core genome was estimated at 2963 clusters. The genus CAZome was represented by 775 GHs classified into 62 families, 297 GTs of 16 families, 100 PLs of 13 families, 112 CEs of 13 families, 186 CBMs of 18 families and 42 AAs of six families. A closer inspection of the carbohydrate-active enzyme (CAZyme) genomic repertoires revealed members of new putative subfamilies of GH16 and GH117, which can be biotechnologically promising for production of oligosaccharides and rare monomers with different bioactivities. We analyzed AA3s, among them putative FAD-dependent glycoside oxidoreductases (FAD-GOs) being of particular interest as promising biocatalysts for glycoside deglycosylation in food and pharmaceutical industries.}, } @article {pmid31768302, year = {2019}, author = {Cabrera-Contreras, R and Santamaría, RI and Bustos, P and Martínez-Flores, I and Meléndez-Herrada, E and Morelos-Ramírez, R and Barbosa-Amezcua, M and González-Covarrubias, V and Silva-Herzog, E and Soberón, X and González, V}, title = {Genomic diversity of prevalent Staphylococcus epidermidis multidrug-resistant strains isolated from a Children's Hospital in México City in an eight-years survey.}, journal = {PeerJ}, volume = {7}, number = {}, pages = {e8068}, pmid = {31768302}, issn = {2167-8359}, abstract = {Staphylococcus epidermidis is a human commensal and pathogen worldwide distributed. In this work, we surveyed for multi-resistant S. epidermidis strains in eight years at a children's health-care unit in México City. Multidrug-resistant S. epidermidis were present in all years of the study, including resistance to methicillin, beta-lactams, fluoroquinolones, and macrolides. To understand the genetic basis of antibiotic resistance and its association with virulence and gene exchange, we sequenced the genomes of 17 S. epidermidis isolates. Whole-genome nucleotide identities between all the pairs of S. epidermidis strains were about 97% to 99%. We inferred a clonal structure and eight Multilocus Sequence Types (MLSTs) in the S. epidermidis sequenced collection. The profile of virulence includes genes involved in biofilm formation and phenol-soluble modulins (PSMs). Half of the S. epidermidis analyzed lacked the ica operon for biofilm formation. Likely, they are commensal S. epidermidis strains but multi-antibiotic resistant. Uneven distribution of insertion sequences, phages, and CRISPR-Cas immunity phage systems suggest frequent horizontal gene transfer. Rates of recombination between S. epidermidis strains were more prevalent than the mutation rate and affected the whole genome. Therefore, the multidrug resistance, independently of the pathogenic traits, might explain the persistence of specific highly adapted S. epidermidis clonal lineages in nosocomial settings.}, } @article {pmid31767775, year = {2020}, author = {Sugrue, I and O'Connor, PM and Hill, C and Stanton, C and Ross, RP}, title = {Actinomyces Produces Defensin-Like Bacteriocins (Actifensins) with a Highly Degenerate Structure and Broad Antimicrobial Activity.}, journal = {Journal of bacteriology}, volume = {202}, number = {4}, pages = {}, pmid = {31767775}, issn = {1098-5530}, mesh = {Actinomyces/genetics/*metabolism ; Anti-Bacterial Agents/*biosynthesis/pharmacology ; Bacteriocins/*biosynthesis/chemistry ; Defensins/biosynthesis ; Drug Design ; }, abstract = {We identified a strain of Actinomyces ruminicola which produces a potent bacteriocin with activity against a broad range of Gram-positive bacteria, many of which are pathogenic to animals and humans. The bacteriocin was purified and found to have a mass of 4,091 ± 1 Da with a sequence of GFGCNLITSNPYQCSNHCKSVGYRGGYCKLRTVCTCY containing three disulfide bridges. Surprisingly, near relatives of actifensin were found to be a series of related eukaryotic defensins displaying greater than 50% identity to the bacteriocin. A pangenomic screen further revealed that production of actifensin-related bacteriocins is a common trait within the genus, with 47 being encoded in 161 genomes. Furthermore, these bacteriocins displayed a remarkable level of diversity with a mean amino acid identity of only 52% between strains/species. This level of redundancy suggests that this new class of bacteriocins may provide a very broad structural basis on which to deliver and design new broad-spectrum antimicrobials for treatment of animal and human infections.IMPORTANCE Bacteriocins (ribosomally produced antimicrobial peptides) are potential alternatives to current antimicrobials given the global challenge of antimicrobial resistance. We identified a novel bacteriocin from Actinomyces ruminicola with no previously characterized antimicrobial activity. Using publicly available genomic data, we found a highly conserved yet divergent family of previously unidentified homologous peptide sequences within the genus Actinomyces with striking similarity to eukaryotic defensins. These actifensins may provide a potent line of antimicrobial defense/offense, and the machinery to produce them could be used for the design of new antimicrobials given the degeneracy that exists naturally in their structure.}, } @article {pmid31762508, year = {2019}, author = {Sujitha, S and Vishnu, US and Karthikeyan, R and Sankarasubramanian, J and Gunasekaran, P and Rajendhran, J}, title = {Genome Investigation of a Cariogenic Pathogen with Implications in Cardiovascular Diseases.}, journal = {Indian journal of microbiology}, volume = {59}, number = {4}, pages = {451-459}, pmid = {31762508}, issn = {0046-8991}, abstract = {The proportion of people suffering from cardiovascular diseases has risen by 34% in the last 15 years in India. Cardiomyopathy is among the many forms of CVD s present. Infection of heart muscles is the suspected etiological agent for the same. Oral pathogens gaining entry into the bloodstream are responsible for such infections. Streptococcus mutans is an oral pathogen with implications in cardiovascular diseases. Previous studies have shown certain strains of S. mutans are found predominantly within atherosclerotic plaques and extirpated valves. To decipher the genetic differences responsible for endothelial cell invasion, we have sequenced the genome of Streptococcus mutans B14. Pan-genome analysis, search for adhesion proteins through a special algorithm, and protein-protein interactions search through HPIDB have been done. Pan-genome analysis of 187 whole genomes, assemblies revealed 6965 genes in total and 918 genes forming the core gene cluster. Adhesion to the endothelial cell is a critical virulence factor distinguishing virulent and non-virulent strains. Overall, 4% of the total proteins in S. mutans B14 were categorized as adhesion proteins. Protein-protein interaction between putative adhesion proteins and Human extracellular matrix components was predicted, revealing novel interactions. A conserved gene catalyzing the synthesis of branched-chain amino acids in S. mutans B14 shows possible interaction with isoforms of cathepsin protein of the ECM. This genome sequence analysis indicates towards other proteins in the S. mutans genome, which might have a specific role to play in host cell interaction.}, } @article {pmid31758048, year = {2019}, author = {Decano, AG and Downing, T}, title = {An Escherichia coli ST131 pangenome atlas reveals population structure and evolution across 4,071 isolates.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {17394}, pmid = {31758048}, issn = {2045-2322}, mesh = {Drug Resistance, Multiple, Bacterial/genetics ; Epidemics ; Escherichia coli/classification/*genetics/isolation & purification/pathogenicity ; Escherichia coli Infections/epidemiology/*microbiology ; Escherichia coli Proteins/genetics ; *Evolution, Molecular ; Genome, Bacterial/*genetics ; Genomics ; Genotype ; Geography ; Humans ; Molecular Epidemiology ; Phylogeny ; Plasmids/genetics ; Sequence Analysis, DNA ; beta-Lactamases/genetics ; }, abstract = {Escherichia coli ST131 is a major cause of infection with extensive antimicrobial resistance (AMR) facilitated by widespread beta-lactam antibiotic use. This drug pressure has driven extended-spectrum beta-lactamase (ESBL) gene acquisition and evolution in pathogens, so a clearer resolution of ST131's origin, adaptation and spread is essential. E. coli ST131's ESBL genes are typically embedded in mobile genetic elements (MGEs) that aid transfer to new plasmid or chromosomal locations, which are mobilised further by plasmid conjugation and recombination, resulting in a flexible ESBL, MGE and plasmid composition with a conserved core genome. We used population genomics to trace the evolution of AMR in ST131 more precisely by extracting all available high-quality Illumina HiSeq read libraries to investigate 4,071 globally-sourced genomes, the largest ST131 collection examined so far. We applied rigorous quality-control, genome de novo assembly and ESBL gene screening to resolve ST131's population structure across three genetically distinct Clades (A, B, C) and abundant subclades from the dominant Clade C. We reconstructed their evolutionary relationships across the core and accessory genomes using published reference genomes, long read assemblies and k-mer-based methods to contextualise pangenome diversity. The three main C subclades have co-circulated globally at relatively stable frequencies over time, suggesting attaining an equilibrium after their origin and initial rapid spread. This contrasted with their ESBL genes, which had stronger patterns across time, geography and subclade, and were located at distinct locations across the chromosomes and plasmids between isolates. Within the three C subclades, the core and accessory genome diversity levels were not correlated due to plasmid and MGE activity, unlike patterns between the three main clades, A, B and C. This population genomic study highlights the dynamic nature of the accessory genomes in ST131, suggesting that surveillance should anticipate genetically variable outbreaks with broader antibiotic resistance levels. Our findings emphasise the potential of evolutionary pangenomics to improve our understanding of AMR gene transfer, adaptation and transmission to discover accessory genome changes linked to novel subtypes.}, } @article {pmid31756773, year = {2020}, author = {Amoroso, L and Ognibene, M and Morini, M and Conte, M and Di Cataldo, A and Tondo, A and D'Angelo, P and Castellano, A and Garaventa, A and Lasorsa, VA and Podestà, M and Capasso, M and Pezzolo, A}, title = {Genomic coamplification of CDK4/MDM2/FRS2 is associated with very poor prognosis and atypical clinical features in neuroblastoma patients.}, journal = {Genes, chromosomes & cancer}, volume = {59}, number = {5}, pages = {277-285}, doi = {10.1002/gcc.22827}, pmid = {31756773}, issn = {1098-2264}, mesh = {Adaptor Proteins, Signal Transducing/*genetics ; Biomarkers, Tumor/genetics ; Child ; Chromosomes, Human, Pair 12 ; Comparative Genomic Hybridization/methods ; Cyclin-Dependent Kinase 4/*genetics ; Gene Amplification ; Humans ; Membrane Proteins/*genetics ; Neuroblastoma/*genetics/mortality/pathology ; Prognosis ; Proto-Oncogene Proteins c-mdm2/*genetics ; Retrospective Studies ; Survival Rate ; Exome Sequencing/methods ; }, abstract = {Neuroblastoma (NB) is the most common extracranial malignant tumor of childhood and is characterized by a broad heterogeneity in clinical presentation and evolution. Recent advances in pangenomic analysis of NB have revealed different recurrent chromosomal aberrations. Indeed, it is now well established that the overall genomic profile is important for treatment stratification. In previous studies, 11 genes were shown to be recurrently amplified (ODC1, ALK, GREB1, NTSR2, LIN28B, MDM2, CDK4, MYEOV, CCND1, TERT, and MYC) besides MYCN, with poor survival of NB patients harboring these amplifications being suggested. Genomic profiles of 628 NB samples analyzed by array-comparative genome hybridization (a-CGH) were re-examined to identify gene amplifications other them MYCN amplification. Clinical data were retrospectively collected. We additionally evaluated the association of FRS2 gene expression with NB patient outcome using the public R2 Platform. We found eight NB samples with high grade amplification of one or two loci on chromosome arm 12q. The regional amplifications were located on bands 12q13.3-q14.1 and 12q15-q21.1 involving the genes CDK4, MDM2, and the potential oncogenic gene FRS2. The CDK4, MDM2, and FRS2 loci were coamplified in 8/8 samples. The 12q amplifications were associated with very poor prognosis and atypical clinical features of NB patients. Further functional and clinical investigations are needed to confirm or refute these associations.}, } @article {pmid31749118, year = {2020}, author = {de Fátima Rauber Würfel, S and Jorge, S and de Oliveira, NR and Kremer, FS and Sanchez, CD and Campos, VF and da Silva Pinto, L and da Silva, WP and Dellagostin, OA}, title = {Campylobacter jejuni isolated from poultry meat in Brazil: in silico analysis and genomic features of two strains with different phenotypes of antimicrobial susceptibility.}, journal = {Molecular biology reports}, volume = {47}, number = {1}, pages = {671-681}, pmid = {31749118}, issn = {1573-4978}, support = {Finance Code 001//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; 0//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; 0//Fundação de Amparo à Pesquisa do Estado do Rio Grande do Sul/ ; }, mesh = {Animals ; Anti-Bacterial Agents/*pharmacology ; Brazil ; *Campylobacter jejuni/drug effects/genetics/isolation & purification/pathogenicity ; Drug Resistance, Multiple, Bacterial/*genetics ; Genome, Bacterial/genetics ; Genomics ; Meat/*microbiology ; Multilocus Sequence Typing ; Plasmids/genetics ; Poultry ; Virulence Factors/genetics ; }, abstract = {Campylobacter jejuni is the most common bacterial cause of foodborne diarrheal disease worldwide and is among the antimicrobial resistant "priority pathogens" that pose greatest threat to public health. The genomes of two C. jejuni isolated from poultry meat sold on the retail market in Southern Brazil phenotypically characterized as multidrug-resistant (CJ100) and susceptible (CJ104) were sequenced and analyzed by bioinformatic tools. The isolates CJ100 and CJ104 showed distinct multilocus sequence types (MLST). Comparative genomic analysis revealed a large number of single nucleotide polymorphisms, rearrangements, and inversions in both genomes, in addition to virulence factors, genomic islands, prophage sequences, and insertion sequences. A circular 103-kilobase megaplasmid carrying virulence factors was identified in the genome of CJ100, in addition to resistance mechanisms to aminoglycosides, beta-lactams, macrolides, quinolones, and tetracyclines. The molecular characterization of distinct phenotypes of foodborne C. jejuni and the discovery of a novel virulence megaplasmid provide useful data for pan-genome and large-scale studies to monitor the virulent C. jejuni in poultry meat is warranted.}, } @article {pmid31745243, year = {2019}, author = {Chapeton-Montes, D and Plourde, L and Bouchier, C and Ma, L and Diancourt, L and Criscuolo, A and Popoff, MR and Brüggemann, H}, title = {Author Correction: The population structure of Clostridium tetani deduced from its pan-genome.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {17409}, doi = {10.1038/s41598-019-53688-z}, pmid = {31745243}, issn = {2045-2322}, abstract = {An amendment to this paper has been published and can be accessed via a link at the top of the paper.}, } @article {pmid31740752, year = {2020}, author = {Lawson, MAE and O'Neill, IJ and Kujawska, M and Gowrinadh Javvadi, S and Wijeyesekera, A and Flegg, Z and Chalklen, L and Hall, LJ}, title = {Breast milk-derived human milk oligosaccharides promote Bifidobacterium interactions within a single ecosystem.}, journal = {The ISME journal}, volume = {14}, number = {2}, pages = {635-648}, pmid = {31740752}, issn = {1751-7370}, support = {BB/J004529/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012490/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; 100/974/C/13/Z/WT_/Wellcome Trust/United Kingdom ; BBS/E/F/000PR10353/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/F/000PR10356/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/M011216/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; 100974//Wellcome Trust/United Kingdom ; BBS/E/F/00044409/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Bifidobacterium/genetics/isolation & purification/physiology ; Breast Feeding ; Carbohydrate Metabolism/*genetics ; Ecosystem ; Female ; Genes, Bacterial ; Genetic Variation ; Genome, Bacterial ; Humans ; Infant ; Metagenome/genetics/physiology ; Microbial Interactions ; Microbiota ; *Milk, Human/chemistry ; Oligosaccharides/*genetics/metabolism ; }, abstract = {Diet-microbe interactions play an important role in modulating the early-life microbiota, with Bifidobacterium strains and species dominating the gut of breast-fed infants. Here, we sought to explore how infant diet drives distinct bifidobacterial community composition and dynamics within individual infant ecosystems. Genomic characterisation of 19 strains isolated from breast-fed infants revealed a diverse genomic architecture enriched in carbohydrate metabolism genes, which was distinct to each strain, but collectively formed a pangenome across infants. Presence of gene clusters implicated in digestion of human milk oligosaccharides (HMOs) varied between species, with growth studies indicating that within single infants there were differences in the ability to utilise 2'FL and LNnT HMOs between strains. Cross-feeding experiments were performed with HMO degraders and non-HMO users (using spent or 'conditioned' media and direct co-culture). Further [1]H-NMR analysis identified fucose, galactose, acetate, and N-acetylglucosamine as key by-products of HMO metabolism; as demonstrated by modest growth of non-HMO users on spend media from HMO metabolism. These experiments indicate how HMO metabolism permits the sharing of resources to maximise nutrient consumption from the diet and highlights the cooperative nature of bifidobacterial strains and their role as 'foundation' species in the infant ecosystem. The intra- and inter-infant bifidobacterial community behaviour may contribute to the diversity and dominance of Bifidobacterium in early life and suggests avenues for future development of new diet and microbiota-based therapies to promote infant health.}, } @article {pmid31738764, year = {2019}, author = {Robertson, J and Lin, J and Wren-Hedgus, A and Arya, G and Carrillo, C and Nash, JHE}, title = {Development of a multi-locus typing scheme for an Enterobacteriaceae linear plasmid that mediates inter-species transfer of flagella.}, journal = {PloS one}, volume = {14}, number = {11}, pages = {e0218638}, pmid = {31738764}, issn = {1932-6203}, mesh = {Enterobacteriaceae/classification/*genetics ; Flagella/*genetics ; Gene Transfer, Horizontal ; Genes, Bacterial ; Humans ; Multilocus Sequence Typing/*methods ; Phylogeny ; Plasmids/classification/genetics ; Salmonella typhi/classification/genetics ; Serogroup ; Species Specificity ; }, abstract = {Due to the public health importance of flagellar genes for typing, it is important to understand mechanisms that could alter their expression or presence. Phenotypic novelty in flagellar genes arise predominately through accumulation of mutations but horizontal transfer is known to occur. A linear plasmid termed pBSSB1 previously identified in Salmonella Typhi, was found to encode a flagellar operon that can mediate phase variation, which results in the rare z66 flagella phenotype. The identification and tracking of homologs of pBSSB1 is limited because it falls outside the normal replicon typing schemes for plasmids. Here we report the generation of nine new pBSSB1-family sequences using Illumina and Nanopore sequence data. Homologs of pBSSB1 were identified in 154 genomes representing 25 distinct serotypes from 67,758 Salmonella public genomes. Pangenome analysis of pBSSB1-family contigs was performed using roary and we identified three core genes amenable to a minimal pMLST scheme. Population structure analysis based on the newly developed pMLST scheme identified three major lineages representing 35 sequence types, and the distribution of these sequence types was found to span multiple serovars across the globe. This in silico pMLST scheme has shown utility in tracking and subtyping pBSSB1-family plasmids and it has been incorporated into the plasmid MLST database under the name "pBSSB1-family".}, } @article {pmid31736915, year = {2019}, author = {Suresh, G and Lodha, TD and Indu, B and Sasikala, C and Ramana, CV}, title = {Taxogenomics Resolves Conflict in the Genus Rhodobacter: A Two and Half Decades Pending Thought to Reclassify the Genus Rhodobacter.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2480}, pmid = {31736915}, issn = {1664-302X}, abstract = {The genus Rhodobacter is taxonomically well studied, and some members are model organisms. However, this genus is comprised of a heterogeneous group of members. 16S rRNA gene-based phylogeny of the genus Rhodobacter indicates a motley assemblage of anoxygenic phototrophic bacteria (genus Rhodobacter) with interspersing members of other genera (chemotrophs) making the genus polyphyletic. Taxogenomics was performed to resolve the taxonomic conflicts of the genus Rhodobacter using twelve type strains. The phylogenomic analysis showed that Rhodobacter spp. can be grouped into four monophyletic clusters with interspersing chemotrophs. Genomic indices (ANI and dDDH) confirmed that all the current species are well defined, except Rhodobacter megalophilus. The average amino acid identity values between the monophyletic clusters of Rhodobacter members, as well as with the chemotrophic genera, are less than 80% whereas the percentage of conserved proteins values were below 70%, which has been observed among several genera related to Rhodobacter. The pan-genome analysis has shown that there are only 1239 core genes shared between the 12 species of the genus Rhodobacter. The polyphasic taxonomic analysis supports the phylogenomic and genomic studies in distinguishing the four Rhodobacter clusters. Each cluster is comprised of one to seven species according to the current Rhodobacter taxonomy. Therefore, to address this taxonomic discrepancy we propose to reclassify the members of the genus Rhodobacter into three new genera, Luteovulum gen. nov., Phaeovulum gen. nov. and Fuscovulum gen. nov., and provide an emended description of the genus Rhodobacter sensu stricto. Also, we propose reclassification of Rhodobacter megalophilus as a sub-species of Rhodobacter sphaeroides.}, } @article {pmid31731444, year = {2019}, author = {Ghosh, S and Sarangi, AN and Mukherjee, M and Bhowmick, S and Tripathy, S}, title = {Reanalysis of Lactobacillus paracasei Lbs2 Strain and Large-Scale Comparative Genomics Places Many Strains into Their Correct Taxonomic Position.}, journal = {Microorganisms}, volume = {7}, number = {11}, pages = {}, pmid = {31731444}, issn = {2076-2607}, abstract = {Lactobacillus paracasei are diverse Gram-positive bacteria that are very closely related to Lactobacillus casei, belonging to the Lactobacillus casei group. Due to extreme genome similarities between L. casei and L. paracasei, many strains have been cross placed in the other group. We had earlier sequenced and analyzed the genome of Lactobacillus paracasei Lbs2, but mistakenly identified it as L. casei. We re-analyzed Lbs2 reads into a 2.5 MB genome that is 91.28% complete with 0.8% contamination, which is now suitably placed under L. paracasei based on Average Nucleotide Identity and Average Amino Acid Identity. We took 74 sequenced genomes of L. paracasei from GenBank with assembly sizes ranging from 2.3 to 3.3 MB and genome completeness between 88% and 100% for comparison. The pan-genome of 75 L. paracasei strains hold 15,945 gene families (21,5232 genes), while the core genome contained about 8.4% of the total genes (243 gene families with 18,225 genes) of pan-genome. Phylogenomic analysis based on core gene families revealed that the Lbs2 strain has a closer relationship with L. paracasei subsp. tolerans DSM20258. Finally, the in-silico analysis of the L. paracasei Lbs2 genome revealed an important pathway that could underpin the production of thiamin, which may contribute to the host energy metabolism.}, } @article {pmid31728978, year = {2020}, author = {Seribelli, AA and Gonzales, JC and de Almeida, F and Benevides, L and Cazentini Medeiros, MI and Dos Prazeres Rodrigues, D and de C Soares, S and Allard, MW and Falcão, JP}, title = {Phylogenetic analysis revealed that Salmonella Typhimurium ST313 isolated from humans and food in Brazil presented a high genomic similarity.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {51}, number = {1}, pages = {53-64}, pmid = {31728978}, issn = {1678-4405}, support = {2017/06633-6//Fundação de Amparo à Pesquisa do Estado de São Paulo/ ; 2016/24716-3//Fundação de Amparo à Pesquisa do Estado de São Paulo/ ; }, mesh = {Bacterial Typing Techniques ; Brazil ; Feces/microbiology ; *Food Microbiology ; *Genome, Bacterial ; Genomics ; Genotype ; Humans ; Multilocus Sequence Typing ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Salmonella Infections/microbiology ; Salmonella typhimurium/*classification ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {Salmonella Typhimurium sequence type 313 (S. Typhimurium ST313) has caused invasive disease mainly in sub-Saharan Africa. In Brazil, ST313 strains have been recently described, and there is a lack of studies that assessed by whole genome sequencing (WGS)-the relationship of these strains. The aims of this work were to study the phylogenetic relationship of 70 S. Typhimurium genomes comparing strains of ST313 (n = 9) isolated from humans and food in Brazil among themselves, with other STs isolated in this country (n = 31) and in other parts of the globe (n = 30) by 16S rRNA sequences, the Gegenees software, whole genome multilocus sequence typing (wgMLST), and average nucleotide identity (ANI) for the genomes of ST313. Additionally, pangenome analysis was performed to verify the heterogeneity of these genomes. The phylogenetic analyses showed that the ST313 genomes were very similar among themselves. However, the ST313 genomes were usually clustered more distantly to other STs of strains isolated in Brazil and in other parts of the world. By pangenome calculation, the core genome was 2,880 CDSs and 4,171 CDSs singletons for all the 70 S. Typhimurium genomes studied. Considering the 10 ST313 genomes analyzed the core genome was 4,112 CDSs and 76 CDSs singletons. In conclusion, the ST313 genomes from Brazil showed a high similarity among them which information might eventually help in the development of vaccines and antibiotics. The pangenome analysis showed that the S. Typhimurium genomes studied presented an open pangenome, but specifically tending to become close for the ST313 strains.}, } @article {pmid31719113, year = {2020}, author = {Chhotaray, C and Wang, S and Tan, Y and Ali, A and Shehroz, M and Fang, C and Liu, Y and Lu, Z and Cai, X and Hameed, HMA and Islam, MM and Surineni, G and Tan, S and Liu, J and Zhang, T}, title = {Comparative Analysis of Whole-Genome and Methylome Profiles of a Smooth and a Rough Mycobacterium abscessus Clinical Strain.}, journal = {G3 (Bethesda, Md.)}, volume = {10}, number = {1}, pages = {13-22}, pmid = {31719113}, issn = {2160-1836}, mesh = {DNA Methylation ; *Epigenome ; *Genome, Bacterial ; Mycobacterium abscessus/*genetics ; *Phenotype ; Polymorphism, Genetic ; }, abstract = {Mycobacterium abscessus is a fast growing Mycobacterium species mainly causing skin and respiratory infections in human. M. abscessus is resistant to numerous drugs, which is a major challenge for the treatment. In this study, we have sequenced the genomes of two clinical M. abscessus strains having rough and smooth morphology, using the single molecule real-time and Illumina HiSeq sequencing technology. In addition, we reported the first comparative methylome profiles of a rough and a smooth M. abscessus clinical strains. The number of N4-methylcytosine (4mC) and N6-methyladenine (6mA) modified bases obtained from smooth phenotype were two-fold and 1.6 fold respectively higher than that of rough phenotype. We have also identified 4 distinct novel motifs in two clinical strains and genes encoding antibiotic-modifying/targeting enzymes and genes associated with intracellular survivability having different methylation patterns. To our knowledge, this is the first report about genome-wide methylation profiles of M. abscessus strains and identification of a natural linear plasmid (15 kb) in this critical pathogen harboring methylated bases. The pan-genome analysis of 25 M. abscessus strains including two clinical strains revealed an open pan genome comprises of 7596 gene clusters. Likewise, structural variation analysis revealed that the genome of rough phenotype strain contains more insertions and deletions than the smooth phenotype and that of the reference strain. A total of 391 single nucleotide variations responsible for the non-synonymous mutations were detected in clinical strains compared to the reference genome. The comparative genomic analysis elucidates the genome plasticity in this emerging pathogen. Furthermore, the detection of genome-wide methylation profiles of M. abscessus clinical strains may provide insight into the significant role of DNA methylation in pathogenicity and drug resistance in this opportunistic pathogen.}, } @article {pmid31703875, year = {2020}, author = {Kim, KH and Chun, BH and Baek, JH and Roh, SW and Lee, SH and Jeon, CO}, title = {Genomic and metabolic features of Lactobacillus sakei as revealed by its pan-genome and the metatranscriptome of kimchi fermentation.}, journal = {Food microbiology}, volume = {86}, number = {}, pages = {103341}, doi = {10.1016/j.fm.2019.103341}, pmid = {31703875}, issn = {1095-9998}, mesh = {Biogenic Amines/metabolism ; Brassica/*microbiology ; Fermentation ; Fermented Foods/microbiology ; Food Microbiology ; Gene Expression Profiling ; *Genome, Bacterial ; Genomics ; Glucuronic Acid/metabolism ; Lactic Acid/metabolism ; Latilactobacillus sakei/*genetics/isolation & purification/*metabolism ; Metabolic Networks and Pathways ; Vegetables/*microbiology ; }, abstract = {The genomic and metabolic features of Lactobacillus sakei were investigated using its pan-genome and by analyzing the metatranscriptome of kimchi fermentation. In the genome-based relatedness analysis, the strains were divided into the Lb. sakei ssp. sakei and Lb. sakei ssp. carnosus lineage groups. Genomic and metabolic pathway analysis revealed that all Lb. sakei strains have the capability of producing d/l-lactate, ethanol, acetate, CO2, formate, l-malate, diacetyl, acetoin, and 2,3-butanediol from d-glucose, d-fructose, d-galactose, sucrose, d-lactose, l-arabinose, cellobiose, d-mannose, d-gluconate, and d-ribose through homolactic and heterolactic fermentation, whereas their capability of d-maltose, d-xylose, l-xylulose, d-galacturonate, and d-glucuronate metabolism is strain-specific. All strains carry genes for the biosynthesis of folate and thiamine, whereas genes for biogenic amine and toxin production, hemolysis, and antibiotic resistance were not identified. The metatranscriptomic analysis showed that the expression of Lb. sakei transcripts involved in carbohydrate metabolism increased as kimchi fermentation progressed, suggesting that Lb. sakei is more competitive during late fermentation stage. Homolactic fermentation pathway was highly expressed and generally constant during kimchi fermentation, whereas expression of heterolactic fermentation pathway increased gradually as fermentation progressed. l-Lactate dehydrogenase was more highly expressed than d-lactate dehydrogenase, suggesting that l-lactate is the major lactate metabolized by Lb. sakei.}, } @article {pmid31703394, year = {2019}, author = {Spaety, ME and Gries, A and Badie, A and Venkatasamy, A and Romain, B and Orvain, C and Yanagihara, K and Okamoto, K and Jung, AC and Mellitzer, G and Pfeffer, S and Gaiddon, C}, title = {HDAC4 Levels Control Sensibility toward Cisplatin in Gastric Cancer via the p53-p73/BIK Pathway.}, journal = {Cancers}, volume = {11}, number = {11}, pages = {}, pmid = {31703394}, issn = {2072-6694}, support = {#2016AD//ARC/ ; #2017ZE//Ligue contre le Cancer/ ; IDEX#2018//Strasbourg University/ ; }, abstract = {Gastric cancer (GC) remains a health issue due to the low efficiency of therapies, such as cisplatin. This unsatisfactory situation highlights the necessity of finding factors impacting GC sensibility to therapies. We analyzed the cisplatin pangenomic response in cancer cells and found HDAC4 as a major epigenetic regulator being inhibited. HDAC4 mRNA repression was partly mediated by the cisplatin-induced expression of miR-140. At a functional level, HDAC4 inhibition favored cisplatin cytotoxicity and reduced tumor growth. Inversely, overexpression of HDAC4 inhibits cisplatin cytotoxicity. Importantly, HDAC4 expression was found to be elevated in gastric tumors compared to healthy tissues, and in particular in specific molecular subgroups. Furthermore, mutations in HDAC4 correlate with good prognosis. Pathway analysis of genes whose expression in patients correlated strongly with HDAC4 highlighted DNA damage, p53 stabilization, and apoptosis as processes downregulated by HDAC4. This was further confirmed by silencing of HDAC4, which favored cisplatin-induced apoptosis characterized by cleavage of caspase 3 and induction of proapoptotic genes, such as BIK, in part via a p53-dependent mechanism. Altogether, these results reveal HDAC4 as a resistance factor for cisplatin in GC cells that impacts on patients' survival.}, } @article {pmid31695182, year = {2020}, author = {Bernheim, A and Sorek, R}, title = {The pan-immune system of bacteria: antiviral defence as a community resource.}, journal = {Nature reviews. Microbiology}, volume = {18}, number = {2}, pages = {113-119}, pmid = {31695182}, issn = {1740-1534}, mesh = {Bacteria/immunology/*virology ; Bacteriophages/*physiology ; }, abstract = {Viruses and their hosts are engaged in a constant arms race leading to the evolution of antiviral defence mechanisms. Recent studies have revealed that the immune arsenal of bacteria against bacteriophages is much more diverse than previously envisioned. These discoveries have led to seemingly contradictory observations: on one hand, individual microorganisms often encode multiple distinct defence systems, some of which are acquired by horizontal gene transfer, alluding to their fitness benefit. On the other hand, defence systems are frequently lost from prokaryotic genomes on short evolutionary time scales, suggesting that they impose a fitness cost. In this Perspective article, we present the 'pan-immune system' model in which we suggest that, although a single strain cannot carry all possible defence systems owing to their burden on fitness, it can employ horizontal gene transfer to access immune defence mechanisms encoded by closely related strains. Thus, the 'effective' immune system is not the one encoded by the genome of a single microorganism but rather by its pan-genome, comprising the sum of all immune systems available for a microorganism to horizontally acquire and use.}, } @article {pmid31694533, year = {2019}, author = {Vila Nova, M and Durimel, K and La, K and Felten, A and Bessières, P and Mistou, MY and Mariadassou, M and Radomski, N}, title = {Genetic and metabolic signatures of Salmonella enterica subsp. enterica associated with animal sources at the pangenomic scale.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {814}, pmid = {31694533}, issn = {1471-2164}, support = {Typautobac//Agence Nationale de Sécurité Sanitaire de l'Alimentation, de l'Environnement et du Travail/ ; Typautobac//Institut National de la Recherche Agronomique/ ; 643476//COMPARE/ ; }, mesh = {Animals ; Genome-Wide Association Study ; *Genomics ; Mutation ; Phylogeny ; Salmonella enterica/*genetics/*metabolism ; }, abstract = {BACKGROUND: Salmonella enterica subsp. enterica is a public health issue related to food safety, and its adaptation to animal sources remains poorly described at the pangenome scale. Firstly, serovars presenting potential mono- and multi-animal sources were selected from a curated and synthetized subset of Enterobase. The corresponding sequencing reads were downloaded from the European Nucleotide Archive (ENA) providing a balanced dataset of 440 Salmonella genomes in terms of serovars and sources (i). Secondly, the coregenome variants and accessory genes were detected (ii). Thirdly, single nucleotide polymorphisms and small insertions/deletions from the coregenome, as well as the accessory genes were associated to animal sources based on a microbial Genome Wide Association Study (GWAS) integrating an advanced correction of the population structure (iii). Lastly, a Gene Ontology Enrichment Analysis (GOEA) was applied to emphasize metabolic pathways mainly impacted by the pangenomic mutations associated to animal sources (iv).

RESULTS: Based on a genome dataset including Salmonella serovars from mono- and multi-animal sources (i), 19,130 accessory genes and 178,351 coregenome variants were identified (ii). Among these pangenomic mutations, 52 genomic signatures (iii) and 9 over-enriched metabolic signatures (iv) were associated to avian, bovine, swine and fish sources by GWAS and GOEA, respectively.

CONCLUSIONS: Our results suggest that the genetic and metabolic determinants of Salmonella adaptation to animal sources may have been driven by the natural feeding environment of the animal, distinct livestock diets modified by human, environmental stimuli, physiological properties of the animal itself, and work habits for health protection of livestock.}, } @article {pmid31690629, year = {2020}, author = {Lauer, V and Grampp, S and Platt, J and Lafleur, V and Lombardi, O and Choudhry, H and Kranz, F and Hartmann, A and Wullich, B and Yamamoto, A and Coleman, ML and Ratcliffe, PJ and Mole, DR and Schödel, J}, title = {Hypoxia drives glucose transporter 3 expression through hypoxia-inducible transcription factor (HIF)-mediated induction of the long noncoding RNA NICI.}, journal = {The Journal of biological chemistry}, volume = {295}, number = {13}, pages = {4065-4078}, pmid = {31690629}, issn = {1083-351X}, support = {078333/Z/05/Z/WT_/Wellcome Trust/United Kingdom ; MR/N021053/1/MRC_/Medical Research Council/United Kingdom ; FC001501/CRUK_/Cancer Research UK/United Kingdom ; RG/11/1/28684/BHF_/British Heart Foundation/United Kingdom ; WT091857MA/WT_/Wellcome Trust/United Kingdom ; FC001501/WT_/Wellcome Trust/United Kingdom ; FC001501/MRC_/Medical Research Council/United Kingdom ; A416016/CRUK_/Cancer Research UK/United Kingdom ; 088182/Z/09/Z/WT_/Wellcome Trust/United Kingdom ; RP-2015-06-004/DH_/Department of Health/United Kingdom ; /WT_/Wellcome Trust/United Kingdom ; }, mesh = {CRISPR-Cas Systems/genetics ; Carcinoma, Renal Cell/*genetics/pathology ; Cell Line, Tumor ; Cell Proliferation/genetics ; DNA-Binding Proteins/genetics ; Gene Expression Regulation, Neoplastic/genetics ; Gene Knockout Techniques ; Glucose Transporter Type 3/*genetics ; Humans ; Hypoxia-Inducible Factor 1, alpha Subunit/genetics ; Promoter Regions, Genetic/genetics ; RNA Polymerase II/genetics ; RNA, Long Noncoding/*genetics ; Transcriptional Activation/genetics ; Tumor Hypoxia/genetics ; Von Hippel-Lindau Tumor Suppressor Protein/*genetics ; }, abstract = {Hypoxia-inducible transcription factors (HIFs) directly dictate the expression of multiple RNA species including novel and as yet uncharacterized long noncoding transcripts with unknown function. We used pan-genomic HIF-binding and transcriptomic data to identify a novel long noncoding RNA Noncoding Intergenic Co-Induced transcript (NICI) on chromosome 12p13.31 which is regulated by hypoxia via HIF-1 promoter-binding in multiple cell types. CRISPR/Cas9-mediated deletion of the hypoxia-response element revealed co-regulation of NICI and the neighboring protein-coding gene, solute carrier family 2 member 3 (SLC2A3) which encodes the high-affinity glucose transporter 3 (GLUT3). Knockdown or knockout of NICI attenuated hypoxic induction of SLC2A3, indicating a direct regulatory role of NICI in SLC2A3 expression, which was further evidenced by CRISPR/Cas9-VPR-mediated activation of NICI expression. We also demonstrate that regulation of SLC2A3 is mediated through transcriptional activation rather than posttranscriptional mechanisms because knockout of NICI leads to reduced recruitment of RNA polymerase 2 to the SLC2A3 promoter. Consistent with this we observe NICI-dependent regulation of glucose consumption and cell proliferation. Furthermore, NICI expression is regulated by the von Hippel-Lindau (VHL) tumor suppressor and is highly expressed in clear cell renal cell carcinoma (ccRCC), where SLC2A3 expression is associated with patient prognosis, implying an important role for the HIF/NICI/SLC2A3 axis in this malignancy.}, } @article {pmid31666129, year = {2019}, author = {Aguirre de Cárcer, D}, title = {A conceptual framework for the phylogenetically constrained assembly of microbial communities.}, journal = {Microbiome}, volume = {7}, number = {1}, pages = {142}, pmid = {31666129}, issn = {2049-2618}, mesh = {Ecosystem ; *Microbial Interactions ; *Microbiota ; *Phylogeny ; }, abstract = {Microbial communities play essential and preponderant roles in all ecosystems. Understanding the rules that govern microbial community assembly will have a major impact on our ability to manage microbial ecosystems, positively impacting, for instance, human health and agriculture. Here, I present a phylogenetically constrained community assembly principle grounded on the well-supported facts that deterministic processes have a significant impact on microbial community assembly, that microbial communities show significant phylogenetic signal, and that microbial traits and ecological coherence are, to some extent, phylogenetically conserved. From these facts, I derive a few predictions which form the basis of the framework. Chief among them is the existence, within most microbial ecosystems, of phylogenetic core groups (PCGs), defined as discrete portions of the phylogeny of varying depth present in all instances of the given ecosystem, and related to specific niches whose occupancy requires a specific phylogenetically conserved set of traits. The predictions are supported by the recent literature, as well as by dedicated analyses. Integrating the effect of ecosystem patchiness, microbial social interactions, and scale sampling pitfalls takes us to a comprehensive community assembly model that recapitulates the characteristics most commonly observed in microbial communities. PCGs' identification is relatively straightforward using high-throughput 16S amplicon sequencing, and subsequent bioinformatic analysis of their phylogeny, estimated core pan-genome, and intra-group co-occurrence should provide valuable information on their ecophysiology and niche characteristics. Such a priori information for a significant portion of the community could be used to prime complementing analyses, boosting their usefulness. Thus, the use of the proposed framework could represent a leap forward in our understanding of microbial community assembly and function.}, } @article {pmid31661016, year = {2019}, author = {Alonge, M and Soyk, S and Ramakrishnan, S and Wang, X and Goodwin, S and Sedlazeck, FJ and Lippman, ZB and Schatz, MC}, title = {RaGOO: fast and accurate reference-guided scaffolding of draft genomes.}, journal = {Genome biology}, volume = {20}, number = {1}, pages = {224}, pmid = {31661016}, issn = {1474-760X}, support = {S10 OD020122/OD/NIH HHS/United States ; R01-HG006677/NH/NIH HHS/United States ; UM1 HG008898/NH/NIH HHS/United States ; }, mesh = {Arabidopsis/genetics ; Genome, Plant ; Genomic Structural Variation ; Genomics/*methods ; Solanum lycopersicum/genetics ; *Software ; }, abstract = {We present RaGOO, a reference-guided contig ordering and orienting tool that leverages the speed and sensitivity of Minimap2 to accurately achieve chromosome-scale assemblies in minutes. After the pseudomolecules are constructed, RaGOO identifies structural variants, including those spanning sequencing gaps. We show that RaGOO accurately orders and orients 3 de novo tomato genome assemblies, including the widely used M82 reference cultivar. We then demonstrate the scalability and utility of RaGOO with a pan-genome analysis of 103 Arabidopsis thaliana accessions by examining the structural variants detected in the newly assembled pseudomolecules. RaGOO is available open source at https://github.com/malonge/RaGOO .}, } @article {pmid31659686, year = {2019}, author = {Oh, YJ and Kim, JY and Park, HK and Jang, JY and Lim, SK and Kwon, MS and Choi, HJ}, title = {Salicibibacter halophilus sp. nov., a moderately halophilic bacterium isolated from kimchi.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {57}, number = {11}, pages = {997-1002}, pmid = {31659686}, issn = {1976-3794}, mesh = {Bacillaceae/*classification/genetics/*isolation & purification/physiology ; Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Diaminopimelic Acid/metabolism ; Fatty Acids/chemistry ; Fermented Foods/*microbiology ; Genes, Bacterial/genetics ; Genomics ; Halobacteriales ; Hydrogen-Ion Concentration ; Peptidoglycan/chemistry ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Republic of Korea ; Salt Tolerance ; Sequence Analysis, DNA ; Sodium Chloride/metabolism ; Vitamin K 2/analogs & derivatives/chemistry ; Whole Genome Sequencing ; }, abstract = {A Gram-stain-positive, rod-shaped, alkalitolerant, and halophilic bacterium-designated as strain NKC3-5[T]-was isolated from kimchi that was collected from the Geumsan area in the Republic of Korea. Cells of isolated strain NKC3-5[T] were 0.5-0.7 μm wide and 1.4-2.8 μm long. The strain NKC3-5T could grow at up to 20.0% (w/v) NaCl (optimum 10%), pH 6.5-10.0 (optimum pH 9.0), and 25-40°C (optimum 35°C). The cells were able to reduce nitrate under aerobic conditions, which is the first report in the genus Salicibibacter. The genome size and genomic G + C content of strain NKC3-5[T] were 3,754,174 bp and 45.9 mol%, respectively; it contained 3,630 coding sequences, 16S rRNA genes (six 16S, five 5S, and five 23S), and 59 tRNA genes. Phylogenetic analysis based on 16S rRNA showed that strain NKC3-5[T] clustered with bacterium Salicibibacter kimchii NKC1-1[T], with a similarity of 96.2-97.6%, but formed a distinct branch with other published species of the family Bacillaceae. In addition, OrthoANI value between strain NKC3-5[T] and Salicibibacter kimchii NKC1-1[T] was far lower than the species demarcation threshold. Using functional genome annotation, the result found that carbohydrate, amino acid, and vitamin metabolism related genes were highly distributed in the genome of strain NKC3-5[T]. Comparative genomic analysis revealed that strain NKC3-5[T] had 716 pan-genome orthologous groups (POGs), dominated with carbohydrate metabolism. Phylogenomic analysis based on the concatenated core POGs revealed that strain NKC3-5[T] was closely related to Salicibibacter kimchii. The predominant polar lipids were phosphatidylglycerol and two unidentified lipids. Anteiso-C15:0, iso-C17:0, anteiso-C17:0, and iso-C15:0 were the major cellular fatty acids, and menaquinone-7 was the major isoprenoid quinone present in strain NKC3-5[T]. Cell wall peptidoglycan analysis of strain NKC3-5[T] showed that meso-diaminopimelic acid was the diagnostic diamino acid. The phephenotypic, genomic, phylogenetic, and chemotaxonomic properties reveal that the strain represents a novel species of the genus Salicibibacter, for which the name Salicibibacter halophilus sp. nov. is proposed, with the type strain NKC3-5[T] (= KACC 21230[T] = JCM 33437[T]).}, } @article {pmid31654228, year = {2020}, author = {Zhu, D and Yang, Z and Xu, J and Wang, M and Jia, R and Chen, S and Liu, M and Zhao, X and Yang, Q and Wu, Y and Zhang, S and Liu, Y and Zhang, L and Yu, Y and Chen, X and Cheng, A}, title = {Pan-genome analysis of Riemerella anatipestifer reveals its genomic diversity and acquired antibiotic resistance associated with genomic islands.}, journal = {Functional & integrative genomics}, volume = {20}, number = {3}, pages = {307-320}, pmid = {31654228}, issn = {1438-7948}, support = {2017YFD050080//National Key Research and Development Program of China/ ; CARS-42-17//China Agricultural Research System/ ; 2017HH0026//International S&T Cooperation Program of Sichuan Province/ ; (2017)03//Science and Technology Innovation Program of Guizhou Academy of Agricultural Science/ ; CARS-SVDIP//Sichuan Veterinary Medicine and Drug Innovation Group of China Agricultural Research System/ ; }, mesh = {*Drug Resistance, Bacterial ; Flavobacteriaceae/classification/*genetics ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Phylogeny ; Virulence Factors/genetics ; }, abstract = {Riemerella anatipestifer is a gram-negative bacterium that leads to severe contagious septicemia in ducks, turkeys, chickens, and wild waterfowl. Here, a pan-genome with 32 R. anatipestifer genomes is re-established, and the mathematical model is calculated to evaluate the expansion of R. anatipestifer genomes, which were determined to be open. Average nucleotide identity (ANI) and phylogenetic analysis preliminarily clarify intraspecies variation and distance. Comparative genomic analysis of R. anatipestifer found that horizontal gene transfer events, which provide an expressway for the recruitment of novel functionalities and facilitate genetic diversity in microbial genomes, play a key role in the process of acquiring and transmitting antibiotic-resistance genes in R. anatipestifer. Furthermore, a new antibiotic-resistance gene cluster was identified in the same loci in 14 genomes. The uneven distribution of virulence factors was also confirmed by our results. Our study suggests that the ability to acquire foreign genes (such as antibiotic-resistance genes) increases the adaptability of R. anatipestifer, and the virulence genes with little mobility are highly conserved in R. anatipestifer.}, } @article {pmid31649123, year = {2020}, author = {Roy, S and Liu, W and Nandety, RS and Crook, A and Mysore, KS and Pislariu, CI and Frugoli, J and Dickstein, R and Udvardi, MK}, title = {Celebrating 20 Years of Genetic Discoveries in Legume Nodulation and Symbiotic Nitrogen Fixation.}, journal = {The Plant cell}, volume = {32}, number = {1}, pages = {15-41}, pmid = {31649123}, issn = {1532-298X}, mesh = {Bacteria ; Cell Division ; Fabaceae/*genetics ; Flavonoids ; Gene Editing ; Gene Expression Regulation, Plant ; Genes, Plant/*genetics ; Genetic Association Studies/*history ; Genomics/history ; History, 20th Century ; History, 21st Century ; Homeostasis ; Host Microbial Interactions/genetics/physiology ; Lotus/genetics ; Medicago truncatula/genetics ; Nitrogen Fixation/*genetics/physiology ; Organogenesis ; Oxygen ; Phaseolus/genetics ; Plant Growth Regulators ; Plant Proteins/genetics ; Plant Root Nodulation/*genetics/physiology ; Signal Transduction ; Soybeans/genetics ; Symbiosis/*genetics/physiology ; }, abstract = {Since 1999, various forward- and reverse-genetic approaches have uncovered nearly 200 genes required for symbiotic nitrogen fixation (SNF) in legumes. These discoveries advanced our understanding of the evolution of SNF in plants and its relationship to other beneficial endosymbioses, signaling between plants and microbes, the control of microbial infection of plant cells, the control of plant cell division leading to nodule development, autoregulation of nodulation, intracellular accommodation of bacteria, nodule oxygen homeostasis, the control of bacteroid differentiation, metabolism and transport supporting symbiosis, and the control of nodule senescence. This review catalogs and contextualizes all of the plant genes currently known to be required for SNF in two model legume species, Medicago truncatula and Lotus japonicus, and two crop species, Glycine max (soybean) and Phaseolus vulgaris (common bean). We also briefly consider the future of SNF genetics in the era of pan-genomics and genome editing.}, } @article {pmid31647104, year = {2020}, author = {Vallenet, D and Calteau, A and Dubois, M and Amours, P and Bazin, A and Beuvin, M and Burlot, L and Bussell, X and Fouteau, S and Gautreau, G and Lajus, A and Langlois, J and Planel, R and Roche, D and Rollin, J and Rouy, Z and Sabatet, V and Médigue, C}, title = {MicroScope: an integrated platform for the annotation and exploration of microbial gene functions through genomic, pangenomic and metabolic comparative analysis.}, journal = {Nucleic acids research}, volume = {48}, number = {D1}, pages = {D579-D589}, pmid = {31647104}, issn = {1362-4962}, mesh = {Databases, Genetic ; *Genes, Archaeal ; *Genes, Bacterial ; Genomics/*methods ; Metabolic Networks and Pathways ; Molecular Sequence Annotation/*methods ; *Software ; }, abstract = {Large-scale genome sequencing and the increasingly massive use of high-throughput approaches produce a vast amount of new information that completely transforms our understanding of thousands of microbial species. However, despite the development of powerful bioinformatics approaches, full interpretation of the content of these genomes remains a difficult task. Launched in 2005, the MicroScope platform (https://www.genoscope.cns.fr/agc/microscope) has been under continuous development and provides analysis for prokaryotic genome projects together with metabolic network reconstruction and post-genomic experiments allowing users to improve the understanding of gene functions. Here we present new improvements of the MicroScope user interface for genome selection, navigation and expert gene annotation. Automatic functional annotation procedures of the platform have also been updated and we added several new tools for the functional annotation of genes and genomic regions. We finally focus on new tools and pipeline developed to perform comparative analyses on hundreds of genomes based on pangenome graphs. To date, MicroScope contains data for >11 800 microbial genomes, part of which are manually curated and maintained by microbiologists (>4500 personal accounts in September 2019). The platform enables collaborative work in a rich comparative genomic context and improves community-based curation efforts.}, } @article {pmid31647096, year = {2020}, author = {Mende, DR and Letunic, I and Maistrenko, OM and Schmidt, TSB and Milanese, A and Paoli, L and Hernández-Plaza, A and Orakov, AN and Forslund, SK and Sunagawa, S and Zeller, G and Huerta-Cepas, J and Coelho, LP and Bork, P}, title = {proGenomes2: an improved database for accurate and consistent habitat, taxonomic and functional annotations of prokaryotic genomes.}, journal = {Nucleic acids research}, volume = {48}, number = {D1}, pages = {D621-D625}, pmid = {31647096}, issn = {1362-4962}, mesh = {Computational Biology/methods ; *Databases, Genetic ; Ecosystem ; *Genome, Archaeal ; *Genome, Bacterial ; *Genomics ; Internet ; Molecular Sequence Annotation ; Polymorphism, Single Nucleotide ; Prokaryotic Cells ; Reproducibility of Results ; Software ; }, abstract = {Microbiology depends on the availability of annotated microbial genomes for many applications. Comparative genomics approaches have been a major advance, but consistent and accurate annotations of genomes can be hard to obtain. In addition, newer concepts such as the pan-genome concept are still being implemented to help answer biological questions. Hence, we present proGenomes2, which provides 87 920 high-quality genomes in a user-friendly and interactive manner. Genome sequences and annotations can be retrieved individually or by taxonomic clade. Every genome in the database has been assigned to a species cluster and most genomes could be accurately assigned to one or multiple habitats. In addition, general functional annotations and specific annotations of antibiotic resistance genes and single nucleotide variants are provided. In short, proGenomes2 provides threefold more genomes, enhanced habitat annotations, updated taxonomic and functional annotation and improved linkage to the NCBI BioSample database. The database is available at http://progenomes.embl.de/.}, } @article {pmid31646960, year = {2019}, author = {Yin, Z and Yuan, C and Du, Y and Yang, P and Qian, C and Wei, Y and Zhang, S and Huang, D and Liu, B}, title = {Comparative genomic analysis of the Hafnia genus reveals an explicit evolutionary relationship between the species alvei and paralvei and provides insights into pathogenicity.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {768}, pmid = {31646960}, issn = {1471-2164}, support = {No. 81471904, 81772148, and 81611530714//National Natural Science Foundation of China/ ; }, mesh = {Bacterial Secretion Systems/genetics ; Comparative Genomic Hybridization ; Drug Resistance, Bacterial/genetics ; *Genome, Bacterial ; Genotype ; Hafnia/*classification/*pathogenicity ; Phylogeny ; Species Specificity ; *Virulence ; Virulence Factors/genetics ; }, abstract = {BACKGROUND: The Hafnia genus is an opportunistic pathogen that has been implicated in both nosocomial and community-acquired infections. Although Hafnia is fairly often isolated from clinical material, its taxonomy has remained an unsolved riddle, and the involvement and importance of Hafnia in human disease is also uncertain. Here, we used comparative genomic analysis to define the taxonomy of Hafnia, identify species-specific genes that may be the result of ecological and pathogenic specialization, and reveal virulence-related genetic profiles that may contribute to pathogenesis.

RESULTS: One complete genome sequence and 19 draft genome sequences for Hafnia strains were generated and combined with 27 publicly available genomes. We provided high-resolution typing methods by constructing phylogeny and population structure based on single-copy core genes in combination with whole genome average nucleotide identity to identify two distant Hafnia species (alvei and paralvei) and one mislabeled strain. The open pan-genome and the presence of numerous mobile genetic elements reveal that Hafnia has undergone massive gene rearrangements. Presence of species-specific core genomes associated with metabolism and transport suggests the putative niche differentiation between alvei and paralvei. We also identified possession of diverse virulence-related profiles in both Hafnia species., including the macromolecular secretion system, virulence, and antimicrobial resistance. In the macromolecular system, T1SS, Flagellum 1, Tad pilus and T6SS-1 were conserved in Hafnia, whereas T4SS, T5SS, and other T6SSs exhibited the evolution of diversity. The virulence factors in Hafnia are related to adherence, toxin, iron uptake, stress adaptation, and efflux pump. The identified resistance genes are associated with aminoglycoside, beta-lactam, bacitracin, cationic antimicrobial peptide, fluoroquinolone, and rifampin. These virulence-related profiles identified at the genomic level provide insights into Hafnia pathogenesis and the differentiation between alvei and paralvei.

CONCLUSIONS: Our research using core genome phylogeny and comparative genomics analysis of a larger collection of strains provides a comprehensive view of the taxonomy and species-specific traits between Hafnia species. Deciphering the genome of Hafnia strains possessing a reservoir of macromolecular secretion systems, virulence factors, and resistance genes related to pathogenicity may provide insights into addressing its numerous infections and devising strategies to combat the pathogen.}, } @article {pmid31645966, year = {2019}, author = {Chen, F and Song, Y and Li, X and Chen, J and Mo, L and Zhang, X and Lin, Z and Zhang, L}, title = {Genome sequences of horticultural plants: past, present, and future.}, journal = {Horticulture research}, volume = {6}, number = {}, pages = {112}, pmid = {31645966}, issn = {2662-6810}, abstract = {Horticultural plants play various and critical roles for humans by providing fruits, vegetables, materials for beverages, and herbal medicines and by acting as ornamentals. They have also shaped human art, culture, and environments and thereby have influenced the lifestyles of humans. With the advent of sequencing technologies, there has been a dramatic increase in the number of sequenced genomes of horticultural plant species in the past decade. The genomes of horticultural plants are highly diverse and complex, often with a high degree of heterozygosity and a high ploidy due to their long and complex history of evolution and domestication. Here we summarize the advances in the genome sequencing of horticultural plants, the reconstruction of pan-genomes, and the development of horticultural genome databases. We also discuss past, present, and future studies related to genome sequencing, data storage, data quality, data sharing, and data visualization to provide practical guidance for genomic studies of horticultural plants. Finally, we propose a horticultural plant genome project as well as the roadmap and technical details toward three goals of the project.}, } @article {pmid31642484, year = {2020}, author = {Lu, F and Wei, Z and Luo, Y and Guo, H and Zhang, G and Xia, Q and Wang, Y}, title = {SilkDB 3.0: visualizing and exploring multiple levels of data for silkworm.}, journal = {Nucleic acids research}, volume = {48}, number = {D1}, pages = {D749-D755}, pmid = {31642484}, issn = {1362-4962}, mesh = {Animals ; Bayes Theorem ; Bombyx/*genetics ; Chromosome Mapping ; Chromosomes/genetics ; Computational Biology/*methods ; Computer Graphics ; *Databases, Genetic ; Exons ; Gene Expression Profiling ; Genetic Variation ; *Genome, Insect ; Genomics ; Introns ; Phylogeny ; *Transcriptome ; User-Computer Interface ; }, abstract = {SilkDB is an open-accessibility database and powerful platform that provides comprehensive information on the silkworm (Bombyx mori) genome. Since SilkDB 2.0 was released 10 years ago, vast quantities of data about multiple aspects of the silkworm have been generated, including genome, transcriptome, Hi-C and pangenome. To visualize data at these different biological levels, we present SilkDB 3.0 (https://silkdb.bioinfotoolkits.net), a visual analytic tool for exploring silkworm data through an interactive user interface. The database contains a high-quality chromosome-level assembly of the silkworm genome, and its coding sequences and gene sets are more accurate than those in the previous version. SilkDB 3.0 provides a view of the information for each gene at the levels of sequence, protein structure, gene family, orthology, synteny, genome organization and gives access to gene expression information, genetic variation and genome interaction map. A set of visualization tools are available to display the abundant information in the above datasets. With an improved interactive user interface for the integration of large data sets, the updated SilkDB 3.0 database will be a valuable resource for the silkworm and insect research community.}, } @article {pmid31642169, year = {2019}, author = {Nanayakkara, BS and O'Brien, CL and Gordon, DM}, title = {Phenotypic characteristics contributing to the enhanced growth of Escherichia coli bloom strains.}, journal = {Environmental microbiology reports}, volume = {11}, number = {6}, pages = {817-824}, doi = {10.1111/1758-2229.12801}, pmid = {31642169}, issn = {1758-2229}, support = {LP120100327//Australian Research Council/International ; Project 1101//Water Research Australia/International ; //Fitzroy River Water and Victorian Department of Health/International ; //Sydney Catchment Authority/International ; //South Australian Water Corporation/International ; //Sydney Water Corporation/International ; //City West Water/International ; //Yarra Valley Water/International ; //South East Water/International ; //Hunter Water Corporation/International ; //Queensland Bulk Water Authority/International ; //Water Corporation of Western Australia/International ; //Melbourne Water/International ; }, mesh = {Bacterial Capsules/metabolism ; Biological Transport ; Escherichia coli/classification/genetics/*growth & development/isolation & purification ; Ferric Compounds/metabolism ; Phylogeny ; *Water Microbiology ; }, abstract = {During bloom events, Escherichia coli cell counts increase to between 10,000 and 100,000 cfu/100 ml of water. The strains responsible for bloom events belong to E. coli phylogenetic groups A and B1, and all have acquired a capsule from Klebsiella. A pan-genome comparison of phylogroup A E. coli revealed that the ferric citrate uptake system (fecIRABCDE) was overrepresented in phylogroup A bloom strains compared with non-bloom E. coli. A series of experiments were carried out to investigate if the capsule together with ferric citrate uptake system could confer a growth rate advantage on E. coli. Capsulated strains had a growth rate advantage regardless of the media composition and the presence/absence of the fec operon, and they had a shorter lag phase compared with capsule-negative strains. The results suggest that the Klebsiella capsule may facilitate nutrient uptake or utilization by a strain. This, together with the protective roles played by the capsule and the shorter lag phase of capsule-positive strains, may explain why it is only capsule-positive strains that produce elevated counts in response to nutrient influx.}, } @article {pmid31641046, year = {2019}, author = {Zhong, C and Han, M and Yang, P and Chen, C and Yu, H and Wang, L and Ning, K}, title = {Comprehensive Analysis Reveals the Evolution and Pathogenicity of Aeromonas, Viewed from Both Single Isolated Species and Microbial Communities.}, journal = {mSystems}, volume = {4}, number = {5}, pages = {}, pmid = {31641046}, issn = {2379-5077}, abstract = {The genus Aeromonas is a common gastrointestinal pathogen associated with human and animal infections. Due to the high level of cross-species similarity, their evolutionary dynamics and genetic diversity are still fragmented. Hereby, we investigated the pan-genomes of 29 Aeromonas species, as well as Aeromonas species in microbial communities, to clarify their evolutionary dynamics and genetic diversity, with special focus on virulence factors and horizontal gene transfer events. Our study revealed an open pan-genome of Aeromonas containing 10,144 gene families. These Aeromonas species exhibited different functional constraints, with the single-copy core genes and most accessory genes experiencing purifying selection. The significant congruence between core genome and pan-genome trees revealed that core genes mainly affected evolutionary divergences of Aeromonas species. Gene gains and losses revealed a high level of genome plasticity, exhibited by hundreds of gene expansions and contractions, horizontally transferred genes, and mobile genetic elements. The selective constraints shaped virulence gene pools of these Aeromonas strains, where genes encoding hemolysin were ubiquitous. Of these strains, Aeromonas aquatica MX16A seemed to be more resistant, as it harbored most resistance genes. Finally, the virulence factors of Aeromonas in microbial communities were quite dynamic in response to environment changes. For example, the virulence diversity of Aeromonas in microbial communities could reach levels that match some of the most virulent Aeromonas species (such as A. hydrophila) in penetrated-air and modified-air packaging. Our work shed some light onto genetic diversity, evolutionary history, and functional features of Aeromonas, which could facilitate the detection and prevention of infections.IMPORTANCE Aeromonas has long been known as a gastrointestinal pathogen, yet it has many species whose evolutionary dynamics and genetic diversity had been unclear until now. We have conducted pan-genome analysis for 29 Aeromonas species and revealed a high level of genome plasticity exhibited by hundreds of gene expansions and contractions, horizontally transferred genes, and mobile genetic elements. These species also contained many virulence factors both identified from single isolated species and microbial community. This pan-genome study could elevate the level for detection and prevention of Aeromonas infections.}, } @article {pmid31639358, year = {2019}, author = {Brockhurst, MA and Harrison, E and Hall, JPJ and Richards, T and McNally, A and MacLean, C}, title = {The Ecology and Evolution of Pangenomes.}, journal = {Current biology : CB}, volume = {29}, number = {20}, pages = {R1094-R1103}, doi = {10.1016/j.cub.2019.08.012}, pmid = {31639358}, issn = {1879-0445}, support = {BB/R006253/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R014884/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; 106918/Z/15/Z/WT_/Wellcome Trust/United Kingdom ; BB/R006261/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Biological Evolution ; Evolution, Molecular ; *Genome, Bacterial ; *Metagenome ; Phylogeny ; }, abstract = {Since the first genome-scale comparisons, it has been evident that the genomes of many species are unbound by strict vertical descent: Large differences in gene content can occur among genomes belonging to the same prokaryotic species, with only a fraction of genes being universal to all genomes. These insights gave rise to the pangenome concept. The pangenome is defined as the set of all the genes present in a given species and can be subdivided into the accessory genome, present in only some of the genomes, and the core genome, present in all the genomes. Pangenomes arise due to gene gain by genomes from other species through horizontal gene transfer and differential gene loss among genomes, and have been described in both prokaryotes and eukaryotes. Our current view of pangenome variation is phenomenological and incomplete. In this review, we outline the mechanistic, ecological and evolutionary drivers of and barriers to horizontal gene transfer that are likely to structure pangenomes. We highlight the key role of conflict between the host chromosome(s) and the mobile genetic elements that mediate gene exchange. We identify shortcomings in our current models of pangenome evolution and suggest directions for future research to allow a more complete understanding of how and why pangenomes evolve.}, } @article {pmid31635385, year = {2019}, author = {Boursier, G and Rittore, C and Georgin-Lavialle, S and Belot, A and Galeotti, C and Hachulla, E and Hentgen, V and Rossi-Semerano, L and Sarrabay, G and Touitou, I}, title = {Positive Impact of Expert Reference Center Validation on Performance of Next-Generation Sequencing for Genetic Diagnosis of Autoinflammatory Diseases.}, journal = {Journal of clinical medicine}, volume = {8}, number = {10}, pages = {}, pmid = {31635385}, issn = {2077-0383}, support = {No grant. Funding as National Reference Center.//Ministère des Affaires Sociales et de la Santé/International ; }, abstract = {Monogenic autoinflammatory diseases (AIDs) are caused by variants in genes that regulate innate immunity. The current diagnostic performance of targeted next-generation sequencing (NGS) for AIDs is low. We assessed whether pre-analytic advice from expert clinicians could help improve NGS performance from our 4 years of experience with the sequencing of a panel of 55 AIDs genes. The study included all patients who underwent routine NGS testing between September 2014 and January 2019 at the laboratory of autoinflammatory diseases (Montpellier, France). Before March 2018, all medical requests for testing were accepted. After this time, we required validation by a reference center before NGS: the positive advice could be obtained after a face-to-face consultation with the patient or presentation of the patient's case at a multidisciplinary staff meeting. Targeted NGS resulted in an overall 7% genetic confirmation, which is consistent with recent reports. The diagnostic performance before and after implementation of the new pre-requisite increased from 6% to 10% (p = 0.021). Our study demonstrated, for the first time, the beneficial effect of a two-step strategy (clinical expert advice, then genetic testing) for AIDs diagnosis and stressed the possible usefulness of the strategy in anticipation of the development of pan-genomic analyses in routine settings.}, } @article {pmid31630297, year = {2020}, author = {Zhao, S and Ci, J and Xue, J and Wang, Y and Li, X and Hao, L and Tian, L and Guo, H and Xin, C and Zhao, Y}, title = {Cutibacterium acnes Type II strains are associated with acne in Chinese patients.}, journal = {Antonie van Leeuwenhoek}, volume = {113}, number = {3}, pages = {377-388}, doi = {10.1007/s10482-019-01344-x}, pmid = {31630297}, issn = {1572-9699}, mesh = {Acne Vulgaris/*microbiology ; Actinomycetales Infections/*microbiology ; China ; Genome, Bacterial ; Genomics/methods ; Humans ; Multilocus Sequence Typing ; Phylogeny ; Propionibacterium/*classification/genetics ; Whole Genome Sequencing ; }, abstract = {Acne is a common inflammatory skin disease, especially in adolescents. Certain Cutibacterium acnes subtypes are associated with acne, although more than one subtype of C. acnes strains may simultaneously reside on the surface of the skin of an individual. To better understand the relationship between the genomic characteristics of C. acnes subtypes and acnes, we collected 50 C. acnes strains from the facial skin of 10 people (5 healthy individuals, 5 patients with acne) in Liaoning, China and performed whole genome sequencing of all strains. We demonstrated that the six potential pathogenic C. acnes strains were all Type II subtype, and discovered 90 unique genes of the six strains related to acne using pan-genome analysis. The distribution of 2 of the 90 genes was identified by PCR in bacterial cultures collected from the facial skin of 171 individuals (55 healthy individuals, 52 patients with mild acne and 64 patients with moderate to severe acne). Both the genes were significantly associated with acne (Chi square test, P < 0.01). We conclude that Type II strains are associated with acne in Chinese patients.}, } @article {pmid31626589, year = {2019}, author = {Mangas, EL and Rubio, A and Álvarez-Marín, R and Labrador-Herrera, G and Pachón, J and Pachón-Ibáñez, ME and Divina, F and Pérez-Pulido, AJ}, title = {Pangenome of Acinetobacter baumannii uncovers two groups of genomes, one of them with genes involved in CRISPR/Cas defence systems associated with the absence of plasmids and exclusive genes for biofilm formation.}, journal = {Microbial genomics}, volume = {5}, number = {11}, pages = {}, pmid = {31626589}, issn = {2057-5858}, mesh = {Acinetobacter baumannii/*genetics ; Bacteria/genetics ; Bacterial Proteins/genetics ; Biofilms ; CRISPR-Cas Systems ; Clustered Regularly Interspaced Short Palindromic Repeats ; Genome, Bacterial/genetics ; Genomics ; Phylogeny ; Plasmids/*genetics ; }, abstract = {Acinetobacter baumannii is an opportunistic bacterium that causes hospital-acquired infections with a high mortality and morbidity, since there are strains resistant to virtually any kind of antibiotic. The chase to find novel strategies to fight against this microbe can be favoured by knowledge of the complete catalogue of genes of the species, and their relationship with the specific characteristics of different isolates. In this work, we performed a genomics analysis of almost 2500 strains. Two different groups of genomes were found based on the number of shared genes. One of these groups rarely has plasmids, and bears clustered regularly interspaced short palindromic repeat (CRISPR) sequences, in addition to CRISPR-associated genes (cas genes) or restriction-modification system genes. This fact strongly supports the lack of plasmids. Furthermore, the scarce plasmids in this group also bear CRISPR sequences, and specifically contain genes involved in prokaryotic toxin-antitoxin systems that could either act as the still little known CRISPR type IV system or be the precursors of other novel CRISPR/Cas systems. In addition, a limited set of strains present a new cas9-like gene, which may complement the other cas genes in inhibiting the entrance of new plasmids into the bacteria. Finally, this group has exclusive genes involved in biofilm formation, which would connect CRISPR systems to the biogenesis of these bacterial resistance structures.}, } @article {pmid31623351, year = {2019}, author = {Wan, X}, title = {Comparative Genome Analyses Reveal the Genomic Traits and Host Plant Adaptations of Flavobacterium akiainvivens IK-1[T].}, journal = {International journal of molecular sciences}, volume = {20}, number = {19}, pages = {}, pmid = {31623351}, issn = {1422-0067}, support = {63191440//This work was supported by the Fundamental Research Funds for the Central Universities, Nankai University/ ; }, mesh = {*Adaptation, Physiological ; Bacterial Physiological Phenomena ; Biological Evolution ; Computational Biology/methods ; Flavobacterium/classification/*physiology ; *Genome, Bacterial ; *Genomics/methods ; *Host-Pathogen Interactions ; Plant Diseases/microbiology ; Plants/*microbiology ; Quorum Sensing ; Synteny ; }, abstract = {The genus Flavobacterium contains a large group of commensal bacteria identified in diverse terrestrial and aquatic habitats. We compared the genome of a new species Flavobacterium akiainvivens IK-1[T] to public available genomes of Flavobacterium species to reveal the genomic traits and ecological roles of IK-1[T]. Principle component analysis (PCA) of carbohydrate-active enzyme classes suggests that IK-1[T] belongs to a terrestrial clade of Flavobacterium. In addition, type 2 and type 9 secretion systems involved in bacteria-environment interactions were identified in the IK-1[T] genome. The IK-1[T] genome encodes eukaryotic-like domain containing proteins including ankyrin repeats, von Willebrand factor type A domain, and major royal jelly proteins, suggesting that IK-1[T] may alter plant host physiology by secreting eukaryotic-like proteins that mimic host proteins. A novel two-component system FaRpfC-FaYpdB was identified in the IK-1[T] genome, which may mediate quorum sensing to regulate global gene expressions. Our findings suggest that comparative genome analyses of Flavobacterium spp. reveal that IK-1[T] has adapted to a terrestrial niche. Further functional characterizations of IK-1[T] secreted proteins and their regulation systems will shed light on molecular basis of bacteria-plant interactions in environments.}, } @article {pmid31621921, year = {2020}, author = {Paulsson, JO and Backman, S and Wang, N and Stenman, A and Crona, J and Thutkawkorapin, J and Ghaderi, M and Tham, E and Stålberg, P and Zedenius, J and Juhlin, CC}, title = {Whole-genome sequencing of synchronous thyroid carcinomas identifies aberrant DNA repair in thyroid cancer dedifferentiation.}, journal = {The Journal of pathology}, volume = {250}, number = {2}, pages = {183-194}, doi = {10.1002/path.5359}, pmid = {31621921}, issn = {1096-9896}, mesh = {Aged ; Cell Dedifferentiation/genetics ; DNA Copy Number Variations ; DNA Mutational Analysis/methods ; DNA Repair/*genetics ; DNA, Neoplasm/genetics ; Disease Progression ; Female ; Gene Frequency ; Humans ; Lymphatic Metastasis ; Microsatellite Instability ; Mutation ; Neoplasms, Multiple Primary/*genetics/pathology ; Thyroid Carcinoma, Anaplastic/*genetics/pathology/secondary ; Thyroid Neoplasms/*genetics/pathology ; Whole Genome Sequencing/methods ; }, abstract = {The genetics underlying thyroid cancer dedifferentiation is only partly understood and has not yet been characterised using comprehensive pan-genomic analyses. We investigated a unique case with synchronous follicular thyroid carcinoma (FTC), poorly differentiated thyroid carcinoma (PDTC), and anaplastic thyroid carcinoma (ATC), as well as regional lymph node metastases from the PDTC and ATC from a single patient using whole-genome sequencing (WGS). The FTC displayed mutations in CALR, RB1, and MSH2, and the PDTC exhibited mutations in TP53, DROSHA, APC, TERT, and additional DNA repair genes - associated with an immense increase in sub-clonal somatic mutations. All components displayed an overrepresentation of C>T transitions with associated microsatellite instability (MSI) in the PDTC and ATC, with borderline MSI in the FTC. Clonality analyses pinpointed a shared ancestral clone enriched for mutations in TP53-associated regulation of DNA repair and identified important sub-clones for each tumour component already present in the corresponding preceding lesion. This genomic characterisation of the natural progression of thyroid cancer reveals several novel genes of interest for future studies. Moreover, the findings support the theory of a stepwise dedifferentiation process and suggest that defects in DNA repair could play an important role in the clonal evolution of thyroid cancer. © 2019 Pathological Society of Great Britain and Ireland. Published by John Wiley & Sons, Ltd.}, } @article {pmid31620779, year = {2020}, author = {Zhang, Y and Zhang, Z and Zhang, H and Zhao, Y and Zhang, Z and Xiao, J}, title = {PADS Arsenal: a database of prokaryotic defense systems related genes.}, journal = {Nucleic acids research}, volume = {48}, number = {D1}, pages = {D590-D598}, pmid = {31620779}, issn = {1362-4962}, mesh = {Archaea/*genetics/virology ; Archaeal Viruses/pathogenicity ; Bacteria/*genetics/virology ; Bacteriophages/pathogenicity ; CRISPR-Cas Systems ; DNA Restriction-Modification Enzymes ; *Databases, Genetic ; *Host-Pathogen Interactions ; *Software ; }, abstract = {Defense systems are vital weapons for prokaryotes to resist heterologous DNA and survive from the constant invasion of viruses, and they are widely used in biochemistry investigation and antimicrobial drug research. So far, numerous types of defense systems have been discovered, but there is no comprehensive defense systems database to organize prokaryotic defense gene datasets. To fill this gap, we unveil the prokaryotic antiviral defense system (PADS) Arsenal (https://bigd.big.ac.cn/padsarsenal), a public database dedicated to gathering, storing, analyzing and visualizing prokaryotic defense gene datasets. The initial version of PADS Arsenal integrates 18 distinctive categories of defense system with the annotation of 6 600 264 genes retrieved from 63,701 genomes across 33 390 species of archaea and bacteria. PADS Arsenal provides various ways to retrieve defense systems related genes information and visualize them with multifarious function modes. Moreover, an online analysis pipeline is integrated into PADS Arsenal to facilitate annotation and evolutionary analysis of defense genes. PADS Arsenal can also visualize the dynamic variation information of defense genes from pan-genome analysis. Overall, PADS Arsenal is a state-of-the-art open comprehensive resource to accelerate the research of prokaryotic defense systems.}, } @article {pmid31619167, year = {2019}, author = {Li, R and Tian, X and Yang, P and Fan, Y and Li, M and Zheng, H and Wang, X and Jiang, Y}, title = {Recovery of non-reference sequences missing from the human reference genome.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {746}, pmid = {31619167}, issn = {1471-2164}, support = {31822052//National Natural Science Foundation of China/ ; 31802027//National Natural Science Foundation of China/ ; 2018M631209//Doctoral Program Foundation of Institutions of Higher Education of China/ ; 2452018127//Fundamental Research Funds for the Central Universities/ ; }, mesh = {Alleles ; Chromosome Mapping ; Genetic Variation/genetics ; Genome, Human/*genetics ; Humans ; Mutagenesis, Insertional ; Sequence Alignment ; Sequence Analysis, DNA ; Tandem Repeat Sequences ; }, abstract = {BACKGROUND: The non-reference sequences (NRS) represent structure variations in human genome with potential functional significance. However, besides the known insertions, it is currently unknown whether other types of structure variations with NRS exist.

RESULTS: Here, we compared 31 human de novo assemblies with the current reference genome to identify the NRS and their location. We resolved the precise location of 6113 NRS adding up to 12.8 Mb. Besides 1571 insertions, we detected 3041 alternate alleles, which were defined as having less than 90% (or none) identity with the reference alleles. These alternate alleles overlapped with 1143 protein-coding genes including a putative novel MHC haplotype. Further, we demonstrated that the alternate alleles and their flanking regions had high content of tandem repeats, indicating that their origin was associated with tandem repeats.

CONCLUSIONS: Our study detected a large number of NRS including many alternate alleles which are previously uncharacterized. We suggested that the origin of alternate alleles was associated with tandem repeats. Our results enriched the spectrum of genetic variations in human genome.}, } @article {pmid31611653, year = {2020}, author = {Hoarfrost, A and Nayfach, S and Ladau, J and Yooseph, S and Arnosti, C and Dupont, CL and Pollard, KS}, title = {Global ecotypes in the ubiquitous marine clade SAR86.}, journal = {The ISME journal}, volume = {14}, number = {1}, pages = {178-188}, pmid = {31611653}, issn = {1751-7370}, mesh = {Ecotype ; Gammaproteobacteria/*classification/genetics ; Genes, Bacterial ; Metagenome ; Oceans and Seas ; Phylogeography ; }, abstract = {SAR86 is an abundant and ubiquitous heterotroph in the surface ocean that plays a central role in the function of marine ecosystems. We hypothesized that despite its ubiquity, different SAR86 subgroups may be endemic to specific ocean regions and functionally specialized for unique marine environments. However, the global biogeographical distributions of SAR86 genes, and the manner in which these distributions correlate with marine environments, have not been investigated. We quantified SAR86 gene content across globally distributed metagenomic samples and modeled these gene distributions as a function of 51 environmental variables. We identified five distinct clusters of genes within the SAR86 pangenome, each with a unique geographic distribution associated with specific environmental characteristics. Gene clusters are characterized by the strong taxonomic enrichment of distinct SAR86 genomes and partial assemblies, as well as differential enrichment of certain functional groups, suggesting differing functional and ecological roles of SAR86 ecotypes. We then leveraged our models and high-resolution, remote sensing-derived environmental data to predict the distributions of SAR86 gene clusters across the world's oceans, creating global maps of SAR86 ecotype distributions. Our results reveal that SAR86 exhibits previously unknown, complex biogeography, and provide a framework for exploring geographic distributions of genetic diversity from other microbial clades.}, } @article {pmid31609418, year = {2019}, author = {Tralamazza, SM and Rocha, LO and Oggenfuss, U and Corrêa, B and Croll, D}, title = {Complex Evolutionary Origins of Specialized Metabolite Gene Cluster Diversity among the Plant Pathogenic Fungi of the Fusarium graminearum Species Complex.}, journal = {Genome biology and evolution}, volume = {11}, number = {11}, pages = {3106-3122}, pmid = {31609418}, issn = {1759-6653}, mesh = {DNA Transposable Elements ; Evolution, Molecular ; Fungi/genetics ; Fusariosis/*microbiology ; Fusarium/*genetics ; Gene Transfer, Horizontal ; *Genome, Fungal ; *Multigene Family ; Plant Diseases/microbiology ; Secondary Metabolism/*genetics ; Triticum/microbiology ; }, abstract = {Fungal genomes encode highly organized gene clusters that underlie the production of specialized (or secondary) metabolites. Gene clusters encode key functions to exploit plant hosts or environmental niches. Promiscuous exchange among species and frequent reconfigurations make gene clusters some of the most dynamic elements of fungal genomes. Despite evidence for high diversity in gene cluster content among closely related strains, the microevolutionary processes driving gene cluster gain, loss, and neofunctionalization are largely unknown. We analyzed the Fusarium graminearum species complex (FGSC) composed of plant pathogens producing potent mycotoxins and causing Fusarium head blight on cereals. We de novo assembled genomes of previously uncharacterized FGSC members (two strains of F. austroamericanum, F. cortaderiae, and F. meridionale). Our analyses of 8 species of the FGSC in addition to 15 other Fusarium species identified a pangenome of 54 gene clusters within FGSC. We found that multiple independent losses were a key factor generating extant cluster diversity within the FGSC and the Fusarium genus. We identified a modular gene cluster conserved among distantly related fungi, which was likely reconfigured to encode different functions. We also found strong evidence that a rare cluster in FGSC was gained through an ancient horizontal transfer between bacteria and fungi. Chromosomal rearrangements underlying cluster loss were often complex and were likely facilitated by an enrichment in specific transposable elements. Our findings identify important transitory stages in the birth and death process of specialized metabolism gene clusters among very closely related species.}, } @article {pmid31607556, year = {2019}, author = {Tett, A and Huang, KD and Asnicar, F and Fehlner-Peach, H and Pasolli, E and Karcher, N and Armanini, F and Manghi, P and Bonham, K and Zolfo, M and De Filippis, F and Magnabosco, C and Bonneau, R and Lusingu, J and Amuasi, J and Reinhard, K and Rattei, T and Boulund, F and Engstrand, L and Zink, A and Collado, MC and Littman, DR and Eibach, D and Ercolini, D and Rota-Stabelli, O and Huttenhower, C and Maixner, F and Segata, N}, title = {The Prevotella copri Complex Comprises Four Distinct Clades Underrepresented in Westernized Populations.}, journal = {Cell host & microbe}, volume = {26}, number = {5}, pages = {666-679.e7}, pmid = {31607556}, issn = {1934-6069}, support = {TL1 TR001447/TR/NCATS NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; U54 DE023798/DE/NIDCR NIH HHS/United States ; 716575/ERC_/European Research Council/International ; R24 DK110499/DK/NIDDK NIH HHS/United States ; R01 DK103358/DK/NIDDK NIH HHS/United States ; R01 HG005220/HG/NHGRI NIH HHS/United States ; }, mesh = {Diet ; Ethiopia ; Feces/microbiology ; Fossils/*microbiology ; Gastrointestinal Microbiome/*genetics ; Genetic Variation ; Genome, Bacterial/*genetics ; Ghana ; Humans ; Prevotella/*classification/*genetics/isolation & purification ; Tanzania ; }, abstract = {Prevotella copri is a common human gut microbe that has been both positively and negatively associated with host health. In a cross-continent meta-analysis exploiting >6,500 metagenomes, we obtained >1,000 genomes and explored the genetic and population structure of P. copri. P. copri encompasses four distinct clades (>10% inter-clade genetic divergence) that we propose constitute the P. copri complex, and all clades were confirmed by isolate sequencing. These clades are nearly ubiquitous and co-present in non-Westernized populations. Genomic analysis showed substantial functional diversity in the complex with notable differences in carbohydrate metabolism, suggesting that multi-generational dietary modifications may be driving reduced prevalence in Westernized populations. Analysis of ancient metagenomes highlighted patterns of P. copri presence consistent with modern non-Westernized populations and a clade delineation time pre-dating human migratory waves out of Africa. These findings reveal that P. copri exhibits a high diversity that is underrepresented in Western-lifestyle populations.}, } @article {pmid34386196, year = {2019}, author = {Llamas, B and Narzisi, G and Schneider, V and Audano, PA and Biederstedt, E and Blauvelt, L and Bradbury, P and Chang, X and Chin, CS and Fungtammasan, A and Clarke, WE and Cleary, A and Ebler, J and Eizenga, J and Sibbesen, JA and Markello, CJ and Garrison, E and Garg, S and Hickey, G and Lazo, GR and Lin, MF and Mahmoud, M and Marschall, T and Minkin, I and Monlong, J and Musunuri, RL and Sagayaradj, S and Novak, AM and Rautiainen, M and Regier, A and Sedlazeck, FJ and Siren, J and Souilmi, Y and Wagner, J and Wrightsman, T and Yokoyama, TT and Zeng, Q and Zook, JM and Paten, B and Busby, B}, title = {A strategy for building and using a human reference pangenome.}, journal = {F1000Research}, volume = {8}, number = {}, pages = {1751}, pmid = {34386196}, issn = {2046-1402}, support = {U41 HG007234/HG/NHGRI NIH HHS/United States ; UM1 HG008898/HG/NHGRI NIH HHS/United States ; }, abstract = {In March 2019, 45 scientists and software engineers from around the world converged at the University of California, Santa Cruz for the first pangenomics codeathon. The purpose of the meeting was to propose technical specifications and standards for a usable human pangenome as well as to build relevant tools for genome graph infrastructures. During the meeting, the group held several intense and productive discussions covering a diverse set of topics, including advantages of graph genomes over a linear reference representation, design of new methods that can leverage graph-based data structures, and novel visualization and annotation approaches for pangenomes. Additionally, the participants self-organized themselves into teams that worked intensely over a three-day period to build a set of pipelines and tools for specific pangenomic applications. A summary of the questions raised and the tools developed are reported in this manuscript.}, } @article {pmid31600234, year = {2019}, author = {Chen, S and Soehnlen, M and Blom, J and Terrapon, N and Henrissat, B and Walker, ED}, title = {Comparative genomic analyses reveal diverse virulence factors and antimicrobial resistance mechanisms in clinical Elizabethkingia meningoseptica strains.}, journal = {PloS one}, volume = {14}, number = {10}, pages = {e0222648}, pmid = {31600234}, issn = {1932-6203}, support = {R37 AI021884/AI/NIAID NIH HHS/United States ; }, mesh = {Anti-Bacterial Agents/therapeutic use ; Biofilms/growth & development ; Clustered Regularly Interspaced Short Palindromic Repeats/genetics ; Comparative Genomic Hybridization ; DNA-Binding Proteins/genetics ; Drug Resistance, Bacterial/*genetics ; Flavobacteriaceae/*genetics/pathogenicity ; Flavobacteriaceae Infections/drug therapy/epidemiology/*genetics/microbiology ; Genome, Bacterial/*genetics ; Genomics/methods ; Humans ; Phylogeny ; Transcription Factors/genetics ; Virulence Factors/genetics ; }, abstract = {Three human clinical isolates of bacteria (designated strains Em1, Em2 and Em3) had high average nucleotide identity (ANI) to Elizabethkingia meningoseptica. Their genome sizes (3.89, 4.04 and 4.04 Mb) were comparable to those of other Elizabethkingia species and strains, and exhibited open pan-genome characteristics, with two strains being nearly identical and the third divergent. These strains were susceptible only to trimethoprim/sulfamethoxazole and ciprofloxacin amongst 16 antibiotics in minimum inhibitory tests. The resistome exhibited a high diversity of resistance genes, including 5 different lactamase- and 18 efflux protein- encoding genes. Forty-four genes encoding virulence factors were conserved among the strains. Sialic acid transporters and curli synthesis genes were well conserved in E. meningoseptica but absent in E. anophelis and E. miricola. E. meningoseptica carried several genes contributing to biofilm formation. 58 glycoside hydrolases (GH) and 25 putative polysaccharide utilization loci (PULs) were found. The strains carried numerous genes encoding two-component system proteins (56), transcription factor proteins (187~191), and DNA-binding proteins (6~7). Several prophages and CRISPR/Cas elements were uniquely present in the genomes.}, } @article {pmid31598686, year = {2019}, author = {Bayliss, SC and Thorpe, HA and Coyle, NM and Sheppard, SK and Feil, EJ}, title = {PIRATE: A fast and scalable pangenomics toolbox for clustering diverged orthologues in bacteria.}, journal = {GigaScience}, volume = {8}, number = {10}, pages = {}, pmid = {31598686}, issn = {2047-217X}, support = {MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; MR/R00241X/1/MRC_/Medical Research Council/United Kingdom ; BB/M026388/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Cluster Analysis ; *Genome, Bacterial ; Genomics/*methods ; }, abstract = {BACKGROUND: Cataloguing the distribution of genes within natural bacterial populations is essential for understanding evolutionary processes and the genetic basis of adaptation. Advances in whole genome sequencing technologies have led to a vast expansion in the amount of bacterial genomes deposited in public databases. There is a pressing need for software solutions which are able to cluster, catalogue and characterise genes, or other features, in increasingly large genomic datasets.

RESULTS: Here we present a pangenomics toolbox, PIRATE (Pangenome Iterative Refinement and Threshold Evaluation), which identifies and classifies orthologous gene families in bacterial pangenomes over a wide range of sequence similarity thresholds. PIRATE builds upon recent scalable software developments to allow for the rapid interrogation of thousands of isolates. PIRATE clusters genes (or other annotated features) over a wide range of amino acid or nucleotide identity thresholds and uses the clustering information to rapidly identify paralogous gene families and putative fission/fusion events. Furthermore, PIRATE orders the pangenome using a directed graph, provides a measure of allelic variation, and estimates sequence divergence for each gene family.

CONCLUSIONS: We demonstrate that PIRATE scales linearly with both number of samples and computation resources, allowing for analysis of large genomic datasets, and compares favorably to other popular tools. PIRATE provides a robust framework for analysing bacterial pangenomes, from largely clonal to panmictic species.}, } @article {pmid31589296, year = {2019}, author = {John, J and George, S and Nori, SRC and Nelson-Sathi, S}, title = {Phylogenomic Analysis Reveals the Evolutionary Route of Resistant Genes in Staphylococcus aureus.}, journal = {Genome biology and evolution}, volume = {11}, number = {10}, pages = {2917-2926}, pmid = {31589296}, issn = {1759-6653}, mesh = {Drug Resistance, Bacterial/*genetics ; *Evolution, Molecular ; Gene Transfer, Horizontal ; Genome, Bacterial ; Phylogeny ; Staphylococcus aureus/classification/drug effects/*genetics ; }, abstract = {Multidrug-resistant Staphylococcus aureus is a leading concern worldwide. Coagulase-Negative Staphylococci are claimed to be the reservoir and source of important resistant elements in S. aureus. However, the origin and evolutionary route of resistant genes in S. aureus are still remaining unknown. Here, we performed a detailed phylogenomic analysis of 152 completely sequenced S. aureus strains in comparison with 7,529 non-Staphylococcus aureus reference bacterial genomes. Our results reveal that S. aureus has a large open pan-genome where 97 (55%) of its known resistant-related genes belonging to its accessory genome. Among these genes, 47 (27%) were located within the Staphylococcal Cassette Chromosome mec (SCCmec), a transposable element responsible for resistance against major classes of antibiotics including beta-lactams, macrolides, and aminoglycosides. However, the physically linked mec-box genes (MecA-MecR-MecI) that are responsible for the maintenance of SCCmec elements is not unique to S. aureus, instead it is widely distributed within Staphylococcaceae family. The phyletic patterns of SCCmec-encoded resistant genes in Staphylococcus species are significantly different from that of its core genes indicating frequent exchange of these genes between Staphylococcus species. Our in-depth analysis of SCCmec-resistant gene phylogenies reveals that genes such as blaZ, ble, kmA, and tetK that are responsible for beta-lactam, bleomycin, kanamycin, and tetracycline resistance in S. aureus were laterally transferred from non-Staphylococcus sources. In addition, at least 11 non-SCCmec-encoded resistant genes in S. aureus, were laterally acquired from distantly related species. Our study evidently shows that gene transfers played a crucial role in shaping the evolution of antibiotic resistance in S. aureus.}, } @article {pmid31584869, year = {2020}, author = {Zhao, L and Chen, H and Didelot, X and Li, Z and Li, Y and Chen, M and Du, Y and Zhao, H and Li, J and Hu, Q and Kan, B and Chen, M and Pang, B}, title = {Co-existence of multiple distinct lineages in Vibrio parahaemolyticus serotype O4:K12.}, journal = {Microbial genomics}, volume = {6}, number = {12}, pages = {}, pmid = {31584869}, issn = {2057-5858}, support = {MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {China ; DNA, Bacterial/genetics ; Genetic Variation ; Genomic Islands ; High-Throughput Nucleotide Sequencing ; Humans ; Phylogeny ; Principal Component Analysis ; Serotyping ; Vibrio Infections/*microbiology ; Vibrio parahaemolyticus/*classification/genetics ; Whole Genome Sequencing/*methods ; }, abstract = {Vibrio parahaemolyticus is an important cause of foodborne gastroenteritis globally. Thermostable direct haemolysin (TDH) and the TDH-related haemolysin are the two key virulence factors in V. parahaemolyticus. Vibrio pathogenicity islands harbour the genes encoding these two haemolysins. The serotyping of V. parahaemolyticus is based on the combination of O and K antigens. Frequent recombination has been observed in V. parahaemolyticus, including in the genomic regions encoding the O and K antigens. V. parahaemolyticus serotype O4:K12 has caused gastroenteritis outbreaks in the USA and Spain. Recently, outbreaks caused by this serotype of V. parahaemolyticus have been reported in China. However, the relationships among this serotype of V. parahaemolyticus strains isolated in different regions have not been addressed. Here, we investigated the genome variation of the V. parahaemolyticus serotype O4:K12 using the whole-genome sequences of 29 isolates. We determined five distinct lineages in this strain collection. We observed frequent recombination among different lineages. In contrast, little recombination was observed within each individual lineage. We showed that the lineage of this serotype of V. parahaemolyticus isolated in America was different from those isolated in Asia and identified genes that exclusively existed in the strains isolated in America. Pan-genome analysis showed that strain-specific and cluster-specific genes were mostly located in the genomic islands. Pan-genome analysis also showed that the vast majority of the accessory genes in the O4:K12 serotype of V. parahaemolyticus were acquired from within the genus Vibrio. Hence, we have shown that multiple distinct lineages exist in V. parahaemolyticus serotype O4:K12 and have provided more evidence about the gene segregation found in V. parahaemolyticus isolated in different continents.}, } @article {pmid31584649, year = {2019}, author = {Li, G and Ji, B and Nielsen, J}, title = {The pan-genome of Saccharomyces cerevisiae.}, journal = {FEMS yeast research}, volume = {19}, number = {7}, pages = {}, doi = {10.1093/femsyr/foz064}, pmid = {31584649}, issn = {1567-1364}, mesh = {Gene Expression Regulation, Fungal ; *Genome, Fungal ; Genotype ; High-Throughput Nucleotide Sequencing ; *Machine Learning ; Phenotype ; Saccharomyces cerevisiae/*genetics ; }, abstract = {Understanding genotype-phenotype relationship is fundamental in biology. With the benefit from next-generation sequencing and high-throughput phenotyping methodologies, there have been generated much genome and phenome data for Saccharomyces cerevisiae. This makes it an excellent model system to understand the genotype-phenotype relationship. In this paper, we presented the reconstruction and application of the yeast pan-genome in resolving genotype-phenotype relationship by a machine learning-assisted approach.}, } @article {pmid31584605, year = {2020}, author = {Ferrés, I and Fresia, P and Iraola, G}, title = {simurg: simulate bacterial pangenomes in R.}, journal = {Bioinformatics (Oxford, England)}, volume = {36}, number = {4}, pages = {1273-1274}, doi = {10.1093/bioinformatics/btz735}, pmid = {31584605}, issn = {1367-4811}, mesh = {Bacteria ; Biological Evolution ; *Genome ; *Software ; }, abstract = {MOTIVATION: The pangenome concept describes genetic variability as the union of genes shared in a set of genomes and constitutes the current paradigm for comparative analysis of bacterial populations. However, there is a lack of tools to simulate pangenome variability and structure using defined evolutionary models.

RESULTS: We developed simurg, an R package that allows to simulate bacterial pangenomes using different combinations of evolutionary constraints such as gene gain, gene loss and mutation rates. Our tool allows the straightforward and reproducible simulation of bacterial pangenomes using real sequence data, providing a valuable tool for benchmarking of pangenome software or comparing evolutionary hypotheses.

The simurg package is released under the GPL-3 license, and is freely available for download from GitHub (https://github.com/iferres/simurg).

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid31579561, year = {2019}, author = {Sabbagh, CRR and Carrere, S and Lonjon, F and Vailleau, F and Macho, AP and Genin, S and Peeters, N}, title = {Pangenomic type III effector database of the plant pathogenic Ralstonia spp.}, journal = {PeerJ}, volume = {7}, number = {}, pages = {e7346}, pmid = {31579561}, issn = {2167-8359}, abstract = {BACKGROUND: The bacterial plant pathogenic Ralstonia species belong to the beta-proteobacteria class and are soil-borne pathogens causing vascular bacterial wilt disease, affecting a wide range of plant hosts. These bacteria form a heterogeneous group considered as a "species complex" gathering three newly defined species. Like many other Gram negative plant pathogens, Ralstonia pathogenicity relies on a type III secretion system, enabling bacteria to secrete/inject a large repertoire of type III effectors into their plant host cells. Type III-secreted effectors (T3Es) are thought to participate in generating a favorable environment for the pathogen (countering plant immunity and modifying the host metabolism and physiology).

METHODS: Expert genome annotation, followed by specific type III-dependent secretion, allowed us to improve our Hidden-Markov-Model and Blast profiles for the prediction of type III effectors.

RESULTS: We curated the T3E repertoires of 12 plant pathogenic Ralstonia strains, representing a total of 12 strains spread over the different groups of the species complex. This generated a pangenome repertoire of 102 T3E genes and 16 hypothetical T3E genes. Using this database, we scanned for the presence of T3Es in the 155 available genomes representing 140 distinct plant pathogenic Ralstonia strains isolated from different host plants in different areas of the globe. All this information is presented in a searchable database. A presence/absence analysis, modulated by a strain sequence/gene annotation quality score, enabled us to redefine core and accessory T3E repertoires.}, } @article {pmid31574353, year = {2019}, author = {Rani, A and Donovan, N and Mantri, N}, title = {Review: The future of plant pathogen diagnostics in a nursery production system.}, journal = {Biosensors & bioelectronics}, volume = {145}, number = {}, pages = {111631}, doi = {10.1016/j.bios.2019.111631}, pmid = {31574353}, issn = {1873-4235}, mesh = {*Biosensing Techniques ; *Lab-On-A-Chip Devices ; Plant Development/genetics ; Plant Diseases/genetics/*microbiology ; Plants/genetics/*microbiology ; Robotics/trends ; }, abstract = {Plant diseases have a global economic impact through the loss of productivity and trade restrictions. Production of disease free plants in nurseries is crucial for plant survival and productivity in the field environment. Accurate diagnosis of plant pathogens helps to identify appropriate management practices to reduce production losses. Current diagnostic methods for plant pathogens include evaluation of disease symptoms, identification of culturable organisms or direct analysis of infected tissue by serological and molecular methods. Laboratory methods can be laborious, expensive and require specific technical expertise. There is a strong demand for the development of rapid, specific, sensitive and cost-effective tests that can be used at the point-of-care in nurseries. This review summarises disease diagnostic methods that have been successfully applied in other fields, and have the potential to transform production in the nursery industry. Emerging technologies include isothermal amplification, nanomaterial-based detection, biosensors, robotics, lab-on-chip, and paper-based analytical devices.}, } @article {pmid31574156, year = {2019}, author = {Song, B and Song, Y and Fu, Y and Kizito, EB and Kamenya, SN and Kabod, PN and Liu, H and Muthemba, S and Kariba, R and Njuguna, J and Maina, S and Stomeo, F and Djikeng, A and Hendre, PS and Chen, X and Chen, W and Li, X and Sun, W and Wang, S and Cheng, S and Muchugi, A and Jamnadass, R and Shapiro, HY and Van Deynze, A and Yang, H and Wang, J and Xu, X and Odeny, DA and Liu, X}, title = {Draft genome sequence of Solanum aethiopicum provides insights into disease resistance, drought tolerance, and the evolution of the genome.}, journal = {GigaScience}, volume = {8}, number = {10}, pages = {}, pmid = {31574156}, issn = {2047-217X}, mesh = {Acclimatization/genetics ; Disease Resistance/genetics ; Droughts ; Evolution, Molecular ; *Genome, Plant ; Phylogeny ; Polymorphism, Single Nucleotide ; Retroelements ; Solanum/*genetics ; Terminal Repeat Sequences ; }, abstract = {BACKGROUND: The African eggplant (Solanum aethiopicum) is a nutritious traditional vegetable used in many African countries, including Uganda and Nigeria. It is thought to have been domesticated in Africa from its wild relative, Solanum anguivi. S. aethiopicum has been routinely used as a source of disease resistance genes for several Solanaceae crops, including Solanum melongena. A lack of genomic resources has meant that breeding of S. aethiopicum has lagged behind other vegetable crops.

RESULTS: We assembled a 1.02-Gb draft genome of S. aethiopicum, which contained predominantly repetitive sequences (78.9%). We annotated 37,681 gene models, including 34,906 protein-coding genes. Expansion of disease resistance genes was observed via 2 rounds of amplification of long terminal repeat retrotransposons, which may have occurred ∼1.25 and 3.5 million years ago, respectively. By resequencing 65 S. aethiopicum and S. anguivi genotypes, 18,614,838 single-nucleotide polymorphisms were identified, of which 34,171 were located within disease resistance genes. Analysis of domestication and demographic history revealed active selection for genes involved in drought tolerance in both "Gilo" and "Shum" groups. A pan-genome of S. aethiopicum was assembled, containing 51,351 protein-coding genes; 7,069 of these genes were missing from the reference genome.

CONCLUSIONS: The genome sequence of S. aethiopicum enhances our understanding of its biotic and abiotic resistance. The single-nucleotide polymorphisms identified are immediately available for use by breeders. The information provided here will accelerate selection and breeding of the African eggplant, as well as other crops within the Solanaceae family.}, } @article {pmid31572328, year = {2019}, author = {Wang, D and Gao, F}, title = {Comprehensive Analysis of Replication Origins in Saccharomyces cerevisiae Genomes.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2122}, pmid = {31572328}, issn = {1664-302X}, abstract = {DNA replication initiates from multiple replication origins (ORIs) in eukaryotes. Discovery and characterization of replication origins are essential for a better understanding of the molecular mechanism of DNA replication. In this study, the features of autonomously replicating sequences (ARSs) in Saccharomyces cerevisiae have been comprehensively analyzed as follows. Firstly, we carried out the analysis of the ARSs available in S. cerevisiae S288C. By evaluating the sequence similarity of experimentally established ARSs, we found that 94.32% of ARSs are unique across the whole genome of S. cerevisiae S288C and those with high sequence similarity are prone to locate in subtelomeres. Subsequently, we built a non-redundant dataset with a total of 520 ARSs, which are based on ARSs annotation of S. cerevisiae S288C from SGD and then supplemented with those from OriDB and DeOri databases. We conducted a large-scale comparison of ORIs among the diverse budding yeast strains from a population genomics perspective. We found that 82.7% of ARSs are not only conserved in genomic sequence but also relatively conserved in chromosomal position. The non-conserved ARSs tend to distribute in the subtelomeric regions. We also conducted a pan-genome analysis of ARSs among the S. cerevisiae strains, and a total of 183 core ARSs existing in all yeast strains were determined. We extracted the genes adjacent to replication origins among the 104 yeast strains to examine whether there are differences in their gene functions. The result showed that the genes involved in the initiation of DNA replication, such as orc3, mcm2, mcm4, mcm6, and cdc45, are conservatively located adjacent to the replication origins. Furthermore, we found the genes adjacent to conserved ARSs are significantly enriched in DNA binding, enzyme activity, transportation, and energy, whereas for the genes adjacent to non-conserved ARSs are significantly enriched in response to environmental stress, metabolites biosynthetic process and biosynthesis of antibiotics. In general, we characterized the replication origins from the genome-wide and population genomics perspectives, which would provide new insights into the replication mechanism of S. cerevisiae and facilitate the design of algorithms to identify genome-wide replication origins in yeast.}, } @article {pmid31569751, year = {2019}, author = {Khadke, SP and Kuvalekar, AA and Harsulkar, AM and Mantri, N}, title = {High Energy Intake Induced Overexpression of Transcription Factors and Its Regulatory Genes Involved in Acceleration of Hepatic Lipogenesis: A Rat Model for Type 2 Diabetes.}, journal = {Biomedicines}, volume = {7}, number = {4}, pages = {}, pmid = {31569751}, issn = {2227-9059}, abstract = {Type 2 diabetes mellitus (T2DM) is a metabolic disorder characterized by impaired insulin action and its secretion. The objectives of the present study were to establish an economical and efficient animal model, mimicking pathophysiology of human T2DM to understand probable molecular mechanisms in context with lipid metabolism. In the present study, male Wistar rats were randomly divided into three groups. Animals were fed with high fat diet (HFD) except healthy control (HC) for 12 weeks. After eight weeks, intra peritoneal glucose tolerance test was performed. After confirmation of glucose intolerance, diabetic control (DC) group was injected with streptozotocin (STZ) (35 mg/kg b.w., i.p.). HFD fed rats showed increase (p ≤ 0.001) in glucose tolerance and HOMA-IR as compared to HC. Diabetes rats showed abnormal (p ≤ 0.001) lipid profile as compared to HC. The hepatocyte expression of transcription factors SREBP-1c and NFκβ, and their target genes were found to be upregulated, while PPAR-γ, CPT1A and FABP expressions were downregulated as compared to the HC. A number of animal models have been raised for studying T2DM, but the study has been restricted to only the biochemical level. The model is validated at biochemical, molecular and histopathological levels, which can be used for screening new therapeutics for the effective management of T2DM.}, } @article {pmid31561016, year = {2020}, author = {Lecoquierre, F and Cassinari, K and Chambon, P and Nicolas, G and Malsa, S and Marlin, R and Assouline, Y and Fléjou, JF and Frebourg, T and Houdayer, C and Bera, O and Baert-Desurmont, S}, title = {Patients with 10q22.3q23.1 recurrent deletion syndrome are at risk for juvenile polyposis.}, journal = {European journal of medical genetics}, volume = {63}, number = {4}, pages = {103773}, doi = {10.1016/j.ejmg.2019.103773}, pmid = {31561016}, issn = {1878-0849}, mesh = {Adult ; Bone Morphogenetic Protein Receptors, Type I/*genetics ; *Chromosome Deletion ; Chromosome Disorders/*complications ; Chromosomes, Human, Pair 10 ; Female ; Gene Dosage ; Humans ; Intestinal Polyposis/*congenital/diagnosis/etiology ; Neoplastic Syndromes, Hereditary/*diagnosis/etiology ; PTEN Phosphohydrolase/*genetics ; *Point Mutation ; Recurrence ; }, abstract = {Juvenile polyposis syndrome (JPS) is a rare autosomal dominant predisposition to hamartomatous polyps within the gastrointestinal tract, at high risk for malignant transformation. BMPR1A and SMAD4 loss-of-function variants account for 50% of the cases. More specifically, point mutations and structural abnormalities in BMPR1A lead to a highly penetrant yet variable phenotype of JPS. Intriguingly, in the developmental disorder caused by a recurrent 10q22.3q23.1 7 Mb deletion which includes BMPR1A, juvenile polyps have never been reported. We present the case of a young adult harboring this recurrent deletion, in a context of intellectual disability, ventricular septal defect and severe juvenile polyposis syndrome diagnosed at the age of 25 years, requiring a surgical preventive colectomy. She developed a gastric adenocarcinoma from which she died at the age of 32. We hypothesize that with the current available pangenomic CNV arrays, the diagnosis of 10q22.3q23.1 deletion is often made several years before the onset of the digestive phenotype, which could explain the absence of reports for juvenile polyps. This observation highlights the importance of an active digestive surveillance of patients with 10q22.3q23.1 deletion.}, } @article {pmid31553100, year = {2020}, author = {Dolatabadian, A and Bayer, PE and Tirnaz, S and Hurgobin, B and Edwards, D and Batley, J}, title = {Characterization of disease resistance genes in the Brassica napus pangenome reveals significant structural variation.}, journal = {Plant biotechnology journal}, volume = {18}, number = {4}, pages = {969-982}, pmid = {31553100}, issn = {1467-7652}, mesh = {Brassica napus/*genetics ; DNA Copy Number Variations ; Disease Resistance/*genetics ; *Genes, Plant ; Polymorphism, Single Nucleotide ; Quantitative Trait Loci ; }, abstract = {Methods based on single nucleotide polymorphism (SNP), copy number variation (CNV) and presence/absence variation (PAV) discovery provide a valuable resource to study gene structure and evolution. However, as a result of these structural variations, a single reference genome is unable to cover the entire gene content of a species. Therefore, pangenomics analysis is needed to ensure that the genomic diversity within a species is fully represented. Brassica napus is one of the most important oilseed crops in the world and exhibits variability in its resistance genes across different cultivars. Here, we characterized resistance gene distribution across 50 B. napus lines. We identified a total of 1749 resistance gene analogs (RGAs), of which 996 are core and 753 are variable, 368 of which are not present in the reference genome (cv. Darmor-bzh). In addition, a total of 15 318 SNPs were predicted within 1030 of the RGAs. The results showed that core R-genes harbour more SNPs than variable genes. More nucleotide binding site-leucine-rich repeat (NBS-LRR) genes were located in clusters than as singletons, with variable genes more likely to be found in clusters. We identified 106 RGA candidates linked to blackleg resistance quantitative trait locus (QTL). This study provides a better understanding of resistance genes to target for genomics-based improvement and improved disease resistance.}, } @article {pmid31552103, year = {2019}, author = {Zhang, W and Wang, J and Zhang, D and Liu, H and Wang, S and Wang, Y and Ji, H}, title = {Complete Genome Sequencing and Comparative Genome Characterization of Lactobacillus johnsonii ZLJ010, a Potential Probiotic With Health-Promoting Properties.}, journal = {Frontiers in genetics}, volume = {10}, number = {}, pages = {812}, pmid = {31552103}, issn = {1664-8021}, abstract = {Lactobacillus johnsonii ZLJ010 is a probiotic strain isolated from the feces of a healthy sow and has putative health-promoting properties. To determine the molecular basis underlying the probiotic potential of ZLJ010 and the genes involved in the same, complete genome sequencing and comparative genome analysis with L. johnsonii ZLJ010 were performed. The ZLJ010 genome was found to contain a single circular chromosome of 1,999,879 bp with a guanine-cytosine (GC) content of 34.91% and encoded 18 ribosomal RNA (rRNA) genes and 77 transfer RNA (tRNA) genes. From among the 1,959 protein coding sequences (CDSs), genes known to confer probiotic properties were identified, including genes related to stress adaptation, biosynthesis, metabolism, transport of amino acid, secretion, and the defense machinery. ZLJ010 lacked complete or partial biosynthetic pathways for amino acids but was predicted to compensate for this with an enhanced transport system and some unique amino acid permeases and peptidases that allow it to acquire amino acids and other precursors exogenously. The comparative genomic analysis of L. johnsonii ZLP001 and seven other available L. johnsonii strains, including L. johnsonii NCC533, FI9785, DPC6026, N6.2, BS15, UMNLJ22, and PF01, revealed 2,732 pan-genome orthologous gene clusters and 1,324 core-genome orthologous gene clusters. Phylogenomic analysis based on 1,288 single copy genes showed that ZLJ010 had a closer relationship with the BS15 from yogurt and DPC6026 from the porcine intestinal tract but was located on a relatively standalone branch. The number of clusters of unique, strain-specific genes ranged from 42 to 185. A total of 219 unique genes present in the genome of L. johnsonii ZLJ010 primarily encoded proteins that are putatively involved in replication, recombination and repair, defense mechanisms, transcription, amino acid transport and metabolism, and carbohydrate transport and metabolism. Two unique prophages were predicted in the ZLJ010 genome. The present study helps us understand the ability of L. johnsonii ZLJ010 to better adapt to the gut environment and also its probiotic functionalities.}, } @article {pmid31552006, year = {2019}, author = {Pain, M and Hjerde, E and Klingenberg, C and Cavanagh, JP}, title = {Comparative Genomic Analysis of Staphylococcus haemolyticus Reveals Key to Hospital Adaptation and Pathogenicity.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2096}, pmid = {31552006}, issn = {1664-302X}, abstract = {Staphylococcus haemolyticus is a skin commensal gaining increased attention as an emerging pathogen of nosocomial infections. However, knowledge about the transition from a commensal to an invasive lifestyle remains sparse and there is a paucity of studies comparing pathogenicity traits between commensal and clinical isolates. In this study, we used a pan-genomic approach to identify factors important for infection and hospital adaptation by exploring the genomic variability of 123 clinical isolates and 46 commensal S. haemolyticus isolates. Phylogenetic reconstruction grouped the 169 isolates into six clades with a distinct distribution of clinical and commensal isolates in the different clades. Phenotypically, multi-drug antibiotic resistance was detected in 108/123 (88%) of the clinical isolates and 5/46 (11%) of the commensal isolates (p < 0.05). In the clinical isolates, we commonly identified a homolog of the serine-rich repeat glycoproteins sraP. Additionally, three novel capsular polysaccharide operons were detected, with a potential role in S. haemolyticus virulence. Clinical S. haemolyticus isolates showed specific signatures associated with successful hospital adaption. Biofilm forming S. haemolyticus isolates that are resistant to oxacillin (mecA) and aminoglycosides (aacA-aphD) are most likely invasive isolates whereas absence of these traits strongly indicates a commensal isolate. We conclude that our data show a clear segregation of isolates of commensal origin, and specific genetic signatures distinguishing the clinical isolates from the commensal isolates. The widespread use of antimicrobial agents has probably promoted the development of successful hospital adapted clones of S. haemolyticus clones through acquisition of mobile genetic elements or beneficial point mutations and rearrangements in surface associated genes.}, } @article {pmid31546297, year = {2019}, author = {Heo, S and Lee, J and Lee, JH and Jeong, DW}, title = {Genomic Insight into the Salt Tolerance of Enterococcus faecium, Enterococcus faecalis and Tetragenococcus halophilus.}, journal = {Journal of microbiology and biotechnology}, volume = {29}, number = {10}, pages = {1591-1602}, doi = {10.4014/jmb.1908.08015}, pmid = {31546297}, issn = {1738-8872}, mesh = {Bacterial Proteins/genetics ; Enterococcaceae/genetics/physiology ; Enterococcus faecalis/genetics/*physiology ; Enterococcus faecium/genetics/physiology ; Genome, Bacterial/*genetics ; Membrane Transport Proteins ; Salt Tolerance/*genetics ; Species Specificity ; }, abstract = {To shed light on the genetic basis of salt tolerance in Enterococcus faecium, Enterococcus faecalis, and Tetragenococcus halophilus, we performed comparative genome analysis of 10 E. faecalis, 11 E. faecium, and three T. halophilus strains. Factors involved in salt tolerance that could be used to distinguish the species were identified. Overall, T. halophilus contained a greater number of potassium transport and osmoprotectant synthesis genes compared with the other two species. In particular, our findings suggested that T. halophilus may be the only one among the three species capable of synthesizing glycine betaine from choline, cardiolipin from glycerol and proline from citrate. These molecules are well-known osmoprotectants; thus, we propose that these genes confer the salt-tolerance of T. halophilus.}, } @article {pmid31544971, year = {2019}, author = {Hatje, K and Mühlhausen, S and Simm, D and Kollmar, M}, title = {The Protein-Coding Human Genome: Annotating High-Hanging Fruits.}, journal = {BioEssays : news and reviews in molecular, cellular and developmental biology}, volume = {41}, number = {11}, pages = {e1900066}, doi = {10.1002/bies.201900066}, pmid = {31544971}, issn = {1521-1878}, mesh = {Algorithms ; Alternative Splicing/genetics ; Animals ; Exons/genetics ; Genome, Human/*genetics ; Genomics/methods ; Humans ; Proteins/*genetics ; RNA Splicing/genetics ; Transcriptome/genetics ; }, abstract = {The major transcript variants of human protein-coding genes are annotated to a certain degree of accuracy combining manual curation, transcript data, and proteomics evidence. However, there is considerable disagreement on the annotation of about 2000 genes-they can be protein-coding, noncoding, or pseudogenes-and on the annotation of most of the predicted alternative transcripts. Pure transcriptome mapping approaches seem to be limited in discriminating functional expression from noise. These limitations have partially been overcome by dedicated algorithms to detect alternative spliced micro-exons and wobble splice variants. Recently, knowledge about splice mechanism and protein structure are incorporated into an algorithm to predict neighboring homologous exons, often spliced in a mutually exclusive manner. Predicted exons are evaluated by transcript data, structural compatibility, and evolutionary conservation, revealing hundreds of novel coding exons and splice mechanism re-assignments. The emerging human pan-genome is necessitating distinctive annotations incorporating differences between individuals and between populations.}, } @article {pmid31529373, year = {2021}, author = {Erwin, DH}, title = {Tempos and modes of collectivity in the history of life.}, journal = {Theory in biosciences = Theorie in den Biowissenschaften}, volume = {140}, number = {4}, pages = {343-351}, pmid = {31529373}, issn = {1611-7530}, support = {NNA13AA90A//NASA Astrobiology Institute/ ; }, mesh = {Animals ; *Biological Evolution ; *Insecta ; Phylogeny ; }, abstract = {Collective integration and processing of information have increased through the history of life, through both the formation of aggregates in which the entities may have very different properties and which jointly coarse-grained environmental variables (ranging from widely varying metabolism in microbial consortia to the ecological diversity of species on reefs) and through collectives of similar entities (such as cells within an organism or social groups). Such increases have been implicated in significant transitions in the history of life, including aspects of the origin of life, the generation of pangenomes among microbes and microbial communities such as stromatolites, multicellularity and social insects. This contribution provides a preliminary overview of the dominant modes of collective information processing in the history of life, their phylogenetic distribution and extent of convergence, and the effects of new modes for integrating and acting upon information on the tempo of evolutionary change.}, } @article {pmid31523509, year = {2019}, author = {Yeoman, CJ and Brutscher, LM and Esen, ÖC and Ibaoglu, F and Fowler, C and Eren, AM and Wanner, K and Weaver, DK}, title = {Genome-resolved insights into a novel Spiroplasma symbiont of the Wheat Stem Sawfly (Cephus cinctus).}, journal = {PeerJ}, volume = {7}, number = {}, pages = {e7548}, pmid = {31523509}, issn = {2167-8359}, abstract = {Arthropods often have obligate relationships with symbiotic microbes, and recent investigations have demonstrated that such host-microbe relationships could be exploited to suppress natural populations of vector carrying mosquitos. Strategies that target the interplay between agricultural pests and their symbionts could decrease the burden caused by agricultural pests; however, the lack of comprehensive genomic insights into naturally occurring microbial symbionts presents a significant bottleneck. Here we employed amplicon surveys, genome-resolved metagenomics, and scanning electron microscopy to investigate symbionts of the wheat stem sawfly (Cephus cinctus), a major pest that causes an estimated $350 million dollars or more in wheat yield losses in the northwestern United States annually. Through 16S rRNA gene sequencing of two major haplotypes and life stages of wheat stem sawfly, we show a novel Spiroplasma species is ever-present and predominant, with phylogenomic analyses placing it as a member of the ixodetis clade of mollicutes. Using state-of-the-art metagenomic assembly and binning strategies we were able to reconstruct a 714 Kb, 72.7%-complete Spiroplasma genome, which represents just the second draft genome from the ixodetis clade of mollicutes. Functional annotation of the Spiroplasma genome indicated carbohydrate-metabolism involved PTS-mediated import of glucose and fructose followed by glycolysis to lactate, acetate, and propionoate. The bacterium also encoded biosynthetic pathways for essential vitamins B2, B3, and B9. We identified putative Spiroplasma virulence genes: cardiolipin and chitinase. These results identify a previously undescribed symbiosis between wheat stem sawfly and a novel Spiroplasma sp., availing insight into their molecular relationship, and may yield new opportunities for microbially-mediated pest control strategies.}, } @article {pmid31510914, year = {2019}, author = {Sigalova, OM and Chaplin, AV and Bochkareva, OO and Shelyakin, PV and Filaretov, VA and Akkuratov, EE and Burskaia, V and Gelfand, MS}, title = {Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {710}, pmid = {31510914}, issn = {1471-2164}, mesh = {Adaptation, Physiological/*genetics ; Chlamydia/*genetics/*physiology ; Evolution, Molecular ; Genome, Bacterial/genetics ; *Genomics ; Host-Pathogen Interactions/*genetics ; Molecular Sequence Annotation ; *Selection, Genetic ; }, abstract = {BACKGROUND: Chlamydia are ancient intracellular pathogens with reduced, though strikingly conserved genome. Despite their parasitic lifestyle and isolated intracellular environment, these bacteria managed to avoid accumulation of deleterious mutations leading to subsequent genome degradation characteristic for many parasitic bacteria.

RESULTS: We report pan-genomic analysis of sixteen species from genus Chlamydia including identification and functional annotation of orthologous genes, and characterization of gene gains, losses, and rearrangements. We demonstrate the overall genome stability of these bacteria as indicated by a large fraction of common genes with conserved genomic locations. On the other hand, extreme evolvability is confined to several paralogous gene families such as polymorphic membrane proteins and phospholipase D, and likely is caused by the pressure from the host immune system.

CONCLUSIONS: This combination of a large, conserved core genome and a small, evolvable periphery likely reflect the balance between the selective pressure towards genome reduction and the need to adapt to escape from the host immunity.}, } @article {pmid31510650, year = {2019}, author = {Ghaffaari, A and Marschall, T}, title = {Fully-sensitive seed finding in sequence graphs using a hybrid index.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {14}, pages = {i81-i89}, pmid = {31510650}, issn = {1367-4811}, mesh = {*Algorithms ; Alleles ; Diploidy ; *Genome, Human ; Humans ; Sequence Analysis, DNA ; *Software ; }, abstract = {MOTIVATION: Sequence graphs are versatile data structures that are, for instance, able to represent the genetic variation found in a population and to facilitate genome assembly. Read mapping to sequence graphs constitutes an important step for many applications and is usually done by first finding exact seed matches, which are then extended by alignment. Existing methods for finding seed hits prune the graph in complex regions, leading to a loss of information especially in highly polymorphic regions of the genome. While such complex graph structures can indeed lead to a combinatorial explosion of possible alleles, the query set of reads from a diploid individual realizes only two alleles per locus-a property that is not exploited by extant methods.

RESULTS: We present the Pan-genome Seed Index (PSI), a fully-sensitive hybrid method for seed finding, which takes full advantage of this property by combining an index over selected paths in the graph with an index over the query reads. This enables PSI to find all seeds while eliminating the need to prune the graph. We demonstrate its performance with different parameter settings on both simulated data and on a whole human genome graph constructed from variants in the 1000 Genome Project dataset. On this graph, PSI outperforms GCSA2 in terms of index size, query time and sensitivity.

The C++ implementation is publicly available at: https://github.com/cartoonist/psi.}, } @article {pmid31507574, year = {2019}, author = {Espadinha, D and Sobral, RG and Mendes, CI and Méric, G and Sheppard, SK and Carriço, JA and de Lencastre, H and Miragaia, M}, title = {Distinct Phenotypic and Genomic Signatures Underlie Contrasting Pathogenic Potential of Staphylococcus epidermidis Clonal Lineages.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1971}, pmid = {31507574}, issn = {1664-302X}, support = {MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; }, abstract = {Background: Staphylococcus epidermidis is a common skin commensal that has emerged as a pathogen in hospitals, mainly related to medical devices-associated infections. Noteworthy, infection rates by S. epidermidis have the tendency to rise steeply in next decades together with medical devices use and immunocompromized population growth. Staphylococcus epidermidis population structure includes two major clonal lineages (A/C and B) that present contrasting pathogenic potentials. To address this distinction and explore the basis of increased pathogenicity of A/C lineage, we performed a detailed comparative analysis using phylogenetic and integrated pangenome-wide-association study (panGWAS) approaches and compared the lineages's phenotypes in in vitro conditions mimicking carriage and infection. Results: Each S. epidermidis lineage had distinct phenotypic signatures in skin and infection conditions and differed in genomic content. Combination of phenotypic and genotypic data revealed that both lineages were well adapted to skin environmental cues. However, they appear to occupy different skin niches, perform distinct biological functions in the skin and use different mechanisms to complete the same function: lineage B strains showed evidence of specialization to survival in microaerobic and lipid rich environment, characteristic of hair follicle and sebaceous glands; lineage A/C strains showed evidence for adaption to diverse osmotic and pH conditions, potentially allowing them to occupy a broader and more superficial skin niche. In infection conditions, A/C strains had an advantage, having the potential to bind blood-associated host matrix proteins, form biofilms at blood pH, resist antibiotics and macrophage acidity and to produce proteases. These features were observed to be rare in the lineage B strains. PanGWAS analysis produced a catalog of putative S. epidermidis virulence factors and identified an epidemiological molecular marker for the more pathogenic lineage. Conclusion: The prevalence of A/C lineage in infection is probably related to a higher metabolic and genomic versatility that allows rapid adaptation during transition from a commensal to a pathogenic lifestyle. The putative virulence and phenotypic factors associated to A/C lineage constitute a reliable framework for future studies on S. epidermidis pathogenesis and the finding of an epidemiological marker for the more pathogenic lineage is an asset for the management of S. epidermidis infections.}, } @article {pmid31506467, year = {2019}, author = {Fariq, A and Blazier, JC and Yasmin, A and Gentry, TJ and Deng, Y}, title = {Whole genome sequence analysis reveals high genetic variation of newly isolated Acidithiobacillus ferrooxidans IO-2C.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {13049}, pmid = {31506467}, issn = {2045-2322}, mesh = {Acidithiobacillus/*genetics ; Environmental Microbiology ; *Genetic Variation ; *Genome, Bacterial ; *Genomics/methods ; Iron/metabolism ; Oxidation-Reduction ; Phylogeny ; Sequence Analysis ; Whole Genome Sequencing ; }, abstract = {Acidithiobacillus ferrooxidans, a chemolithoautotrophic bacterium, is well known for its mineral oxidizing properties. The current study combines experimental and whole genome sequencing approaches to investigate an iron oxidizing, extreme acidophilic bacterium, A. ferrooxidans isolate (IO-2C) from an acid seep area near Carlos, TX, USA. Strain IO-2C was capable of oxidizing iron i.e. iron sulphate and iron ammonium sulphate yielding shwertmannite and jarosite minerals. Further, the bacterium's genome was sequenced, assembled and annotated to study its general features, structure and functions. To determine genetic heterogeneity, it was compared with the genomes of other published A. ferrooxidans strains. Pan-genome analysis displayed low gene conservation and significant genetic diversity in A. ferrooxidans species comprising of 6926 protein coding sequences with 23.04% (1596) core genes, 46.13% (3195) unique and 30.82% (2135) accessory genes. Variant analysis showed >75,000 variants, 287 of them with a predicted high impact, in A. ferrooxidans IO-2C genome compared to the reference strain, resulting in abandonment of some important functional key genes. The genome contains numerous functional genes for iron and sulphur metabolism, nitrogen fixation, secondary metabolites, degradation of aromatic compounds, and multidrug and heavy metal resistance. This study demonstrated the bio-oxidation of iron by newly isolated A. ferrooxidans IO-2C under acidic conditions, which was further supported by genomic analysis. Genomic analysis of this strain provided valuable information about the complement of genes responsible for the utilization of iron and tolerance of other metals.}, } @article {pmid31500174, year = {2019}, author = {Kaminski, MA and Sobczak, A and Dziembowski, A and Lipinski, L}, title = {Genomic Analysis of γ-Hexachlorocyclohexane-Degrading Sphingopyxis lindanitolerans WS5A3p Strain in the Context of the Pangenome of Sphingopyxis.}, journal = {Genes}, volume = {10}, number = {9}, pages = {}, pmid = {31500174}, issn = {2073-4425}, mesh = {Bacterial Proteins/genetics/metabolism ; Biodegradation, Environmental ; *Genome, Bacterial ; Hexachlorocyclohexane/*metabolism ; Pesticides/*metabolism ; Sphingomonadaceae/enzymology/*genetics/metabolism ; }, abstract = {Sphingopyxis inhabit diverse environmental niches, including marine, freshwater, oceans, soil and anthropogenic sites. The genus includes 20 phylogenetically distinct, valid species, but only a few with a sequenced genome. In this work, we analyzed the nearly complete genome of the newly described species, Sphingopyxislindanitolerans, and compared it to the other available Sphingopyxis genomes. The genome included 4.3 Mbp in total and consists of a circular chromosome, and two putative plasmids. Among the identified set of lin genes responsible for γ-hexachlorocyclohexane pesticide degradation, we discovered a gene coding for a new isoform of the LinA protein. The significant potential of this species in the remediation of contaminated soil is also correlated with the fact that its genome encodes a higher number of enzymes potentially involved in aromatic compound degradation than for most other Sphingopyxis strains. Additional analysis of 44 Sphingopyxis representatives provides insights into the pangenome of Sphingopyxis and revealed a core of 734 protein clusters and between four and 1667 unique proteins per genome.}, } @article {pmid31484962, year = {2019}, author = {Safari, M and Yakhchali, B and Shariati J, V}, title = {Comprehensive genomic analysis of an indigenous Pseudomonas pseudoalcaligenes degrading phenolic compounds.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {12736}, pmid = {31484962}, issn = {2045-2322}, mesh = {Anti-Bacterial Agents/pharmacology ; Biodegradation, Environmental ; Drug Resistance, Bacterial ; Genome, Bacterial ; Genomics ; Iran ; Phenols/chemistry/*metabolism ; Phylogeny ; Pseudomonas pseudoalcaligenes/drug effects/*genetics/isolation & purification/*metabolism ; Soil Pollutants/chemistry/metabolism ; }, abstract = {Environmental contamination with aromatic compounds is a universal challenge. Aromatic-degrading microorganisms isolated from the same or similar polluted environments seem to be more suitable for bioremediation. Moreover, microorganisms adapted to contaminated environments are able to use toxic compounds as the sole sources of carbon and energy. An indigenous strain of Pseudomonas, isolated from the Mahshahr Petrochemical plant in the Khuzestan province, southwest of Iran, was studied genetically. It was characterized as a novel Gram-negative, aerobic, halotolerant, rod-shaped bacterium designated Pseudomonas YKJ, which was resistant to chloramphenicol and ampicillin. Genome of the strain was completely sequenced using Illumina technology to identify its genetic characteristics. MLST analysis revealed that the YKJ strain belongs to the genus Pseudomonas indicating the highest sequence similarity with Pseudomonas pseudoalcaligenes strain CECT 5344 (99% identity). Core- and pan-genome analysis indicated that P. pseudoalcaligenes contains 1,671 core and 3,935 unique genes for coding DNA sequences. The metabolic and degradation pathways for aromatic pollutants were investigated using the NCBI and KEGG databases. Genomic and experimental analyses showed that the YKJ strain is able to degrade certain aromatic compounds including bisphenol A, phenol, benzoate, styrene, xylene, benzene and chlorobenzene. Moreover, antibiotic resistance and chemotaxis properties of the YKJ strain were found to be controlled by two-component regulatory systems.}, } @article {pmid31481382, year = {2019}, author = {Tidjani, AR and Lorenzi, JN and Toussaint, M and van Dijk, E and Naquin, D and Lespinet, O and Bontemps, C and Leblond, P}, title = {Massive Gene Flux Drives Genome Diversity between Sympatric Streptomyces Conspecifics.}, journal = {mBio}, volume = {10}, number = {5}, pages = {}, pmid = {31481382}, issn = {2150-7511}, mesh = {Actinobacteria/genetics ; Biosynthetic Pathways/genetics ; Chromosomes, Bacterial ; Conjugation, Genetic ; DNA, Bacterial/genetics ; *Gene Transfer, Horizontal ; Genes, Bacterial/*genetics ; *Genetic Variation ; Genome, Bacterial ; Multigene Family ; Multilocus Sequence Typing ; Phylogeny ; Plasmids ; Streptomyces/*genetics ; }, abstract = {In this work, by comparing genomes of closely related individuals of Streptomyces isolated at a spatial microscale (millimeters or centimeters), we investigated the extent and impact of horizontal gene transfer in the diversification of a natural Streptomyces population. We show that despite these conspecific strains sharing a recent common ancestor, all harbored significantly different gene contents, implying massive and rapid gene flux. The accessory genome of the strains was distributed across insertion/deletion events (indels) ranging from one to several hundreds of genes. Indels were preferentially located in the arms of the linear chromosomes (ca. 12 Mb) and appeared to form recombination hot spots. Some of them harbored biosynthetic gene clusters (BGCs) whose products confer an inhibitory capacity and may constitute public goods that can favor the cohesiveness of the bacterial population. Moreover, a significant proportion of these variable genes were either plasmid borne or harbored signatures of actinomycete integrative and conjugative elements (AICEs). We propose that conjugation is the main driver for the indel flux and diversity in Streptomyces populations.IMPORTANCE Horizontal gene transfer is a rapid and efficient way to diversify bacterial gene pools. Currently, little is known about this gene flux within natural soil populations. Using comparative genomics of Streptomyces strains belonging to the same species and isolated at microscale, we reveal frequent transfer of a significant fraction of the pangenome. We show that it occurs at a time scale enabling the population to diversify and to cope with its changing environment, notably, through the production of public goods.}, } @article {pmid31474554, year = {2019}, author = {Fernie, AR and Aharoni, A}, title = {Pan-Genomic Illumination of Tomato Identifies Novel Gene-Trait Interactions.}, journal = {Trends in plant science}, volume = {24}, number = {10}, pages = {882-884}, doi = {10.1016/j.tplants.2019.08.001}, pmid = {31474554}, issn = {1878-4372}, mesh = {Alleles ; Fruit ; Genome, Plant ; Genomics ; Lighting ; Solanum lycopersicum/*genetics ; }, abstract = {A recent study by Gao et al., (Nat. Genet., 2019) presents a tomato pan-genome that was constructed using genome sequences of 725 phylogenetically and geographically representative accessions. The study revealed 4873 genes that are absent from the reference genome, including important genes associated with both disease resistance and flavor, thereby providing an important breeding resource.}, } @article {pmid31462565, year = {2019}, author = {Zeng, C and Gilcrease, EB and Hendrix, RW and Xie, Y and Jalfon, MJ and Gill, JJ and Casjens, SR}, title = {DNA Packaging and Genomics of the Salmonella 9NA-Like Phages.}, journal = {Journal of virology}, volume = {93}, number = {22}, pages = {}, pmid = {31462565}, issn = {1098-5514}, support = {R01 GM051975/GM/NIGMS NIH HHS/United States ; R01 GM114817/GM/NIGMS NIH HHS/United States ; }, mesh = {DNA Packaging/*genetics/physiology ; DNA Replication ; DNA, Viral/genetics ; Genome/genetics ; Genome, Viral/genetics ; Genomics/methods ; Phylogeny ; Salmonella/genetics/metabolism/*virology ; Salmonella Phages/*genetics ; Siphoviridae/genetics/metabolism ; Viral Proteins/genetics ; Virion/genetics ; }, abstract = {We present the genome sequences of Salmonella enterica tailed phages Sasha, Sergei, and Solent. These phages, along with Salmonella phages 9NA, FSL_SP-062, and FSL_SP-069 and the more distantly related Proteus phage PmiS-Isfahan, have similarly sized genomes of between 52 and 57 kbp in length that are largely syntenic. Their genomes also show substantial genome mosaicism relative to one another, which is common within tailed phage clusters. Their gene content ranges from 80 to 99 predicted genes, of which 40 are common to all seven and form the core genome, which includes all identifiable virion assembly and DNA replication genes. The total number of gene types (pangenome) in the seven phages is 176, and 59 of these are unique to individual phages. Their core genomes are much more closely related to one another than to the genome of any other known phage, and they comprise a well-defined cluster within the family Siphoviridae To begin to characterize this group of phages in more experimental detail, we identified the genes that encode the major virion proteins and examined the DNA packaging of the prototypic member, phage 9NA. We show that it uses a pac site-directed headful packaging mechanism that results in virion chromosomes that are circularly permuted and about 13% terminally redundant. We also show that its packaging series initiates with double-stranded DNA cleavages that are scattered across a 170-bp region and that its headful measuring device has a precision of ±1.8%.IMPORTANCE The 9NA-like phages are clearly highly related to each other but are not closely related to any other known phage type. This work describes the genomes of three new 9NA-like phages and the results of experimental analysis of the proteome of the 9NA virion and DNA packaging into the 9NA phage head. There is increasing interest in the biology of phages because of their potential for use as antibacterial agents and for their ecological roles in bacterial communities. 9NA-like phages that infect two bacterial genera have been identified to date, and related phages infecting additional Gram-negative bacterial hosts are likely to be found in the future. This work provides a foundation for the study of these phages, which will facilitate their study and potential use.}, } @article {pmid31461668, year = {2020}, author = {Lee, K and Kim, MS and Lee, JS and Bae, DN and Jeong, N and Yang, K and Lee, JD and Park, JH and Moon, JK and Jeong, SC}, title = {Chromosomal features revealed by comparison of genetic maps of Glycine max and Glycine soja.}, journal = {Genomics}, volume = {112}, number = {2}, pages = {1481-1489}, doi = {10.1016/j.ygeno.2019.08.019}, pmid = {31461668}, issn = {1089-8646}, mesh = {Chromosomes, Plant/*genetics ; Crossing Over, Genetic ; Gene Rearrangement ; *Genetic Linkage ; Plant Breeding ; Quantitative Trait Loci ; Sequence Alignment ; Soybeans/classification/*genetics ; }, abstract = {Recombination is a crucial component of evolution and breeding. New combinations of variation on chromosomes are shaped by recombination. Recombination is also involved in chromosomal rearrangements. However, recombination rates vary tremendously among chromosome segments. Genome-wide genetic maps are one of the best tools to study variation of recombination. Here, we describe high density genetic maps of Glycine max and Glycine soja constructed from four segregating populations. The maps were used to identify chromosomal rearrangements and find the highly predictable pattern of cross-overs on the broad scale in soybean. Markers on these genetic maps were used to evaluate assembly quality of the current soybean reference genome sequence. We find a strong inversion candidate larger than 3 Mb based on patterns of cross-overs. We also identify quantitative trait loci (QTL) that control number of cross-overs. This study provides fundamental insights relevant to practical strategy for breeding programs and for pan-genome researches.}, } @article {pmid31455646, year = {2019}, author = {Seif, Y and Monk, JM and Machado, H and Kavvas, E and Palsson, BO}, title = {Systems Biology and Pangenome of Salmonella O-Antigens.}, journal = {mBio}, volume = {10}, number = {4}, pages = {}, pmid = {31455646}, issn = {2150-7511}, support = {U01 AI124316/AI/NIAID NIH HHS/United States ; }, mesh = {Genetic Variation ; Genome, Bacterial/*genetics ; Lipopolysaccharides/*immunology ; Metabolic Networks and Pathways ; O Antigens/biosynthesis/*genetics/immunology ; Salmonella/genetics/*immunology ; Serogroup ; Serotyping ; *Systems Biology ; }, abstract = {O-antigens are glycopolymers in lipopolysaccharides expressed on the cell surface of Gram-negative bacteria. Variability in the O-antigen structure constitutes the basis for the establishment of the serotyping schema. We pursued a two-pronged approach to define the basis for O-antigen structural diversity. First, we developed a bottom-up systems biology approach to O-antigen metabolism by building a reconstruction of Salmonella O-antigen biosynthesis and used it to (i) update 410 existing Salmonella strain-specific metabolic models, (ii) predict a strain's serogroup and its O-antigen glycan synthesis capability (yielding 98% agreement with experimental data), and (iii) extend our workflow to more than 1,400 Gram-negative strains. Second, we used a top-down pangenome analysis to elucidate the genetic basis for intraserogroup O-antigen structural variations. We assembled a database of O-antigen gene islands from over 11,000 sequenced Salmonella strains, revealing (i) that gene duplication, pseudogene formation, gene deletion, and bacteriophage insertion elements occur ubiquitously across serogroups; (ii) novel serotypes in the group O:4 B2 variant, as well as an additional genotype variant for group O:4, and (iii) two novel O-antigen gene islands in understudied subspecies. We thus comprehensively defined the genetic basis for O-antigen diversity.IMPORTANCE Lipopolysaccharides are a major component of the outer membrane in Gram-negative bacteria. They are composed of a conserved lipid structure that is embedded in the outer leaflet of the outer membrane and a polysaccharide known as the O-antigen. O-antigens are highly variable in structure across strains of a species and are crucial to a bacterium's interactions with its environment. They constitute the first line of defense against both the immune system and bacteriophage infections and have been shown to mediate antimicrobial resistance. The significance of our research is in identifying the metabolic and genetic differences within and across O-antigen groups in Salmonella strains. Our effort constitutes a first step toward characterizing the O-antigen metabolic network across Gram-negative organisms and a comprehensive overview of genetic variations in Salmonella.}, } @article {pmid31454987, year = {2019}, author = {Xu, L and Ye, KX and Dai, WH and Sun, C and Xu, LH and Han, BN}, title = {Comparative Genomic Insights into Secondary Metabolism Biosynthetic Gene Cluster Distributions of Marine Streptomyces.}, journal = {Marine drugs}, volume = {17}, number = {9}, pages = {}, pmid = {31454987}, issn = {1660-3397}, support = {LGN18C190011//Special Fund for Agro-scientific Research in the Public Interest of Zhejiang Province/ ; LQ19C010006//Natural Science Foundation of Zhejiang Province/ ; 2019M652042//China Postdoctoral Science Foundation/ ; 17042058-Y//Science Foundation of Zhejiang Sci-Tech University/ ; 17042187-Y//Science Foundation of Zhejiang Sci-Tech University/ ; 18-CX-1//Project for Jiaozhou Excellent Innovation Team/ ; }, mesh = {Aquatic Organisms/*genetics/metabolism ; Biological Products/metabolism ; Biosynthetic Pathways/genetics ; *Genes, Bacterial ; Genomics ; *Multigene Family ; Phylogeny ; Secondary Metabolism/*genetics ; Streptomyces/*genetics/metabolism ; }, abstract = {Bacterial secondary metabolites have huge application potential in multiple industries. Biosynthesis of bacterial secondary metabolites are commonly encoded in a set of genes that are organized in the secondary metabolism biosynthetic gene clusters (SMBGCs). The development of genome sequencing technology facilitates mining bacterial SMBGCs. Marine Streptomyces is a valuable resource of bacterial secondary metabolites. In this study, 87 marine Streptomyces genomes were obtained and carried out into comparative genomic analysis, which revealed their high genetic diversity due to pan-genomes owning 123,302 orthologous clusters. Phylogenomic analysis indicated that the majority of Marine Streptomyces were classified into three clades named Clade I, II, and III, containing 23, 38, and 22 strains, respectively. Genomic annotations revealed that SMBGCs in the genomes of marine Streptomyces ranged from 16 to 84. Statistical analysis pointed out that phylotypes and ecotypes were both associated with SMBGCs distribution patterns. The Clade I and marine sediment-derived Streptomyces harbored more specific SMBGCs, which consisted of several common ones; whereas the Clade II and marine invertebrate-derived Streptomyces have more SMBGCs, acting as more plentiful resources for mining secondary metabolites. This study is beneficial for broadening our knowledge about SMBGC distribution patterns in marine Streptomyces and developing their secondary metabolites in the future.}, } @article {pmid31426439, year = {2019}, author = {Guo, M and Ren, L and Xu, Y and Liao, B and Song, J and Li, Y and Mantri, N and Guo, B and Chen, S and Pang, X}, title = {Development of Plastid Genomic Resources for Discrimination and Classification of Epimedium wushanense (Berberidaceae).}, journal = {International journal of molecular sciences}, volume = {20}, number = {16}, pages = {}, pmid = {31426439}, issn = {1422-0067}, support = {81573541//National Natural Science Foundation of China/ ; 2017-I2M-1-013//CAMS Innovation Fund for Medical Sciences (CIFMS)/ ; 201808110108//State Scholarship Fund of China/ ; }, mesh = {Codon Usage ; DNA, Plant/genetics ; Epimedium/classification/*genetics ; *Genome, Plastid ; Genomics ; Phylogeny ; Plastids/classification/genetics ; }, abstract = {Epimedium wushanense (Berberidaceae) is recorded as the source plant of Epimedii Wushanensis Folium in the Chinese Pharmacopoeia. However, controversies exist on the classification of E. wushanense and its closely related species, namely, E. pseudowushanense, E. chlorandrum, E. mikinorii, E. ilicifolium, and E. borealiguizhouense. These species are often confused with one another because of their highly similar morphological characteristics. This confusion leads to misuse in the medicinal market threatening efficiency and safety. Here, we studied the plastid genomes of these Epimedium species. Results show that the plastid genomes of E. wushanense and its relative species are typical circular tetramerous structure, with lengths of 156,855-158,251 bp. A total of 112 genes were identified from the Epimedium plastid genomes, including 78 protein-coding, 30 tRNA, and 4 rRNA genes. A loss of rpl32 gene in E. chlorandrum was found for the first time in this study. The phylogenetic trees constructed indicated that E. wushanense can be distinguished from its closely related species. E. wushanense shows a closer relationship to species in ser. Dolichocerae. In conclusion, the use of plastid genomes contributes useful genetic information for identifying medicinally important species E. wushanense and provides new evidence for understanding phylogenetic relationships within the Epimedium genus.}, } @article {pmid31409021, year = {2019}, author = {Dar, HA and Zaheer, T and Shehroz, M and Ullah, N and Naz, K and Muhammad, SA and Zhang, T and Ali, A}, title = {Immunoinformatics-Aided Design and Evaluation of a Potential Multi-Epitope Vaccine against Klebsiella Pneumoniae.}, journal = {Vaccines}, volume = {7}, number = {3}, pages = {}, pmid = {31409021}, issn = {2076-393X}, abstract = {Klebsiella pneumoniae is an opportunistic gram-negative bacterium that causes nosocomial infection in healthcare settings. Despite the high morbidity and mortality rate associated with these bacterial infections, no effective vaccine is available to counter the pathogen. In this study, the pangenome of a total of 222 available complete genomes of K. pneumoniae was explored to obtain the core proteome. A reverse vaccinology strategy was applied to the core proteins to identify four antigenic proteins. These proteins were then subjected to epitope mapping and prioritization steps to shortlist nine B-cell derived T-cell epitopes which were linked together using GPGPG linkers. An adjuvant (Cholera Toxin B) was also added at the N-terminal of the vaccine construct to improve its immunogenicity and a stabilized multi-epitope protein structure was obtained using molecular dynamics simulation. The designed vaccine exhibited sustainable and strong bonding interactions with Toll-like receptor 2 and Toll-like receptor 4. In silico reverse translation and codon optimization also confirmed its high expression in E. coli K12 strain. The computer-aided analyses performed in this study imply that the designed multi-epitope vaccine can elicit specific immune responses against K. pneumoniae. However, wet lab validation is necessary to further verify the effectiveness of this proposed vaccine candidate.}, } @article {pmid31399846, year = {2019}, author = {Xing, J and Li, X and Sun, Y and Zhao, J and Miao, S and Xiong, Q and Zhang, Y and Zhang, G}, title = {Comparative genomic and functional analysis of Akkermansia muciniphila and closely related species.}, journal = {Genes & genomics}, volume = {41}, number = {11}, pages = {1253-1264}, pmid = {31399846}, issn = {2092-9293}, mesh = {Akkermansia ; *Genome, Bacterial ; *Phylogeny ; *Polymorphism, Single Nucleotide ; Verrucomicrobia/classification/*genetics ; }, abstract = {BACKGROUND: Akkermansia muciniphila is an important bacterium that resides on the mucus layer of the intestinal tract. Akkermansia muciniphila has a high abundance in human feces and plays an important role in human health.

OBJECTIVE: In this article, 23 whole genome sequences of the Akkermansia genus were comparatively studied.

METHODS: Phylogenetic trees were constructed with three methods: All amino acid sequences of each strain were used to construct the first phylogenetic tree using the web server of Composition Vector Tree Version 3. The matrix of Genome-to-Genome Distances which were obtained from GGDC 2.0 was used to construct the second phylogenetic tree using FastME. The concatenated single-copy core gene-based phylogenetic tree was generated through MEGA. The single-copy genes were obtained using OrthoMCL. Population structure was assessed by STRUCTURE 2.3.4 using the SNPs in core genes. PROKKA and Roary were used to do pan-genome analyses. The biosynthetic gene clusters were predicted using antiSMASH 4.0. IalandViewer 4 was used to detect the genomic islands.

RESULTS: The results of comparative genomic analysis revealed that: (1) The 23 Akkermansia strains formed 4 clades in phylogenetic trees. The A. muciniphila strains isolated from different geographic regions and ecological niches, formed a closely related clade. (2) The 23 Akkermansia strains were divided into 4 species based on digital DNA-DNA hybridization (dDDH) values. (3) Pan-genome of A. muciniphila is in an open state and increases with addition of new sequenced genomes. (4) SNPs were not evenly distributed throughout the A. muciniphila genomes. The genes in regions with high SNP density are related to metabolism and cell wall/membrane envelope biogenesis. (5) The thermostable outer-membrane protein, Amuc_1100, was conserved in the Akkermansia genus, except for Akkermansia glycaniphila Pyt[T].

CONCLUSION: Overall, applying comparative genomic and pan-genomic analyses, we classified and illuminated the phylogenetic relationship of the 23 Akkermansia strains. Insights of the evolutionary, population structure, gene clusters and genome islands of Akkermansia provided more information about the possible physiological and probiotic mechanisms of the Akkermansia strains, and gave some instructions for the in-depth researches about the use of Akkermansia as a gut probiotic in the future.}, } @article {pmid31392469, year = {2019}, author = {Khan, AMAM and Mendoza, C and Hauk, VJ and Blumer-Schuette, SE}, title = {Genomic and physiological analyses reveal that extremely thermophilic Caldicellulosiruptor changbaiensis deploys uncommon cellulose attachment mechanisms.}, journal = {Journal of industrial microbiology & biotechnology}, volume = {46}, number = {9-10}, pages = {1251-1263}, pmid = {31392469}, issn = {1476-5535}, mesh = {Bacterial Adhesion ; Biomass ; Cellulose/*metabolism ; Clostridiales/genetics/*metabolism ; Genome, Bacterial ; Genomics ; }, abstract = {The genus Caldicellulosiruptor is comprised of extremely thermophilic, heterotrophic anaerobes that degrade plant biomass using modular, multifunctional enzymes. Prior pangenome analyses determined that this genus is genetically diverse, with the current pangenome remaining open, meaning that new genes are expected with each additional genome sequence added. Given the high biodiversity observed among the genus Caldicellulosiruptor, we have sequenced and added a 14th species, Caldicellulosiruptor changbaiensis, to the pangenome. The pangenome now includes 3791 ortholog clusters, 120 of which are unique to C. changbaiensis and may be involved in plant biomass degradation. Comparisons between C. changbaiensis and Caldicellulosiruptor bescii on the basis of growth kinetics, cellulose solubilization and cell attachment to polysaccharides highlighted physiological differences between the two species which are supported by their respective gene inventories. Most significantly, these comparisons indicated that C. changbaiensis possesses uncommon cellulose attachment mechanisms not observed among the other strongly cellulolytic members of the genus Caldicellulosiruptor.}, } @article {pmid31375706, year = {2019}, author = {Chapeton-Montes, D and Plourde, L and Bouchier, C and Ma, L and Diancourt, L and Criscuolo, A and Popoff, MR and Brüggemann, H}, title = {The population structure of Clostridium tetani deduced from its pan-genome.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {11220}, pmid = {31375706}, issn = {2045-2322}, mesh = {Clostridium tetani/*genetics/pathogenicity ; Collagenases/genetics ; Conserved Sequence ; Genome, Bacterial/*genetics ; Neurotoxins/genetics ; Phylogeny ; Species Specificity ; Tetanus Toxin/genetics ; Virulence Factors/genetics ; }, abstract = {Clostridium tetani produces a potent neurotoxin, the tetanus neurotoxin (TeNT) that is responsible for the worldwide neurological disease tetanus, but which can be efficiently prevented by vaccination with tetanus toxoid. Until now only one type of TeNT has been characterized and very little information exists about the heterogeneity among C. tetani strains. We report here the genome sequences of 26 C. tetani strains, isolated between 1949 and 2017 and obtained from different locations. Genome analyses revealed that the C. tetani population is distributed in two phylogenetic clades, a major and a minor one, with no evidence for clade separation based on geographical origin or time of isolation. The chromosome of C. tetani is highly conserved; in contrast, the TeNT-encoding plasmid shows substantial heterogeneity. TeNT itself is highly conserved among all strains; the most relevant difference is an insertion of four amino acids in the C-terminal receptor-binding domain in four strains that might impact on receptor-binding properties. Other putative virulence factors, including tetanolysin and collagenase, are encoded in all genomes. This study highlights the population structure of C. tetani and suggests that tetanus-causing strains did not undergo extensive evolutionary diversification, as judged from the high conservation of its main virulence factors.}, } @article {pmid31375067, year = {2019}, author = {Faoro, H and Oliveira, WK and Weiss, VA and Tadra-Sfeir, MZ and Cardoso, RL and Balsanelli, E and Brusamarello-Santos, LCC and Camilios-Neto, D and Cruz, LM and Raittz, RT and Marques, ACQ and LiPuma, J and Fadel-Picheth, CMT and Souza, EM and Pedrosa, FO}, title = {Genome comparison between clinical and environmental strains of Herbaspirillum seropedicae reveals a potential new emerging bacterium adapted to human hosts.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {630}, pmid = {31375067}, issn = {1471-2164}, support = {573828/2008-3//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; }, mesh = {Adaptation, Physiological/*genetics ; *Environment ; Evolution, Molecular ; Genome, Bacterial/genetics ; Genomic Islands/genetics ; *Genomics ; Herbaspirillum/*genetics/metabolism/*physiology ; Host-Pathogen Interactions/*genetics ; Humans ; Lipopolysaccharides/biosynthesis ; Phylogeny ; Siderophores/biosynthesis ; Species Specificity ; }, abstract = {BACKGROUND: Herbaspirillum seropedicae is an environmental β-proteobacterium that is capable of promoting the growth of economically relevant plants through biological nitrogen fixation and phytohormone production. However, strains of H. seropedicae have been isolated from immunocompromised patients and associated with human infections and deaths. In this work, we sequenced the genomes of two clinical strains of H. seropedicae, AU14040 and AU13965, and compared them with the genomes of strains described as having an environmental origin.

RESULTS: Both genomes were closed, indicating a single circular chromosome; however, strain AU13965 also carried a plasmid of 42,977 bp, the first described in the genus Herbaspirillum. Genome comparison revealed that the clinical strains lost the gene sets related to biological nitrogen fixation (nif) and the type 3 secretion system (T3SS), which has been described to be essential for interactions with plants. Comparison of the pan-genomes of clinical and environmental strains revealed different sets of accessorial genes. However, antimicrobial resistance genes were found in the same proportion in all analyzed genomes. The clinical strains also acquired new genes and genomic islands that may be related to host interactions. Among the acquired islands was a cluster of genes related to lipopolysaccharide (LPS) biosynthesis. Although highly conserved in environmental strains, the LPS biosynthesis genes in the two clinical strains presented unique and non-orthologous genes within the genus Herbaspirillum. Furthermore, the AU14040 strain cluster contained the neuABC genes, which are responsible for sialic acid (Neu5Ac) biosynthesis, indicating that this bacterium could add it to its lipopolysaccharide. The Neu5Ac-linked LPS could increase the bacterial resilience in the host aiding in the evasion of the immune system.

CONCLUSIONS: Our findings suggest that the lifestyle transition from environment to opportunist led to the loss and acquisition of specific genes allowing adaptations to colonize and survive in new hosts. It is possible that these substitutions may be the starting point for interactions with new hosts.}, } @article {pmid31371776, year = {2019}, author = {Saad, J and Phelippeau, M and Khoder, M and Lévy, M and Musso, D and Drancourt, M}, title = {"Mycobacterium mephinesia", a Mycobacterium terrae complex species of clinical interest isolated in French Polynesia.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {11169}, pmid = {31371776}, issn = {2045-2322}, mesh = {Bronchoalveolar Lavage ; Genome, Bacterial ; Humans ; Lung/microbiology ; Male ; Middle Aged ; Mycobacterium/genetics/*isolation & purification ; Nontuberculous Mycobacteria/*isolation & purification ; Polynesia ; RNA, Ribosomal, 16S ; Sequence Analysis, DNA ; }, abstract = {A 59-year-old tobacco smoker male with chronic bronchitis living in Taravao, French Polynesia, Pacific, presented with a two-year growing nodule in the middle lobe of the right lung. A guided bronchoalveolar lavage inoculated onto Löwenstein-Jensen medium yielded colonies of a rapidly-growing non-chromogenic mycobacterium designed as isolate P7213. The isolate could not be identified using routine matrix-assisted laser desorption ionization-time of flight-mass spectrometry and phenotypic and probe-hybridization techniques and yielded 100% and 97% sequence similarity with the respective 16S rRNA and rpoB gene sequences of Mycobacterium virginiense in the Mycobacterium terrae complex. Electron microscopy showed a 1.15 µm long and 0.38 µm large bacillus which was in vitro susceptible to rifampicin, rifabutin, ethambutol, isoniazid, doxycycline and kanamycin. Its 4,511,948-bp draft genome exhibited a 67.6% G + C content with 4,153 coding-protein genes and 87 predicted RNA genes. Genome sequence-derived DNA-DNA hybridization, OrthoANI and pangenome analysis confirmed isolate P7213 was representative of a new species in the M. terrae complex. We named this species "Mycobacterium mephinesia".}, } @article {pmid31371382, year = {2019}, author = {O'Connor, E and McGowan, J and McCarthy, CGP and Amini, A and Grogan, H and Fitzpatrick, DA}, title = {Whole Genome Sequence of the Commercially Relevant Mushroom Strain Agaricus bisporus var. bisporus ARP23.}, journal = {G3 (Bethesda, Md.)}, volume = {9}, number = {10}, pages = {3057-3066}, pmid = {31371382}, issn = {2160-1836}, mesh = {Agaricales/*classification/*genetics ; Agaricus/*classification/*genetics ; Computational Biology/methods ; *Genome, Fungal ; Molecular Sequence Annotation ; *Molecular Typing ; Phylogeny ; Quantitative Trait Loci ; *Whole Genome Sequencing ; }, abstract = {Agaricus bisporus is an extensively cultivated edible mushroom. Demand for cultivation is continuously growing and difficulties associated with breeding programs now means strains are effectively considered monoculture. While commercial growing practices are highly efficient and tightly controlled, the over-use of a single strain has led to a variety of disease outbreaks from a range of pathogens including bacteria, fungi and viruses. To address this, the Agaricus Resource Program (ARP) was set up to collect wild isolates from diverse geographical locations through a bounty-driven scheme to create a repository of wild Agaricus germplasm. One of the strains collected, Agaricus bisporus var. bisporus ARP23, has been crossed extensively with white commercial varieties leading to the generation of a novel hybrid with a dark brown pileus commonly referred to as 'Heirloom'. Heirloom has been successfully implemented into commercial mushroom cultivation. In this study the whole genome of Agaricus bisporus var. bisporus ARP23 was sequenced and assembled with Illumina and PacBio sequencing technology. The final genome was found to be 33.49 Mb in length and have significant levels of synteny to other sequenced Agaricus bisporus strains. Overall, 13,030 putative protein coding genes were located and annotated. Relative to the other A. bisporus genomes that are currently available, Agaricus bisporus var. bisporus ARP23 is the largest A. bisporus strain in terms of gene number and genetic content sequenced to date. Comparative genomic analysis shows that the A. bisporus mating loci in unifactorial and unsurprisingly highly conserved between strains. The lignocellulolytic gene content of all A. bisporus strains compared is also very similar. Our results show that the pangenome structure of A. bisporus is quite diverse with between 60-70% of the total protein coding genes per strain considered as being orthologous and syntenically conserved. These analyses and the genome sequence described herein are the starting point for more detailed molecular analyses into the growth and phenotypical responses of Agaricus bisporus var. bisporus ARP23 when challenged with economically important mycoviruses.}, } @article {pmid31366358, year = {2019}, author = {Duan, Z and Qiao, Y and Lu, J and Lu, H and Zhang, W and Yan, F and Sun, C and Hu, Z and Zhang, Z and Li, G and Chen, H and Xiang, Z and Zhu, Z and Zhao, H and Yu, Y and Wei, C}, title = {HUPAN: a pan-genome analysis pipeline for human genomes.}, journal = {Genome biology}, volume = {20}, number = {1}, pages = {149}, pmid = {31366358}, issn = {1474-760X}, mesh = {Asian People/genetics ; Black People/genetics ; *Genome, Human ; High-Throughput Nucleotide Sequencing ; Humans ; Proteins/genetics ; Sequence Analysis, DNA ; *Software ; }, abstract = {The human reference genome is still incomplete, especially for those population-specific or individual-specific regions, which may have important functions. Here, we developed a HUman Pan-genome ANalysis (HUPAN) system to build the human pan-genome. We applied it to 185 deep sequencing and 90 assembled Han Chinese genomes and detected 29.5 Mb novel genomic sequences and at least 188 novel protein-coding genes missing in the human reference genome (GRCh38). It can be an important resource for the human genome-related biomedical studies, such as cancer genome analysis. HUPAN is freely available at http://cgm.sjtu.edu.cn/hupan/ and https://github.com/SJTU-CGM/HUPAN .}, } @article {pmid31361894, year = {2019}, author = {Pensar, J and Puranen, S and Arnold, B and MacAlasdair, N and Kuronen, J and Tonkin-Hill, G and Pesonen, M and Xu, Y and Sipola, A and Sánchez-Busó, L and Lees, JA and Chewapreecha, C and Bentley, SD and Harris, SR and Parkhill, J and Croucher, NJ and Corander, J}, title = {Genome-wide epistasis and co-selection study using mutual information.}, journal = {Nucleic acids research}, volume = {47}, number = {18}, pages = {e112}, pmid = {31361894}, issn = {1362-4962}, support = {098051//Wellcome Trust/United Kingdom ; MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; 107378/Z/15/Z/WT_/Wellcome Trust/United Kingdom ; 098051/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Computational Biology/*methods ; Drug Resistance, Microbial/genetics ; *Epistasis, Genetic ; Genome, Bacterial/*genetics ; *Genomics ; Humans ; Metagenomics/methods ; Neisseria meningitidis/genetics/pathogenicity ; Streptococcus pneumoniae/genetics ; Virulence/genetics ; }, abstract = {Covariance-based discovery of polymorphisms under co-selective pressure or epistasis has received considerable recent attention in population genomics. Both statistical modeling of the population level covariation of alleles across the chromosome and model-free testing of dependencies between pairs of polymorphisms have been shown to successfully uncover patterns of selection in bacterial populations. Here we introduce a model-free method, SpydrPick, whose computational efficiency enables analysis at the scale of pan-genomes of many bacteria. SpydrPick incorporates an efficient correction for population structure, which adjusts for the phylogenetic signal in the data without requiring an explicit phylogenetic tree. We also introduce a new type of visualization of the results similar to the Manhattan plots used in genome-wide association studies, which enables rapid exploration of the identified signals of co-evolution. Simulations demonstrate the usefulness of our method and give some insight to when this type of analysis is most likely to be successful. Application of the method to large population genomic datasets of two major human pathogens, Streptococcus pneumoniae and Neisseria meningitidis, revealed both previously identified and novel putative targets of co-selection related to virulence and antibiotic resistance, highlighting the potential of this approach to drive molecular discoveries, even in the absence of phenotypic data.}, } @article {pmid31357515, year = {2019}, author = {Pujol, P and De La Motte Rouge, T and Penault-Llorca, F}, title = {From Targeting Somatic Mutations to Finding Inherited Cancer Predispositions: The Other Side of the Coin.}, journal = {Diagnostics (Basel, Switzerland)}, volume = {9}, number = {3}, pages = {}, pmid = {31357515}, issn = {2075-4418}, abstract = {The expanding use of tumor genome analysis by next generation sequencing to drive target therapies has led to increased germline findings in genes predisposing to hereditary cancer. These putative germline findings obtained from theranostic analyses, such as BRCA1/2 gene testing, large panels, whole-exome, or whole-genome sequencing, need to be managed carefully and in an anticipated way with the patient. Before the genetic analysis of a tumor, specific information should be given to patients, who should be aware that the results may have extra-therapeutic medical issues for themselves and relatives. We previously published a list of 36 actionable genes predisposing to cancer for which informing the patient is recommended prior to pangenomic germline analysis because of available screening or preventive strategies. Here, we report clinical practice considerations and schemes for managing germline findings in tumor analyses, including written informed consent and a multidisciplinary approach involving an oncologist, molecular biologist/pathologist, and geneticist in case of germline findings. A somatic result showing a deleterious mutation in a known predisposing gene in a patient who has consented to this purpose should result in referral to a geneticist who is part of the multidisciplinary team. At any time of the somatic analysis process, the patient may have access to a geneticist consultation if additional information is required. This framework will optimally manage both personalized theranostic issues and specific preventive strategies for individuals and relatives; it will also simplify and accelerate the process of genetic testing.}, } @article {pmid31350563, year = {2019}, author = {Richards, VP and Velsko, IM and Alam, MT and Zadoks, RN and Manning, SD and Pavinski Bitar, PD and Hassler, HB and Crestani, C and Springer, GH and Probert, BM and Town, CD and Stanhope, MJ}, title = {Population Gene Introgression and High Genome Plasticity for the Zoonotic Pathogen Streptococcus agalactiae.}, journal = {Molecular biology and evolution}, volume = {36}, number = {11}, pages = {2572-2590}, pmid = {31350563}, issn = {1537-1719}, abstract = {The influence that bacterial adaptation (or niche partitioning) within species has on gene spillover and transmission among bacterial populations occupying different niches is not well understood. Streptococcus agalactiae is an important bacterial pathogen that has a taxonomically diverse host range making it an excellent model system to study these processes. Here, we analyze a global set of 901 genome sequences from nine diverse host species to advance our understanding of these processes. Bayesian clustering analysis delineated 12 major populations that closely aligned with niches. Comparative genomics revealed extensive gene gain/loss among populations and a large pan genome of 9,527 genes, which remained open and was strongly partitioned among niches. As a result, the biochemical characteristics of 11 populations were highly distinctive (significantly enriched). Positive selection was detected and biochemical characteristics of the dispensable genes under selection were enriched in ten populations. Despite the strong gene partitioning, phylogenomics detected gene spillover. In particular, tetracycline resistance (which likely evolved in the human-associated population) from humans to bovine, canines, seals, and fish, demonstrating how a gene selected in one host can ultimately be transmitted into another, and biased transmission from humans to bovines was confirmed with a Bayesian migration analysis. Our findings show high bacterial genome plasticity acting in balance with selection pressure from distinct functional requirements of niches that is associated with an extensive and highly partitioned dispensable genome, likely facilitating continued and expansive adaptation.}, } @article {pmid31333599, year = {2019}, author = {Naidenov, B and Lim, A and Willyerd, K and Torres, NJ and Johnson, WL and Hwang, HJ and Hoyt, P and Gustafson, JE and Chen, C}, title = {Pan-Genomic and Polymorphic Driven Prediction of Antibiotic Resistance in Elizabethkingia.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1446}, pmid = {31333599}, issn = {1664-302X}, abstract = {The Elizabethkingia are a genetically diverse genus of emerging pathogens that exhibit multidrug resistance to a range of common antibiotics. Two representative species, Elizabethkingia bruuniana and E. meningoseptica, were phenotypically tested to determine minimum inhibitory concentrations (MICs) for five antibiotics. Ultra-long read sequencing with Oxford Nanopore Technologies (ONT) and subsequent de novo assembly produced complete, gapless circular genomes for each strain. Alignment based annotation with Prokka identified 5,480 features in E. bruuniana and 5,203 features in E. meningoseptica, where none of these identified genes or gene combinations corresponded to observed phenotypic resistance values. Pan-genomic analysis, performed with an additional 19 Elizabethkingia strains, identified a core-genome size of 2,658,537 bp, 32 uniquely identifiable intrinsic chromosomal antibiotic resistance core-genes and 77 antibiotic resistance pan-genes. Using core-SNPs and pan-genes in combination with six machine learning (ML) algorithms, binary classification of clindamycin and vancomycin resistance achieved f1 scores of 0.94 and 0.84, respectively. Performance on the more challenging multiclass problem for fusidic acid, rifampin and ciprofloxacin resulted in f1 scores of 0.70, 0.75, and 0.54, respectively. By producing two sets of quality biological predictors, pan-genome genes and core-genome SNPs, from long-read sequence data and applying an ensemble of ML techniques, our results demonstrated that accurate phenotypic inference, at multiple AMR resolutions, can be achieved.}, } @article {pmid31329231, year = {2019}, author = {Fenske, GJ and Thachil, A and McDonough, PL and Glaser, A and Scaria, J}, title = {Geography Shapes the Population Genomics of Salmonella enterica Dublin.}, journal = {Genome biology and evolution}, volume = {11}, number = {8}, pages = {2220-2231}, pmid = {31329231}, issn = {1759-6653}, mesh = {Animals ; Bacterial Proteins/*genetics ; Cattle ; *Evolution, Molecular ; Gene Expression Regulation, Bacterial ; *Genome, Bacterial ; *Metagenomics ; *Phylogeography ; Salmonella enterica/classification/*genetics ; Serogroup ; Transcriptome ; Virulence ; Virulence Factors/*genetics ; }, abstract = {Salmonella enterica serotype Dublin (S. Dublin) is a bovine-adapted serotype that can cause serious systemic infections in humans. Despite the increasing prevalence of human infections and the negative impact on agricultural processes, little is known about the population structure of the serotype. To this end, we compiled a manually curated data set comprising of 880 S. Dublin genomes. Core genome phylogeny and ancestral state reconstruction revealed that region-specific clades dominate the global population structure of S. Dublin. Strains of S. Dublin in the UK are genomically distinct from US, Brazilian, and African strains. The geographical partitioning impacts the composition of the core genome as well as the ancillary genome. Antibiotic resistance genes are almost exclusively found in US genomes and are mediated by an IncA/C2 plasmid. Phage content and the S. Dublin virulence plasmid were strongly conserved in the serotype. Comparison of S. Dublin to a closely related serotype, S. enterica serotype Enteritidis, revealed that S. Dublin contains 82 serotype specific genes that are not found in S. Enteritidis. Said genes encode metabolic functions involved in the uptake and catabolism of carbohydrates and virulence genes associated with type VI secretion systems and fimbria assembly respectively.}, } @article {pmid31319017, year = {2019}, author = {Passarelli-Araujo, H and Palmeiro, JK and Moharana, KC and Pedrosa-Silva, F and Dalla-Costa, LM and Venancio, TM}, title = {Genomic analysis unveils important aspects of population structure, virulence, and antimicrobial resistance in Klebsiella aerogenes.}, journal = {The FEBS journal}, volume = {286}, number = {19}, pages = {3797-3810}, doi = {10.1111/febs.15005}, pmid = {31319017}, issn = {1742-4658}, support = {//Fundação Carlos Chagas Filho de Amparo à Pesquisa do Estado do Rio de Janeiro/International ; //Conselho Nacional de Desenvolvimento Científico e Tecnológico/International ; //Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/International ; //UENF/International ; }, mesh = {Anti-Bacterial Agents/*pharmacology ; Bacteriophages/isolation & purification ; Drug Resistance, Bacterial/*genetics ; Enterobacter aerogenes/drug effects/*genetics/*pathogenicity ; *Genome, Bacterial ; Plasmids ; Virulence/*genetics ; }, abstract = {Klebsiella aerogenes is an important pathogen in healthcare-associated infections. Nevertheless, in comparison to other clinically important pathogens, K. aerogenes population structure, genetic diversity, and pathogenicity remain poorly understood. Here, we elucidate K. aerogenes clonal complexes (CCs) and genomic features associated with resistance and virulence. We present a detailed description of the population structure of K. aerogenes based on 97 publicly available genomes by using both multilocus sequence typing and single-nucleotide polymorphisms extracted from the core genome. We also assessed virulence and resistance profiles using Virulence Finder Database and Comprehensive Antibiotic Resistance Database, respectively. We show that K. aerogenes has an open pangenome and a large effective population size, which account for its high genomic diversity and support that negative selection prevents fixation of most deleterious alleles. The population is structured in at least 10 CCs, including two novel ones identified here, CC9 and CC10. The repertoires of resistance genes comprise a high number of antibiotic efflux proteins as well as narrow- and extended-spectrum β-lactamases. Regarding the population structure, we identified two clusters based on virulence profiles because of the presence of the toxin-encoding clb operon and the siderophore production genes, irp and ybt. Notably, CC3 comprises the majority of K. aerogenes isolates associated with hospital outbreaks, emphasizing the importance of constant monitoring of this pathogen. Collectively, our results may provide a foundation for the development of new therapeutic and surveillance strategies worldwide.}, } @article {pmid31316488, year = {2019}, author = {Chen, SL}, title = {Genomic Insights Into the Distribution and Evolution of Group B Streptococcus.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1447}, pmid = {31316488}, issn = {1664-302X}, abstract = {Streptococcus agalactiae, also known as Group B Streptococcus (GBS), is a bacteria with truly protean biology. It infects a variety of hosts, among which the most commonly studied are humans, cattle, and fish. GBS holds a singular position in the history of bacterial genomics, as it was the substrate used to describe one of the first major conceptual advances of comparative genomics, the idea of the pan-genome. In this review, I describe a brief history of GBS and the major contributions of genomics to understanding its genome plasticity and evolution as well as its molecular epidemiology, focusing on the three hosts mentioned above. I also discuss one of the major recent paradigm shifts in our understanding of GBS evolution and disease burden: foodborne GBS can cause invasive infections in humans.}, } @article {pmid31316072, year = {2019}, author = {Xia, Q and Pan, L and Zhang, R and Ni, X and Wang, Y and Dong, X and Gao, Y and Zhang, Z and Kui, L and Li, Y and Wang, W and Yang, H and Chen, C and Miao, J and Chen, W and Dong, Y}, title = {The genome assembly of asparagus bean, Vigna unguiculata ssp. sesquipedialis.}, journal = {Scientific data}, volume = {6}, number = {1}, pages = {124}, pmid = {31316072}, issn = {2052-4463}, support = {NO.31501369//National Natural Science Foundation of China (National Science Foundation of China)/International ; }, mesh = {Chromosome Mapping ; DNA Transposable Elements ; Genetic Linkage ; *Genome, Plant ; Genomics ; Molecular Sequence Annotation ; Vigna/*genetics ; Whole Genome Sequencing ; }, abstract = {Asparagus bean (Vigna. unguiculata ssp. sesquipedialis), known for its very long and tender green pods, is an important vegetable crop broadly grown in the developing Asian countries. In this study, we reported a 632.8 Mb assembly (549.81 Mb non-N size) of asparagus bean based on the whole genome shotgun sequencing strategy. We also generated a linkage map for asparagus bean, which helped anchor 94.42% of the scaffolds into 11 pseudo-chromosomes. A total of 42,609 protein-coding genes and 3,579 non-protein-coding genes were predicted from the assembly. Taken together, these genomic resources of asparagus bean will help develop a pan-genome of V. unguiculata and facilitate the investigation of economically valuable traits in this species, so that the cultivation of this plant would help combat the protein and energy malnutrition in the developing world.}, } @article {pmid31310202, year = {2019}, author = {Yahara, K and Lehours, P and Vale, FF}, title = {Analysis of genetic recombination and the pan-genome of a highly recombinogenic bacteriophage species.}, journal = {Microbial genomics}, volume = {5}, number = {8}, pages = {}, pmid = {31310202}, issn = {2057-5858}, mesh = {Bacteriophages/genetics ; Genetic Variation/genetics ; Genome, Bacterial/genetics ; Genome, Viral/genetics ; Helicobacter pylori/*genetics/virology ; Phylogeny ; Prophages/*genetics ; Recombination, Genetic/genetics ; Sequence Analysis, DNA/methods ; }, abstract = {Bacteriophages are the most prevalent biological entities impacting on the ecosystem and are characterized by their extensive diversity. However, there are two aspects of phages that have remained largely unexplored: genetic flux by recombination between phage populations and characterization of specific phages in terms of the pan-genome. Here, we examined the recombination and pan-genome in Helicobacter pylori prophages at both the genome and gene level. In the genome-level analysis, we applied, for the first time, chromosome painting and fineSTRUCTURE algorithms to a phage species, and showed novel trends in inter-population genetic flux. Notably, hpEastAsia is a phage population that imported a higher proportion of DNA fragments from other phages, whereas the hpSWEurope phages showed weaker signatures of inter-population recombination, suggesting genetic isolation. The gene-level analysis showed that, after parameter tuning of the prokaryote pan-genome analysis program, H. pylori phages have a pan-genome consisting of 75 genes and a soft-core genome of 10 genes, which includes genes involved in the lytic and lysogenic life cycles. Quantitative analysis of recombination events of the soft-core genes showed no substantial variation in the intensity of recombination across the genes, but rather equally frequent recombination among housekeeping genes that were previously reported to be less prone to recombination. The signature of frequent recombination appears to reflect the host-phage evolutionary arms race, either by contributing to escape from bacterial immunity or by protecting the host by producing defective phages.}, } @article {pmid31302711, year = {2019}, author = {Paterson, ML and Ranasinghe, D and Blom, J and Dover, LG and Sutcliffe, IC and Lopes, B and Sangal, V}, title = {Genomic analysis of a novel Rhodococcus (Prescottella) equi isolate from a bovine host.}, journal = {Archives of microbiology}, volume = {201}, number = {9}, pages = {1317-1321}, pmid = {31302711}, issn = {1432-072X}, mesh = {Actinomycetales Infections/microbiology/*veterinary ; Amino Acid Sequence/genetics ; Animals ; Bacterial Proteins/genetics ; Cattle ; Genome, Bacterial/*genetics ; Genomic Islands/genetics ; Genomics ; Horses ; Humans ; Multigene Family/genetics ; Plasmids/genetics ; Pneumonia, Bacterial/microbiology/*veterinary ; Rhodococcus equi/*genetics/isolation & purification ; Vesicular Transport Proteins/genetics ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Rhodococcus (Prescottella) equi causes pneumonia-like infections in foals with high mortality rates and can also infect a number of other animals. R. equi is also emerging as an opportunistic human pathogen. In this study, we have sequenced the genome of a novel R. equi isolate, B0269, isolated from the faeces of a bovine host. Comparative genomic analyses with seven other published R. equi genomes, including those from equine or human sources, revealed a pangenome comprising of 6876 genes with 4141 genes in the core genome. Two hundred and 75 genes were specific to the bovine isolate, mostly encoding hypothetical proteins of unknown function. However, these genes include four copies of terA and five copies of terD genes that may be involved in responding to chemical stress. Virulence characteristics in R. equi are associated with the presence of large plasmids carrying a pathogenicity island, including genes from the vap multigene family. A BLAST search of the protein sequences from known virulence-associated plasmids (pVAPA, pVAPB and pVAPN) revealed a similar plasmid backbone on two contigs in bovine isolate B0269; however, no homologues of the main virulence-associated genes, vapA, vapB or vapN, were identified. In summary, this study confirms that R. equi genomes are highly conserved and reports the presence of an apparently novel plasmid in the bovine isolate B0269 that needs further characterisation to understand its potential involvement in virulence properties.}, } @article {pmid31300285, year = {2019}, author = {Kingstad-Bakke, BA and Chandrasekar, SS and Phanse, Y and Ross, KA and Hatta, M and Suresh, M and Kawaoka, Y and Osorio, JE and Narasimhan, B and Talaat, AM}, title = {Effective mosaic-based nanovaccines against avian influenza in poultry.}, journal = {Vaccine}, volume = {37}, number = {35}, pages = {5051-5058}, doi = {10.1016/j.vaccine.2019.06.077}, pmid = {31300285}, issn = {1873-2518}, mesh = {Animals ; Antibodies, Viral/*blood ; Chickens/immunology ; Hemagglutinin Glycoproteins, Influenza Virus/chemistry/immunology ; Immunity, Cellular ; Immunity, Humoral ; Influenza A Virus, H1N1 Subtype ; Influenza A Virus, H5N1 Subtype ; Influenza A Virus, H5N2 Subtype ; Influenza Vaccines/administration & dosage/*immunology ; Influenza in Birds/*prevention & control ; Nanoparticles/*administration & dosage/chemistry ; Vaccination/*veterinary ; }, abstract = {Avian influenza virus (AIV) is an extraordinarily diverse pathogen that causes significant morbidity in domesticated poultry populations and threatens human life with looming pandemic potential. Controlling avian influenza in susceptible populations requires highly effective, economical and broadly reactive vaccines. Several AIV vaccines have proven insufficient despite their wide use, and better technologies are needed to improve their immunogenicity and broaden effectiveness. Previously, we developed a "mosaic" H5 subtype hemagglutinin (HA) AIV vaccine and demonstrated its broad protection against diverse highly pathogenic H5N1 and seasonal H1N1 virus strains in mouse and non-human primate models. There is a significant interest in developing effective and safe vaccines against AIV that cannot contribute to the emergence of new strains of the virus once circulating in poultry. Here, we report on the development of an H5 mosaic (H5M) vaccine antigen formulated with polyanhydride nanoparticles (PAN) that provide sustained release of encapsulated antigens. H5M vaccine constructs were immunogenic whether delivered by the modified virus Ankara (MVA) strain or encapsulated within PAN. Both humoral and cellular immune responses were generated in both specific-pathogen free (SPF) and commercial chicks. Importantly, chicks vaccinated by H5M constructs were protected in terms of viral shedding from divergent challenge with a low pathogenicity avian influenza (LPAI) strain at 8 weeks post-vaccination. In addition, protective levels of humoral immunity were generated against highly pathogenic avian influenza (HPAI) of the similar H5N1 and genetically dissimilar H5N2 viruses. Overall, the developed platform technologies (MVA vector and PAN encapsulation) were safe and provided high levels of sustained protection against AIV in chickens. Such approaches could be used to design more efficacious vaccines against other important poultry infections.}, } @article {pmid31295964, year = {2019}, author = {McCarthy, CGP and Fitzpatrick, DA}, title = {Pangloss: A Tool for Pan-Genome Analysis of Microbial Eukaryotes.}, journal = {Genes}, volume = {10}, number = {7}, pages = {}, pmid = {31295964}, issn = {2073-4425}, mesh = {Aspergillus fumigatus/*genetics ; *Genome, Fungal ; Genomics ; *Software ; Yarrowia/*genetics ; }, abstract = {Although the pan-genome concept originated in prokaryote genomics, an increasing number of eukaryote species pan-genomes have also been analysed. However, there is a relative lack of software intended for eukaryote pan-genome analysis compared to that available for prokaryotes. In a previous study, we analysed the pan-genomes of four model fungi with a computational pipeline that constructed pan-genomes using the synteny-dependent Pan-genome Ortholog Clustering Tool (PanOCT) approach. Here, we present a modified and improved version of that pipeline which we have called Pangloss. Pangloss can perform gene prediction for a set of genomes from a given species that the user provides, constructs and optionally refines a species pan-genome from that set using PanOCT, and can perform various functional characterisation and visualisation analyses of species pan-genome data. To demonstrate Pangloss's capabilities, we constructed and analysed a species pan-genome for the oleaginous yeast Yarrowialipolytica and also reconstructed a previously-published species pan-genome for the opportunistic respiratory pathogen Aspergillus fumigatus. Pangloss is implemented in Python, Perl and R and is freely available under an open source GPLv3 licence via GitHub.}, } @article {pmid31294750, year = {2019}, author = {Assié, G and Jouinot, A and Fassnacht, M and Libé, R and Garinet, S and Jacob, L and Hamzaoui, N and Neou, M and Sakat, J and de La Villéon, B and Perlemoine, K and Ragazzon, B and Sibony, M and Tissier, F and Gaujoux, S and Dousset, B and Sbiera, S and Ronchi, CL and Kroiss, M and Korpershoek, E and De Krijger, R and Waldmann, J and Quinkler, M and Haissaguerre, M and Tabarin, A and Chabre, O and Luconi, M and Mannelli, M and Groussin, L and Bertagna, X and Baudin, E and Amar, L and Coste, J and Beuschlein, F and Bertherat, J}, title = {Value of Molecular Classification for Prognostic Assessment of Adrenocortical Carcinoma.}, journal = {JAMA oncology}, volume = {5}, number = {10}, pages = {1440-1447}, pmid = {31294750}, issn = {2374-2445}, abstract = {IMPORTANCE: The risk stratification of adrenocortical carcinoma (ACC) based on tumor proliferation index and stage is limited. Adjuvant therapy after surgery is recommended for most patients. Pan-genomic studies have identified distinct molecular groups closely associated with outcome.

OBJECTIVE: To compare the molecular classification for prognostic assessment of ACC with other known prognostic factors.

In this retrospective biomarker analysis, ACC tumor samples from 368 patients who had undergone surgical tumor removal were collected from March 1, 2005, to September 30, 2015 (144 in the training cohort and 224 in the validation cohort) at 21 referral centers with a median follow-up of 35 months (interquartile range, 18-74 months). Data were analyzed from March 2016 to March 2018.

EXPOSURES: Meta-analysis of pan-genomic studies (transcriptome, methylome, chromosome alteration, and mutational profiles) was performed on the training cohort. Targeted biomarker analysis, including targeted gene expression (BUB1B and PINK1), targeted methylation (PAX5, GSTP1, PYCARD, and PAX6), and targeted next-generation sequencing, was performed on the training and validation cohorts.

MAIN OUTCOMES AND MEASURES: Disease-free survival. Cox proportional hazards regression and C indexes were used to assess the prognostic value of each model.

RESULTS: Of the 368 patients (mean [SD] age, 49 [16] years), 144 were in the training cohort (100 [69.4%] female) and 224 were in the validation cohort (142 [63.4%] female). In the training cohort, pan-genomic measures classified ACC into 3 molecular groups (A1, A2, and A3-B), with 5-year survival of 9% for group A1, 45% for group A2, and 82% for group A3-B (log-rank P < .001). Molecular class was an independent prognostic factor of recurrence in stage I to III ACC after complete surgery (hazard ratio, 55.91; 95% CI, 8.55-365.40; P < .001). The combination of European Network for the Study of Adrenal Tumors (ENSAT) stage, tumor proliferation index, and molecular class provided the most discriminant prognostic model (C index, 0.88). In the validation cohort, the molecular classification, determined by targeted biomarker measures, was confirmed as an independent prognostic factor of recurrence (hazard ratio, 5.96 [95% CI, 1.81-19.58], P = .003 for the targeted classifier combining expression, methylation, and chromosome alterations; and 2.61 [95% CI, 1.31-5.19], P = .006 for the targeted classifier combining methylation, chromosome alterations, and mutational profile). The prognostic value of the molecular markers was limited for patients with stage IV ACC.

CONCLUSIONS AND RELEVANCE: The findings suggest that in localized ACC, targeted classifiers may be used as independent markers of recurrence. The determination of molecular class may improve individual prognostic assessment and thus may spare unnecessary adjuvant treatment.}, } @article {pmid31293547, year = {2019}, author = {Passera, A and Compant, S and Casati, P and Maturo, MG and Battelli, G and Quaglino, F and Antonielli, L and Salerno, D and Brasca, M and Toffolatti, SL and Mantegazza, F and Delledonne, M and Mitter, B}, title = {Not Just a Pathogen? Description of a Plant-Beneficial Pseudomonas syringae Strain.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1409}, pmid = {31293547}, issn = {1664-302X}, abstract = {Plants develop in a microbe-rich environment and must interact with a plethora of microorganisms, both pathogenic and beneficial. Indeed, such is the case of Pseudomonas, and its model organisms P. fluorescens and P. syringae, a bacterial genus that has received particular attention because of its beneficial effect on plants and its pathogenic strains. The present study aims to compare plant-beneficial and pathogenic strains belonging to the P. syringae species to get new insights into the distinction between the two types of plant-microbe interactions. In assays carried out under greenhouse conditions, P. syringae pv. syringae strain 260-02 was shown to promote plant-growth and to exert biocontrol of P. syringae pv. tomato strain DC3000, against the Botrytis cinerea fungus and the Cymbidium Ringspot Virus. This P. syringae strain also had a distinct volatile emission profile, as well as a different plant-colonization pattern, visualized by confocal microscopy and gfp labeled strains, compared to strain DC3000. Despite the different behavior, the P. syringae strain 260-02 showed great similarity to pathogenic strains at a genomic level. However, genome analyses highlighted a few differences that form the basis for the following hypotheses regarding strain 260-02. P. syringae strain 260-02: (i) possesses non-functional virulence genes, like the mangotoxin-producing operon Mbo; (ii) has different regulation pathways, suggested by the difference in the autoinducer system and the lack of a virulence activator gene; (iii) has genes encoding DNA methylases different from those found in other P. syringae strains, suggested by the presence of horizontal-gene-transfer-obtained methylases that could affect gene expression.}, } @article {pmid31293536, year = {2019}, author = {Fontana, A and Falasconi, I and Molinari, P and Treu, L and Basile, A and Vezzi, A and Campanaro, S and Morelli, L}, title = {Genomic Comparison of Lactobacillus helveticus Strains Highlights Probiotic Potential.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1380}, pmid = {31293536}, issn = {1664-302X}, abstract = {Lactobacillus helveticus belongs to the large group of lactic acid bacteria (LAB), which are the major players in the fermentation of a wide range of foods. LAB are also present in the human gut, which has often been exploited as a reservoir of potential novel probiotic strains, but several parameters need to be assessed before establishing their safety and potential use for human consumption. In the present study, six L. helveticus strains isolated from natural whey cultures were analyzed for their phenotype and genotype in exopolysaccharide (EPS) production, low pH and bile salt tolerance, bile salt hydrolase (BSH) activity, and antibiotic resistance profile. In addition, a comparative genomic investigation was performed between the six newly sequenced strains and the 51 publicly available genomes of L. helveticus to define the pangenome structure. The results indicate that the newly sequenced strain UC1267 and the deposited strain DSM 20075 can be considered good candidates for gut-adapted strains due to their ability to survive in the presence of 0.2% glycocholic acid (GCA) and 1% taurocholic and taurodeoxycholic acid (TDCA). Moreover, these strains had the highest bile salt deconjugation activity among the tested L. helveticus strains. Considering the safety profile, none of these strains presented antibiotic resistance phenotypically and/or at the genome level. The pangenome analysis revealed genes specific to the new isolates, such as enzymes related to folate biosynthesis in strains UC1266 and UC1267 and an integrated phage in strain UC1035. Finally, the presence of maltose-degrading enzymes and multiple copies of 6-phospho-β-glucosidase genes in our strains indicates the capability to metabolize sugars other than lactose, which is related solely to dairy niches.}, } @article {pmid31290097, year = {2020}, author = {Tian, X and Li, R and Fu, W and Li, Y and Wang, X and Li, M and Du, D and Tang, Q and Cai, Y and Long, Y and Zhao, Y and Li, M and Jiang, Y}, title = {Building a sequence map of the pig pan-genome from multiple de novo assemblies and Hi-C data.}, journal = {Science China. Life sciences}, volume = {63}, number = {5}, pages = {750-763}, doi = {10.1007/s11427-019-9551-7}, pmid = {31290097}, issn = {1869-1889}, mesh = {Animals ; Base Sequence ; Chromatin/*genetics ; Chromosome Mapping ; Female ; Genome/*genetics ; High-Throughput Nucleotide Sequencing ; Liver ; Mutation ; Sequence Alignment ; *Sequence Analysis, DNA ; Swine/*genetics ; }, abstract = {Pigs were domesticated independently in the Near East and China, indicating that a single reference genome from one individual is unable to represent the full spectrum of divergent sequences in pigs worldwide. Therefore, 12 de novo pig assemblies from Eurasia were compared in this study to identify the missing sequences from the reference genome. As a result, 72.5 Mb of non-redundant sequences (∼3% of the genome) were found to be absent from the reference genome (Sscrofa11.1) and were defined as pan-sequences. Of the pan-sequences, 9.0 Mb were dominant in Chinese pigs, in contrast with their low frequency in European pigs. One sequence dominant in Chinese pigs contained the complete genic region of the tazarotene-induced gene 3 (TIG3) gene which is involved in fatty acid metabolism. Using flanking sequences and Hi-C based methods, 27.7% of the sequences could be anchored to the reference genome. The supplementation of these sequences could contribute to the accurate interpretation of the 3D chromatin structure. A web-based pan-genome database was further provided to serve as a primary resource for exploration of genetic diversity and promote pig breeding and biomedical research.}, } @article {pmid31284652, year = {2019}, author = {Piligrimova, EG and Kazantseva, OA and Nikulin, NA and Shadrin, AM}, title = {Bacillus Phage vB_BtS_B83 Previously Designated as a Plasmid May Represent a New Siphoviridae Genus.}, journal = {Viruses}, volume = {11}, number = {7}, pages = {}, pmid = {31284652}, issn = {1999-4915}, mesh = {Bacillus Phages/*classification/*genetics/isolation & purification/ultrastructure ; Bacillus thuringiensis/virology ; Base Sequence ; DNA, Viral/genetics ; Genes, Viral/genetics ; Genome, Viral ; Genomics ; Host Specificity ; Microscopy, Electron, Transmission ; *Phylogeny ; *Plasmids ; Prophages/genetics ; Sequence Analysis, DNA ; Siphoviridae/*classification/*genetics/ultrastructure ; }, abstract = {The Bacillus cereus group of bacteria includes, inter alia, the species known to be associated with human diseases and food poisoning. Here, we describe the Bacillus phage vB_BtS_B83 (abbreviated as B83) infecting the species of this group. Transmission electron microscopy (TEM) micrographs indicate that B83 belongs to the Siphoviridae family. B83 is a temperate phage using an arbitrium system for the regulation of the lysis-lysogeny switch, and is probably capable of forming a circular plasmid prophage. Comparative analysis shows that it has been previously sequenced, but was mistaken for a plasmid. B83 shares common genome organization and >46% of proteins with other the Bacillus phage, BMBtp14. Phylograms constructed using large terminase subunits and a pan-genome presence-absence matrix show that these phages form a clade distinct from the closest viruses. Based on the above, we propose the creation of a new genus named Bembunaquatrovirus that includes B83 and BMBtp14.}, } @article {pmid31281302, year = {2019}, author = {Machado, KCT and Fortuin, S and Tomazella, GG and Fonseca, AF and Warren, RM and Wiker, HG and de Souza, SJ and de Souza, GA}, title = {On the Impact of the Pangenome and Annotation Discrepancies While Building Protein Sequence Databases for Bacteria Proteogenomics.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1410}, pmid = {31281302}, issn = {1664-302X}, abstract = {In proteomics, peptide information within mass spectrometry (MS) data from a specific organism sample is routinely matched against a protein sequence database that best represent such organism. However, if the species/strain in the sample is unknown or genetically poorly characterized, it becomes challenging to determine a database which can represent such sample. Building customized protein sequence databases merging multiple strains for a given species has become a strategy to overcome such restrictions. However, as more genetic information is publicly available and interesting genetic features such as the existence of pan- and core genes within a species are revealed, we questioned how efficient such merging strategies are to report relevant information. To test this assumption, we constructed databases containing conserved and unique sequences for 10 different species. Features that are relevant for probabilistic-based protein identification by proteomics were then monitored. As expected, increase in database complexity correlates with pangenomic complexity. However, Mycobacterium tuberculosis and Bordetella pertussis generated very complex databases even having low pangenomic complexity. We further tested database performance by using MS data from eight clinical strains from M. tuberculosis, and from two published datasets from Staphylococcus aureus. We show that by using an approach where database size is controlled by removing repeated identical tryptic sequences across strains/species, computational time can be reduced drastically as database complexity increases.}, } @article {pmid31279974, year = {2019}, author = {Nielsen, MR and Wollenberg, RD and Westphal, KR and Sondergaard, TE and Wimmer, R and Gardiner, DM and Sørensen, JL}, title = {Heterologous expression of intact biosynthetic gene clusters in Fusarium graminearum.}, journal = {Fungal genetics and biology : FG & B}, volume = {132}, number = {}, pages = {103248}, doi = {10.1016/j.fgb.2019.103248}, pmid = {31279974}, issn = {1096-0937}, mesh = {Biosynthetic Pathways/*genetics ; Fungal Proteins/genetics ; Fusarium/enzymology/*genetics ; *Gene Expression Regulation, Fungal ; Genome, Fungal ; *Multigene Family ; Peptide Synthases/genetics ; Polyketide Synthases/genetics ; Recombination, Genetic ; }, abstract = {Filamentous fungi such as species from the genus Fusarium are capable of producing a wide palette of interesting metabolites relevant to health, agriculture and biotechnology. Secondary metabolites are formed from large synthase/synthetase enzymes often encoded in gene clusters containing additional enzymes cooperating in the metabolite's biosynthesis. The true potential of fungal metabolomes remain untapped as the majority of secondary metabolite gene clusters are silent under standard laboratory growth conditions. One way to achieve expression of biosynthetic pathways is to clone the responsible genes and express them in a well-suited heterologous host, which poses a challenge since Fusarium polyketide synthase and non-ribosomal peptide synthetase gene clusters can be large (e.g. as large as 80 kb) and comprise several genes necessary for product formation. The major challenge associated with heterologous expression of fungal biosynthesis pathways is thus handling and cloning large DNA sequences. In this paper we present the successful workflow for cloning, reconstruction and heterologous production of two previously characterized Fusarium pseudograminearum natural product pathways in Fusarium graminearum. In vivo yeast recombination enabled rapid assembly of the W493 (NRPS32-PKS40) and the Fusarium Cytokinin gene clusters. F. graminearum transformants were obtained through protoplast-mediated and Agrobacterium tumefaciens-mediated transformation. Whole genome sequencing revealed isolation of transformants carrying intact copies the gene clusters was possible. Known Fusarium cytokinin metabolites; fusatin, 8-oxo-fusatin, 8-oxo-isopentenyladenine, fusatinic acid together with cis- and trans-zeatin were detected by liquid chromatography and mass spectrometry, which confirmed gene functionality in F. graminearum. In addition the non-ribosomal lipopeptide products W493 A and B was heterologously produced in similar amounts to that observed in the F. pseudograminearum doner. The Fusarium pan-genome comprises more than 60 uncharacterized putative secondary metabolite gene clusters. We nominate the well-characterized F. graminearum as a heterologous expression platform for Fusarium secondary metabolite gene clusters, and present our experience cloning and introducing gene clusters into this species. We expect the presented methods will inspire future endevours in heterologous production of Fusarium metabolites and potentially aid the production and characterization of novel natural products.}, } @article {pmid31279858, year = {2020}, author = {Matteoli, FP and Passarelli-Araujo, H and Pedrosa-Silva, F and Olivares, FL and Venancio, TM}, title = {Population structure and pangenome analysis of Enterobacter bugandensis uncover the presence of blaCTX-M-55, blaNDM-5 and blaIMI-1, along with sophisticated iron acquisition strategies.}, journal = {Genomics}, volume = {112}, number = {2}, pages = {1182-1191}, doi = {10.1016/j.ygeno.2019.07.003}, pmid = {31279858}, issn = {1089-8646}, mesh = {Bacterial Proteins/*genetics/metabolism ; Enterobacter/*genetics/metabolism ; Enterobactin/analogs & derivatives/genetics/metabolism ; *Genome, Bacterial ; Hydroxamic Acids/metabolism ; Iron/*metabolism ; Operon ; beta-Lactamases/*genetics/metabolism ; }, abstract = {Enterobacter bugandensis is a recently described species that has been largely associated with nosocomial infections. We report the genome of a non-clinical E. bugandensis strain, which was integrated with publicly available genomes to study the pangenome and general population structure of E. bugandensis. Core- and whole-genome multilocus sequence typing allowed the detection of five E. bugandensis phylogroups (PG-A to E), which contain important antimicrobial resistance and virulence determinants. We uncovered several extended-spectrum β-lactamases, including blaCTX-M-55 and blaNDM-5, present in an IncX replicon type plasmid, described here for the first time in E. bugandensis. Genetic context analysis of blaNDM-5 revealed the resemblance of this plasmid with other IncX plasmids from other bacteria from the same country. Three distinctive siderophore producing operons were found in E. bugandensis: enterobactin (ent), aerobactin (iuc/iut), and salmochelin (iro). Our findings provide novel insights on the lifestyle, physiology, antimicrobial, and virulence profiles of E. bugandensis.}, } @article {pmid31276593, year = {2019}, author = {de Witt, RN and Kroukamp, H and Van Zyl, WH and Paulsen, IT and Volschenk, H}, title = {QTL analysis of natural Saccharomyces cerevisiae isolates reveals unique alleles involved in lignocellulosic inhibitor tolerance.}, journal = {FEMS yeast research}, volume = {19}, number = {5}, pages = {}, doi = {10.1093/femsyr/foz047}, pmid = {31276593}, issn = {1567-1364}, mesh = {Alleles ; Genetic Engineering ; Genetic Variation ; High-Throughput Nucleotide Sequencing ; Lignin/*antagonists & inhibitors ; Multifactorial Inheritance ; Phenotype ; *Quantitative Trait Loci ; Saccharomyces cerevisiae/*genetics ; Saccharomyces cerevisiae Proteins/*genetics ; }, abstract = {Decoding the genetic basis of lignocellulosic inhibitor tolerance in Saccharomyces cerevisiae is crucial for rational engineering of bioethanol strains with enhanced robustness. The genetic diversity of natural strains present an invaluable resource for the exploration of complex traits of industrial importance from a pan-genomic perspective to complement the limited range of specialised, tolerant industrial strains. Natural S. cerevisiae isolates have lately garnered interest as a promising toolbox for engineering novel, genetically encoded tolerance phenotypes into commercial strains. To this end, we investigated the genetic basis for lignocellulosic inhibitor tolerance of natural S. cerevisiae isolates. A total of 12 quantitative trait loci underpinning tolerance were identified by next-generation sequencing linked bulk-segregant analysis of superior interbred pools. Our findings corroborate the current perspective of lignocellulosic inhibitor tolerance as a multigenic, complex trait. Apart from a core set of genetic variants required for inhibitor tolerance, an additional genetic background-specific response was observed. Functional analyses of the identified genetic loci revealed the uncharacterised ORF, YGL176C and the bud-site selection XRN1/BUD13 as potentially beneficial alleles contributing to tolerance to a complex lignocellulosic inhibitor mixture. We present evidence for the consideration of both regulatory and coding sequence variants for strain improvement.}, } @article {pmid31273387, year = {2019}, author = {Kopejtka, K and Lin, Y and Jakubovičová, M and Koblížek, M and Tomasch, J}, title = {Clustered Core- and Pan-Genome Content on Rhodobacteraceae Chromosomes.}, journal = {Genome biology and evolution}, volume = {11}, number = {8}, pages = {2208-2217}, pmid = {31273387}, issn = {1759-6653}, mesh = {Bacterial Proteins/*genetics ; Chromosomes, Bacterial/*genetics ; DNA Replication ; Gene Expression Regulation, Bacterial ; *Genome, Bacterial ; *Phylogeny ; Rhodobacteraceae/*genetics ; }, abstract = {In Bacteria, chromosome replication starts at a single origin of replication and proceeds on both replichores. Due to its asymmetric nature, replication influences chromosome structure and gene organization, mutation rate, and expression. To date, little is known about the distribution of highly conserved genes over the bacterial chromosome. Here, we used a set of 101 fully sequenced Rhodobacteraceae representatives to analyze the relationship between conservation of genes within this family and their distance from the origin of replication. Twenty-two of the analyzed species had core genes clustered significantly closer to the origin of replication with representatives of the genus Celeribacter being the most apparent example. Interestingly, there were also eight species with the opposite organization. In particular, Rhodobaca barguzinensis and Loktanella vestfoldensis showed a significant increase of core genes with distance from the origin of replication. The uneven distribution of low-conserved regions is in particular pronounced for genomes in which the halves of one replichore differ in their conserved gene content. Phage integration and horizontal gene transfer partially explain the scattered nature of Rhodobacteraceae genomes. Our findings lay the foundation for a better understanding of bacterial genome evolution and the role of replication therein.}, } @article {pmid31272871, year = {2020}, author = {Pant, A and Das, B and Bhadra, RK}, title = {CTX phage of Vibrio cholerae: Genomics and applications.}, journal = {Vaccine}, volume = {38 Suppl 1}, number = {}, pages = {A7-A12}, doi = {10.1016/j.vaccine.2019.06.034}, pmid = {31272871}, issn = {1873-2518}, mesh = {Bacteriophages/*genetics/physiology ; Cholera ; Cholera Toxin/genetics ; Chromosomes, Bacterial ; Genome, Viral ; Genomics ; Humans ; Vibrio cholerae/genetics/*virology ; *Virus Integration ; }, abstract = {The bipartite genome of Vibrio cholerae is divided into two circular non-homologous chromosomes, which harbor several genetic elements like phages, plasmids, transposons, integrative conjugative elements, and pathogenic islands that encode functions responsible for disease development, antimicrobial resistance, and subsistence in hostile environments. These elements are highly heterogeneous, mobile in nature, and encode their own mobility functions or exploit host-encoded enzymes for intra- and inter-cellular movements. The key toxin of V. cholerae responsible for the life-threatening diarrheal disease cholera, the cholera toxin, is coded by part of the genome of a filamentous phage, CTXϕ. The replicative genome of CTXϕ is divided into two distinct modular structures and has adopted a unique strategy for its irreversible integration into the V. cholerae chromosomes. CTXϕ exploits two host-encoded tyrosine recombinases, XerC and XerD, for its integration in the highly conserved dimer resolution site (dif) of V. cholerae chromosomes. CTXϕ can replicate only in the limited number of Vibrio species. In contrast, the phage integration into the bacterial chromosome does not rely on its replication and could integrate to the dif site of large numbers of gram-negative bacteria. Recent pangenomic analysis revealed that like CTXϕ, the bacterial dif site is the integration spot for several other mobile genetic elements such as plasmids and genomic islands. In this review we discuss about current molecular insights into CTXϕ genomics and its replication and integration mechanisms into hosts. Particular emphasis has been given on the exploitation of CTXϕ genomics knowledge in developing genetic tools and designing environmentally safe recombinant live oral cholera vaccine strains.}, } @article {pmid31263885, year = {2019}, author = {Palevich, N and Maclean, PH and Baten, A and Scott, RW and Leathwick, DM}, title = {The Genome Sequence of the Anthelmintic-Susceptible New Zealand Haemonchus contortus.}, journal = {Genome biology and evolution}, volume = {11}, number = {7}, pages = {1965-1970}, pmid = {31263885}, issn = {1759-6653}, mesh = {Animals ; Anthelmintics/*pharmacology ; Base Sequence ; Genome, Helminth/*genetics ; Genomics ; Haemonchus/*drug effects/*genetics ; New Zealand ; }, abstract = {Internal parasitic nematodes are a global animal health issue causing drastic losses in livestock. Here, we report a H. contortus representative draft genome to serve as a genetic resource to the scientific community and support future experimental research of molecular mechanisms in related parasites. A de novo hybrid assembly was generated from PCR-free whole genome sequence data, resulting in a chromosome-level assembly that is 465 Mb in size encoding 22,341 genes. The genome sequence presented here is consistent with the genome architecture of the existing Haemonchus species and is a valuable resource for future studies regarding population genetic structures of parasitic nematodes. Additionally, comparative pan-genomics with other species of economically important parasitic nematodes have revealed highly open genomes and strong collinearities within the phylum Nematoda.}, } @article {pmid31253105, year = {2019}, author = {Lima, NCB and Tanmoy, AM and Westeel, E and de Almeida, LGP and Rajoharison, A and Islam, M and Endtz, HP and Saha, SK and de Vasconcelos, ATR and Komurian-Pradel, F}, title = {Analysis of isolates from Bangladesh highlights multiple ways to carry resistance genes in Salmonella Typhi.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {530}, pmid = {31253105}, issn = {1471-2164}, support = {23038.010041/2013-13//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; Finance code 001//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; 643476//European COMPARE/ ; Allocations de Recherche pour une Thèse au Sud (ARTS) scholarship//Institut de Recherche pour le Développement/ ; //Wellcome Trust/United Kingdom ; }, mesh = {Bangladesh ; Chromosomes, Bacterial/genetics ; Drug Resistance, Bacterial/*genetics ; Genes, Bacterial/*genetics ; Genomic Islands/genetics ; *Genomics ; Genotype ; Humans ; Molecular Sequence Annotation ; Phenotype ; Plasmids/genetics ; Salmonella typhi/drug effects/*genetics/isolation & purification ; }, abstract = {BACKGROUND: Typhoid fever, caused by Salmonella Typhi, follows a fecal-oral transmission route and is a major global public health concern, especially in developing countries like Bangladesh. Increasing emergence of antimicrobial resistance (AMR) is a serious issue; the list of treatments for typhoid fever is ever-decreasing. In addition to IncHI1-type plasmids, Salmonella genomic island (SGI) 11 has been reported to carry AMR genes. Although reports suggest a recent reduction in multidrug resistance (MDR) in the Indian subcontinent, the corresponding genomic changes in the background are unknown.

RESULTS: Here, we assembled and annotated complete closed chromosomes and plasmids for 73 S. Typhi isolates using short-length Illumina reads. S. Typhi had an open pan-genome, and the core genome was smaller than previously reported. Considering AMR genes, we identified five variants of SGI11, including the previously reported reference sequence. Five plasmids were identified, including the new plasmids pK91 and pK43; pK43and pHCM2 were not related to AMR. The pHCM1, pPRJEB21992 and pK91 plasmids carried AMR genes and, along with the SGI11 variants, were responsible for resistance phenotypes. pK91 also contained qnr genes, conferred high ciprofloxacin resistance and was related to the H58-sublineage Bdq, which shows the same phenotype. The presence of plasmids (pHCM1 and pK91) and SGI11 were linked to two H58-lineages, Ia and Bd. Loss of plasmids and integration of resistance genes in genomic islands could contribute to the fitness advantage of lineage Ia isolates.

CONCLUSIONS: Such events may explain why lineage Ia is globally widespread, while the Bd lineage is locally restricted. Further studies are required to understand how these S. Typhi AMR elements spread and generate new variants. Preventive measures such as vaccination programs should also be considered in endemic countries; such initiatives could potentially reduce the spread of AMR.}, } @article {pmid31247102, year = {2019}, author = {Muskens, IS and Zhang, C and de Smith, AJ and Biegel, JA and Walsh, KM and Wiemels, JL}, title = {Germline genetic landscape of pediatric central nervous system tumors.}, journal = {Neuro-oncology}, volume = {21}, number = {11}, pages = {1376-1388}, pmid = {31247102}, issn = {1523-5866}, support = {P30 CA014236/CA/NCI NIH HHS/United States ; R01 CA194189/CA/NCI NIH HHS/United States ; }, mesh = {Biomarkers, Tumor/*genetics ; Central Nervous System Neoplasms/*genetics/*pathology ; Child ; *Genetic Predisposition to Disease ; Genomics/*methods ; *Germ-Line Mutation ; High-Throughput Nucleotide Sequencing ; Humans ; }, abstract = {Central nervous system (CNS) tumors are the second most common type of cancer among children. Depending on histopathology, anatomic location, and genomic factors, specific subgroups of brain tumors have some of the highest cancer-related mortality rates or result in considerable lifelong morbidity. Pediatric CNS tumors often occur in patients with genetic predisposition, at times revealing underlying cancer predisposition syndromes. Advances in next-generation sequencing (NGS) have resulted in the identification of an increasing number of cancer predisposition genes. In this review, the literature on genetic predisposition to pediatric CNS tumors is evaluated with a discussion of potential future targets for NGS and clinical implications. Furthermore, we explore potential strategies for enhancing the understanding of genetic predisposition of pediatric CNS tumors, including evaluation of non-European populations, pan-genomic approaches, and large collaborative studies.}, } @article {pmid31244798, year = {2019}, author = {Levesque, S and de Melo, AG and Labrie, SJ and Moineau, S}, title = {Mobilome of Brevibacterium aurantiacum Sheds Light on Its Genetic Diversity and Its Adaptation to Smear-Ripened Cheeses.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1270}, pmid = {31244798}, issn = {1664-302X}, abstract = {Brevibacterium aurantiacum is an actinobacterium that confers key organoleptic properties to washed-rind cheeses during the ripening process. Although this industrially relevant species has been gaining an increasing attention in the past years, its genome plasticity is still understudied due to the unavailability of complete genomic sequences. To add insights on the mobilome of this group, we sequenced the complete genomes of five dairy Brevibacterium strains and one non-dairy strain using PacBio RSII. We performed phylogenetic and pan-genome analyses, including comparisons with other publicly available Brevibacterium genomic sequences. Our phylogenetic analysis revealed that these five dairy strains, previously identified as Brevibacterium linens, belong instead to the B. aurantiacum species. A high number of transposases and integrases were observed in the Brevibacterium spp. strains. In addition, we identified 14 and 12 new insertion sequences (IS) in B. aurantiacum and B. linens genomes, respectively. Several stretches of homologous DNA sequences were also found between B. aurantiacum and other cheese rind actinobacteria, suggesting horizontal gene transfer (HGT). A HGT region from an iRon Uptake/Siderophore Transport Island (RUSTI) and an iron uptake composite transposon were found in five B. aurantiacum genomes. These findings suggest that low iron availability in milk is a driving force in the adaptation of this bacterial species to this niche. Moreover, the exchange of iron uptake systems suggests cooperative evolution between cheese rind actinobacteria. We also demonstrated that the integrative and conjugative element BreLI (Brevibacterium Lanthipeptide Island) can excise from B. aurantiacum SMQ-1417 chromosome. Our comparative genomic analysis suggests that mobile genetic elements played an important role into the adaptation of B. aurantiacum to cheese ecosystems.}, } @article {pmid31240253, year = {2019}, author = {Zhang, B and Zhu, W and Diao, S and Wu, X and Lu, J and Ding, C and Su, X}, title = {The poplar pangenome provides insights into the evolutionary history of the genus.}, journal = {Communications biology}, volume = {2}, number = {}, pages = {215}, pmid = {31240253}, issn = {2399-3642}, mesh = {*Evolution, Molecular ; Gene Ontology ; *Genome, Plant ; Phylogeny ; *Polymorphism, Single Nucleotide ; Populus/classification/*genetics ; }, abstract = {The genus Populus comprises a complex amalgam of ancient and modern species that has become a prime model for evolutionary and taxonomic studies. Here we sequenced the genomes of 10 species from five sections of the genus Populus, identified 71 million genomic variations, and observed new correlations between the single-nucleotide polymorphism-structural variation (SNP-SV) density and indel-SV density to complement the SNP-indel density correlation reported in mammals. Disease resistance genes (R genes) with heterozygous loss-of-function (LOF) were significantly enriched in the 10 species, which increased the diversity of poplar R genes during evolution. Heterozygous LOF mutations in the self-incompatibility genes were closely related to the self-fertilization of poplar, suggestive of genomic control of self-fertilization in dioecious plants. The phylogenetic genome-wide SNPs tree also showed possible ancient hybridization among species in sections Tacamahaca, Aigeiros, and Leucoides. The pangenome resource also provided information for poplar genetics and breeding.}, } @article {pmid31238973, year = {2019}, author = {Zhang, AN and Mao, Y and Wang, Y and Zhang, T}, title = {Mining traits for the enrichment and isolation of not-yet-cultured populations.}, journal = {Microbiome}, volume = {7}, number = {1}, pages = {96}, pmid = {31238973}, issn = {2049-2618}, mesh = {Betaproteobacteria/*genetics/*isolation & purification/metabolism ; Gene Expression Profiling ; *Genome, Bacterial ; Genomics/*methods ; Metabolic Networks and Pathways/genetics ; Phylogeny ; }, abstract = {BACKGROUND: The lack of pure cultures limits our understanding into 99% of bacteria. Proper interpretation of the genetic and the transcriptional datasets can reveal clues for the enrichment and even isolation of the not-yet-cultured populations. Unraveling such information requires a proper mining method.

RESULTS: Here, we present a method to infer the hidden traits for the enrichment of not-yet-cultured populations. We demonstrate this method using Candidatus Accumulibacter. Our method constructs a whole picture of the carbon, electron, and energy flows in the not-yet-cultured populations from the genomic datasets. Then, it decodes the coordination across three flows from the transcriptional datasets. Based on it, our method diagnoses the status of the not-yet-cultured populations and provides strategy to optimize the enrichment systems.

CONCLUSION: Our method could shed light to the exploration into the bacterial dark matter in the environments.}, } @article {pmid31231937, year = {2019}, author = {Québatte, M and Dehio, C}, title = {Bartonella gene transfer agent: Evolution, function, and proposed role in host adaptation.}, journal = {Cellular microbiology}, volume = {21}, number = {11}, pages = {e13068}, pmid = {31231937}, issn = {1462-5822}, support = {31003A_173119/SNSF_/Swiss National Science Foundation/Switzerland ; }, mesh = {Adaptation, Physiological/genetics ; Animals ; Bacterial Proteins/genetics ; Bartonella/*genetics/growth & development/metabolism/*pathogenicity ; Evolution, Molecular ; Gene Transfer, Horizontal/*genetics/physiology ; Host Microbial Interactions ; Mutation ; Recombination, Genetic/genetics ; Replication Origin/genetics ; Type IV Secretion Systems/genetics/metabolism ; }, abstract = {The processes underlying host adaptation by bacterial pathogens remain a fundamental question with relevant clinical, ecological, and evolutionary implications. Zoonotic pathogens of the genus Bartonella constitute an exceptional model to study these aspects. Bartonellae have undergone a spectacular diversification into multiple species resulting from adaptive radiation. Specific adaptations of a complex facultative intracellular lifestyle have enabled the colonisation of distinct mammalian reservoir hosts. This remarkable host adaptability has a multifactorial basis and is thought to be driven by horizontal gene transfer (HGT) and recombination among a limited genus-specific pan genome. Recent functional and evolutionary studies revealed that the conserved Bartonella gene transfer agent (BaGTA) mediates highly efficient HGT and could thus drive this evolution. Here, we review the recent progress made towards understanding BaGTA evolution, function, and its role in the evolution and pathogenesis of Bartonella spp. We notably discuss how BaGTA could have contributed to genome diversification through recombination of beneficial traits that underlie host adaptability. We further address how BaGTA may counter the accumulation of deleterious mutations in clonal populations (Muller's ratchet), which are expected to occur through the recurrent transmission bottlenecks during the complex infection cycle of these pathogens in their mammalian reservoir hosts and arthropod vectors.}, } @article {pmid31231616, year = {2019}, author = {Minnullina, L and Pudova, D and Shagimardanova, E and Shigapova, L and Sharipova, M and Mardanova, A}, title = {Comparative Genome Analysis of Uropathogenic Morganella morganii Strains.}, journal = {Frontiers in cellular and infection microbiology}, volume = {9}, number = {}, pages = {167}, pmid = {31231616}, issn = {2235-2988}, mesh = {Adult ; Bacterial Proteins/biosynthesis/genetics ; Bacterial Toxins/genetics ; Base Composition ; Carcinoma ; Cell Line, Tumor ; Child, Preschool ; Female ; Genes, Bacterial/*genetics ; Genome Size ; *Genome, Bacterial ; Genomic Islands ; *Genomics ; Hemolysin Proteins/genetics ; Humans ; Male ; Middle Aged ; Molecular Sequence Annotation ; Morganella morganii/*genetics/isolation & purification ; Multigene Family ; Prophages/genetics ; Russia ; Salmonella Phages/genetics ; Urease/genetics/metabolism ; Urinary Bladder Neoplasms ; Urinary Tract Infections/*microbiology ; Virulence/genetics ; }, abstract = {Morganella morganii is an opportunistic bacterial pathogen shown to cause a wide range of clinical and community-acquired infections. This study was aimed at sequencing and comparing the genomes of three M. morganii strains isolated from the urine samples of patients with community-acquired urinary tract infections. Draft genome sequencing was conducted using the Illumina HiSeq platform. The genomes of MM 1, MM 4, and MM 190 strains have a size of 3.82-3.97 Mb and a GC content of 50.9-51%. Protein-coding sequences (CDS) represent 96.1% of the genomes, RNAs are encoded by 2.7% of genes and pseudogenes account for 1.2% of the genomes. The pan-genome containes 4,038 CDS, of which 3,279 represent core genes. Six to ten prophages and 21-33 genomic islands were identified in the genomes of MM 1, MM 4, and MM 190. More than 30 genes encode capsular biosynthesis proteins, an average of 60 genes encode motility and chemotaxis proteins, and about 70 genes are associated with fimbrial biogenesis and adhesion. We determined that all strains contained urease gene cluster ureABCEFGD and had a urease activity. Both MM 4 and MM 190 strains are capable of hemolysis and their activity correlates well with a cytotoxicity level on T-24 bladder carcinoma cells. These activities were associated with expression of RTX toxin gene hlyA, which was introduced into the genomes by a phage similar to Salmonella phage 118970_sal4.}, } @article {pmid31222169, year = {2019}, author = {Weiss, E and Spicher, C and Haas, R and Fischer, W}, title = {Excision and transfer of an integrating and conjugative element in a bacterial species with high recombination efficiency.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {8915}, pmid = {31222169}, issn = {2045-2322}, mesh = {Bacteria/*genetics ; Chromosomes, Bacterial ; *Conjugation, Genetic ; Gene Transfer, Horizontal ; *Homologous Recombination ; }, abstract = {Horizontal transfer of mobile genetic elements, such as integrating and conjugative elements (ICEs), plays an important role in generating diversity and maintaining comprehensive pan-genomes in bacterial populations. The human gastric pathogen Helicobacter pylori, which is known for its extreme genetic diversity, possesses highly efficient transformation and recombination systems to achieve this diversity, but it is unclear to what extent these systems influence ICE physiology. In this study, we have examined the excision/integration and horizontal transfer characteristics of an ICE (termed ICEHptfs4) in these bacteria. We show that transfer of ICEHptfs4 DNA during mating between donor and recipient strains is independent of its conjugation genes, and that homologous recombination is much more efficient than site-specific integration into the recipient chromosome. Nevertheless, ICEHptfs4 excision by site-specific recombination occurs permanently in a subpopulation of cells and involves relocation of a circularization-dependent promoter. Selection experiments for excision indicate that the circular form of ICEHptfs4 is not replicative, but readily reintegrates by site-specific recombination. Thus, although ICEHptfs4 harbours all essential transfer genes, and typical ICE functions such as site-specific integration are active in H. pylori, canonical ICE transfer is subordinate to the more efficient general DNA uptake and homologous recombination machineries in these bacteria.}, } @article {pmid31210272, year = {2019}, author = {Blake, VC and Woodhouse, MR and Lazo, GR and Odell, SG and Wight, CP and Tinker, NA and Wang, Y and Gu, YQ and Birkett, CL and Jannink, JL and Matthews, DE and Hane, DL and Michel, SL and Yao, E and Sen, TZ}, title = {GrainGenes: centralized small grain resources and digital platform for geneticists and breeders.}, journal = {Database : the journal of biological databases and curation}, volume = {2019}, number = {}, pages = {}, pmid = {31210272}, issn = {1758-0463}, mesh = {*Databases, Genetic ; Edible Grain/*genetics ; *Genome, Plant ; *Plant Breeding ; Poaceae/*genetics ; *Quantitative Trait Loci ; }, abstract = {GrainGenes (https://wheat.pw.usda.gov or https://graingenes.org) is an international centralized repository for curated, peer-reviewed datasets useful to researchers working on wheat, barley, rye and oat. GrainGenes manages genomic, genetic, germplasm and phenotypic datasets through a dynamically generated web interface for facilitated data discovery. Since 1992, GrainGenes has served geneticists and breeders in both the public and private sectors on six continents. Recently, several new datasets were curated into the database along with new tools for analysis. The GrainGenes homepage was enhanced by making it more visually intuitive and by adding links to commonly used pages. Several genome assemblies and genomic tracks are displayed through the genome browsers at GrainGenes, including the Triticum aestivum (bread wheat) cv. 'Chinese Spring' IWGSC RefSeq v1.0 genome assembly, the Aegilops tauschii (D genome progenitor) Aet v4.0 genome assembly, the Triticum turgidum ssp. dicoccoides (wild emmer wheat) cv. 'Zavitan' WEWSeq v.1.0 genome assembly, a T. aestivum (bread wheat) pangenome, the Hordeum vulgare (barley) cv. 'Morex' IBSC genome assembly, the Secale cereale (rye) select 'Lo7' assembly, a partial hexaploid Avena sativa (oat) assembly and the Triticum durum cv. 'Svevo' (durum wheat) RefSeq Release 1.0 assembly. New genetic maps and markers were added and can be displayed through CMAP. Quantitative trait loci, genetic maps and genes from the Wheat Gene Catalogue are indexed and linked through the Wheat Information System (WheatIS) portal. Training videos were created to help users query and reach the data they need. GSP (Genome Specific Primers) and PIECE2 (Plant Intron Exon Comparison and Evolution) tools were implemented and are available to use. As more small grains reference sequences become available, GrainGenes will play an increasingly vital role in helping researchers improve crops.}, } @article {pmid31202417, year = {2019}, author = {Chun, BH and Han, DM and Kim, KH and Jeong, SE and Park, D and Jeon, CO}, title = {Genomic and metabolic features of Tetragenococcus halophilus as revealed by pan-genome and transcriptome analyses.}, journal = {Food microbiology}, volume = {83}, number = {}, pages = {36-47}, doi = {10.1016/j.fm.2019.04.009}, pmid = {31202417}, issn = {1095-9998}, mesh = {Biogenic Amines/metabolism ; Enterococcaceae/*genetics/*metabolism ; *Gene Expression Profiling ; *Genome, Bacterial ; Genomics ; *Metabolic Networks and Pathways ; Osmotic Pressure ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Salinity ; }, abstract = {The genomic and metabolic diversity and features of Tetragenococcus halophilus, a moderately halophilic lactic acid bacterium, were investigated by pan-genome, transcriptome, and metabolite analyses. Phylogenetic analyses based on the 16S rRNA gene and genome sequences of 15 T. halophilus strains revealed their phylogenetic distinctness from other Tetragenococcus species. Pan-genome analysis of the T. halophilus strains showed that their carbohydrate metabolic capabilities were diverse and strain dependent. Aside from one histidine decarboxylase gene in one strain, no decarboxylase gene associated with biogenic amine production was identified from the genomes. However, T. halophilus DSM 20339[T] produced tyramine without a biogenic amine-producing decarboxylase gene, suggesting the presence of an unidentified tyramine-producing gene. Our reconstruction of the metabolic pathways of these strains showed that T. halophilus harbors a facultative lactic acid fermentation pathway to produce l-lactate, ethanol, acetate, and CO2 from various carbohydrates. The transcriptomic analysis of strain DSM 20339[T] suggested that T. halophilus may produce more acetate via the heterolactic pathway (including d-ribose metabolism) at high salt conditions. Although genes associated with the metabolism of glycine betaine, proline, glutamate, glutamine, choline, and citrulline were identified from the T. halophilus genomes, the transcriptome and metabolite analyses suggested that glycine betaine was the main compatible solute responding to high salt concentration and that citrulline may play an important role in the coping mechanism against high salinity-induced osmotic stresses. Our results will provide a better understanding of the genome and metabolic features of T. halophilus, which has implications for the food fermentation industry.}, } @article {pmid31191477, year = {2019}, author = {Kröber, E and Schäfer, H}, title = {Identification of Proteins and Genes Expressed by Methylophaga thiooxydans During Growth on Dimethylsulfide and Their Presence in Other Members of the Genus.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1132}, pmid = {31191477}, issn = {1664-302X}, abstract = {Dimethylsulfide is a volatile organic sulfur compound that provides the largest input of biogenic sulfur from the oceans to the atmosphere, and thence back to land, constituting an important link in the global sulfur cycle. Microorganisms degrading DMS affect fluxes of DMS in the environment, but the underlying metabolic pathways are still poorly understood. Methylophaga thiooxydans is a marine methylotrophic bacterium capable of growth on DMS as sole source of carbon and energy. Using proteomics and transcriptomics we identified genes expressed during growth on dimethylsulfide and methanol to refine our knowledge of the metabolic pathways that are involved in DMS and methanol degradation in this strain. Amongst the most highly expressed genes on DMS were the two methanethiol oxidases driving the oxidation of this reactive and toxic intermediate of DMS metabolism. Growth on DMS also increased expression of the enzymes of the tetrahydrofolate linked pathway of formaldehyde oxidation, in addition to the tetrahydromethanopterin linked pathway. Key enzymes of the inorganic sulfur oxidation pathway included flavocytochrome c sulfide dehydrogenase, sulfide quinone oxidoreductase, and persulfide dioxygenases. A sulP permease was also expressed during growth on DMS. Proteomics and transcriptomics also identified a number of highly expressed proteins and gene products whose function is currently not understood. As the identity of some enzymes of organic and inorganic sulfur metabolism previously detected in Methylophaga has not been characterized at the genetic level yet, highly expressed uncharacterized genes provide new targets for further biochemical and genetic analysis. A pan-genome analysis of six available Methylophaga genomes showed that only two of the six investigated strains, M. thiooxydans and M. sulfidovorans have the gene encoding methanethiol oxidase, suggesting that growth on methylated sulfur compounds of M. aminisulfidivorans is likely to involve different enzymes and metabolic intermediates. Hence, the pathways of DMS-utilization and subsequent C1 and sulfur oxidation are not conserved across Methylophaga isolates that degrade methylated sulfur compounds.}, } @article {pmid31188829, year = {2019}, author = {Guyeux, C and Charr, JC and Tran, HTM and Furtado, A and Henry, RJ and Crouzillat, D and Guyot, R and Hamon, P}, title = {Evaluation of chloroplast genome annotation tools and application to analysis of the evolution of coffee species.}, journal = {PloS one}, volume = {14}, number = {6}, pages = {e0216347}, pmid = {31188829}, issn = {1932-6203}, mesh = {Coffee/*genetics ; Evolution, Molecular ; Genes, Plant ; Genome, Chloroplast/*genetics ; Molecular Sequence Annotation/*methods/standards ; *Phylogeny ; Sequence Analysis, DNA ; }, abstract = {Chloroplast sequences are widely used for phylogenetic analysis due to their high degree of conservation in plants. Whole chloroplast genomes can now be readily obtained for plant species using new sequencing methods, giving invaluable data for plant evolution However new annotation methods are required for the efficient analysis of this data to deliver high quality phylogenetic analyses. In this study, the two main tools for chloroplast genome annotation were compared. More consistent detection and annotation of genes were produced with GeSeq when compared to the currently used Dogma. This suggests that the annotation of most of the previously annotated chloroplast genomes should now be updated. GeSeq was applied to species related to coffee, including 16 species of the Coffea and Psilanthus genera to reconstruct the ancestral chloroplast genomes and to evaluate their phylogenetic relationships. Eight genes in the plant chloroplast pan genome (consisting of 92 genes) were always absent in the coffee species analyzed. Notably, the two main cultivated coffee species (i.e. Arabica and Robusta) did not group into the same clade and differ in their pattern of gene evolution. While Arabica coffee (Coffea arabica) belongs to the Coffea genus, Robusta coffee (Coffea canephora) is associated with the Psilanthus genus. A more extensive survey of related species is required to determine if this is a unique attribute of Robusta coffee or a more widespread feature of coffee tree species.}, } @article {pmid31169073, year = {2019}, author = {Hsu, T and Gemmell, MR and Franzosa, EA and Berry, S and Mukhopadhya, I and Hansen, R and Michaud, M and Nielsen, H and Miller, WG and Nielsen, H and Bajaj-Elliott, M and Huttenhower, C and Garrett, WS and Hold, GL}, title = {Comparative genomics and genome biology of Campylobacter showae.}, journal = {Emerging microbes & infections}, volume = {8}, number = {1}, pages = {827-840}, pmid = {31169073}, issn = {2222-1751}, support = {R24 DK110499/DK/NIDDK NIH HHS/United States ; }, mesh = {Bacterial Proteins/genetics ; Campylobacter/classification/*genetics/isolation & purification/pathogenicity ; Campylobacter Infections/*microbiology ; Crohn Disease/microbiology ; Gastroenteritis/microbiology ; *Genome, Bacterial ; Genomics ; Humans ; Phenotype ; Phylogeny ; Virulence ; Virulence Factors/genetics ; }, abstract = {Campylobacter showae a bacterium historically linked to gingivitis and periodontitis, has recently been associated with inflammatory bowel disease and colorectal cancer. Our aim was to generate genome sequences for new clinical C. showae strains and identify functional properties explaining their pathogenic potential. Eight C. showae genomes were assessed, four strains isolated from inflamed gut tissues from paediatric Crohn's disease patients, three strains from colonic adenomas, and one from a gastroenteritis patient stool. Genome assemblies were analyzed alongside the only 3 deposited C. showae genomes. The pangenome from these 11 strains consisted of 4686 unique protein families, and the core genome size was estimated at 1050 ± 15 genes with each new genome contributing an additional 206 ± 16 genes. Functional assays indicated that colonic strains segregated into 2 groups: adherent/invasive vs. non-adherent/non-invasive strains. The former possessed Type IV secretion machinery and S-layer proteins, while the latter contained Cas genes and other CRISPR associated proteins. Comparison of gene profiles with strains in Human Microbiome Project metagenomes showed that gut-derived isolates share genes specific to tongue dorsum and supragingival plaque counterparts. Our findings indicate that C. showae strains are phenotypically and genetically diverse and suggest that secretion systems may play an important role in virulence potential.}, } @article {pmid31164106, year = {2019}, author = {Hemsley, CM and O'Neill, PA and Essex-Lopresti, A and Norville, IH and Atkins, TP and Titball, RW}, title = {Extensive genome analysis of Coxiella burnetii reveals limited evolution within genomic groups.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {441}, pmid = {31164106}, issn = {1471-2164}, support = {//Wellcome Trust/United Kingdom ; DSTLX-1000068994//Defence Science and Technology Laboratory/ ; }, mesh = {Animals ; Cattle ; Coxiella burnetii/classification/*genetics/isolation & purification ; Evolution, Molecular ; *Genome, Bacterial ; Genome-Wide Association Study ; Genomics ; Genotyping Techniques ; Phylogeny ; United Kingdom ; }, abstract = {BACKGROUND: Coxiella burnetii is a zoonotic pathogen that resides in wild and domesticated animals across the globe and causes a febrile illness, Q fever, in humans. An improved understanding of the genetic diversity of C. burnetii is essential for the development of diagnostics, vaccines and therapeutics, but genotyping data is lacking from many parts of the world. Sporadic outbreaks of Q fever have occurred in the United Kingdom, but the local genetic make-up of C. burnetii has not been studied in detail.

RESULTS: Here, we report whole genome data for nine C. burnetii sequences obtained in the UK. All four genomes of C. burnetii from cattle, as well as one sheep sample, belonged to Multi-spacer sequence type (MST) 20, whereas the goat samples were MST33 (three genomes) and MST32 (one genome), two genotypes that have not been described to be present in the UK to date. We established the phylogenetic relationship between the UK genomes and 67 publically available genomes based on single nucleotide polymorphisms (SNPs) in the core genome, which confirmed tight clustering of strains within genomic groups, but also indicated that sub-groups exist within those groups. Variation is mainly achieved through SNPs, many of which are non-synonymous, thereby confirming that evolution of C. burnetii is based on modification of existing genes. Finally, we discovered genomic-group specific genome content, which supports a model of clonal expansion of previously established genotypes, with large scale dissemination of some of these genotypes across continents being observed.

CONCLUSIONS: The genetic make-up of C. burnetii in the UK is similar to the one in neighboring European countries. As a species, C. burnetii has been considered a clonal pathogen with low genetic diversity at the nucleotide level. Here, we present evidence for significant variation at the protein level between isolates of different genomic groups, which mainly affects secreted and membrane-associated proteins. Our results thereby increase our understanding of the global genetic diversity of C. burnetii and provide new insights into the evolution of this emerging zoonotic pathogen.}, } @article {pmid31162871, year = {2019}, author = {León-Sampedro, R and Del Campo, R and Rodriguez-Baños, M and Lanza, VF and Pozuelo, MJ and Francés-Cuesta, C and Tedim, AP and Freitas, AR and Novais, C and Peixe, L and Willems, RJL and Corander, J and González Candelas, F and Baquero, F and Coque, TM}, title = {Phylogenomics of Enterococcus faecalis from wild birds: new insights into host-associated differences in core and accessory genomes of the species.}, journal = {Environmental microbiology}, volume = {21}, number = {8}, pages = {3046-3062}, doi = {10.1111/1462-2920.14702}, pmid = {31162871}, issn = {1462-2920}, support = {//Instituto de Salud Carlos III of Spain/Ministry of Economy and Competitiveness/International ; //European Development Regional Fund 'A way to achieve Europe' (ERDF)/International ; PI15-0512//Spanish R&D National Plan Estatal de I + D + i 2013-2016/International ; JPIAMR2016-AC16/00039//Joint Programming Initiative in Antimicrobial Resistance (JPIAMR)/International ; //Sociedad Española de Enfermedades Infecciosas y Microbiología Clínica (SEIMC)/International ; }, mesh = {Animals ; Animals, Wild ; Birds/*microbiology ; Enterococcus faecalis/*genetics ; Gene Expression Regulation, Bacterial ; Gene Transfer, Horizontal ; Genome, Bacterial ; Host Specificity ; *Phylogeny ; }, abstract = {Wild birds have been suggested to be reservoirs of antimicrobial resistant and/or pathogenic Enterococcus faecalis (Efs) strains, but the scarcity of studies and available sequences limit our understanding of the population structure of the species in these hosts. Here, we analysed the clonal and plasmid diversity of 97 Efs isolates from wild migratory birds. We found a high diversity, with most sequence types (STs) being firstly described here, while others were found in other hosts including some predominant in poultry. We found that pheromone-responsive plasmids predominate in wild bird Efs while 35% of the isolates entirely lack plasmids. Then, to better understand the ecology of the species, the whole genome of fivestrains with known STs (ST82, ST170, ST16 and ST55) were sequenced and compared with all the Efs genomes available in public databases. Using several methods to analyse core and accessory genomes (AccNET, PLACNET, hierBAPS and PANINI), we detected differences in the accessory genome of some lineages (e.g. ST82) demonstrating specific associations with birds. Conversely, the genomes of other Efs lineages exhibited divergence in core and accessory genomes, reflecting different adaptive trajectories in various hosts. This pangenome divergence, horizontal gene transfer events and occasional epidemic peaks could explain the population structure of the species.}, } @article {pmid31150357, year = {2019}, author = {Micleaa, D and Al-Khzouza, C and Osan, S and Bucerzan, S and Cret, V and Popp, RA and Puiu, M and Chirita-Emandi, A and Zimbru, C and Ghervan, C}, title = {Genomic study via chromosomal microarray analysis in a group of Romanian patients with obesity and developmental disability/intellectual disability.}, journal = {Journal of pediatric endocrinology & metabolism : JPEM}, volume = {32}, number = {7}, pages = {667-674}, doi = {10.1515/jpem-2018-0439}, pmid = {31150357}, issn = {2191-0251}, mesh = {Adolescent ; Biomarkers/*analysis ; Child ; Child, Preschool ; *Chromosome Aberrations ; DNA Copy Number Variations ; Developmental Disabilities/complications/epidemiology/*genetics/pathology ; Female ; Follow-Up Studies ; Genetic Testing ; Genomics/*methods ; Humans ; Infant ; Intellectual Disability/complications/epidemiology/*genetics/pathology ; Male ; Microarray Analysis ; Pediatric Obesity/complications/epidemiology/*genetics/pathology ; *Polymorphism, Single Nucleotide ; Prognosis ; Romania/epidemiology ; }, abstract = {Background Obesity with developmental disability/intellectual disability (DD/ID) is the most common association in syndromic obesity. Genomic analysis studies have allowed the decipherment of disease aetiology, both in cases of syndromic obesity as well as in cases of isolated or syndromic DD/ID. However, more data are needed to further elucidate the link between the two. The aim of this pangenomic study was to use single nucleotide polymorphism (SNP) array technology to determine the copy number variant (CNV) type and frequency associated with both obesity and DD/ID. Methods Thirty-six patients were recruited from the Clinical Emergency Hospital for Children, in Cluj-Napoca, Romania during the period 2015-2017. The main inclusion criterion was a diagnosis that included both obesity and DD/ID. Genomic analysis via SNP array technology was performed. Results Out of the 36 patients, 12 (33%) presented CNVs with a higher degree of pathogenicity (A group) and 24 (66%) presented benign CNVs (B group). The SNP array results for the A group were as follows: pathogenic CNVs in 8/12 patients (67%); variants of unknown significance (VOUS) in 2/12 patients (16%); and uniparental disomy (UPD) in 2/12 patients (16%). Conclusions Some of these CNVs have already been observed in patients with both obesity and DD/ID, but the others were noticed only in DD/ID patients and have not been described until now in association with obesity.}, } @article {pmid31149898, year = {2019}, author = {Rossoni, AW and Price, DC and Seger, M and Lyska, D and Lammers, P and Bhattacharya, D and Weber, AP}, title = {The genomes of polyextremophilic cyanidiales contain 1% horizontally transferred genes with diverse adaptive functions.}, journal = {eLife}, volume = {8}, number = {}, pages = {}, pmid = {31149898}, issn = {2050-084X}, support = {EXC 1028//Deutsche Forschungsgemeinschaft/International ; WE 2231/21-1//Deutsche Forschungsgemeinschaft/International ; }, mesh = {*Adaptation, Biological ; Algal Proteins/genetics ; DNA, Algal/genetics ; *Evolution, Molecular ; *Gene Transfer, Horizontal ; Rhodophyta/*genetics ; }, abstract = {The role and extent of horizontal gene transfer (HGT) in eukaryotes are hotly disputed topics that impact our understanding of the origin of metabolic processes and the role of organelles in cellular evolution. We addressed this issue by analyzing 10 novel Cyanidiales genomes and determined that 1% of their gene inventory is HGT-derived. Numerous HGT candidates share a close phylogenetic relationship with prokaryotes that live in similar habitats as the Cyanidiales and encode functions related to polyextremophily. HGT candidates differ from native genes in GC-content, number of splice sites, and gene expression. HGT candidates are more prone to loss, which may explain the absence of a eukaryotic pan-genome. Therefore, the lack of a pan-genome and cumulative effects fail to provide substantive arguments against our hypothesis of recurring HGT followed by differential loss in eukaryotes. The maintenance of 1% HGTs, even under selection for genome reduction, underlines the importance of non-endosymbiosis related foreign gene acquisition.}, } @article {pmid31134015, year = {2019}, author = {Singh, PK and Mahato, AK and Jain, P and Rathour, R and Sharma, V and Sharma, TR}, title = {Comparative Genomics Reveals the High Copy Number Variation of a Retro Transposon in Different Magnaporthe Isolates.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {966}, pmid = {31134015}, issn = {1664-302X}, abstract = {Magnaporthe oryzae is one of the fungal pathogens of rice which results in heavy yield losses worldwide. Understanding the genomic structure of M. oryzae is essential for appropriate deployment of the blast resistance in rice crop improvement programs. In this study we sequenced two M. oryzae isolates, RML-29 (avirulent) and RP-2421 (highly virulent) and performed comparative study along with three publically available genomes of 70-15, P131, and Y34. We identified several candidate effectors (>600) and isolate specific sequences from RML-29 and RP-2421, while a core set of 10013 single copy orthologs were found among the isolates. Pan-genome analysis showed extensive presence and absence variations (PAVs). We identified isolate-specific genes across 12 isolates using the pan-genome information. Repeat analysis was separately performed for each of the 15 isolates. This analysis revealed ∼25 times higher copy number of short interspersed nuclear elements (SINE) in virulent than avirulent isolate. We conclude that the extensive PAVs and occurrence of SINE throughout the genome could be one of the major mechanisms by which pathogenic variability is emerging in M. oryzae isolates. The knowledge gained in this comparative genome study can provide understandings about the fungal genome variations in different hosts and environmental conditions, and it will provide resources to effectively manage this important disease of rice.}, } @article {pmid31131017, year = {2019}, author = {Norri, T and Cazaux, B and Kosolobov, D and Mäkinen, V}, title = {Linear time minimum segmentation enables scalable founder reconstruction.}, journal = {Algorithms for molecular biology : AMB}, volume = {14}, number = {}, pages = {12}, pmid = {31131017}, issn = {1748-7188}, abstract = {BACKGROUND: We study a preprocessing routine relevant in pan-genomic analyses: consider a set of aligned haplotype sequences of complete human chromosomes. Due to the enormous size of such data, one would like to represent this input set with a few founder sequences that retain as well as possible the contiguities of the original sequences. Such a smaller set gives a scalable way to exploit pan-genomic information in further analyses (e.g. read alignment and variant calling). Optimizing the founder set is an NP-hard problem, but there is a segmentation formulation that can be solved in polynomial time, defined as follows. Given a threshold L and a set R = { R 1 , … , R m } of m strings (haplotype sequences), each having length n, the minimum segmentation problem for founder reconstruction is to partition [1, n] into set P of disjoint segments such that each segment [ a , b ] ∈ P has length at least L and the number d (a , b) = | { R i [ a , b ] : 1 ≤ i ≤ m } | of distinct substrings at segment [a, b] is minimized over [ a , b ] ∈ P . The distinct substrings in the segments represent founder blocks that can be concatenated to form max { d (a , b) : [ a , b ] ∈ P } founder sequences representing the original R such that crossovers happen only at segment boundaries.

RESULTS:  We give an O(mn) time (i.e. linear time in the input size) algorithm to solve the minimum segmentation problem for founder reconstruction, improving over an earlier O (m n 2) .

CONCLUSIONS:  Our improvement enables to apply the formulation on an input of thousands of complete human chromosomes. We implemented the new algorithm and give experimental evidence on its practicality. The implementation is available in https://github.com/tsnorri/founder-sequences.}, } @article {pmid31126314, year = {2019}, author = {Yang, X and Lee, WP and Ye, K and Lee, C}, title = {One reference genome is not enough.}, journal = {Genome biology}, volume = {20}, number = {1}, pages = {104}, pmid = {31126314}, issn = {1474-760X}, support = {U41HG007497/NH/NIH HHS/United States ; }, mesh = {*Genome, Human ; Genomic Structural Variation ; Genomics/standards ; Humans ; Reference Standards ; }, abstract = {A recent study on human structural variation indicates insufficiencies and errors in the human reference genome, GRCh38, and argues for the construction of a human pan-genome.}, } @article {pmid31122208, year = {2019}, author = {Feyereisen, M and Mahony, J and Kelleher, P and Roberts, RJ and O'Sullivan, T and Geertman, JA and van Sinderen, D}, title = {Comparative genome analysis of the Lactobacillus brevis species.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {416}, pmid = {31122208}, issn = {1471-2164}, support = {EPSPG/2015/7//Irish Research Council/ ; 450 13/IA/1953//Science Foundation Ireland/Ireland ; 15/SIRG/3430//Science Foundation Ireland/Ireland ; }, mesh = {Beer/microbiology ; Evolution, Molecular ; Genes, Bacterial ; *Genome, Bacterial ; Levilactobacillus brevis/classification/*genetics/isolation & purification ; Phylogeny ; Plasmids ; }, abstract = {BACKGROUND: Lactobacillus brevis is a member of the lactic acid bacteria (LAB), and strains of L. brevis have been isolated from silage, as well as from fermented cabbage and other fermented foods. However, this bacterium is also commonly associated with bacterial spoilage of beer.

RESULTS: In the current study, complete genome sequences of six isolated L. brevis strains were determined. Five of these L. brevis strains were isolated from beer (three isolates) or the brewing environment (two isolates), and were characterized as beer-spoilers or non-beer spoilers, respectively, while the sixth isolate had previously been isolated from silage. The genomic features of 19 L. brevis strains, encompassing the six L. brevis strains described in this study and thirteen L. brevis strains for which complete genome sequences were available in public databases, were analyzed with particular attention to evolutionary aspects and adaptation to beer.

CONCLUSIONS: Comparative genomic analysis highlighted evolution of the taxon allowing niche colonization, notably adaptation to the beer environment, with approximately 50 chromosomal genes acquired by L. brevis beer-spoiler strains representing approximately 2% of their total chromosomal genetic content. These genes primarily encode proteins that are putatively involved in oxidation-reduction reactions, transcription regulation or membrane transport, functions that may be crucial to survive the harsh conditions associated with beer. The study emphasized the role of plasmids in beer spoilage with a number of unique genes identified among L. brevis beer-spoiler strains.}, } @article {pmid31121984, year = {2019}, author = {Jiang, W and Ren, L and Guo, M and Mantri, N and Zhao, S and Pang, X}, title = {Detecting Schisandrae Chinensis Fructus and Its Chinese Patent Medicines with a Nucleotide Signature.}, journal = {Genes}, volume = {10}, number = {5}, pages = {}, pmid = {31121984}, issn = {2073-4425}, mesh = {Chromatography, High Pressure Liquid ; *DNA Barcoding, Taxonomic ; DNA, Intergenic/*genetics ; Drug Contamination ; Drugs, Chinese Herbal/chemistry/therapeutic use ; Fruit/chemistry ; Humans ; *Medicine, Chinese Traditional ; Nonprescription Drugs ; Nucleotide Motifs/genetics ; Schisandra/chemistry/*genetics ; }, abstract = {Schisandrae Chinensis Fructus (Wuweizi) is often adulterated with Schisandrae Sphenantherae Fructus (Nanwuweizi) in the herbal market. This adulteration is a threat to clinical treatment and safety. In this study, we aimed to develop a nucleotide signature for the identification of Wuweizi and its Chinese patent medicines based on the mini-DNA barcoding technique. We collected 49 samples to obtain internal transcribed spacer 2 (ITS2) sequences and developed a 26-bp nucleotide signature (5'-CGCTTTGCGACGCTCCCCTCCCTCCC-3') on the basis of a single nucleotide polymorphism (SNP) site within the ITS2 region that is unique to Wuweizi. Then, using the nucleotide signature, we investigated 27 batches of commercial crude drug samples labeled as Wuweizi and eight batches of Chinese patent medicines containing Wuweizi. Results showed that eight commercial crude drug samples were adulterants and one of the Chinese patent medicines contained adulterants. The nucleotide signature can serve as an effective tool for identifying Wuweizi and its Chinese patent medicines and can thus be used to ensure clinical drug safety.}, } @article {pmid31120895, year = {2019}, author = {Vincent, AT and Schiettekatte, O and Goarant, C and Neela, VK and Bernet, E and Thibeaux, R and Ismail, N and Mohd Khalid, MKN and Amran, F and Masuzawa, T and Nakao, R and Amara Korba, A and Bourhy, P and Veyrier, FJ and Picardeau, M}, title = {Revisiting the taxonomy and evolution of pathogenicity of the genus Leptospira through the prism of genomics.}, journal = {PLoS neglected tropical diseases}, volume = {13}, number = {5}, pages = {e0007270}, pmid = {31120895}, issn = {1935-2735}, mesh = {Animals ; Asia ; *Evolution, Molecular ; *Genome, Bacterial ; Genomics ; Humans ; Leptospira/*classification/genetics/isolation & purification/*pathogenicity ; Leptospirosis/*microbiology ; Phylogeny ; Virulence ; Zoonoses/microbiology ; }, abstract = {The causative agents of leptospirosis are responsible for an emerging zoonotic disease worldwide. One of the major routes of transmission for leptospirosis is the natural environment contaminated with the urine of a wide range of reservoir animals. Soils and surface waters also host a high diversity of non-pathogenic Leptospira and species for which the virulence status is not clearly established. The genus Leptospira is currently divided into 35 species classified into three phylogenetic clusters, which supposedly correlate with the virulence of the bacteria. In this study, a total of 90 Leptospira strains isolated from different environments worldwide including Japan, Malaysia, New Caledonia, Algeria, mainland France, and the island of Mayotte in the Indian Ocean were sequenced. A comparison of average nucleotide identity (ANI) values of genomes of the 90 isolates and representative genomes of known species revealed 30 new Leptospira species. These data also supported the existence of two clades and 4 subclades. To avoid classification that strongly implies assumption on the virulence status of the lineages, we called them P1, P2, S1, S2. One of these subclades has not yet been described and is composed of Leptospira idonii and 4 novel species that are phylogenetically related to the saprophytes. We then investigated genome diversity and evolutionary relationships among members of the genus Leptospira by studying the pangenome and core gene sets. Our data enable the identification of genome features, genes and domains that are important for each subclade, thereby laying the foundation for refining the classification of this complex bacterial genus. We also shed light on atypical genomic features of a group of species that includes the species often associated with human infection, suggesting a specific and ongoing evolution of this group of species that will require more attention. In conclusion, we have uncovered a massive species diversity and revealed a novel subclade in environmental samples collected worldwide and we have redefined the classification of species in the genus. The implication of several new potentially infectious Leptospira species for human and animal health remains to be determined but our data also provide new insights into the emergence of virulence in the pathogenic species.}, } @article {pmid31114559, year = {2019}, author = {González, V and Santamaría, RI and Bustos, P and Pérez-Carrascal, OM and Vinuesa, P and Juárez, S and Martínez-Flores, I and Cevallos, MÁ and Brom, S and Martínez-Romero, E and Romero, D}, title = {Phylogenomic Rhizobium Species Are Structured by a Continuum of Diversity and Genomic Clusters.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {910}, pmid = {31114559}, issn = {1664-302X}, abstract = {The bacterial genus Rhizobium comprises diverse symbiotic nitrogen-fixing species associated with the roots of plants in the Leguminosae family. Multiple genomic clusters defined by whole genome comparisons occur within Rhizobium, but their equivalence to species is controversial. In this study we investigated such genomic clusters to ascertain their significance in a species phylogeny context. Phylogenomic inferences based on complete sets of ribosomal proteins and stringent core genome markers revealed the main lineages of Rhizobium. The clades corresponding to R. etli and R. leguminosarum species show several genomic clusters with average genomic nucleotide identities (ANI > 95%), and a continuum of divergent strains, respectively. They were found to be inversely correlated with the genetic distance estimated from concatenated ribosomal proteins. We uncovered evidence of a Rhizobium pangenome that was greatly expanded, both in its chromosomes and plasmids. Despite the variability of extra-chromosomal elements, our genomic comparisons revealed only a few chromid and plasmid families. The presence/absence profile of genes in the complete Rhizobium genomes agreed with the phylogenomic pattern of species divergence. Symbiotic genes were distributed according to the principal phylogenomic Rhizobium clades but did not resolve genome clusters within the clades. We distinguished some types of symbiotic plasmids within Rhizobium that displayed different rates of synonymous nucleotide substitutions in comparison to chromosomal genes. Symbiotic plasmids may have been repeatedly transferred horizontally between strains and species, in the process displacing and substituting pre-existing symbiotic plasmids. In summary, the results indicate that Rhizobium genomic clusters, as defined by whole genomic identities, might be part of a continuous process of evolutionary divergence that includes the core and the extrachromosomal elements leading to species formation.}, } @article {pmid31112551, year = {2019}, author = {Pucker, B and Holtgräwe, D and Stadermann, KB and Frey, K and Huettel, B and Reinhardt, R and Weisshaar, B}, title = {A chromosome-level sequence assembly reveals the structure of the Arabidopsis thaliana Nd-1 genome and its gene set.}, journal = {PloS one}, volume = {14}, number = {5}, pages = {e0216233}, pmid = {31112551}, issn = {1932-6203}, mesh = {Arabidopsis/genetics ; Chromosome Aberrations ; Chromosomes/genetics ; Genome, Plant/*genetics ; Genotype ; Sequence Analysis, DNA ; }, abstract = {In addition to the BAC-based reference sequence of the accession Columbia-0 from the year 2000, several short read assemblies of THE plant model organism Arabidopsis thaliana were published during the last years. Also, a SMRT-based assembly of Landsberg erecta has been generated that identified translocation and inversion polymorphisms between two genotypes of the species. Here we provide a chromosome-arm level assembly of the A. thaliana accession Niederzenz-1 (AthNd-1_v2c) based on SMRT sequencing data. The best assembly comprises 69 nucleome sequences and displays a contig length of up to 16 Mbp. Compared to an earlier Illumina short read-based NGS assembly (AthNd-1_v1), a 75 fold increase in contiguity was observed for AthNd-1_v2c. To assign contig locations independent from the Col-0 gold standard reference sequence, we used genetic anchoring to generate a de novo assembly. In addition, we assembled the chondrome and plastome sequences. Detailed analyses of AthNd-1_v2c allowed reliable identification of large genomic rearrangements between A. thaliana accessions contributing to differences in the gene sets that distinguish the genotypes. One of the differences detected identified a gene that is lacking from the Col-0 gold standard sequence. This de novo assembly extends the known proportion of the A. thaliana pan-genome.}, } @article {pmid31100356, year = {2019}, author = {Galata, V and Laczny, CC and Backes, C and Hemmrich-Stanisak, G and Schmolke, S and Franke, A and Meese, E and Herrmann, M and von Müller, L and Plum, A and Müller, R and Stähler, C and Posch, AE and Keller, A}, title = {Integrating Culture-based Antibiotic Resistance Profiles with Whole-genome Sequencing Data for 11,087 Clinical Isolates.}, journal = {Genomics, proteomics & bioinformatics}, volume = {17}, number = {2}, pages = {169-182}, pmid = {31100356}, issn = {2210-3244}, mesh = {Acinetobacter baumannii/genetics/isolation & purification ; Bacteria/*genetics/*isolation & purification ; Cell Culture Techniques/*methods ; Drug Resistance, Microbial/*genetics ; Escherichia coli/genetics/isolation & purification ; Genome, Bacterial ; Genotype ; Humans ; Internet ; Microbial Sensitivity Tests ; Phenotype ; *Whole Genome Sequencing ; }, abstract = {Emerging antibiotic resistance is a major global health threat. The analysis of nucleic acid sequences linked to susceptibility phenotypes facilitates the study of genetic antibiotic resistance determinants to inform molecular diagnostics and drug development. We collected genetic data (11,087 newly-sequenced whole genomes) and culture-based resistance profiles (10,991 out of the 11,087 isolates comprehensively tested against 22 antibiotics in total) of clinical isolates including 18 main species spanning a time period of 30 years. Species and drug specific resistance patterns were observed including increased resistance rates for Acinetobacter baumannii to carbapenems and for Escherichia coli to fluoroquinolones. Species-level pan-genomes were constructed to reflect the genetic repertoire of the respective species, including conserved essential genes and known resistance factors. Integrating phenotypes and genotypes through species-level pan-genomes allowed to infer gene-drug resistance associations using statistical testing. The isolate collection and the analysis results have been integrated into GEAR-base, a resource available for academic research use free of charge at https://gear-base.com.}, } @article {pmid31086351, year = {2019}, author = {Gao, L and Gonda, I and Sun, H and Ma, Q and Bao, K and Tieman, DM and Burzynski-Chang, EA and Fish, TL and Stromberg, KA and Sacks, GL and Thannhauser, TW and Foolad, MR and Diez, MJ and Blanca, J and Canizares, J and Xu, Y and van der Knaap, E and Huang, S and Klee, HJ and Giovannoni, JJ and Fei, Z}, title = {The tomato pan-genome uncovers new genes and a rare allele regulating fruit flavor.}, journal = {Nature genetics}, volume = {51}, number = {6}, pages = {1044-1051}, doi = {10.1038/s41588-019-0410-2}, pmid = {31086351}, issn = {1546-1718}, mesh = {*Alleles ; Computational Biology/methods ; Domestication ; Fruit/*genetics ; *Genetic Association Studies ; *Genome, Plant ; *Genomics/methods ; Humans ; Solanum lycopersicum/*genetics ; Open Reading Frames ; Plant Breeding ; Promoter Regions, Genetic ; *Quantitative Trait, Heritable ; Selection, Genetic ; }, abstract = {Modern tomatoes have narrow genetic diversity limiting their improvement potential. We present a tomato pan-genome constructed using genome sequences of 725 phylogenetically and geographically representative accessions, revealing 4,873 genes absent from the reference genome. Presence/absence variation analyses reveal substantial gene loss and intense negative selection of genes and promoters during tomato domestication and improvement. Lost or negatively selected genes are enriched for important traits, especially disease resistance. We identify a rare allele in the TomLoxC promoter selected against during domestication. Quantitative trait locus mapping and analysis of transgenic plants reveal a role for TomLoxC in apocarotenoid production, which contributes to desirable tomato flavor. In orange-stage fruit, accessions harboring both the rare and common TomLoxC alleles (heterozygotes) have higher TomLoxC expression than those homozygous for either and are resurgent in modern tomatoes. The tomato pan-genome adds depth and completeness to the reference genome, and is useful for future biological discovery and breeding.}, } @article {pmid31077301, year = {2019}, author = {Swanson, DM and Lien, T and Bergholtz, H and Sørlie, T and Frigessi, A}, title = {A Bayesian two-way latent structure model for genomic data integration reveals few pan-genomic cluster subtypes in a breast cancer cohort.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {23}, pages = {4886-4897}, doi = {10.1093/bioinformatics/btz381}, pmid = {31077301}, issn = {1367-4811}, mesh = {Algorithms ; Bayes Theorem ; *Breast Neoplasms ; Cluster Analysis ; Cohort Studies ; Genomics ; Humans ; }, abstract = {MOTIVATION: Unsupervised clustering is important in disease subtyping, among having other genomic applications. As genomic data has become more multifaceted, how to cluster across data sources for more precise subtyping is an ever more important area of research. Many of the methods proposed so far, including iCluster and Cluster of Cluster Assignments (COCAs), make an unreasonable assumption of a common clustering across all data sources, and those that do not are fewer and tend to be computationally intensive.

RESULTS: We propose a Bayesian parametric model for integrative, unsupervised clustering across data sources. In our two-way latent structure model, samples are clustered in relation to each specific data source, distinguishing it from methods like COCAs and iCluster, but cluster labels have across-dataset meaning, allowing cluster information to be shared between data sources. A common scaling across data sources is not required, and inference is obtained by a Gibbs Sampler, which we improve with a warm start strategy and modified density functions to robustify and speed convergence. Posterior interpretation allows for inference on common clusterings occurring among subsets of data sources. An interesting statistical formulation of the model results in sampling from closed-form posteriors despite incorporation of a complex latent structure. We fit the model with Gaussian and more general densities, which influences the degree of across-dataset cluster label sharing. Uniquely among integrative clustering models, our formulation makes no nestedness assumptions of samples across data sources so that a sample missing data from one genomic source can be clustered according to its existing data sources. We apply our model to a Norwegian breast cancer cohort of ductal carcinoma in situ and invasive tumors, comprised of somatic copy-number alteration, methylation and expression datasets. We find enrichment in the Her2 subtype and ductal carcinoma among those observations exhibiting greater cluster correspondence across expression and CNA data. In general, there are few pan-genomic clusterings, suggesting that models assuming a common clustering across genomic data sources might yield misleading results.

The model is implemented in an R package called twl ('two-way latent'), available on CRAN. Data for analysis are available within the R package.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid31076745, year = {2019}, author = {Cruz-Morales, P and Orellana, CA and Moutafis, G and Moonen, G and Rincon, G and Nielsen, LK and Marcellin, E}, title = {Revisiting the Evolution and Taxonomy of Clostridia, a Phylogenomic Update.}, journal = {Genome biology and evolution}, volume = {11}, number = {7}, pages = {2035-2044}, pmid = {31076745}, issn = {1759-6653}, mesh = {Clostridium/*genetics ; DNA, Bacterial/*genetics ; Phylogeny ; }, abstract = {Clostridium is a large genus of obligate anaerobes belonging to the Firmicutes phylum of bacteria, most of which have a Gram-positive cell wall structure. The genus includes significant human and animal pathogens, causative of potentially deadly diseases such as tetanus and botulism. Despite their relevance and many studies suggesting that they are not a monophyletic group, the taxonomy of the group has largely been neglected. Currently, species belonging to the genus are placed in the unnatural order defined as Clostridiales, which includes the class Clostridia. Here, we used genomic data from 779 strains to study the taxonomy and evolution of the group. This analysis allowed us to 1) confirm that the group is composed of more than one genus, 2) detect major differences between pathogens classified as a single species within the group of authentic Clostridium spp. (sensu stricto), 3) identify inconsistencies between taxonomy and toxin evolution that reflect on the pervasive misclassification of strains, and 4) identify differential traits within central metabolism of members of what has been defined earlier and confirmed by us as cluster I. Our analysis shows that the current taxonomic classification of Clostridium species hinders the prediction of functions and traits, suggests a new classification for this fascinating class of bacteria, and highlights the importance of phylogenomics for taxonomic studies.}, } @article {pmid31068915, year = {2019}, author = {Park, SC and Lee, K and Kim, YO and Won, S and Chun, J}, title = {Large-Scale Genomics Reveals the Genetic Characteristics of Seven Species and Importance of Phylogenetic Distance for Estimating Pan-Genome Size.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {834}, pmid = {31068915}, issn = {1664-302X}, abstract = {For more than a decade, pan-genome analysis has been applied as an effective method for explaining the genetic contents variation of prokaryotic species. However, genomic characteristics and detailed structures of gene pools have not been fully clarified, because most studies have used a small number of genomes. Here, we constructed pan-genomes of seven species in order to elucidate variations in the genetic contents of >27,000 genomes belonging to Streptococcus pneumoniae, Staphylococcus aureus subsp. aureus, Salmonella enterica subsp. enterica, Escherichia coli and Shigella spp., Mycobacterium tuberculosis complex, Pseudomonas aeruginosa, and Acinetobacter baumannii. This work showed the pan-genomes of all seven species has open property. Additionally, systematic evaluation of the characteristics of their pan-genome revealed that phylogenetic distance provided valuable information for estimating the parameters for pan-genome size among several models including Heaps' law. Our results provide a better understanding of the species and a solution to minimize sampling biases associated with genome-sequencing preferences for pathogenic strains.}, } @article {pmid31053324, year = {2019}, author = {van der Nest, MA and Steenkamp, ET and Roodt, D and Soal, NC and Palmer, M and Chan, WY and Wilken, PM and Duong, TA and Naidoo, K and Santana, QC and Trollip, C and De Vos, L and van Wyk, S and McTaggart, AR and Wingfield, MJ and Wingfield, BD}, title = {Genomic analysis of the aggressive tree pathogen Ceratocystis albifundus.}, journal = {Fungal biology}, volume = {123}, number = {5}, pages = {351-363}, doi = {10.1016/j.funbio.2019.02.002}, pmid = {31053324}, issn = {1878-6146}, mesh = {Africa ; Ascomycota/*genetics ; Computational Biology ; Evolution, Molecular ; Gene Order ; Genetic Variation ; *Genome, Fungal ; Genomics ; High-Throughput Nucleotide Sequencing ; Interspersed Repetitive Sequences ; Plant Diseases/*microbiology ; Synteny ; Trees/*microbiology ; }, abstract = {The overall goal of this study was to determine whether the genome of an important plant pathogen in Africa, Ceratocystis albifundus, is structured into subgenomic compartments, and if so, to establish how these compartments are distributed across the genome. For this purpose, the publicly available genome of C. albifundus was complemented with the genome sequences for four additional isolates using the Illumina HiSeq platform. In addition, a reference genome for one of the individuals was assembled using both PacBio and Illumina HiSeq technologies. Our results showed a high degree of synteny between the five genomes, although several regions lacked detectable long-range synteny. These regions were associated with the presence of accessory genes, lower genetic similarity, variation in read-map depth, as well as transposable elements and genes associated with host-pathogen interactions (e.g. effectors and CAZymes). Such patterns are regarded as hallmarks of accelerated evolution, particularly of accessory subgenomic compartments in fungal pathogens. Our findings thus showed that the genome of C. albifundus is made-up of core and accessory subgenomic compartments, which is an important step towards characterizing its pangenome. This study also highlights the value of comparative genomics for understanding mechanisms that may underly and influence the biology and evolution of pathogens.}, } @article {pmid31046679, year = {2019}, author = {Lorentzen, MP and Campbell-Sills, H and Jorgensen, TS and Nielsen, TK and Coton, M and Coton, E and Hansen, L and Lucas, PM}, title = {Expanding the biodiversity of Oenococcus oeni through comparative genomics of apple cider and kombucha strains.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {330}, pmid = {31046679}, issn = {1471-2164}, support = {643063//H2020 Marie Skłodowska-Curie Actions, ITN MICROWINE/ ; 8960//Villum Fonden (DK), Project AMPHICOP/ ; }, mesh = {*Biodiversity ; *Genome, Bacterial ; *Kombucha Tea ; Malus/*chemistry ; Oenococcus/*classification/*genetics ; Phylogeny ; Whole Genome Sequencing ; *Wine ; }, abstract = {BACKGROUND: Oenococcus oeni is a lactic acid bacteria species adapted to the low pH, ethanol-rich environments of wine and cider fermentation, where it performs the crucial role of malolactic fermentation. It has a small genome and has lost the mutS-mutL DNA mismatch repair genes, making it a hypermutable and highly specialized species. Two main lineages of strains, named groups A and B, have been described to date, as well as other subgroups correlated to different types of wines or regions. A third group "C" has also been hypothesized based on sequence analysis, but it remains controversial. In this study we have elucidated the species population structure by sequencing 14 genomes of new strains isolated from cider and kombucha and performing comparative genomics analyses.

RESULTS: Sequence-based phylogenetic trees confirmed a population structure of 4 clades: The previously identified A and B, a third group "C" consisting of the new cider strains and a small subgroup of wine strains previously attributed to group B, and a fourth group "D" exclusively represented by kombucha strains. A pair of complete genomes from group C and D were compared to the circularized O. oeni PSU-1 strain reference genome and no genomic rearrangements were found. Phylogenetic trees, K-means clustering and pangenome gene clusters evidenced the existence of smaller, specialized subgroups of strains. Using the pangenome, genomic differences in stress resistance and biosynthetic pathways were found to uniquely distinguish the C and D clades.

CONCLUSIONS: The obtained results, including the additional cider and kombucha strains, firmly established the O. oeni population structure. Group C does not appear as fully domesticated as group A to wine, but showed several unique patterns which may be due to ongoing specialization to the cider environment. Group D was shown to be the most divergent member of O. oeni to date, appearing as the closest to a pre-domestication state of the species.}, } @article {pmid31030454, year = {2019}, author = {Wang, J and Xing, J and Lu, J and Sun, Y and Zhao, J and Miao, S and Xiong, Q and Zhang, Y and Zhang, G}, title = {Complete Genome Sequencing of Bacillus velezensis WRN014, and Comparison with Genome Sequences of other Bacillus velezensis Strains.}, journal = {Journal of microbiology and biotechnology}, volume = {29}, number = {5}, pages = {794-808}, doi = {10.4014/jmb.1901.01040}, pmid = {31030454}, issn = {1738-8872}, mesh = {Bacillus/classification/*genetics/isolation & purification ; Base Sequence ; China ; Chromosome Mapping ; DNA, Bacterial/analysis/genetics ; Genes, Bacterial/*genetics ; Genetic Variation ; Genome, Bacterial ; Multigene Family ; Musa/microbiology ; Mutation ; Phylogeny ; Plant Development ; Plant Diseases/microbiology ; Polymorphism, Single Nucleotide ; Secondary Metabolism/genetics ; Sequence Analysis, DNA ; Whole Genome Sequencing/*methods ; }, abstract = {Bacillus velezensis strain WRN014 was isolated from banana fields in Hainan, China. Bacillus velezensis is an important member of the plant growth-promoting rhizobacteria (PGPR) which can enhance plant growth and control soil-borne disease. The complete genome of Bacillus velezensis WRN014 was sequenced by combining Illumina Hiseq 2500 system and Pacific Biosciences SMRT high-throughput sequencing technologies. Then, the genome of Bacillus velezensis WRN014, together with 45 other completed genome sequences of the Bacillus velezensis strains, were comparatively studied. The genome of Bacillus velezensis WRN014 was 4,063,541bp in length and contained 4,062 coding sequences, 9 genomic islands and 13 gene clusters. The results of comparative genomic analysis provide evidence that (i) The 46 Bacillus velezensis strains formed 2 obviously closely related clades in phylogenetic trees. (ii) The pangenome in this study is open and is increasing with the addition of new sequenced genomes. (iii) Analysis of single nucleotide polymorphisms (SNPs) revealed local diversification of the 46 Bacillus velezensis genomes. Surprisingly, SNPs were not evenly distributed throughout the whole genome. (iv) Analysis of gene clusters revealed that rich gene clusters spread over Bacillus velezensis strains and some gene clusters are conserved in different strains. This study reveals that the strain WRN014 and other Bacillus velezensis strains have potential to be used as PGPR and biopesticide.}, } @article {pmid31028022, year = {2019}, author = {Thompson, LR and Haroon, MF and Shibl, AA and Cahill, MJ and Ngugi, DK and Williams, GJ and Morton, JT and Knight, R and Goodwin, KD and Stingl, U}, title = {Red Sea SAR11 and Prochlorococcus Single-Cell Genomes Reflect Globally Distributed Pangenomes.}, journal = {Applied and environmental microbiology}, volume = {85}, number = {13}, pages = {}, pmid = {31028022}, issn = {1098-5336}, mesh = {Alphaproteobacteria/*genetics ; *Genome, Bacterial ; Indian Ocean ; *Metagenome ; Phylogeny ; Prochlorococcus/*genetics ; Seawater/*microbiology ; }, abstract = {Evidence suggests many marine bacteria are cosmopolitan, with widespread but sparse strains poised to seed abundant populations under conducive growth conditions. However, studies supporting this "microbial seed bank" hypothesis have analyzed taxonomic marker genes rather than whole genomes/metagenomes, leaving open the possibility that disparate ocean regions harbor endemic gene content. The Red Sea is isolated geographically from the rest of the ocean and has a combination of high irradiance, high temperature, and high salinity that is unique among the oceans; we therefore asked whether it harbors endemic gene content. We sequenced and assembled single-cell genomes of 21 SAR11 (subclades Ia, Ib, Id, and II) and 5 Prochlorococcus (ecotype HLII) samples from the Red Sea and combined them with globally sourced reference genomes to cluster genes into ortholog groups (OGs). Ordination of OG composition could distinguish clades, including phylogenetically cryptic Prochlorococcus ecotypes LLII and LLIII. Compared with reference genomes, 1% of Prochlorococcus and 17% of SAR11 OGs were unique to the Red Sea genomes (RS-OGs). Most (83%) RS-OGs had no annotated function, but 65% of RS-OGs were expressed in diel Red Sea metatranscriptomes, suggesting they are functional. Searching Tara Oceans metagenomes, RS-OGs were as likely to be found as non-RS-OGs; nevertheless, Red Sea and other warm samples could be distinguished from cooler samples using the relative abundances of OGs. The results suggest that the prevalence of OGs in these surface ocean bacteria is largely cosmopolitan, with differences in population metagenomes manifested by differences in relative abundance rather than complete presence/absence of OGs.IMPORTANCE Studies have shown that as we sequence seawater from a selected environment deeper and deeper, we approach finding every bacterial taxon known for the ocean as a whole. However, such studies have focused on taxonomic marker genes rather than on whole genomes, raising the possibility that the lack of endemism results from the method of investigation. We took a geographically isolated water body, the Red Sea, and sequenced single cells from it. We compared those single-cell genomes to available genomes from around the ocean and to ocean-spanning metagenomes. We showed that gene ortholog groups found in Red Sea genomes but not in other genomes are nevertheless common across global ocean metagenomes. These results suggest that Baas Becking's hypothesis "everything is everywhere, but the environment selects" also applies to gene ortholog groups. This widely dispersed functional diversity may give oceanic microbial communities the functional capacity to respond rapidly to changing conditions.}, } @article {pmid31024592, year = {2019}, author = {Dillon, MM and Almeida, RND and Laflamme, B and Martel, A and Weir, BS and Desveaux, D and Guttman, DS}, title = {Molecular Evolution of Pseudomonas syringae Type III Secreted Effector Proteins.}, journal = {Frontiers in plant science}, volume = {10}, number = {}, pages = {418}, pmid = {31024592}, issn = {1664-462X}, abstract = {Diverse Gram-negative pathogens like Pseudomonas syringae employ type III secreted effector (T3SE) proteins as primary virulence factors that combat host immunity and promote disease. T3SEs can also be recognized by plant hosts and activate an effector triggered immune (ETI) response that shifts the interaction back toward plant immunity. Consequently, T3SEs are pivotal in determining the virulence potential of individual P. syringae strains, and ultimately help to restrict P. syringae pathogens to a subset of potential hosts that are unable to recognize their repertoires of T3SEs. While a number of effector families are known to be present in the P. syringae species complex, one of the most persistent challenges has been documenting the complex variation in T3SE contents across a diverse collection of strains. Using the entire pan-genome of 494 P. syringae strains isolated from more than 100 hosts, we conducted a global analysis of all known and putative T3SEs. We identified a total of 14,613 putative T3SEs, 4,636 of which were unique at the amino acid level, and show that T3SE repertoires of different P. syringae strains vary dramatically, even among strains isolated from the same hosts. We also find substantial diversification within many T3SE families, and in many cases find strong signatures of positive selection. Furthermore, we identify multiple gene gain and loss events for several families, demonstrating an important role of horizontal gene transfer (HGT) in the evolution of P. syringae T3SEs. These analyses provide insight into the evolutionary history of P. syringae T3SEs as they co-evolve with the host immune system, and dramatically expand the database of P. syringae T3SEs alleles.}, } @article {pmid31014247, year = {2019}, author = {Roach, R and Mann, R and Gambley, CG and Chapman, T and Shivas, RG and Rodoni, B}, title = {Genomic sequence analysis reveals diversity of Australian Xanthomonas species associated with bacterial leaf spot of tomato, capsicum and chilli.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {310}, pmid = {31014247}, issn = {1471-2164}, mesh = {*Biodiversity ; Capsicum/*microbiology ; Genome, Bacterial/genetics ; *Genomics ; Solanum lycopersicum/*microbiology ; Phylogeny ; Plant Diseases/*microbiology ; Plasmids/genetics ; Polymorphism, Single Nucleotide ; Xanthomonas/classification/*genetics/*physiology ; }, abstract = {BACKGROUND: The genetic diversity in Australian populations of Xanthomonas species associated with bacterial leaf spot in tomato, capsicum and chilli were compared to worldwide bacterial populations. The aim of this study was to confirm the identities of these Australian Xanthomonas species and classify them in comparison to overseas isolates. Analysis of whole genome sequence allows for the investigation of bacterial population structure, pathogenicity and gene exchange, resulting in better management strategies and biosecurity.

RESULTS: Phylogenetic analysis of the core genome alignments and SNP data grouped strains in distinct clades. Patterns observed in average nucleotide identity, pan genome structure, effector and carbohydrate active enzyme profiles reflected the whole genome phylogeny and highlight taxonomic issues in X. perforans and X. euvesicatoria. Circular sequences with similarity to previously characterised plasmids were identified, and plasmids of similar sizes were isolated. Potential false positive and false negative plasmid assemblies were discussed. Effector patterns that may influence virulence on host plant species were analysed in pathogenic and non-pathogenic xanthomonads.

CONCLUSIONS: The phylogeny presented here confirmed X. vesicatoria, X. arboricola, X. euvesicatoria and X. perforans and a clade of an uncharacterised Xanthomonas species shown to be genetically distinct from all other strains of this study. The taxonomic status of X. perforans and X. euvesicatoria as one species is discussed in relation to whole genome phylogeny and phenotypic traits. The patterns evident in enzyme and plasmid profiles indicate worldwide exchange of genetic material with the potential to introduce new virulence elements into local bacterial populations.}, } @article {pmid31009331, year = {2019}, author = {Rao, RT and Sivakumar, N and Jayakumar, K}, title = {Analyses of Livestock-Associated Staphylococcus aureus Pan-Genomes Suggest Virulence Is Not Primary Interest in Evolution of Its Genome.}, journal = {Omics : a journal of integrative biology}, volume = {23}, number = {4}, pages = {224-236}, doi = {10.1089/omi.2019.0005}, pmid = {31009331}, issn = {1557-8100}, mesh = {Computational Biology ; Drug Discovery ; Genome, Bacterial/genetics ; Genomics ; Staphylococcus aureus/*genetics/*pathogenicity ; Virulence/genetics ; }, abstract = {Staphylococcus aureus is not only part of normal flora but also an opportunistic pathogen relevant to microbial genomics, public health, and veterinary medicine. In addition to being a well-known human pathogen, S. aureus causes various infections in economically important livestock animals such as cows, sheep, goats, and pigs. There are very few studies that have examined the pan-genome of S. aureus or the host-specific strains' pan-genomes. We report on livestock-associated S. aureus' (LA-SA) pan-genome and suggest that virulence is not the primary interest in evolution of its genome. LA-SA' complete genomes were retrieved from the NCBI and pan-genome was constructed by high-speed Roary pipeline. The pan-genome size was 4637 clusters, whereas 42.46% of the pan-genome was associated with the core genome. We found 1268 genes were associated with the strain-unique genome, and the remaining 1432 cluster with the accessory genome. COG (clusters of orthologous group of proteins) analysis of the core genes revealed 34% of clusters related to metabolism responsible for amino acid and inorganic ion transport (COG categories E and P), followed by carbohydrate metabolism (category G). Virulent gene analysis revealed the core genes responsible for antiphagocytosis and iron uptake. The fluidity of pan-genome was calculated as 0.082 ± 0.025. Importantly, the positive selection analysis suggested a slower rate of evolution among the LA-SA genomes. We call for comparative microbial and pan-genome research between human and LA-SA that can help further understand the evolution of virulence and thus inform future microbial diagnostics and drug discovery.}, } @article {pmid31006539, year = {2019}, author = {Morice-Picard, F}, title = {[Genetics and dermatology].}, journal = {Annales de dermatologie et de venereologie}, volume = {146}, number = {4}, pages = {326-339}, doi = {10.1016/j.annder.2019.02.009}, pmid = {31006539}, issn = {0151-9638}, mesh = {DNA Mutational Analysis ; Early Diagnosis ; Early Medical Intervention ; Genetic Counseling ; Genetic Predisposition to Disease/genetics ; Genetic Testing ; Humans ; Mosaicism ; Neurodegenerative Diseases/diagnosis/genetics/therapy ; Risk Factors ; *Sequence Analysis, DNA ; Skin Diseases, Genetic/diagnosis/*genetics/therapy ; Skin Neoplasms/diagnosis/genetics/therapy ; }, abstract = {Many types of genodermatosis exist, with numerous modes of transmission. The development of molecular genetic methods, in particular the most recent sequencing techniques, can be used to identify an increasing number of genes involved in these forms of genodermatosis while providing confirmation or more details regarding clinical diagnosis. Thanks to this approach, it is possible to determine risk of recurrence and to formulate an antenatal strategy. These technologies have led to improved molecular definition and to a better understanding of the physiopathological mechanisms involved in different genodermatoses such as bullous epidermolysis, keratinisation disorders, pigmentation disorders, potentially tumoral conditions, and epidermal and pilar dysplasia. The large amount of information provided by high-throughput sequencing makes it possible to study modifying genes as well as genotype-phenotype correlations. However, this genetic information in its turn poses problems of interpretation and of control of the resulting data. The use of genetics in dermatology for the purposes of diagnosis or research requires a consultation to provide patients with information regarding the genetic tests involved and the potential consequences thereof for them and their families. Furthermore, with pangenomic approaches there is a higher probability of fortuitous discovery of abnormalities such as variants associated with risks predisposing to cancer or neurodegenerative disease. Collaboration between dermatologists and geneticists enables optimisation of patient management in terms of diagnosis and genetic counselling in the event of such rare diseases. Therapeutic applications are beginning to be developed. The scope of therapeutic application includes gene therapy, replacement therapy (enzyme therapy) and targeted therapy.}, } @article {pmid31004458, year = {2019}, author = {Liu, J and Zeng, Q and Wang, M and Cheng, A and Liu, M and Zhu, D and Chen, S and Jia, R and Zhao, XX and Wu, Y and Yang, Q and Zhang, S and Liu, Y and Yu, Y and Zhang, L and Chen, X}, title = {Comparative genome-scale modelling of the pathogenic Flavobacteriaceae species Riemerella anatipestifer in China.}, journal = {Environmental microbiology}, volume = {21}, number = {8}, pages = {2836-2851}, doi = {10.1111/1462-2920.14635}, pmid = {31004458}, issn = {1462-2920}, support = {CARS-42-17//China Agricultural Research System/International ; 2017YFD0500800//National Key Research and Development Program of China/International ; CARS-SVDIP//Sichuan Veterinary Medicine and Drug Innovation Group of China Agricultural Research System/International ; 2016JPT0004//Special Fund for Key Laboratory of Animal Disease and Human Health of Sichuan Province/International ; }, mesh = {Animals ; China ; Genetic Variation ; Genome, Bacterial ; Genomics ; Models, Genetic ; Multilocus Sequence Typing ; Riemerella/*genetics/pathogenicity ; Virulence ; }, abstract = {Riemerella anatipestifer (RA) is a gram-negative bacterium that has a high potential to infect waterfowl. Although more and more genomes of RA have been generated comparaed to genomic analysis of RA still remains at the level of individual species. In this study, we analysed the pan-genome of 27 RA virulent isolates to reveal the intraspecies genomic diversity from various aspects. The multi-locus sequence typing (MLST) analysis suggests that the geographic origin of R. anatipestifer is Guangdong province, China. Results of pan-genome analysis revealed an open pan-genome for all 27 species with the sizes of 2967 genes. We identified 387 genes among 555 unique genes originated by horizontal gene transfer. Further studies showed 204 strain-specific HGT genes were predicted as virulent proteins. Screening the 1113 core genes in RA through subtractive genomic approach, 70 putative vaccine targets out of 125 non-cytoplasmic proteins have been predicted. Further analysis of these non A. platyrhynchos homologous proteins predicted that 56 essential proteins as drug target with more interaction partners were involved in unique metabolic pathways of RA. In conclusion, the present study indicated the essence and the diversity of RA and also provides useful information for identification of vaccine and drugs candidates in future.}, } @article {pmid30992351, year = {2019}, author = {Knight, DR and Kullin, B and Androga, GO and Barbut, F and Eckert, C and Johnson, S and Spigaglia, P and Tateda, K and Tsai, PJ and Riley, TV}, title = {Evolutionary and Genomic Insights into Clostridioides difficile Sequence Type 11: a Diverse Zoonotic and Antimicrobial-Resistant Lineage of Global One Health Importance.}, journal = {mBio}, volume = {10}, number = {2}, pages = {}, pmid = {30992351}, issn = {2150-7511}, support = {I01 BX002449/BX/BLRD VA/United States ; }, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Asia ; Australia ; Clostridioides difficile/drug effects/*genetics ; Clostridium Infections/transmission ; Drug Resistance, Bacterial/*genetics ; Europe ; *Evolution, Molecular ; *Genome, Bacterial ; Global Health ; Humans ; Myoviridae/genetics ; North America ; *One Health ; Phylogeny ; Prophages/genetics ; Ribotyping ; Siphoviridae/genetics ; Whole Genome Sequencing ; Zoonoses/microbiology ; }, abstract = {Clostridioides difficile (Clostridium difficile) sequence type 11 (ST11) is well established in production animal populations worldwide and contributes considerably to the global burden of C. difficile infection (CDI) in humans. Increasing evidence of shared ancestry and genetic overlap of PCR ribotype 078 (RT078), the most common ST11 sublineage, between human and animal populations suggests that CDI may be a zoonosis. We performed whole-genome sequencing (WGS) on a collection of 207 ST11 and closely related ST258 isolates of human and veterinary/environmental origin, comprising 16 RTs collected from Australia, Asia, Europe, and North America. Core genome single nucleotide variant (SNV) analysis identified multiple intraspecies and interspecies clonal groups (isolates separated by ≤2 core genome SNVs) in all the major RT sublineages: 078, 126, 127, 033, and 288. Clonal groups comprised isolates spread across different states, countries, and continents, indicative of reciprocal long-range dissemination and possible zoonotic/anthroponotic transmission. Antimicrobial resistance genotypes and phenotypes varied across host species, geographic regions, and RTs and included macrolide/lincosamide resistance (Tn6194 [ermB]), tetracycline resistance (Tn6190 [tetM] and Tn6164 [tet44]), and fluoroquinolone resistance (gyrA/B mutations), as well as numerous aminoglycoside resistance cassettes. The population was defined by a large "open" pan-genome (10,378 genes), a remarkably small core genome of 2,058 genes (only 19.8% of the gene pool), and an accessory genome containing a large and diverse collection of important prophages of the Siphoviridae and Myoviridae This study provides novel insights into strain relatedness and genetic variability of C. difficile ST11, a lineage of global One Health importance.IMPORTANCE Historically, Clostridioides difficile (Clostridium difficile) has been associated with life-threatening diarrhea in hospitalized patients. Increasing rates of C. difficile infection (CDI) in the community suggest exposure to C. difficile reservoirs outside the hospital, including animals, the environment, or food. C. difficile sequence type 11 (ST11) is known to infect/colonize livestock worldwide and comprises multiple ribotypes, many of which cause disease in humans, suggesting CDI may be a zoonosis. Using high-resolution genomics, we investigated the evolution and zoonotic potential of ST11 and a new closely related ST258 lineage sourced from diverse origins. We found multiple intra- and interspecies clonal transmission events in all ribotype sublineages. Clones were spread across multiple continents, often without any health care association, indicative of zoonotic/anthroponotic long-range dissemination in the community. ST11 possesses a massive pan-genome and numerous clinically important antimicrobial resistance elements and prophages, which likely contribute to the success of this globally disseminated lineage of One Health importance.}, } @article {pmid30986243, year = {2019}, author = {Wyres, KL and Wick, RR and Judd, LM and Froumine, R and Tokolyi, A and Gorrie, CL and Lam, MMC and Duchêne, S and Jenney, A and Holt, KE}, title = {Distinct evolutionary dynamics of horizontal gene transfer in drug resistant and virulent clones of Klebsiella pneumoniae.}, journal = {PLoS genetics}, volume = {15}, number = {4}, pages = {e1008114}, pmid = {30986243}, issn = {1553-7404}, mesh = {Bacterial Capsules/genetics/metabolism ; Bacteriophages/genetics ; Cross Infection/drug therapy/microbiology ; Drug Resistance, Bacterial/*genetics ; Drug Resistance, Multiple, Bacterial/genetics ; *Evolution, Molecular ; *Gene Transfer, Horizontal ; Genetic Variation ; Genome, Bacterial ; Humans ; Klebsiella Infections/drug therapy/microbiology ; Klebsiella pneumoniae/drug effects/*genetics/pathogenicity ; Lipopolysaccharides/biosynthesis/genetics ; Models, Genetic ; Plasmids/genetics ; Virulence/*genetics ; }, abstract = {Klebsiella pneumoniae has emerged as an important cause of two distinct public health threats: multi-drug resistant (MDR) healthcare-associated infections and drug susceptible community-acquired invasive infections. These pathotypes are generally associated with two distinct subsets of K. pneumoniae lineages or 'clones' that are distinguished by the presence of acquired resistance genes and several key virulence loci. Genomic evolutionary analyses of the most notorious MDR and invasive community-associated ('hypervirulent') clones indicate differences in terms of chromosomal recombination dynamics and capsule polysaccharide diversity, but it remains unclear if these differences represent generalised trends. Here we leverage a collection of >2200 K. pneumoniae genomes to identify 28 common clones (n ≥ 10 genomes each), and perform the first genomic evolutionary comparison. Eight MDR and 6 hypervirulent clones were identified on the basis of acquired resistance and virulence gene prevalence. Chromosomal recombination, surface polysaccharide locus diversity, pan-genome, plasmid and phage dynamics were characterised and compared. The data showed that MDR clones were highly diverse, with frequent chromosomal recombination generating extensive surface polysaccharide locus diversity. Additional pan-genome diversity was driven by frequent acquisition/loss of both plasmids and phage. In contrast, chromosomal recombination was rare in the hypervirulent clones, which also showed a significant reduction in pan-genome diversity, largely driven by a reduction in plasmid diversity. Hence the data indicate that hypervirulent clones may be subject to some sort of constraint for horizontal gene transfer that does not apply to the MDR clones. Our findings are relevant for understanding the risk of emergence of individual K. pneumoniae strains carrying both virulence and acquired resistance genes, which have been increasingly reported and cause highly virulent infections that are extremely difficult to treat. Specifically, our data indicate that MDR clones pose the greatest risk, because they are more likely to acquire virulence genes than hypervirulent clones are to acquire resistance genes.}, } @article {pmid30975079, year = {2019}, author = {Du, Y and Ma, J and Yin, Z and Liu, K and Yao, G and Xu, W and Fan, L and Du, B and Ding, Y and Wang, C}, title = {Comparative genomic analysis of Bacillus paralicheniformis MDJK30 with its closely related species reveals an evolutionary relationship between B. paralicheniformis and B. licheniformis.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {283}, pmid = {30975079}, issn = {1471-2164}, support = {2017YFD0200804//the National Key Research and Development Program of China/ ; 31700094//National Natural Science Foundation of China/ ; 31770115//National Natural Science Foundation of China/ ; 31600090//National Natural Science Foundation of China/ ; 2014BAD16B02//the National Science and Technology Pillar Program of China/ ; SYL2017XTTD03//the funds of Shandong "Double Tops" Program/ ; }, mesh = {Adaptation, Physiological/genetics ; Bacillus/*genetics/metabolism/physiology ; *Evolution, Molecular ; *Genomics ; Multigene Family/genetics ; Phylogeny ; }, abstract = {BACKGROUND: Members of the genus Bacillus are important plant growth-promoting rhizobacteria that serve as biocontrol agents. Bacillus paralicheniformis MDJK30 is a PGPR isolated from the peony rhizosphere and can suppress plant-pathogenic bacteria and fungi. To further uncover the genetic mechanism of the plant growth-promoting traits of MDJK30 and its closely related strains, we used comparative genomics to provide insights into the genetic diversity and evolutionary relationship between B. paralicheniformis and B. licheniformis.

RESULTS: A comparative genomics analysis based on B. paralicheniformis MDJK30 and 55 other previously reported Bacillus strains was performed. The evolutionary position of MDJK30 and the evolutionary relationship between B. paralicheniformis and B. licheniformis were evaluated by studying the phylogeny of the core genomes, a population structure analysis and ANI results. Comparative genomic analysis revealed various features of B. paralicheniformis that contribute to its commensal lifestyle in the rhizosphere, including an opening pan genome, a diversity of transport and the metabolism of the carbohydrates and amino acids. There are notable differences in the numbers and locations of the insertion sequences, prophages, genomic islands and secondary metabolic synthase operons between B. paralicheniformis and B. licheniformis. In particular, we found most gene clusters of Fengycin, Bacitracin and Lantipeptide were only present in B. paralicheniformis and were obtained by horizontal gene transfer (HGT), and these clusters may be used as genetic markers for distinguishing B. paralicheniformis and B. licheniformis.

CONCLUSIONS: This study reveals that MDJK30 and the other strains of lineage paralicheniformis present plant growth-promoting traits at the genetic level and can be developed and commercially formulated in agriculture as PGPR. Core genome phylogenies and population structure analysis has proven to be a powerful tool for differentiating B. paralicheniformis and B. licheniformis. Comparative genomic analyses illustrate the genetic differences between the paralicheniformis-licheniformis group with respect to rhizosphere adaptation.}, } @article {pmid30963617, year = {2019}, author = {Vakirlis, N and Monerawela, C and McManus, G and Ribeiro, O and McLysaght, A and James, T and Bond, U}, title = {Evolutionary journey and characterisation of a novel pan-gene associated with beer strains of Saccharomyces cerevisiae.}, journal = {Yeast (Chichester, England)}, volume = {36}, number = {7}, pages = {425-437}, doi = {10.1002/yea.3391}, pmid = {30963617}, issn = {1097-0061}, support = {764364//European Commission, Marie Skłodowska-Curie Innovative Training Network award/International ; 1592 award//Trinity College Dublin/International ; }, mesh = {Beer/*microbiology ; Cell Membrane/metabolism ; Chromosomes, Fungal/genetics ; Evolution, Molecular ; Fungal Proteins/*genetics/metabolism ; Gene Deletion ; Gene Expression ; Gene Transfer, Horizontal ; Genome, Fungal/genetics ; Open Reading Frames ; Saccharomyces/classification/genetics/growth & development/isolation & purification ; Saccharomyces cerevisiae/classification/*genetics/growth & development/*isolation & purification ; }, abstract = {The sequencing of over a thousand Saccharomyces cerevisiae genomes revealed a complex pangenome. Over one third of the discovered genes are not present in the S. cerevisiae core genome but instead are often restricted to a subset of yeast isolates and thus may be important for adaptation to specific environmental niches. We refer to these genes as "pan-genes," being part of the pangenome but not the core genome. Here, we describe the evolutionary journey and characterisation of a novel pan-gene, originally named hypothetical (HYPO) open-reading frame. Phylogenetic analysis reveals that HYPO has been predominantly retained in S. cerevisiae strains associated with brewing but has been repeatedly lost in most other fungal species during evolution. There is also evidence that HYPO was horizontally transferred at least once, from S. cerevisiae to Saccharomyces paradoxus. The phylogenetic analysis of HYPO exemplifies the complexity and intricacy of evolutionary trajectories of genes within the S. cerevisiae pangenome. To examine possible functions for Hypo, we overexpressed a HYPO-GFP fusion protein in both S. cerevisiae and Saccharomyces pastorianus. The protein localised to the plasma membrane where it accumulated initially in distinct foci. Time-lapse fluorescent imaging revealed that when cells are grown in wort, Hypo-gfp fluorescence spreads throughout the membrane during cell growth. The overexpression of Hypo-gfp in S. cerevisiae or S. pastorianus strains did not significantly alter cell growth in medium-containing glucose, maltose, maltotriose, or wort at different concentrations.}, } @article {pmid30957837, year = {2019}, author = {Quijada, NM and Rodríguez-Lázaro, D and Eiros, JM and Hernández, M}, title = {TORMES: an automated pipeline for whole bacterial genome analysis.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {21}, pages = {4207-4212}, doi = {10.1093/bioinformatics/btz220}, pmid = {30957837}, issn = {1367-4811}, mesh = {*Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Multilocus Sequence Typing ; *Software ; Whole Genome Sequencing ; }, abstract = {MOTIVATION: The progress of High Throughput Sequencing (HTS) technologies and the reduction in the sequencing costs are such that Whole Genome Sequencing (WGS) could replace many traditional laboratory assays and procedures. Exploiting the volume of data produced by HTS platforms requires substantial computing skills and this is the main bottleneck in the implementation of WGS as a routine laboratory technique. The way in which the vast amount of results are presented to researchers and clinicians with no specialist knowledge of genome sequencing is also a significant issue.

RESULTS: Here we present TORMES, a user-friendly pipeline for WGS analysis of bacteria from any origin generated by HTS on Illumina platforms. TORMES is designed for non-bioinformatician users, and automates the steps required for WGS analysis directly from the raw sequence data: sequence quality filtering, de novo assembly, draft genome ordering against a reference, genome annotation, multi-locus sequence typing (MLST), searching for antibiotic resistance and virulence genes, and pangenome comparisons. Once the analysis is finished, TORMES generates and interactive web-like report that can be opened in any web browser and shared and revised by researchers in a simple manner. TORMES can be run by using very simple commands and represent a quick an easy way to perform WGS analysis.

TORMES is free available at https://github.com/nmquijada/tormes.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid30953542, year = {2019}, author = {Raymond, F and Boissinot, M and Ouameur, AA and Déraspe, M and Plante, PL and Kpanou, SR and Bérubé, È and Huletsky, A and Roy, PH and Ouellette, M and Bergeron, MG and Corbeil, J}, title = {Culture-enriched human gut microbiomes reveal core and accessory resistance genes.}, journal = {Microbiome}, volume = {7}, number = {1}, pages = {56}, pmid = {30953542}, issn = {2049-2618}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacteria/*classification/drug effects/genetics/growth & development ; Bacterial Proteins/genetics ; Bacteriological Techniques/*methods ; *Drug Resistance, Microbial ; Escherichia coli/genetics/growth & development/isolation & purification ; Feces/cytology/microbiology ; Gastrointestinal Microbiome ; Gene Transfer, Horizontal ; Humans ; Metagenomics ; Phylogeny ; Sequence Analysis, DNA/*methods ; }, abstract = {BACKGROUND: Low-abundance microorganisms of the gut microbiome are often referred to as a reservoir for antibiotic resistance genes. Unfortunately, these less-abundant bacteria can be overlooked by deep shotgun sequencing. In addition, it is a challenge to associate the presence of resistance genes with their risk of acquisition by pathogens. In this study, we used liquid culture enrichment of stools to assemble the genome of lower-abundance bacteria from fecal samples. We then investigated the gene content recovered from these culture-enriched and culture-independent metagenomes in relation with their taxonomic origin, specifically antibiotic resistance genes. We finally used a pangenome approach to associate resistance genes with the core or accessory genome of Enterobacteriaceae and inferred their propensity to horizontal gene transfer.

RESULTS: Using culture-enrichment approaches with stools allowed assembly of 187 bacterial species with an assembly size greater than 1 million nucleotides. Of these, 67 were found only in culture-enriched conditions, and 22 only in culture-independent microbiomes. These assembled metagenomes allowed the evaluation of the gene content of specific subcommunities of the gut microbiome. We observed that differentially distributed metabolic enzymes were associated with specific culture conditions and, for the most part, with specific taxa. Gene content differences between microbiomes, for example, antibiotic resistance, were for the most part not associated with metabolic enzymes, but with other functions. We used a pangenome approach to determine if the resistance genes found in Enterobacteriaceae, specifically E. cloacae or E. coli, were part of the core genome or of the accessory genome of this species. In our healthy volunteer cohort, we found that E. cloacae contigs harbored resistance genes that were part of the core genome of the species, while E. coli had a large accessory resistome proximal to mobile elements.

CONCLUSION: Liquid culture of stools contributed to an improved functional and comparative genomics study of less-abundant gut bacteria, specifically those associated with antibiotic resistance. Defining whether a gene is part of the core genome of a species helped in interpreting the genomes recovered from culture-independent or culture-enriched microbiomes.}, } @article {pmid30949149, year = {2019}, author = {Park, CJ and Andam, CP}, title = {Within-Species Genomic Variation and Variable Patterns of Recombination in the Tetracycline Producer Streptomyces rimosus.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {552}, pmid = {30949149}, issn = {1664-302X}, abstract = {Streptomyces rimosus is best known as the primary source of the tetracycline class of antibiotics, most notably oxytetracycline, which have been widely used against many gram-positive and gram-negative pathogens and protozoan parasites. However, despite the medical and agricultural importance of S. rimosus, little is known of its evolutionary history and genome dynamics. In this study, we aim to elucidate the pan-genome characteristics and phylogenetic relationships of 32 S. rimosus genomes. The S. rimosus pan-genome contains more than 22,000 orthologous gene clusters, and approximately 8.8% of these genes constitutes the core genome. A large part of the accessory genome is composed of 9,646 strain-specific genes. S. rimosus exhibits an open pan-genome (decay parameter α = 0.83) and high gene diversity between strains (genomic fluidity φ = 0.12). We also observed strain-level variation in the distribution and abundance of biosynthetic gene clusters (BGCs) and that each individual S. rimosus genome has a unique repertoire of BGCs. Lastly, we observed variation in recombination, with some strains donating or receiving DNA more often than others, strains that tend to frequently recombine with specific partners, genes that often experience recombination more than others, and variable sizes of recombined DNA sequences. We conclude that the high levels of inter-strain genomic variation in S. rimosus is partly explained by differences in recombination among strains. These results have important implications on current efforts for natural drug discovery, the ecological role of strain-level variation in microbial populations, and addressing the fundamental question of why microbes have pan-genomes.}, } @article {pmid30930881, year = {2019}, author = {Cho, H and Song, ES and Heu, S and Baek, J and Lee, YK and Lee, S and Lee, SW and Park, DS and Lee, TH and Kim, JG and Hwang, I}, title = {Prediction of Host-Specific Genes by Pan-Genome Analyses of the Korean Ralstonia solanacearum Species Complex.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {506}, pmid = {30930881}, issn = {1664-302X}, abstract = {The soil-borne pathogenic Ralstonia solanacearum species complex (RSSC) is a group of plant pathogens that is economically destructive worldwide and has a broad host range, including various solanaceae plants, banana, ginger, sesame, and clove. Previously, Korean RSSC strains isolated from samples of potato bacterial wilt were grouped into four pathotypes based on virulence tests against potato, tomato, eggplant, and pepper. In this study, we sequenced the genomes of 25 Korean RSSC strains selected based on these pathotypes. The newly sequenced genomes were analyzed to determine the phylogenetic relationships between the strains with average nucleotide identity values, and structurally compared via multiple genome alignment using Mauve software. To identify candidate genes responsible for the host specificity of the pathotypes, functional genome comparisons were conducted by analyzing pan-genome orthologous group (POG) and type III secretion system effectors (T3es). POG analyses revealed that a total of 128 genes were shared only in tomato-non-pathogenic strains, 8 genes in tomato-pathogenic strains, 5 genes in eggplant-non-pathogenic strains, 7 genes in eggplant-pathogenic strains, 1 gene in pepper-non-pathogenic strains, and 34 genes in pepper-pathogenic strains. When we analyzed T3es, three host-specific effectors were predicted: RipS3 (SKWP3) and RipH3 (HLK3) were found only in tomato-pathogenic strains, and RipAC (PopC) were found only in eggplant-pathogenic strains. Overall, we identified host-specific genes and effectors that may be responsible for virulence functions in RSSC in silico. The expected characters of those genes suggest that the host range of RSSC is determined by the comprehensive actions of various virulence factors, including effectors, secretion systems, and metabolic enzymes.}, } @article {pmid30929798, year = {2019}, author = {Bedoya-Correa, CM and Rincón Rodríguez, RJ and Parada-Sanchez, MT}, title = {Genomic and phenotypic diversity of Streptococcus mutans.}, journal = {Journal of oral biosciences}, volume = {61}, number = {1}, pages = {22-31}, doi = {10.1016/j.job.2018.11.001}, pmid = {30929798}, issn = {1880-3865}, mesh = {Biofilms ; *Dental Caries ; Genomics ; Humans ; *Streptococcus mutans ; Virulence Factors ; }, abstract = {BACKGROUND: Streptococcus mutans (S. mutans) is a commensal microorganism found in the human oral cavity. However, due to environmental changes, selective pressures, and the presence of a variable genome, it adapts and may acquire new physiological and metabolic properties that alter dental biofilm homeostasis, promoting the development of dental caries. Although the plasticity and heterogeneity of S. mutans is widely recognized, very little is known about the mechanisms for the expression of pathogenic properties in specific genotypes.

HIGHLIGHT: The implementation of molecular biology techniques in the study of S. mutans has provided information on the genomic diversity of this species. This variability is generated by genome rearrangements, natural genetic transformation, and horizontal gene transfer, and continues to grow due to an open pan-genome. The main virulence factors associated with the cariogenic potential of S. mutans include adhesion, acid production (acidogenicity), and acid tolerance (aciduricity), and also show variability. These factors coordinate the modification of the physicochemical properties of the biofilm, which results in the accumulation of S. mutans and other acidogenic and aciduric species in the oral cavity.

CONCLUSION: We review the current literature on the main processes that generate S. mutans genomic diversity, as well as the phenotypic variability of its main virulence factors. S. mutans achieves its pathogenesis by sensing the intra- and extracellular environments and regulating gene transcription according to perceived environmental modifications. Consequently, this regulation gives rise to differential synthesis of proteins, allowing this species to potentially express virulence factors.}, } @article {pmid30929020, year = {2019}, author = {Pinholt, M and Bayliss, SC and Gumpert, H and Worning, P and Jensen, VVS and Pedersen, M and Feil, EJ and Westh, H}, title = {WGS of 1058 Enterococcus faecium from Copenhagen, Denmark, reveals rapid clonal expansion of vancomycin-resistant clone ST80 combined with widespread dissemination of a vanA-containing plasmid and acquisition of a heterogeneous accessory genome.}, journal = {The Journal of antimicrobial chemotherapy}, volume = {74}, number = {7}, pages = {1776-1785}, doi = {10.1093/jac/dkz118}, pmid = {30929020}, issn = {1460-2091}, mesh = {Bacterial Proteins/*genetics ; Carbon-Oxygen Ligases/*genetics ; Cross Infection/epidemiology/microbiology/transmission ; Denmark/epidemiology ; Disease Transmission, Infectious ; Enterococcus faecium/classification/genetics/*isolation & purification ; Genome, Bacterial ; *Genotype ; Gram-Positive Bacterial Infections/*epidemiology/microbiology/transmission ; Hospitals ; Humans ; Molecular Epidemiology ; Molecular Typing ; Phylogeny ; Plasmids/*analysis ; Vancomycin-Resistant Enterococci/classification/genetics/*isolation & purification ; *Whole Genome Sequencing ; }, abstract = {OBJECTIVES: From 2012 to 2015, a sudden significant increase in vancomycin-resistant (vanA) Enterococcus faecium (VREfm) was observed in the Capital Region of Denmark. Clonal relatedness of VREfm and vancomycin-susceptible E. faecium (VSEfm) was investigated, transmission events between hospitals were identified and the pan-genome and plasmids from the largest VREfm clonal group were characterized.

METHODS: WGS of 1058 E. faecium isolates was carried out on the Illumina platform to perform SNP analysis and to identify the pan-genome. One isolate was also sequenced on the PacBio platform to close the genome. Epidemiological data were collected from laboratory information systems.

RESULTS: Phylogeny of 892 VREfm and 166 VSEfm revealed a polyclonal structure, with a single clonal group (ST80) accounting for 40% of the VREfm isolates. VREfm and VSEfm co-occurred within many clonal groups; however, no VSEfm were related to the dominant VREfm group. A similar vanA plasmid was identified in ≥99% of isolates belonging to the dominant group and 69% of the remaining VREfm. Ten plasmids were identified in the completed genome, and ∼29% of this genome consisted of dispensable accessory genes. The size of the pan-genome among isolates in the dominant group was 5905 genes.

CONCLUSIONS: Most probably, VREfm emerged owing to importation of a successful VREfm clone which rapidly transmitted to the majority of hospitals in the region whilst simultaneously disseminating a vanA plasmid to pre-existing VSEfm. Acquisition of a heterogeneous accessory genome may account for the success of this clone by facilitating adaptation to new environmental challenges.}, } @article {pmid30928361, year = {2019}, author = {Araújo, CL and Alves, J and Nogueira, W and Pereira, LC and Gomide, AC and Ramos, R and Azevedo, V and Silva, A and Folador, A}, title = {Prediction of new vaccine targets in the core genome of Corynebacterium pseudotuberculosis through omics approaches and reverse vaccinology.}, journal = {Gene}, volume = {702}, number = {}, pages = {36-45}, doi = {10.1016/j.gene.2019.03.049}, pmid = {30928361}, issn = {1879-0038}, mesh = {Antigens, Bacterial/genetics ; Bacterial Vaccines/*genetics ; Computer Simulation ; Corynebacterium/genetics ; Corynebacterium pseudotuberculosis/*genetics/immunology/metabolism ; Gene Expression Profiling ; Genes, Bacterial ; Genome, Bacterial ; Protein Interaction Mapping ; Sequence Analysis, RNA ; Vaccinology ; }, abstract = {Corynebacterium pseudotuberculosis is the etiologic agent of veterinary relevance diseases, such as caseous lymphadenitis, affecting different animal species causing damage to the global agribusiness. So far, there are no completely effective treatment methods to overcome the impacts caused by this pathogen. Several genomes of the species are deposited on public databases, allowing the execution of studies related to the pan-genomic approach. In this study, we used an integrated in silico workflow to prospect novel putative targets using the core genome, a set of shared genes among 65 C. pseudotuberculosis strains. Subsequently, through RNA-Seq data of the same abiotic stresses in two strains, we selected only induced genes to compose the reverse vaccinology workflow based in two different strategies. Our results predicted six probable antigens in both analysis, which indicates that they have a strong potential to be used in further studies as vaccine targets against this bacterium.}, } @article {pmid30918968, year = {2019}, author = {Smith, BA and Leligdon, C and Baltrus, DA}, title = {Just the Two of Us? A Family of Pseudomonas Megaplasmids Offers a Rare Glimpse into the Evolution of Large Mobile Elements.}, journal = {Genome biology and evolution}, volume = {11}, number = {4}, pages = {1192-1206}, pmid = {30918968}, issn = {1759-6653}, mesh = {*Evolution, Molecular ; Genes, Essential ; Multigene Family ; Plasmids/*genetics ; Pseudomonas putida/*genetics ; Pseudomonas syringae/*genetics ; RNA, Transfer/genetics ; }, abstract = {Pseudomonads are ubiquitous group of environmental proteobacteria, well known for their roles in biogeochemical cycling, in the breakdown of xenobiotic materials, as plant growth promoters, and as pathogens of a variety of host organisms. We have previously identified a large megaplasmid present within one isolate of the plant pathogen Pseudomonas syringae, and here we report that a second member of this megaplasmid family is found within an environmental Pseudomonad isolate most closely related to Pseudomonas putida. Many of the shared genes are involved in critical cellular processes like replication, transcription, translation, and DNA repair. We argue that presence of these shared pathways sheds new light on discussions about the types of genes that undergo horizontal gene transfer (i.e., the complexity hypothesis) as well as the evolution of pangenomes. Furthermore, although both megaplasmids display a high level of synteny, genes that are shared differ by over 50% on average at the amino acid level. This combination of conservation in gene order despite divergence in gene sequence suggests that this Pseudomonad megaplasmid family is relatively old, that gene order is under strong selection within this family, and that there are likely many more members of this megaplasmid family waiting to be found in nature.}, } @article {pmid30917781, year = {2019}, author = {Shelyakin, PV and Bochkareva, OO and Karan, AA and Gelfand, MS}, title = {Micro-evolution of three Streptococcus species: selection, antigenic variation, and horizontal gene inflow.}, journal = {BMC evolutionary biology}, volume = {19}, number = {1}, pages = {83}, pmid = {30917781}, issn = {1471-2148}, mesh = {Animals ; Antigenic Variation/*genetics ; *Biological Evolution ; Conserved Sequence/genetics ; DNA, Intergenic ; Gene Flow ; Gene Ontology ; Gene Rearrangement/genetics ; *Gene Transfer, Horizontal ; Genes, Bacterial ; Genome Size ; Humans ; Hydrolases/metabolism ; Nucleotides/genetics ; Phylogeny ; *Selection, Genetic ; Sequence Deletion ; Species Specificity ; Streptococcus/*genetics ; Streptococcus pneumoniae/genetics ; Virulence/genetics ; }, abstract = {BACKGROUND: The genus Streptococcus comprises pathogens that strongly influence the health of humans and animals. Genome sequencing of multiple Streptococcus strains demonstrated high variability in gene content and order even in closely related strains of the same species and created a newly emerged object for genomic analysis, the pan-genome. Here we analysed the genome evolution of 25 strains of Streptococcus suis, 50 strains of Streptococcus pyogenes and 28 strains of Streptococcus pneumoniae.

RESULTS: Fractions of the pan-genome, unique, periphery, and universal genes differ in size, functional composition, the level of nucleotide substitutions, and predisposition to horizontal gene transfer and genomic rearrangements. The density of substitutions in intergenic regions appears to be correlated with selection acting on adjacent genes, implying that more conserved genes tend to have more conserved regulatory regions. The total pan-genome of the genus is open, but only due to strain-specific genes, whereas other pan-genome fractions reach saturation. We have identified the set of genes with phylogenies inconsistent with species and non-conserved location in the chromosome; these genes are rare in at least one species and have likely experienced recent horizontal transfer between species. The strain-specific fraction is enriched with mobile elements and hypothetical proteins, but also contains a number of candidate virulence-related genes, so it may have a strong impact on adaptability and pathogenicity. Mapping the rearrangements to the phylogenetic tree revealed large parallel inversions in all species. A parallel inversion of length 15 kB with breakpoints formed by genes encoding surface antigen proteins PhtD and PhtB in S. pneumoniae leads to replacement of gene fragments that likely indicates the action of an antigen variation mechanism.

CONCLUSIONS: Members of genus Streptococcus have a highly dynamic, open pan-genome, that potentially confers them with the ability to adapt to changing environmental conditions, i.e. antibiotic resistance or transmission between different hosts. Hence, integrated analysis of all aspects of genome evolution is important for the identification of potential pathogens and design of drugs and vaccines.}, } @article {pmid30911788, year = {2019}, author = {Adeniji, AA and Loots, DT and Babalola, OO}, title = {Bacillus velezensis: phylogeny, useful applications, and avenues for exploitation.}, journal = {Applied microbiology and biotechnology}, volume = {103}, number = {9}, pages = {3669-3682}, doi = {10.1007/s00253-019-09710-5}, pmid = {30911788}, issn = {1432-0614}, mesh = {Bacillus/*classification/genetics/isolation & purification/*metabolism ; Food Microbiology ; Genome, Bacterial ; Genomics ; Industrial Microbiology ; Metabolome ; *Phylogeny ; }, abstract = {Some members of the Bacillus velezensis (Bv) group (e.g., Bv FZB42T and AS3.43) were previously assigned grouping with B. subtilis and B. amyloliquefaciens, based on the fact that they shared a 99% DNA-DNA percentage phylogenetic similarity. However, hinging on current assessments of the pan-genomic reassignments, the differing phylogenomic characteristics of Bv from B. subtilis and B. amyloliquefaciens are now better understood. Within this re-grouping/reassignment, the various strains within the Bv share a close phylogenomic resemblance, and a number of these strains have received a lot of attention in recent years, due to their genomic robustness, and the growing evidence for their possible utilization in the agricultural industry for managing plant diseases. Only a few applications for their use medicinally/pharmaceutically, environmentally, and in the food industry have been reported, and this may be due to the fact that the majority of those strains investigated are those typically occurring in soil. Although the intracellular unique biomolecules of Bv strains have been revealed via in silico genome modeling and investigated using transcriptomics and proteomics, a further inquisition into the Bv metabolome using newer technologies such as metabolomics could elucidate additional applications of this economically relevant Bacillus species, beyond that of primarily the agricultural sector.}, } @article {pmid30906288, year = {2019}, author = {Legendre, M and Alempic, JM and Philippe, N and Lartigue, A and Jeudy, S and Poirot, O and Ta, NT and Nin, S and Couté, Y and Abergel, C and Claverie, JM}, title = {Pandoravirus Celtis Illustrates the Microevolution Processes at Work in the Giant Pandoraviridae Genomes.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {430}, pmid = {30906288}, issn = {1664-302X}, abstract = {With genomes of up to 2.7 Mb propagated in μm-long oblong particles and initially predicted to encode more than 2000 proteins, members of the Pandoraviridae family display the most extreme features of the known viral world. The mere existence of such giant viruses raises fundamental questions about their origin and the processes governing their evolution. A previous analysis of six newly available isolates, independently confirmed by a study including three others, established that the Pandoraviridae pan-genome is open, meaning that each new strain exhibits protein-coding genes not previously identified in other family members. With an average increment of about 60 proteins, the gene repertoire shows no sign of reaching a limit and remains largely coding for proteins without recognizable homologs in other viruses or cells (ORFans). To explain these results, we proposed that most new protein-coding genes were created de novo, from pre-existing non-coding regions of the G+C rich pandoravirus genomes. The comparison of the gene content of a new isolate, pandoravirus celtis, closely related (96% identical genome) to the previously described p. quercus is now used to test this hypothesis by studying genomic changes in a microevolution range. Our results confirm that the differences between these two similar gene contents mostly consist of protein-coding genes without known homologs, with statistical signatures close to that of intergenic regions. These newborn proteins are under slight negative selection, perhaps to maintain stable folds and prevent protein aggregation pending the eventual emergence of fitness-increasing functions. Our study also unraveled several insertion events mediated by a transposase of the hAT family, 3 copies of which are found in p. celtis and are presumably active. Members of the Pandoraviridae are presently the first viruses known to encode this type of transposase.}, } @article {pmid30902757, year = {2020}, author = {Banerjee, R and Shine, O and Rajachandran, V and Krishnadas, G and Minnick, MF and Paul, S and Chattopadhyay, S}, title = {Gene duplication and deletion, not horizontal transfer, drove intra-species mosaicism of Bartonella henselae.}, journal = {Genomics}, volume = {112}, number = {1}, pages = {467-471}, doi = {10.1016/j.ygeno.2019.03.009}, pmid = {30902757}, issn = {1089-8646}, mesh = {Bartonella henselae/*genetics ; *Evolution, Molecular ; *Gene Deletion ; *Gene Duplication ; Gene Transfer, Horizontal ; Genes, Bacterial ; Genome, Bacterial ; *Mosaicism ; }, abstract = {Bartonella henselae is a facultative intracellular pathogen that occurs worldwide and is responsible primarily for cat-scratch disease in young people and bacillary angiomatosis in immunocompromised patients. The principal source of genome-level diversity that contributes to B. henselae's host-adaptive features is thought to be horizontal gene transfer events. However, our analyses did not reveal the acquisition of horizontally-transferred islands in B. henselae after its divergence from other Bartonella. Rather, diversity in gene content and genome size was apparently acquired through two alternative mechanisms, including deletion and, more predominantly, duplication of genes. Interestingly, a majority of these events occurred in regions that were horizontally transferred long before B. henselae's divergence from other Bartonella species. Our study indicates the possibility that gene duplication, in response to positive selection pressures in specific clones of B. henselae, might be linked to the pathogen's adaptation to arthropod vectors, the cat reservoir, or humans as incidental host-species.}, } @article {pmid30900970, year = {2019}, author = {de Carvalho, SP and de Almeida, JB and de Freitas, LM and Guimarães, AMS and do Nascimento, NC and Dos Santos, AP and Campos, GB and Messick, JB and Timenetsky, J and Marques, LM}, title = {Genomic profile of Brazilian methicillin-resistant Staphylococcus aureus resembles clones dispersed worldwide.}, journal = {Journal of medical microbiology}, volume = {68}, number = {5}, pages = {693-702}, doi = {10.1099/jmm.0.000956}, pmid = {30900970}, issn = {1473-5644}, mesh = {Anti-Bacterial Agents/pharmacology ; Brazil ; Enterotoxins/genetics ; Fluoroquinolones/pharmacology ; *Genome, Bacterial ; Genomics ; Humans ; Macrolides/pharmacology ; Methicillin-Resistant Staphylococcus aureus/*genetics/*virology ; *Phylogeny ; Virulence Factors/*genetics ; }, abstract = {PURPOSE: Comparative genomic analysis of strains may help us to better understand the wide diversity of their genetic profiles. The aim of this study was to analyse the genomic features of the resistome and virulome of Brazilian first methicillin-resistant Staphylococcus aureus (MRSA) isolates and their relationship to other Brazilian and international MRSA strains.

METHODOLOGY: The whole genomes of three MRSA strains previously isolated in Vitória da Conquista were sequenced, assembled, annotated and compared with other MRSA genomes. A phylogenetic tree was constructed and the pan-genome and accessory and core genomes were constructed. The resistomes and virulomes of all strains were identified.Results/Key findings. Phylogenetic analysis of all 49 strains indicated different clones showing high similarity. The pan-genome of the analysed strains consisted of 4484 genes, with 31 % comprising the gene portion of the core genome, 47 % comprising the accessory genome and 22 % being singletons. Most strains showed at least one gene related to virulence factors associated with immune system evasion, followed by enterotoxins. The strains showed multiresistance, with the most recurrent genes conferring resistance to beta-lactams, fluoroquinolones, aminoglycosides and macrolides.

CONCLUSIONS: Our comparative genomic analysis showed that there is no pattern of virulence gene distribution among the clones analysed in the different regions. The Brazilian strains showed similarity with clones from several continents.}, } @article {pmid30893420, year = {2019}, author = {Correia, K and Yu, SM and Mahadevan, R}, title = {AYbRAH: a curated ortholog database for yeasts and fungi spanning 600 million years of evolution.}, journal = {Database : the journal of biological databases and curation}, volume = {2019}, number = {}, pages = {}, pmid = {30893420}, issn = {1758-0463}, mesh = {Data Curation ; *Databases, Protein ; Genome, Fungal ; Genomics ; Phylogeny ; Saccharomyces cerevisiae/genetics ; Sequence Homology, Amino Acid ; Time Factors ; Yeasts/genetics/*metabolism ; }, abstract = {Budding yeasts inhabit a range of environments by exploiting various metabolic traits. The genetic bases for these traits are mostly unknown, preventing their addition or removal in a chassis organism for metabolic engineering. Insight into the evolution of orthologs, paralogs and xenologs in the yeast pan-genome can help bridge these genotypes; however, existing phylogenomic databases do not span diverse yeasts, and sometimes cannot distinguish between these homologs. To help understand the molecular evolution of these traits in yeasts, we created Analyzing Yeasts by Reconstructing Ancestry of Homologs (AYbRAH), an open-source database of predicted and manually curated ortholog groups for 33 diverse fungi and yeasts in Dikarya, spanning 600 million years of evolution. OrthoMCL and OrthoDB were used to cluster protein sequence into ortholog and homolog groups, respectively; MAFFT and PhyML reconstructed the phylogeny of all homolog groups. Ortholog assignments for enzymes and small metabolite transporters were compared to their phylogenetic reconstruction, and curated to resolve any discrepancies. Information on homolog and ortholog groups can be viewed in the AYbRAH web portal (https://lmse.github.io/aybrah/), including functional annotations, predictions for mitochondrial localization and transmembrane domains, literature references and phylogenetic reconstructions. Ortholog assignments in AYbRAH were compared to HOGENOM, KEGG Orthology, OMA, eggNOG and PANTHER. PANTHER and OMA had the most congruent ortholog groups with AYbRAH, while the other phylogenomic databases had greater amounts of under-clustering, over-clustering or no ortholog annotations for proteins. Future plans are discussed for AYbRAH, and recommendations are made for other research communities seeking to create curated ortholog databases.}, } @article {pmid30871454, year = {2019}, author = {Naz, K and Naz, A and Ashraf, ST and Rizwan, M and Ahmad, J and Baumbach, J and Ali, A}, title = {PanRV: Pangenome-reverse vaccinology approach for identifications of potential vaccine candidates in microbial pangenome.}, journal = {BMC bioinformatics}, volume = {20}, number = {1}, pages = {123}, pmid = {30871454}, issn = {1471-2105}, mesh = {Bacterial Vaccines/pharmacology/*therapeutic use ; Genomics/*methods ; Humans ; Proteomics/*methods ; Vaccinology/*methods ; }, abstract = {BACKGROUND: A revolutionary diversion from classical vaccinology to reverse vaccinology approach has been observed in the last decade. The ever-increasing genomic and proteomic data has greatly facilitated the vaccine designing and development process. Reverse vaccinology is considered as a cost-effective and proficient approach to screen the entire pathogen genome. To look for broad-spectrum immunogenic targets and analysis of closely-related bacterial species, the assimilation of pangenome concept into reverse vaccinology approach is essential. The categories of species pangenome such as core, accessory, and unique genes sets can be analyzed for the identification of vaccine candidates through reverse vaccinology.

RESULTS: We have designed an integrative computational pipeline term as "PanRV" that employs both the pangenome and reverse vaccinology approaches. PanRV comprises of four functional modules including i) Pangenome Estimation Module (PGM) ii) Reverse Vaccinology Module (RVM) iii) Functional Annotation Module (FAM) and iv) Antibiotic Resistance Association Module (ARM). The pipeline is tested by using genomic data from 301 genomes of Staphylococcus aureus and the results are verified by experimentally known antigenic data.

CONCLUSION: The proposed pipeline has proved to be the first comprehensive automated pipeline that can precisely identify putative vaccine candidates exploiting the microbial pangenome. PanRV is a Linux based package developed in JAVA language. An executable installer is provided for ease of installation along with a user manual at https://sourceforge.net/projects/panrv2/ .}, } @article {pmid30870618, year = {2019}, author = {Metwaly, A and Haller, D}, title = {Strain-Level Diversity in the Gut: The P. copri Case.}, journal = {Cell host & microbe}, volume = {25}, number = {3}, pages = {349-350}, doi = {10.1016/j.chom.2019.02.006}, pmid = {30870618}, issn = {1934-6069}, mesh = {Gluconeogenesis ; Glucose ; Homeostasis ; *Microbiota ; *Succinic Acid ; }, abstract = {In this issue of Cell Host & Microbe, De Filippis et al. (2019) report that Prevotella copri strain-level diversity in the gut microbiome can be shaped by host diet. Individual signatures were analyzed by marker gene profiling in assembled pangenomes, providing a strong rationale for the functional adaptation of individual microbial ecosystems in response to diet.}, } @article {pmid30863859, year = {2019}, author = {Li, H and Ding, X and Chen, C and Zheng, X and Han, H and Li, C and Gong, J and Xu, T and Li, QX and Ding, GC and Li, J}, title = {Enrichment of phosphate solubilizing bacteria during late developmental stages of eggplant (Solanum melongena L.).}, journal = {FEMS microbiology ecology}, volume = {95}, number = {3}, pages = {}, doi = {10.1093/femsec/fiz023}, pmid = {30863859}, issn = {1574-6941}, mesh = {Agriculture/*methods ; Bacteria/classification/genetics/growth & development/*metabolism ; Enterobacter/growth & development/metabolism ; Microbiota/genetics ; Phosphates/*metabolism ; RNA, Ribosomal, 16S/genetics ; Rhizosphere ; Soil Microbiology ; Solanum melongena/*growth & development/*microbiology ; }, abstract = {Understanding the ecology of phosphate solubilizing bacteria (PSBs) is critical for developing better strategies to increase crop productivity. In this study, the diversity of PSBs and of the total bacteria in the rhizosphere of eggplant (Solanum melongena L.) cultivated in organic, integrated and conventional farming systems was compared at four developmental stages of its lifecycle. Both selective culture and high-throughput sequencing analysis of 16S rRNA amplicons indicated that Enterobacter with strong or very strong in vivo phosphate solubilization activities was enriched in the rhizosphere during the fruiting stage. The high-throughput sequencing analysis results demonstrated that farming systems explained 23% of total bacterial community variation. Plant development and farming systems synergistically shaped the rhizospheric bacterial community, in which the degree of variation influenced by farming systems decreased over the plant development phase from 56% to 26.3% to 16.3%, and finally to no significant effect as the plant reached at fruiting stage. Pangenome analysis indicated that two-component and transporter systems varied between the rhizosphere and soil PSBs. This study elucidated the complex interactions among farming systems, plant development and rhizosphere microbiomes.}, } @article {pmid30859492, year = {2019}, author = {Burgueño-Roman, A and Castañeda-Ruelas, GM and Pacheco-Arjona, R and Jimenez-Edeza, M}, title = {Pathogenic potential of non-typhoidal Salmonella serovars isolated from aquatic environments in Mexico.}, journal = {Genes & genomics}, volume = {41}, number = {7}, pages = {767-779}, pmid = {30859492}, issn = {2092-9293}, mesh = {*Genome, Bacterial ; Mexico ; Molecular Sequence Annotation ; Phylogeny ; Rivers/*microbiology ; Salmonella/classification/*genetics/isolation & purification/pathogenicity ; Virulence/genetics ; }, abstract = {BACKGROUND: River water has been implicated as a source of non-typhoidal Salmonella (NTS) serovars in Mexico.

OBJECTIVE: To dissect the molecular pathogenesis and defense strategies of seven NTS strains isolated from river water in Mexico.

METHODS: The genome of Salmonella serovars Give, Pomona, Kedougou, Stanley, Oranienburg, Sandiego, and Muenchen were sequenced using the whole-genome shotgun methodology in the Illumina Miseq platform. The genoma annotation and evolutionary analyses were conducted in the RAST and FigTree servers, respectively. The MLST was performed using the SRST2 tool and the comparisons between strains were clustered and visualized using the Gview server. Experimental virulence assay was included to evaluate the pathogenic potential of strains.

RESULTS: We report seven high-quality draft genomes, ranging from ~ 4.61 to ~ 5.12 Mb, with a median G + C value, coding DNA sequence, and protein values of 52.1%, 4697 bp, and 4,589 bp, respectively. The NTS serovars presented with an open pan-genome, offering novel genetic content. Each NTS serovar had an indistinguishable virulotype with a core genome (352 virulence genes) closely associated with Salmonella pathogenicity; 13 genes were characterized as serotype specific, which could explain differences in pathogenicity. All strains maintained highly conserved genetic content regarding the Salmonella pathogenicity islands (1-5) (86.9-100%), fimbriae (84.6%), and hypermutation (100%) genes. Adherence and invasion capacity were confirmed among NTS strains in Caco-2 cells.

CONCLUSION: Our results demonstrated the arsenal of virulence and defense molecular factors harbored on NTS serovars and highlight that environmental NTS strains are waterborne pathogens worthy of attention.}, } @article {pmid30858837, year = {2019}, author = {van Tonder, AJ and Bray, JE and Jolley, KA and Jansen van Rensburg, M and Quirk, SJ and Haraldsson, G and Maiden, MCJ and Bentley, SD and Haraldsson, Á and Erlendsdóttir, H and Kristinsson, KG and Brueggemann, AB}, title = {Genomic Analyses of >3,100 Nasopharyngeal Pneumococci Revealed Significant Differences Between Pneumococci Recovered in Four Different Geographical Regions.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {317}, pmid = {30858837}, issn = {1664-302X}, support = {/WT_/Wellcome Trust/United Kingdom ; 098051/WT_/Wellcome Trust/United Kingdom ; }, abstract = {Understanding the structure of a bacterial population is essential in order to understand bacterial evolution. Estimating the core genome (those genes common to all, or nearly all, strains of a species) is a key component of such analyses. The size and composition of the core genome varies by dataset, but we hypothesized that the variation between different collections of the same bacterial species would be minimal. To investigate this, we analyzed the genome sequences of 3,118 pneumococci recovered from healthy individuals in Reykjavik (Iceland), Southampton (United Kingdom), Boston (United States), and Maela (Thailand). The analyses revealed a "supercore" genome (genes shared by all 3,118 pneumococci) of 558 genes, although an additional 354 core genes were shared by pneumococci from Reykjavik, Southampton, and Boston. Overall, the size and composition of the core and pan-genomes among pneumococci recovered in Reykjavik, Southampton, and Boston were similar. Maela pneumococci were distinctly different in that they had a smaller core genome and larger pan-genome. The pan-genome of Maela pneumococci contained several >25 Kb sequence regions (flanked by pneumococcal genes) that were homologous to genomic regions found in other bacterial species. Overall, our work revealed that some subsets of the global pneumococcal population are highly heterogeneous, and our hypothesis was rejected. This is an important finding in terms of understanding genetic variation among pneumococci and is also an essential point of consideration before generalizing the findings from a single dataset to the wider pneumococcal population.}, } @article {pmid30858831, year = {2019}, author = {Anand, S and Deighton, M and Livanos, G and Morrison, PD and Pang, ECK and Mantri, N}, title = {Antimicrobial Activity of Agastache Honey and Characterization of Its Bioactive Compounds in Comparison With Important Commercial Honeys.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {263}, pmid = {30858831}, issn = {1664-302X}, abstract = {There is an urgent need for new effective antimicrobial agents since acquired resistance of bacteria to currently available agents is increasing. The antimicrobial activity of Mono-floral Agastache honey produced from Australian grown Agastache rugosa was compared with the activity of commercially available honeys derived from Leptospermum species and with Jarrah honey for activity against clinical and non-clinical strains of Staphylococcus aureus (methicillin-susceptible and methicillin-resistant strains), Pseudomonas aeruginosa, and Escherichia coli. The minimum inhibitory concentration (MIC) for Agastache honey was in the range of 6-25% (w/v) for all species examined. The MICs for Leptospermum honeys were generally similar to those of Agastache honey, but MICs were higher for Super manuka and Jarrah honeys and lower for Tea tree honey. Staphylococci were more susceptible to all honeys than Pseudomonas aeruginosa and Escherichia coli. Pretreatment of honey with catalase increased the bacterial growth at MIC of Tea tree honey (35%), Super Manuka (15%), Jarrah honeys (12%), and Agastache honey (10%), indicating variable contributions of hydrogen peroxide to antimicrobial activity. Manuka and Jelly bush honeys retained their antimicrobial activity in the presence of catalase, indicating the presence of other antimicrobial compounds in the honey. An LC-MS/MS method was developed and used to identify possible antimicrobial phenolic compounds in Agastache honey and flowers, and five commercial honeys. The chemical markers characteristic of Agastache honey and honeys of Leptospermum origin were phenyllactic acid and methyl syringate. Overall, the bioactive compounds with antimicrobial and antioxidant activity in Agastache honey suggested a possible use for topical application and in wound care.}, } @article {pmid30858412, year = {2019}, author = {Obolski, U and Gori, A and Lourenço, J and Thompson, C and Thompson, R and French, N and Heyderman, RS and Gupta, S}, title = {Identifying genes associated with invasive disease in S. pneumoniae by applying a machine learning approach to whole genome sequence typing data.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {4049}, pmid = {30858412}, issn = {2045-2322}, support = {//Wellcome Trust/United Kingdom ; MR/N023129/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Genes, Bacterial/genetics ; Genome, Bacterial/*genetics ; Humans ; Meningitis/cerebrospinal fluid/genetics/microbiology ; Pneumonia/cerebrospinal fluid/*genetics/microbiology ; Sepsis/cerebrospinal fluid/genetics/microbiology ; Streptococcus pneumoniae/*genetics/pathogenicity ; *Whole Genome Sequencing ; }, abstract = {Streptococcus pneumoniae, a normal commensal of the upper respiratory tract, is a major public health concern, responsible for substantial global morbidity and mortality due to pneumonia, meningitis and sepsis. Why some pneumococci invade the bloodstream or CSF (so-called invasive pneumococcal disease; IPD) is uncertain. In this study we identify genes associated with IPD. We transform whole genome sequence (WGS) data into a sequence typing scheme, while avoiding the caveat of using an arbitrary genome as a reference by substituting it with a constructed pangenome. We then employ a random forest machine-learning algorithm on the transformed data, and find 43 genes consistently associated with IPD across three geographically distinct WGS data sets of pneumococcal carriage isolates. Of the genes we identified as associated with IPD, we find 23 genes previously shown to be directly relevant to IPD, as well as 18 uncharacterized genes. We suggest that these uncharacterized genes identified by us are also likely to be relevant for IPD.}, } @article {pmid30853944, year = {2019}, author = {Khaleque, HN and González, C and Shafique, R and Kaksonen, AH and Holmes, DS and Watkin, ELJ}, title = {Uncovering the Mechanisms of Halotolerance in the Extremely Acidophilic Members of the Acidihalobacter Genus Through Comparative Genome Analysis.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {155}, pmid = {30853944}, issn = {1664-302X}, abstract = {There are few naturally occurring environments where both acid and salinity stress exist together, consequently, there has been little evolutionary pressure for microorganisms to develop systems that enable them to deal with both stresses simultaneously. Members of the genus Acidihalobacter are iron- and sulfur-oxidizing, halotolerant acidophiles that have developed the ability to tolerate acid and saline stress and, therefore, have the potential to bioleach ores with brackish or saline process waters under acidic conditions. The genus consists of four members, A. prosperus DSM 5130[T], A. prosperus DSM 14174, A. prosperus F5 and "A. ferrooxidans" DSM 14175. An in depth genome comparison was undertaken in order to provide a more comprehensive description of the mechanisms of halotolerance used by the different members of this genus. Pangenome analysis identified 29, 3 and 9 protein families related to halotolerance in the core, dispensable and unique genomes, respectively. The genes for halotolerance showed Ka/Ks ratios between 0 and 0.2, confirming that they are conserved and stabilized. All the Acidihalobacter genomes contained similar genes for the synthesis and transport of ectoine, which was recently found to be the dominant osmoprotectant in A. prosperus DSM 14174 and A. prosperus DSM 5130[T]. Similarities also existed in genes encoding low affinity potassium pumps, however, A. prosperus DSM 14174 was also found to contain genes encoding high affinity potassium pumps. Furthermore, only A. prosperus DSM 5130[T] and "A. ferrooxidans" DSM 14175 contained genes allowing the uptake of taurine as an osmoprotectant. Variations were also seen in genes encoding proteins involved in the synthesis and/or transport of periplasmic glucans, sucrose, proline, and glycine betaine. This suggests that versatility exists in the Acidihalobacter genus in terms of the mechanisms they can use for halotolerance. This information is useful for developing hypotheses for the search for life on exoplanets and moons.}, } @article {pmid30851098, year = {2019}, author = {Tahir Ul Qamar, M and Zhu, X and Xing, F and Chen, LL}, title = {ppsPCP: a plant presence/absence variants scanner and pan-genome construction pipeline.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {20}, pages = {4156-4158}, doi = {10.1093/bioinformatics/btz168}, pmid = {30851098}, issn = {1367-4811}, mesh = {Eukaryota ; *Genome, Plant ; Genomics ; Prokaryotic Cells ; *Software ; }, abstract = {SUMMARY: Since the idea of pan-genomics emerged several tools and pipelines have been introduced for prokaryotic pan-genomics. However, not a single comprehensive pipeline has been reported which could overcome multiple challenges associated with eukaryotic pan-genomics. To aid the eukaryotic pan-genomic studies, here we present ppsPCP pipeline which is designed for eukaryotes especially for plants. It is capable of scanning presence/absence variants (PAVs) and constructing a fully annotated pan-genome. We believe with these unique features of PAV scanning and building a pan-genome together with its annotation, ppsPCP will be useful for plant pan-genomic studies and aid researchers to study genetic/phenotypic variations and genomic diversity.

The ppsPCP is freely available at github DOI: https://doi.org/10.5281/zenodo.2567390 and webpage http://cbi.hzau.edu.cn/ppsPCP/.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid30851095, year = {2019}, author = {Rautiainen, M and Mäkinen, V and Marschall, T}, title = {Bit-parallel sequence-to-graph alignment.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {19}, pages = {3599-3607}, pmid = {30851095}, issn = {1367-4811}, mesh = {*Algorithms ; *Genome ; Sequence Alignment ; Sequence Analysis, DNA ; }, abstract = {MOTIVATION: Graphs are commonly used to represent sets of sequences. Either edges or nodes can be labeled by sequences, so that each path in the graph spells a concatenated sequence. Examples include graphs to represent genome assemblies, such as string graphs and de Bruijn graphs, and graphs to represent a pan-genome and hence the genetic variation present in a population. Being able to align sequencing reads to such graphs is a key step for many analyses and its applications include genome assembly, read error correction and variant calling with respect to a variation graph.

RESULTS: We generalize two linear sequence-to-sequence algorithms to graphs: the Shift-And algorithm for exact matching and Myers' bitvector algorithm for semi-global alignment. These linear algorithms are both based on processing w sequence characters with a constant number of operations, where w is the word size of the machine (commonly 64), and achieve a speedup of up to w over naive algorithms. For a graph with |V| nodes and |E| edges and a sequence of length m, our bitvector-based graph alignment algorithm reaches a worst case runtime of O(|V|+⌈mw⌉|E| log w) for acyclic graphs and O(|V|+m|E| log w) for arbitrary cyclic graphs. We apply it to five different types of graphs and observe a speedup between 3-fold and 20-fold compared with a previous (asymptotically optimal) alignment algorithm.

https://github.com/maickrau/GraphAligner.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid30847473, year = {2019}, author = {Velsko, IM and Perez, MS and Richards, VP}, title = {Resolving Phylogenetic Relationships for Streptococcus mitis and Streptococcus oralis through Core- and Pan-Genome Analyses.}, journal = {Genome biology and evolution}, volume = {11}, number = {4}, pages = {1077-1087}, pmid = {30847473}, issn = {1759-6653}, mesh = {Genome, Bacterial ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Streptococcus mitis/*genetics ; Streptococcus oralis/*genetics ; }, abstract = {Taxonomic and phylogenetic relationships of Streptococcus mitis and Streptococcus oralis have been difficult to establish biochemically and genetically. We used core-genome analyses of S. mitis and S. oralis, as well as the closely related species Streptococcus pneumoniae and Streptococcus parasanguinis, to clarify the phylogenetic relationships between S. mitis and S. oralis, as well as within subclades of S. oralis. All S. mitis (n = 67), S. oralis (n = 89), S. parasanguinis (n = 27), and 27 S. pneumoniae genome assemblies were downloaded from NCBI and reannotated. All genes were delineated into homologous clusters and maximum-likelihood phylogenies built from putatively nonrecombinant core gene sets. Population structure was determined using Bayesian genome clustering, and patristic distance was calculated between populations. Population-specific gene content was assessed using a phylogenetic-based genome-wide association approach. Streptococcus mitis and S. oralis formed distinct clades, but species mixing suggests taxonomic misassignment. Patristic distance between populations suggests that S. oralis subsp. dentisani is a distinct species, whereas S. oralis subsp. tigurinus and subsp. oralis are supported as subspecies, and that S. mitis comprises two subspecies. None of the genes within the pan-genomes of S. mitis and S. oralis could be statistically correlated with either, and the dispensable genomes showed extensive variation among isolates. These are likely important factors contributing to established overlap in biochemical characteristics for these taxa. Based on core-genome analysis, the substructure of S. oralis and S. mitis should be redefined, and species assignments within S. oralis and S. mitis should be made based on whole-genome analysis to be robust to misassignment.}, } @article {pmid30825936, year = {2019}, author = {Eisenbach, L and Geissler, AJ and Ehrmann, MA and Vogel, RF}, title = {Comparative genomics of Lactobacillus sakei supports the development of starter strain combinations.}, journal = {Microbiological research}, volume = {221}, number = {}, pages = {1-9}, doi = {10.1016/j.micres.2019.01.001}, pmid = {30825936}, issn = {1618-0623}, mesh = {Bioreactors/*microbiology ; Carbohydrate Metabolism/*genetics ; Fermentation/*genetics ; Fermented Foods/*microbiology ; Food Microbiology ; Genome, Bacterial/*genetics ; Genomics ; Latilactobacillus sakei/*genetics/metabolism ; Meat/microbiology ; Sequence Analysis, DNA ; }, abstract = {Strains of Lactobacillus sakei can be isolated from a variety of sources including meat, fermented sausages, sake, sourdough, sauerkraut or kimchi. Selected strains are widely used as starter cultures for sausage fermentation. Recently we have demonstrated that control about the lactic microbiota in fermenting sausages is achieved rather by pairs or strain sets than by single strains. In this work we characterized the pan genome of L. sakei to enable exploitation of the genomic diversity of L. sakei for the establishment of assertive starter strain sets. We have established the full genome sequences of nine L. sakei strains from different sources of isolation and included in the analysis the genome of L. sakei 23K. Comparative genomics revealed an accessory genome comprising about 50% of the pan genome and different lineages of strains with no relation to their source of isolation. Group and strain specific differences could be found, which namely referred to agmatine and citrate metabolism. The presence of genes encoding metabolic pathways for fructose, sucrose and trehalose as well as gluconate in all strains suggests a general adaptation to plant/sugary environments and a life in communities with other genera. Analysis of the plasmidome did not reveal any specific mechanisms of adaptation to a habitat. The predicted differences of metabolic settings enable prediction of partner strains, which can occupy the meat environment to a large extent and establish competitive exclusion of autochthonous microbiota. This may assist the development of a new generation of meat starter cultures containing L. sakei strains.}, } @article {pmid30814494, year = {2019}, author = {Peixoto, P and Etcheverry, A and Aubry, M and Missey, A and Lachat, C and Perrard, J and Hendrick, E and Delage-Mourroux, R and Mosser, J and Borg, C and Feugeas, JP and Herfs, M and Boyer-Guittaut, M and Hervouet, E}, title = {EMT is associated with an epigenetic signature of ECM remodeling genes.}, journal = {Cell death & disease}, volume = {10}, number = {3}, pages = {205}, pmid = {30814494}, issn = {2041-4889}, mesh = {A549 Cells ; Epidermal Growth Factor/pharmacology ; Epigenesis, Genetic ; Epithelial-Mesenchymal Transition/*genetics ; Gene Expression Profiling ; Gene Expression Regulation, Neoplastic ; Humans ; Neoplasms/*genetics/pathology ; Retrospective Studies ; Tumor Necrosis Factor-alpha/pharmacology ; }, abstract = {Type III epithelial-mesenchymal transition (EMT) has been previously associated with increased cell migration, invasion, metastasis, and therefore cancer aggressiveness. This reversible process is associated with an important gene expression reprogramming mainly due to epigenetic plasticity. Nevertheless, most of the studies describing the central role of epigenetic modifications during EMT were performed in a single-cell model and using only one mode of EMT induction. In our study, we studied the overall modulations of gene expression and epigenetic modifications in four different EMT-induced cell models issued from different tissues and using different inducers of EMT. Pangenomic analysis (transcriptome and ChIP-sequencing) validated our hypothesis that gene expression reprogramming during EMT is largely regulated by epigenetic modifications of a wide range of genes. Indeed, our results confirmed that each EMT model is unique and can be associated with a specific transcriptome profile and epigenetic program. However, we could select some genes or pathways that are similarly regulated in the different models and that could therefore be used as a common signature of all EMT models and become new biomarkers of the EMT phenotype. As an example, we can cite the regulation of gene-coding proteins involved in the degradation of the extracellular matrix (ECM), which are highly induced in all EMT models. Based on our investigations and results, we identified ADAM19 as a new biomarker of in vitro and in vivo EMT and we validated this biological new marker in a cohort of non-small lung carcinomas.}, } @article {pmid30814318, year = {2019}, author = {Raphael, BH and Huynh, T and Brown, E and Smith, JC and Ruberto, I and Getsinger, L and White, S and Winchell, JM}, title = {Culture of Clinical Specimens Reveals Extensive Diversity of Legionella pneumophila Strains in Arizona.}, journal = {mSphere}, volume = {4}, number = {1}, pages = {}, pmid = {30814318}, issn = {2379-5042}, mesh = {Arizona/epidemiology ; Bacterial Typing Techniques ; Centers for Disease Control and Prevention, U.S. ; *Genetic Variation ; *Genome, Bacterial ; Genotype ; Humans ; Legionella pneumophila/*classification/isolation & purification ; Legionnaires' Disease/epidemiology/*microbiology ; Multilocus Sequence Typing ; Serogroup ; United States ; Whole Genome Sequencing ; }, abstract = {Between 2000 and 2017, a total of 236 Legionella species isolates from Arizona were submitted to the CDC for reference testing. Most of these isolates were recovered from bronchoalveolar lavage specimens. Although the incidence of legionellosis in Arizona is less than the overall U.S. incidence, Arizona submits the largest number of isolates to the CDC for testing compared to those from other states. In addition to a higher proportion of culture confirmation of legionellosis cases in Arizona than in other states, all Legionellapneumophila isolates are forwarded to the CDC for confirmatory testing. Compared to that from other states, a higher proportion of isolates from Arizona were identified as belonging to L. pneumophila serogroups 6 (28.2%) and 8 (8.9%). Genome sequencing was conducted on 113 L. pneumophila clinical isolates not known to be associated with outbreaks in order to understand the genomic diversity of strains causing legionellosis in Arizona. Whole-genome multilocus sequence typing (wgMLST) revealed 17 clusters of isolates sharing at least 99% identical allele content. Only two of these clusters contained isolates from more than one individual with exposure at the same facility. Additionally, wgMLST analysis revealed a group of 31 isolates predominantly belonging to serogroup 6 and containing isolates from three separate clusters. Single nucleotide polymorphism (SNP) and pangenome analysis were used to further resolve genome sequences belonging to a subset of isolates. This study demonstrates that culture of clinical specimens for Legionella spp. reveals a highly diverse population of strains causing legionellosis in Arizona which could be underappreciated using other diagnostic approaches.IMPORTANCE Culture of clinical specimens from patients with Legionnaires' disease is rarely performed, restricting our understanding of the diversity and ecology of Legionella Culture of Legionella from patient specimens in Arizona revealed a greater proportion of non-serogroup 1 Legionellapneumophila isolates than in other U.S. isolates examined. Disease caused by such isolates may go undetected using other diagnostic methods. Moreover, genome sequence analysis revealed that these isolates were genetically diverse, and understanding these populations may help in future environmental source attribution studies.}, } @article {pmid30811910, year = {2019}, author = {Gabbett, MT and Laporte, J and Sekar, R and Nandini, A and McGrath, P and Sapkota, Y and Jiang, P and Zhang, H and Burgess, T and Montgomery, GW and Chiu, R and Fisk, NM}, title = {Molecular Support for Heterogonesis Resulting in Sesquizygotic Twinning.}, journal = {The New England journal of medicine}, volume = {380}, number = {9}, pages = {842-849}, doi = {10.1056/NEJMoa1701313}, pmid = {30811910}, issn = {1533-4406}, support = {T12-403/15-N//Research Grants Council of the Hong Kong/International ; }, mesh = {Adult ; Alleles ; *Chimera ; Embolism, Paradoxical/complications ; Female ; *Fertilization ; Genotype ; Humans ; Male ; Polymorphism, Single Nucleotide ; Pregnancy ; Pregnancy, Twin ; Thromboembolism/etiology ; Twins, Monozygotic/*genetics ; Ultrasonography, Prenatal ; Vena Cava, Inferior ; }, abstract = {Sesquizygotic multiple pregnancy is an exceptional intermediate between monozygotic and dizygotic twinning. We report a monochorionic twin pregnancy with fetal sex discordance. Genotyping of amniotic fluid from each sac showed that the twins were maternally identical but chimerically shared 78% of their paternal genome, which makes them genetically in between monozygotic and dizygotic; they are sesquizygotic. We observed no evidence of sesquizygosis in 968 dizygotic twin pairs whom we screened by means of pangenome single-nucleotide polymorphism genotyping. Data from published repositories also show that sesquizygosis is a rare event. Detailed genotyping implicates chimerism arising at the juncture of zygotic division, termed heterogonesis, as the likely initial step in the causation of sesquizygosis.}, } @article {pmid30810961, year = {2019}, author = {Ranz, J and Clifton, B}, title = {Characterization and evolutionary dynamics of complex regions in eukaryotic genomes.}, journal = {Science China. Life sciences}, volume = {62}, number = {4}, pages = {467-488}, doi = {10.1007/s11427-018-9458-0}, pmid = {30810961}, issn = {1869-1889}, mesh = {Eukaryota/*genetics ; *Evolution, Molecular ; Gene Duplication ; Genome/*genetics ; Genomic Structural Variation ; Genomics/standards ; Multigene Family ; Sequence Analysis, DNA/standards ; }, abstract = {Complex regions in eukaryotic genomes are typically characterized by duplications of chromosomal stretches that often include one or more genes repeated in a tandem array or in relatively close proximity. Nevertheless, the repetitive nature of these regions, together with the often high sequence identity among repeats, have made complex regions particularly recalcitrant to proper molecular characterization, often being misassembled or completely absent in genome assemblies. This limitation has prevented accurate functional and evolutionary analyses of these regions. This is becoming increasingly relevant as evidence continues to support a central role for complex genomic regions in explaining human disease, developmental innovations, and ecological adaptations across phyla. With the advent of long-read sequencing technologies and suitable assemblers, the development of algorithms that can accommodate sample heterozygosity, and the adoption of a pangenomic-like view of these regions, accurate reconstructions of complex regions are now within reach. These reconstructions will finally allow for accurate functional and evolutionary studies of complex genomic regions, underlying the generation of genotype-phenotype maps of unprecedented resolution.}, } @article {pmid30808378, year = {2019}, author = {Caputo, A and Fournier, PE and Raoult, D}, title = {Genome and pan-genome analysis to classify emerging bacteria.}, journal = {Biology direct}, volume = {14}, number = {1}, pages = {5}, pmid = {30808378}, issn = {1745-6150}, mesh = {Bacteria/*classification/genetics ; Genomics/*methods ; Humans ; *Microbiota ; RNA, Bacterial/analysis ; RNA, Ribosomal, 16S/analysis ; Sequence Analysis, RNA/methods ; Spectrometry, Mass, Matrix-Assisted Laser Desorption-Ionization/methods ; }, abstract = {BACKGROUND: In the recent years, genomic and pan-genomic studies have become increasingly important. Culturomics allows to study human microbiota through the use of different culture conditions, coupled with a method of rapid identification by MALDI-TOF, or 16S rRNA. Bacterial taxonomy is undergoing many changes as a consequence. With the help of pan-genomic analyses, species can be redefined, and new species definitions generated.

RESULTS: Genomics, coupled with culturomics, has led to the discovery of many novel bacterial species or genera, including Akkermansia muciniphila and Microvirga massiliensis. Using the genome to define species has been applied within the genus Klebsiella. A discontinuity or an abrupt break in the core/pan-genome ratio can uncover novel species.

CONCLUSIONS: Applying genomic and pan-genomic analyses to the reclassification of other bacterial species or genera will be important in the future of medical microbiology. The pan-genome is one of many new innovative tools in bacterial taxonomy.

REVIEWERS: This article was reviewed by William Martin, Eric Bapteste and James Mcinerney.

OPEN PEER REVIEW: Reviewed by William Martin, Eric Bapteste and James Mcinerney.}, } @article {pmid30801025, year = {2019}, author = {Entwistle, S and Li, X and Yin, Y}, title = {Orphan Genes Shared by Pathogenic Genomes Are More Associated with Bacterial Pathogenicity.}, journal = {mSystems}, volume = {4}, number = {1}, pages = {}, pmid = {30801025}, issn = {2379-5077}, support = {R15 GM114706/GM/NIGMS NIH HHS/United States ; }, abstract = {Orphan genes (also known as ORFans [i.e., orphan open reading frames]) are new genes that enable an organism to adapt to its specific living environment. Our focus in this study is to compare ORFans between pathogens (P) and nonpathogens (NP) of the same genus. Using the pangenome idea, we have identified 130,169 ORFans in nine bacterial genera (505 genomes) and classified these ORFans into four groups: (i) SS-ORFans (P), which are only found in a single pathogenic genome; (ii) SS-ORFans (NP), which are only found in a single nonpathogenic genome; (iii) PS-ORFans (P), which are found in multiple pathogenic genomes; and (iv) NS-ORFans (NP), which are found in multiple nonpathogenic genomes. Within the same genus, pathogens do not always have more genes, more ORFans, or more pathogenicity-related genes (PRGs)-including prophages, pathogenicity islands (PAIs), virulence factors (VFs), and horizontal gene transfers (HGTs)-than nonpathogens. Interestingly, in pathogens of the nine genera, the percentages of PS-ORFans are consistently higher than those of SS-ORFans, which is not true in nonpathogens. Similarly, in pathogens of the nine genera, the percentages of PS-ORFans matching the four types of PRGs are also always higher than those of SS-ORFans, but this is not true in nonpathogens. All of these findings suggest the greater importance of PS-ORFans for bacterial pathogenicity. IMPORTANCE Recent pangenome analyses of numerous bacterial species have suggested that each genome of a single species may have a significant fraction of its gene content unique or shared by a very few genomes (i.e., ORFans). We selected nine bacterial genera, each containing at least five pathogenic and five nonpathogenic genomes, to compare their ORFans in relation to pathogenicity-related genes. Pathogens in these genera are known to cause a number of common and devastating human diseases such as pneumonia, diphtheria, melioidosis, and tuberculosis. Thus, they are worthy of in-depth systems microbiology investigations, including the comparative study of ORFans between pathogens and nonpathogens. We provide direct evidence to suggest that ORFans shared by more pathogens are more associated with pathogenicity-related genes and thus are more important targets for development of new diagnostic markers or therapeutic drugs for bacterial infectious diseases.}, } @article {pmid30799264, year = {2019}, author = {De Filippis, F and Pasolli, E and Tett, A and Tarallo, S and Naccarati, A and De Angelis, M and Neviani, E and Cocolin, L and Gobbetti, M and Segata, N and Ercolini, D}, title = {Distinct Genetic and Functional Traits of Human Intestinal Prevotella copri Strains Are Associated with Different Habitual Diets.}, journal = {Cell host & microbe}, volume = {25}, number = {3}, pages = {444-453.e3}, doi = {10.1016/j.chom.2019.01.004}, pmid = {30799264}, issn = {1934-6069}, mesh = {Diet/*methods ; *Feeding Behavior ; Female ; Gastrointestinal Microbiome ; Genome, Bacterial ; Genotype ; Healthy Volunteers ; Humans ; Italy ; Male ; *Microbiota ; Phenotype ; Prevalence ; Prevotella/*classification/genetics/*isolation & purification/physiology ; }, abstract = {The role of intestinal Prevotella species in human health is controversial, with both positive and negative associations. Strain-level diversity may contribute to discrepancies in genus and species associations with health and disease. We dissected the gut metagenomes of Italians with varying dietary habits, investigating the presence of distinct Prevotella copri strains. Fiber-rich diets were linked to P. copri types with enhanced potential for carbohydrate catabolism. P. copri strains associated with an omnivore diet had a higher prevalence of the leuB gene-involved in branched-chain amino acid biosynthesis-a risk factor for glucose intolerance and type 2 diabetes. These P. copri pangenomes were compared to existing cohorts, providing evidence of distinct gene repertoires characterizing different P. copri populations, with drug metabolism and complex carbohydrate degradation significantly associated with Western and non-Western individuals, respectively. Strain-level P. copri diversity in gut microbiomes is affected by diet and should be considered when examining host-microbe associations.}, } @article {pmid30783197, year = {2019}, author = {Lin, JN and Lai, CH and Yang, CH and Huang, YH and Lin, HH}, title = {Genomic Features, Comparative Genomics, and Antimicrobial Susceptibility Patterns of Elizabethkingia bruuniana.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {2267}, pmid = {30783197}, issn = {2045-2322}, mesh = {Anti-Bacterial Agents/pharmacology ; Drug Resistance, Multiple, Bacterial/drug effects/*genetics ; Flavobacteriaceae/*genetics/metabolism ; *Genome, Bacterial ; }, abstract = {Elizabethkingia bruuniana is a novel species of the Elizabethkingia genus. There is scant information on this microorganism. Here, we report the whole-genome features and antimicrobial susceptibility patterns of E. bruuniana strain EM798-26. Elizabethkingia strain EM798-26 was initially identified as E. miricola. This isolate contained a circular genome of 4,393,011 bp. The whole-genome sequence-based phylogeny revealed that Elizabethkingia strain EM798-26 was in the same group of the type strain E. bruuniana G0146[T]. Both in silico DNA-DNA hybridization and average nucleotide identity analysis clearly demonstrated that Elizabethkingia strain EM798-26 was a species of E. bruuniana. The pan-genome analysis identified 2,875 gene families in the core genome and 5,199 gene families in the pan genome of eight publicly available E. bruuniana genome sequences. The unique genes accounted for 0.2-12.1% of the pan genome in each E. bruuniana. A total of 59 potential virulence factor homologs were predicted in the whole-genome of E. bruuniana strain EM798-26. This isolate was nonsusceptible to multiple antibiotics, but susceptible to aminoglycosides, minocycline, and levofloxacin. The whole-genome sequence analysis of E. bruuniana EM798-26 revealed 29 homologs of antibiotic resistance-related genes. This study presents the genomic features of E. bruuniana. Knowledge of the genomic characteristics provides valuable insights into a novel species.}, } @article {pmid30782660, year = {2019}, author = {López-Pérez, M and Jayakumar, JM and Haro-Moreno, JM and Zaragoza-Solas, A and Reddi, G and Rodriguez-Valera, F and Shapiro, OH and Alam, M and Almagro-Moreno, S}, title = {Evolutionary Model of Cluster Divergence of the Emergent Marine Pathogen Vibrio vulnificus: From Genotype to Ecotype.}, journal = {mBio}, volume = {10}, number = {1}, pages = {}, pmid = {30782660}, issn = {2150-7511}, mesh = {Aquaculture ; Aquatic Organisms/microbiology ; Cluster Analysis ; Computational Biology ; *Ecotype ; Evolution, Molecular ; Gene Flow ; Gene Transfer, Horizontal ; *Genetic Variation ; Genome, Bacterial ; *Genotype ; Phenotype ; Recombination, Genetic ; Vibrio vulnificus/*classification/*genetics/isolation & purification/physiology ; }, abstract = {Vibrio vulnificus, an opportunistic pathogen, is the causative agent of a life-threatening septicemia and a rising problem for aquaculture worldwide. The genetic factors that differentiate its clinical and environmental strains remain enigmatic. Furthermore, clinical strains have emerged from every clade of V. vulnificus In this work, we investigated the underlying genomic properties and population dynamics of the V. vulnificus species from an evolutionary and ecological point of view. Genome comparisons and bioinformatic analyses of 113 V. vulnificus isolates indicate that the population of V. vulnificus is made up of four different clusters. We found evidence that recombination and gene flow between the two largest clusters (cluster 1 [C1] and C2) have drastically decreased to the point where they are diverging independently. Pangenome and phenotypic analyses showed two markedly different lifestyles for these two clusters, indicating commensal (C2) and bloomer (C1) ecotypes, with differences in carbohydrate utilization, defense systems, and chemotaxis, among other characteristics. Nonetheless, we identified frequent intra- and interspecies exchange of mobile genetic elements (e.g., antibiotic resistance plasmids, novel "chromids," or two different and concurrent type VI secretion systems) that provide high levels of genetic diversity in the population. Surprisingly, we identified strains from both clusters in the mucosa of aquaculture species, indicating that manmade niches are bringing strains from the two clusters together. We propose an evolutionary model of V. vulnificus that could be broadly applicable to other pathogenic vibrios and facultative bacterial pathogens to pursue strategies to prevent their infections and emergence.IMPORTANCEVibrio vulnificus is an emergent marine pathogen and is the cause of a deadly septicemia. However, the genetic factors that differentiate its clinical and environmental strains and its several biotypes remain mostly enigmatic. In this work, we investigated the underlying genomic properties and population dynamics of the V. vulnificus species to elucidate the traits that make these strains emerge as a human pathogen. The acquisition of different ecological determinants could have allowed the development of highly divergent clusters with different lifestyles within the same environment. However, we identified strains from both clusters in the mucosa of aquaculture species, indicating that manmade niches are bringing strains from the two clusters together, posing a potential risk of recombination and of emergence of novel variants. We propose a new evolutionary model that provides a perspective that could be broadly applicable to other pathogenic vibrios and facultative bacterial pathogens to pursue strategies to prevent their infections.}, } @article {pmid30781742, year = {2019}, author = {Issa, E and Salloum, T and Panossian, B and Ayoub, D and Abboud, E and Tokajian, S}, title = {Genome Mining and Comparative Analysis of Streptococcus intermedius Causing Brain Abscess in a Child.}, journal = {Pathogens (Basel, Switzerland)}, volume = {8}, number = {1}, pages = {}, pmid = {30781742}, issn = {2076-0817}, abstract = {Streptococcus intermedius (SI) is associated with prolonged hospitalization and low survival rates. The genetic mechanisms involved in brain abscess development and genome evolution in comparison to other members of the Streptococcus anginosus group are understudied. We performed a whole-genome comparative analysis of an SI isolate, LAU_SINT, associated with brain abscess following sinusitis with all SI genomes in addition to S. constellatus and S. anginosus. Selective pressure on virulence factors, phages, pan-genome evolution and single-nucleotide polymorphism analysis were assessed. The structural details of the type seven secretion system (T7SS) was elucidated and compared with different organisms. ily and nanA were both abundant and conserved. Nisin resistance determinants were found in 47% of the isolates. Pan-genome and SNPs-based analysis didn't reveal significant geo-patterns. Our results showed that two SC isolates were misidentified as SI. We propose the presence of four T7SS modules (I[-]IV) located on various genomic islands. We detected a variety of factors linked to metal ions binding on the GIs carrying T7SS. This is the first detailed report characterizing the T7SS and its link to nisin resistance and metal ions binding in SI. These and yet uncharacterized T7SS transmembrane proteins merit further studies and could represent potential therapeutic targets.}, } @article {pmid30779737, year = {2019}, author = {Grytten, I and Rand, KD and Nederbragt, AJ and Storvik, GO and Glad, IK and Sandve, GK}, title = {Graph Peak Caller: Calling ChIP-seq peaks on graph-based reference genomes.}, journal = {PLoS computational biology}, volume = {15}, number = {2}, pages = {e1006731}, pmid = {30779737}, issn = {1553-7358}, mesh = {Algorithms ; Arabidopsis/genetics ; Chromatin Immunoprecipitation/*methods ; Genome/genetics ; Genomics/*methods ; Protein Binding ; Sequence Analysis, DNA/*methods ; Software ; Transcription Factors ; }, abstract = {Graph-based representations are considered to be the future for reference genomes, as they allow integrated representation of the steadily increasing data on individual variation. Currently available tools allow de novo assembly of graph-based reference genomes, alignment of new read sets to the graph representation as well as certain analyses like variant calling and haplotyping. We here present a first method for calling ChIP-Seq peaks on read data aligned to a graph-based reference genome. The method is a graph generalization of the peak caller MACS2, and is implemented in an open source tool, Graph Peak Caller. By using the existing tool vg to build a pan-genome of Arabidopsis thaliana, we validate our approach by showing that Graph Peak Caller with a pan-genome reference graph can trace variants within peaks that are not part of the linear reference genome, and find peaks that in general are more motif-enriched than those found by MACS2.}, } @article {pmid30778187, year = {2019}, author = {O'Malley, MA and Leger, MM and Wideman, JG and Ruiz-Trillo, I}, title = {Concepts of the last eukaryotic common ancestor.}, journal = {Nature ecology & evolution}, volume = {3}, number = {3}, pages = {338-344}, doi = {10.1038/s41559-019-0796-3}, pmid = {30778187}, issn = {2397-334X}, mesh = {*Biological Evolution ; *Eukaryota/classification/genetics/physiology ; Evolution, Molecular ; Genome ; Phylogeny ; }, abstract = {Insight into the last eukaryotic common ancestor (LECA) is central to any phylogeny-based reconstruction of early eukaryotic evolution. Increasing amounts of data enable such reconstructions, without necessarily providing further insight into what LECA actually was. We consider four possible concepts of LECA: an abstract phylogenetic state, a single cell, a population, and a consortium of organisms. We argue that the view most realistically underlying work in the field is that of LECA as a population. Drawing on recent findings of genomically heterogeneous populations in eukaryotes ('pangenomes'), we examine the evolutionary implications of a pangenomic LECA population. For instance, how does this concept affect standard expectations about the ecology, geography, fitness, and diversification of LECA? Does it affect evolutionary interpretations of LECA's cellular functions? Finally, we examine whether this novel pangenomic concept of LECA has implications for phylogenetic reconstructions of early eukaryote evolution. Our aim is to add to the conceptual toolkit for developing theories of LECA and interpreting genomic datasets.}, } @article {pmid30775379, year = {2019}, author = {Lu, QF and Cao, DM and Su, LL and Li, SB and Ye, GB and Zhu, XY and Wang, JP}, title = {Genus-Wide Comparative Genomics Analysis of Neisseria to Identify New Genes Associated with Pathogenicity and Niche Adaptation of Neisseria Pathogens.}, journal = {International journal of genomics}, volume = {2019}, number = {}, pages = {6015730}, pmid = {30775379}, issn = {2314-436X}, abstract = {N. gonorrhoeae and N. meningitidis, the only two human pathogens of Neisseria, are closely related species. But the niches they survived in and their pathogenic characteristics are distinctly different. However, the genetic basis of these differences has not yet been fully elucidated. In this study, comparative genomics analysis was performed based on 15 N. gonorrhoeae, 75 N. meningitidis, and 7 nonpathogenic Neisseria genomes. Core-pangenome analysis found 1111 conserved gene families among them, and each of these species groups had opening pangenome. We found that 452, 78, and 319 gene families were unique in N. gonorrhoeae, N. meningitidis, and both of them, respectively. Those unique gene families were regarded as candidates that related to their pathogenicity and niche adaptation. The relationships among them have been partly verified by functional annotation analysis. But at least one-third genes for each gene set have not found the certain functional information. Simple sequence repeat (SSR), the basis of gene phase variation, was found abundant in the membrane or related genes of each unique gene set, which may facilitate their adaptation to variable host environments. Protein-protein interaction (PPI) analysis found at least five distinct PPI clusters in N. gonorrhoeae and four in N. meningitides, and 167 and 52 proteins with unknown function were contained within them, respectively.}, } @article {pmid30760727, year = {2019}, author = {Stevens, MJA and Tasara, T and Klumpp, J and Stephan, R and Ehling-Schulz, M and Johler, S}, title = {Whole-genome-based phylogeny of Bacillus cytotoxicus reveals different clades within the species and provides clues on ecology and evolution.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {1984}, pmid = {30760727}, issn = {2045-2322}, mesh = {Bacillus/*classification/*genetics ; Bacterial Proteins/genetics ; Bacterial Toxins/genetics ; Food Microbiology ; Food Safety ; Foodborne Diseases/*microbiology ; Gastrointestinal Diseases/*microbiology ; Genome, Bacterial/*genetics ; Phylogeny ; Plasmids/genetics ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {Bacillus cytotoxicus is a member of the Bacillus cereus group linked to fatal cases of diarrheal disease. Information on B. cytotoxicus is very limited; in particular comprehensive genomic data is lacking. Thus, we applied a genomic approach to characterize B. cytotoxicus and decipher its population structure. To this end, complete genomes of ten B. cytotoxicus were sequenced and compared to the four publicly available full B. cytotoxicus genomes and genomes of other B. cereus group members. Average nucleotide identity, core genome, and pan genome clustering resulted in clear distinction of B. cytotoxicus strains from other strains of the B. cereus group. Genomic content analyses showed that a hydroxyphenylalanine operon is present in B. cytotoxicus, but absent in all other members of the B. cereus group. It enables degradation of aromatic compounds to succinate and pyruvate and was likely acquired from another Bacillus species. It allows for utilization of tyrosine and might have given a B. cytotoxicus ancestor an evolutionary advantage resulting in species differentiation. Plasmid content showed that B. cytotoxicus is flexible in exchanging genes, allowing for quick adaptation to the environment. Genome-based phylogenetic analyses divided the B. cytotoxicus strains into four clades that also differed in virulence gene content.}, } @article {pmid30745056, year = {2019}, author = {Lye, ZN and Purugganan, MD}, title = {Copy Number Variation in Domestication.}, journal = {Trends in plant science}, volume = {24}, number = {4}, pages = {352-365}, doi = {10.1016/j.tplants.2019.01.003}, pmid = {30745056}, issn = {1878-4372}, mesh = {Animals ; *DNA Copy Number Variations ; *Domestication ; Phenotype ; Plants ; }, abstract = {Domesticated plants have long served as excellent models for studying evolution. Many genes and mutations underlying important domestication traits have been identified, and most causal mutations appear to be SNPs. Copy number variation (CNV) is an important source of genetic variation that has been largely neglected in studies of domestication. Ongoing work demonstrates the importance of CNVs as a source of genetic variation during domestication, and during the diversification of domesticated taxa. Here, we review how CNVs contribute to evolutionary processes underlying domestication, and review examples of domestication traits caused by CNVs. We draw from examples in plant species, but also highlight cases in animal systems that could illuminate the roles of CNVs in the domestication process.}, } @article {pmid30730313, year = {2019}, author = {Papathomas, TG and Nosé, V}, title = {New and Emerging Biomarkers in Endocrine Pathology.}, journal = {Advances in anatomic pathology}, volume = {26}, number = {3}, pages = {198-209}, doi = {10.1097/PAP.0000000000000227}, pmid = {30730313}, issn = {1533-4031}, mesh = {*Biomarkers, Tumor ; Endocrinology/*trends ; Humans ; Pathology/*trends ; }, abstract = {Significant advances in genomics and molecular genetics in recent years have reshaped the practice of endocrine pathology. Pan-genomic studies, including the pioneering ones on papillary thyroid carcinoma, phaeochromocytoma/paraganglioma, and adrenal cortical carcinoma from the Cancer Genome Atlas (TCGA) project, provided a comprehensive integrated genomic analysis of endocrine tumors into distinct molecularly defined subtypes. Better understanding of the molecular landscape and more accurate definition of biological behavior has been accordingly achieved. Nevertheless, how any of these advances are translated into routine practice still remains a challenge in the era of precision medicine. The challenge for modern pathology is to keep up the pace with scientific discoveries by integrating novel concepts in tumor classification, molecular genetics, prognostication, and theranostics. As an example, pathology plays a role in the identification of hereditary disease, while it offers the tools for complementing molecular genetics, for example, validation of variants of unknown significance deriving from targeted sequencing or whole exome/genome sequencing approach. Immunohistochemistry has arisen as a cost-effective strategy in the evaluation either of somatic mutations in tumors and/or germline mutations in patients with familial cancer syndromes. Herein, a comprehensive review focusing on novel and emerging biomarkers is presented in order pathologists and other endocrine-related specialists to remain updated and become aware of potential pitfalls and limitations in the field of endocrine pathology.}, } @article {pmid30718880, year = {2019}, author = {Arora, S and Steuernagel, B and Gaurav, K and Chandramohan, S and Long, Y and Matny, O and Johnson, R and Enk, J and Periyannan, S and Singh, N and Asyraf Md Hatta, M and Athiyannan, N and Cheema, J and Yu, G and Kangara, N and Ghosh, S and Szabo, LJ and Poland, J and Bariana, H and Jones, JDG and Bentley, AR and Ayliffe, M and Olson, E and Xu, SS and Steffenson, BJ and Lagudah, E and Wulff, BBH}, title = {Resistance gene cloning from a wild crop relative by sequence capture and association genetics.}, journal = {Nature biotechnology}, volume = {37}, number = {2}, pages = {139-143}, doi = {10.1038/s41587-018-0007-9}, pmid = {30718880}, issn = {1546-1696}, support = {BB/E006868/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/H019820/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/L009293/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/P021646/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Chromosome Mapping ; *Cloning, Molecular ; Crops, Agricultural/*genetics ; Disease Resistance/*genetics ; *Genes, Plant ; Genetic Association Studies ; Genetic Variation ; Genomics ; Genotype ; Models, Genetic ; Phenotype ; Phylogeny ; Plant Diseases/*genetics ; Polymorphism, Single Nucleotide ; Seedlings ; Triticum/genetics ; }, abstract = {Disease resistance (R) genes from wild relatives could be used to engineer broad-spectrum resistance in domesticated crops. We combined association genetics with R gene enrichment sequencing (AgRenSeq) to exploit pan-genome variation in wild diploid wheat and rapidly clone four stem rust resistance genes. AgRenSeq enables R gene cloning in any crop that has a diverse germplasm panel.}, } @article {pmid30718868, year = {2019}, author = {Zou, Y and Xue, W and Luo, G and Deng, Z and Qin, P and Guo, R and Sun, H and Xia, Y and Liang, S and Dai, Y and Wan, D and Jiang, R and Su, L and Feng, Q and Jie, Z and Guo, T and Xia, Z and Liu, C and Yu, J and Lin, Y and Tang, S and Huo, G and Xu, X and Hou, Y and Liu, X and Wang, J and Yang, H and Kristiansen, K and Li, J and Jia, H and Xiao, L}, title = {1,520 reference genomes from cultivated human gut bacteria enable functional microbiome analyses.}, journal = {Nature biotechnology}, volume = {37}, number = {2}, pages = {179-185}, pmid = {30718868}, issn = {1546-1696}, mesh = {Bacteria/classification ; Cluster Analysis ; Computational Biology/*methods ; Conserved Sequence ; Feces ; *Gastrointestinal Microbiome ; Genome, Bacterial ; Genomics ; Humans ; *Metagenome ; Metagenomics ; Phylogeny ; Polymorphism, Single Nucleotide ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {Reference genomes are essential for metagenomic analyses and functional characterization of the human gut microbiota. We present the Culturable Genome Reference (CGR), a collection of 1,520 nonredundant, high-quality draft genomes generated from >6,000 bacteria cultivated from fecal samples of healthy humans. Of the 1,520 genomes, which were chosen to cover all major bacterial phyla and genera in the human gut, 264 are not represented in existing reference genome catalogs. We show that this increase in the number of reference bacterial genomes improves the rate of mapping metagenomic sequencing reads from 50% to >70%, enabling higher-resolution descriptions of the human gut microbiome. We use the CGR genomes to annotate functions of 338 bacterial species, showing the utility of this resource for functional studies. We also carry out a pan-genome analysis of 38 important human gut species, which reveals the diversity and specificity of functional enrichment between their core and dispensable genomes.}, } @article {pmid30714895, year = {2019}, author = {McCarthy, CGP and Fitzpatrick, DA}, title = {Pan-genome analyses of model fungal species.}, journal = {Microbial genomics}, volume = {5}, number = {2}, pages = {}, pmid = {30714895}, issn = {2057-5858}, mesh = {Evolution, Molecular ; Fungi/*classification/genetics ; *Genome, Fungal ; Genome-Wide Association Study ; Genomics ; Phylogeny ; }, abstract = {The concept of the species 'pan-genome', the union of 'core' conserved genes and all 'accessory' non-conserved genes across all strains of a species, was first proposed in prokaryotes to account for intraspecific variability. Species pan-genomes have been extensively studied in prokaryotes, but evidence of species pan-genomes has also been demonstrated in eukaryotes such as plants and fungi. Using a previously published methodology based on sequence homology and conserved microsynteny, in addition to bespoke pipelines, we have investigated the pan-genomes of four model fungal species: Saccharomyces cerevisiae, Candida albicans, Cryptococcus neoformans var. grubii and Aspergillus fumigatus. Between 80 and 90 % of gene models per strain in each of these species are core genes that are highly conserved across all strains of that species, many of which are involved in housekeeping and conserved survival processes. In many of these species, the remaining 'accessory' gene models are clustered within subterminal regions and may be involved in pathogenesis and antimicrobial resistance. Analysis of the ancestry of species core and accessory genomes suggests that fungal pan-genomes evolve by strain-level innovations such as gene duplication as opposed to wide-scale horizontal gene transfer. Our findings lend further supporting evidence to the existence of species pan-genomes in eukaryote taxa.}, } @article {pmid30709821, year = {2019}, author = {Lugli, GA and Mancino, W and Milani, C and Duranti, S and Mancabelli, L and Napoli, S and Mangifesta, M and Viappiani, A and Anzalone, R and Longhi, G and van Sinderen, D and Ventura, M and Turroni, F}, title = {Dissecting the Evolutionary Development of the Species Bifidobacterium animalis through Comparative Genomics Analyses.}, journal = {Applied and environmental microbiology}, volume = {85}, number = {7}, pages = {}, pmid = {30709821}, issn = {1098-5336}, mesh = {Animals ; Bifidobacterium/genetics ; Bifidobacterium animalis/enzymology/*genetics/metabolism ; Birds ; Carbohydrate Metabolism ; Carbohydrates ; *Comparative Genomic Hybridization ; *Evolution, Molecular ; Feces/microbiology ; Gastrointestinal Microbiome ; Gastrointestinal Tract/microbiology ; Genes, Bacterial/*genetics ; Genetic Variation ; Genome, Bacterial/genetics ; Humans ; Mammals ; *Phylogeny ; Polysaccharides ; Species Specificity ; }, abstract = {Bifidobacteria are members of the gut microbiota of animals, including mammals, birds, and social insects. In this study, we analyzed and determined the pangenome of Bifidobacterium animalis species, encompassing B. animalis subsp. animalis and the B. animalis subsp. lactis taxon, which is one of the most intensely exploited probiotic bifidobacterial species. In order to reveal differences within the B. animalis species, detailed comparative genomics and phylogenomics analyses were performed, indicating that these two subspecies recently arose through divergent evolutionary events. A subspecies-specific core genome was identified for both B. animalis subspecies, revealing the existence of subspecies-defining genes involved in carbohydrate metabolism. Notably, these in silico analyses coupled with carbohydrate profiling assays suggest genetic adaptations toward a distinct glycan milieu for each member of the B. animalis subspecies, resulting in a divergent evolutionary development of the two subspecies.IMPORTANCE The majority of characterized B. animalis strains have been isolated from human fecal samples. In order to explore genome variability within this species, we isolated 15 novel strains from the gastrointestinal tracts of different animals, including mammals and birds. The present study allowed us to reconstruct the pangenome of this taxon, including the genome contents of 56 B. animalis strains. Through careful assessment of subspecies-specific core genes of the B. animalis subsp. animalis/lactis taxon, we identified genes encoding enzymes involved in carbohydrate transport and metabolism, while unveiling specific gene acquisition and loss events that caused the evolutionary emergence of these two subspecies.}, } @article {pmid30704472, year = {2019}, author = {Nono, AD and Chen, K and Liu, X}, title = {Comparison of different functional prediction scores using a gene-based permutation model for identifying cancer driver genes.}, journal = {BMC medical genomics}, volume = {12}, number = {Suppl 1}, pages = {22}, pmid = {30704472}, issn = {1755-8794}, support = {P30 CA016672/CA/NCI NIH HHS/United States ; }, mesh = {Genes, Neoplasm/*genetics ; Genomics/*methods ; Humans ; *Models, Genetic ; }, abstract = {BACKGROUND: Identifying cancer driver genes (CDG) is a crucial step in cancer genomic toward the advancement of precision medicine. However, driver gene discovery is a very challenging task because we are not only dealing with huge amount of data; but we are also faced with the complexity of the disease including the heterogeneity of background somatic mutation rate in each cancer patient. It is generally accepted that CDG harbor variants conferring growth advantage in the malignant cell and they are positively selected, which are critical to cancer development; whereas, non-driver genes harbor random mutations with no functional consequence on cancer. Based on this fact, function prediction based approaches for identifying CDG have been proposed to interrogate the distribution of functional predictions among mutations in cancer genomes (eLS 1-16, 2016). Assuming most of the observed mutations are passenger mutations and given the quantitative predictions for the functional impact of the mutations, genes enriched of functional or deleterious mutations are more likely to be drivers. The promises of these methods have been continually refined and can therefore be applied to increase accuracy in detecting new candidate CDGs. However, current function prediction based approaches only focus on coding mutations and lack a systematic way to pick the best mutation deleteriousness prediction algorithms for usage.

RESULTS: In this study, we propose a new function prediction based approach to discover CDGs through a gene-based permutation approach. Our method not only covers both coding and non-coding regions of the genes; but it also accounts for the heterogeneous mutational context in cohort of cancer patients. The permutation model was implemented independently using seven popular deleteriousness prediction scores covering splicing regions (SPIDEX), coding regions (MetaLR, and VEST3) and pan-genome (CADD, DANN, Fathmm-MKL coding and Fathmm-MKL noncoding). We applied this new approach to somatic single nucleotide variants (SNVs) from whole-genome sequences of 119 breast and 24 lung cancer patients and compared the seven deleteriousness prediction scores for their performance in this study.

CONCLUSION: The new function prediction based approach not only predicted known cancer genes listed in the Cancer Gene Census (CGC), but also new candidate CDGs that are worth further investigation. The results showed the advantage of utilizing pan-genome deleteriousness prediction scores in function prediction based methods. Although VEST3 score, a deleteriousness prediction score for missense mutations, has the best performance in breast cancer, it was topped by CADD and Fathmm-MKL coding, two pan-genome deleteriousness prediction scores, in lung cancer.}, } @article {pmid30701191, year = {2019}, author = {Leviatan, S and Segal, E}, title = {A Significant Expansion of Our Understanding of the Composition of the Human Microbiome.}, journal = {mSystems}, volume = {4}, number = {1}, pages = {}, pmid = {30701191}, issn = {2379-5077}, abstract = {Shotgun sequencing of samples taken from the human microbiome often reveals only partial mapping of the sequenced metagenomic reads to existing reference genomes. Such partial mappability indicates that many genomes are missing in our reference genome set. This is particularly true for non-Western populations and for samples that do not originate from the gut. Pasolli et al. (E. Pasolli, F. Asnicar, S. Manara, M. Zolfo, et al., Cell, 2019, https://doi.org/10.1016/j.cell.2019.01.001) perform a grand effort to expand the reference set, and to better classify its members, revealing a wider pangenome of existing species as well as identifying new species of previously unknown taxonomic branches.}, } @article {pmid30687297, year = {2018}, author = {Sánchez-Osuna, M and Cortés, P and Barbé, J and Erill, I}, title = {Origin of the Mobile Di-Hydro-Pteroate Synthase Gene Determining Sulfonamide Resistance in Clinical Isolates.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {3332}, pmid = {30687297}, issn = {1664-302X}, abstract = {Sulfonamides are synthetic chemotherapeutic agents that work as competitive inhibitors of the di-hydro-pteroate synthase (DHPS) enzyme, encoded by the folP gene. Resistance to sulfonamides is widespread in the clinical setting and predominantly mediated by plasmid- and integron-borne sul1-3 genes encoding mutant DHPS enzymes that do not bind sulfonamides. In spite of their clinical importance, the genetic origin of sul1-3 genes remains unknown. Here we analyze sul genes and their genetic neighborhoods to uncover sul signature elements that enable the elucidation of their genetic origin. We identify a protein sequence Sul motif associated with sul-encoded proteins, as well as consistent association of a phosphoglucosamine mutase gene (glmM) with the sul2 gene. We identify chromosomal folP genes bearing these genetic markers in two bacterial families: the Rhodobiaceae and the Leptospiraceae. Bayesian phylogenetic inference of FolP/Sul and GlmM protein sequences clearly establishes that sul1-2 and sul3 genes originated as a mobilization of folP genes present in, respectively, the Rhodobiaceae and the Leptospiraceae, and indicate that the Rhodobiaceae folP gene was transferred from the Leptospiraceae. Analysis of %GC content in folP/sul gene sequences supports the phylogenetic inference results and indicates that the emergence of the Sul motif in chromosomally encoded FolP proteins is ancient and considerably predates the clinical introduction of sulfonamides. In vitro assays reveal that both the Rhodobiaceae and the Leptospiraceae, but not other related chromosomally encoded FolP proteins confer resistance in a sulfonamide-sensitive Escherichia coli background, indicating that the Sul motif is associated with sulfonamide resistance. Given the absence of any known natural sulfonamides targeting DHPS, these results provide a novel perspective on the emergence of resistance to synthetic chemotherapeutic agents, whereby preexisting resistant variants in the vast bacterial pangenome may be rapidly selected for and disseminated upon the clinical introduction of novel chemotherapeuticals.}, } @article {pmid30687252, year = {2018}, author = {Slama, HB and Cherif-Silini, H and Chenari Bouket, A and Qader, M and Silini, A and Yahiaoui, B and Alenezi, FN and Luptakova, L and Triki, MA and Vallat, A and Oszako, T and Rateb, ME and Belbahri, L}, title = {Screening for Fusarium Antagonistic Bacteria From Contrasting Niches Designated the Endophyte Bacillus halotolerans as Plant Warden Against Fusarium.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {3236}, pmid = {30687252}, issn = {1664-302X}, abstract = {Date palm (Phoenix dactylifera L.) plantations in North Africa are nowadays threatened with the spread of the Bayoud disease caused by Fusarium oxysporum f. sp. albedinis, already responsible for destroying date production in other infected areas, mainly in Morocco. Biological control holds great promise for sustainable and environmental-friendly management of the disease. In this study, the additional benefits to agricultural ecosystems of using plant growth promoting rhizobacteria (PGPR) or endophytes are addressed. First, PGPR or endophytes can offer an interesting bio-fertilization, meaning that it can add another layer to the sustainability of the approach. Additionally, screening of contrasting niches can yield bacterial actors that could represent wardens against whole genera or groups of plant pathogenic agents thriving in semi-arid to arid ecosystems. Using this strategy, we recovered four bacterial isolates, designated BFOA1, BFOA2, BFOA3 and BFOA4, that proved very active against F. oxysporum f. sp. albedinis. BFOA1-BFOA4 proved also active against 16 Fusarium isolates belonging to four species: F. oxysporum (with strains phytopathogenic of Olea europaea and tomato), F. solani (with different strains attacking O. europaea and potato), F. acuminatum (pathogenic on O. europaea) and F. chlamydosporum (phytopathogenic of O. europaea). BFOA1-BFOA4 bacterial isolates exhibited strong activities against another four major phytopathogens: Botrytis cinerea, Alternaria alternata, Phytophthora infestans, and Rhizoctonia bataticola. Isolates BFOA1-BFOA4 had the ability to grow at temperatures up to 35°C, pH range of 5-10, and tolerate high concentrations of NaCl and up to 30% PEG. The isolates also showed relevant direct and indirect PGP features, including growth on nitrogen-free medium, phosphate solubilization and auxin biosynthesis, as well as resistance to metal and xenobiotic stress. Phylogenomic analysis of BFOA1-BFOA4 isolates indicated that they all belong to Bacillus halotolerans, which could therefore considered as a warden against Fusarium infection in plants. Comparative genomics allowed us to functionally describe the open pan genome of B. halotolerans and LC-HRMS and GCMS analyses, enabling the description of diverse secondary metabolites including pulegone, 2-undecanone, and germacrene D, with important antimicrobial and insecticidal properties. In conclusion, B. halotolerans could be used as an efficient bio-fertilizer and bio-control agent in semi-arid and arid ecosystems.}, } @article {pmid30679463, year = {2019}, author = {Arabaghian, H and Salloum, T and Alousi, S and Panossian, B and Araj, GF and Tokajian, S}, title = {Molecular Characterization of Carbapenem Resistant Klebsiella pneumoniae and Klebsiella quasipneumoniae Isolated from Lebanon.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {531}, pmid = {30679463}, issn = {2045-2322}, mesh = {Carbapenems/*pharmacology ; Genome, Bacterial ; Humans ; Klebsiella/drug effects/*genetics ; Klebsiella Infections/drug therapy/epidemiology/*microbiology ; Klebsiella pneumoniae/drug effects/*genetics ; Lebanon/epidemiology ; Multilocus Sequence Typing ; *beta-Lactam Resistance ; }, abstract = {Klebsiella pneumoniae is a Gram-negative organism and a major public health threat. In this study, we used whole-genome sequences to characterize 32 carbapenem-resistant K. pneumoniae (CRKP) and two carbapenem-resistant K. quasipneumoniae (CRKQ). Antimicrobial resistance was assessed using disk diffusion and E-test, while virulence was assessed in silico. The capsule type was determined by sequencing the wzi gene. The plasmid diversity was assessed by PCR-based replicon typing to detect the plasmid incompatibility (Inc) groups. The genetic relatedness was determined by multilocus sequence typing, pan-genome, and recombination analysis. All of the isolates were resistant to ertapenem together with imipenem and/or meropenem. Phenotypic resistance was due to blaOXA-48, blaNDM-1, blaNDM-7, or the coupling of ESBLs and outer membrane porin modifications. This is the first comprehensive study reporting on the WGS of CRKP and the first detection of CRKQ in the region. The presence and dissemination of CRKP and CRKQ, with some additionally having characteristics of hypervirulent clones such as the hypermucoviscous phenotype and the capsular type K2, are particularly concerning. Additionally, mining the completely sequenced K. pneumoniae genomes revealed the key roles of mobile genetic elements in the spread of antibiotic resistance and in understanding the epidemiology of these clinically significant pathogens.}, } @article {pmid30661755, year = {2019}, author = {Pasolli, E and Asnicar, F and Manara, S and Zolfo, M and Karcher, N and Armanini, F and Beghini, F and Manghi, P and Tett, A and Ghensi, P and Collado, MC and Rice, BL and DuLong, C and Morgan, XC and Golden, CD and Quince, C and Huttenhower, C and Segata, N}, title = {Extensive Unexplored Human Microbiome Diversity Revealed by Over 150,000 Genomes from Metagenomes Spanning Age, Geography, and Lifestyle.}, journal = {Cell}, volume = {176}, number = {3}, pages = {649-662.e20}, pmid = {30661755}, issn = {1097-4172}, support = {BB/L027801/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; MR/M50161X/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Big Data ; Genetic Variation/genetics ; Geography ; Humans ; Life Style ; Metagenome/*genetics ; Metagenomics/*methods ; Microbiota/*genetics ; Phylogeny ; Sequence Analysis, DNA/methods ; }, abstract = {The body-wide human microbiome plays a role in health, but its full diversity remains uncharacterized, particularly outside of the gut and in international populations. We leveraged 9,428 metagenomes to reconstruct 154,723 microbial genomes (45% of high quality) spanning body sites, ages, countries, and lifestyles. We recapitulated 4,930 species-level genome bins (SGBs), 77% without genomes in public repositories (unknown SGBs [uSGBs]). uSGBs are prevalent (in 93% of well-assembled samples), expand underrepresented phyla, and are enriched in non-Westernized populations (40% of the total SGBs). We annotated 2.85 M genes in SGBs, many associated with conditions including infant development (94,000) or Westernization (106,000). SGBs and uSGBs permit deeper microbiome analyses and increase the average mappability of metagenomic reads from 67.76% to 87.51% in the gut (median 94.26%) and 65.14% to 82.34% in the mouth. We thus identify thousands of microbial genomes from yet-to-be-named species, expand the pangenomes of human-associated microbes, and allow better exploitation of metagenomic technologies.}, } @article {pmid30658579, year = {2019}, author = {Zhu, D and He, J and Yang, Z and Wang, M and Jia, R and Chen, S and Liu, M and Zhao, X and Yang, Q and Wu, Y and Zhang, S and Liu, Y and Zhang, L and Yu, Y and You, Y and Chen, X and Cheng, A}, title = {Comparative analysis reveals the Genomic Islands in Pasteurella multocida population genetics: on Symbiosis and adaptability.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {63}, pmid = {30658579}, issn = {1471-2164}, support = {No. 2017YFD050080//the National Key Research and Development Program of China/ ; No. CARS-42-17//China Agricultural Research System/ ; No. 2016JPT0004//Special Fund for Key Laboratory of Animal Disease and Human Health of Sichuan Province/ ; CARS-SVDIP//Sichuan Veterinary Medicine and Drug Innovation Group of China Agricultural Research System/ ; }, mesh = {Animals ; Genes, Bacterial/genetics ; Genetics, Population ; Genome, Bacterial/*genetics ; Genomic Islands/*genetics ; Genomics/methods ; Humans ; Pasteurella Infections/microbiology ; Pasteurella multocida/classification/*genetics/physiology ; Phylogeny ; Species Specificity ; Symbiosis/*genetics ; }, abstract = {BACKGROUND: Pasteurella multocida (P. multocida) is a widespread opportunistic pathogen that infects human and various animals. Genomic Islands (GIs) are one of the most important mobile components that quickly help bacteria acquire large fragments of foreign genes. However, the effects of GIs on P. multocida are unknown in the evolution of bacterial populations.

RESULTS: Ten avian-sourced P. multocida obtained through high-throughput sequencing together with 104 publicly available P. multocida genomes were used to analyse their population genetics, thus constructed a pan-genome containing 3948 protein-coding genes. Through the pan-genome, the open evolutionary pattern of P. multocida was revealed, and the functional components of 944 core genes, 2439 accessory genes and 565 unique genes were analysed. In addition, a total of 280 GIs were predicted in all strains. Combined with the pan-genome of P. multocida, the GIs accounted for 5.8% of the core genes in the pan-genome, mainly related to functional metabolic activities; the accessory genes accounted for 42.3%, mainly for the enrichment of adaptive genes; and the unique genes accounted for 35.4%, containing some defence mechanism-related genes.

CONCLUSIONS: The effects of GIs on the population genetics of P. multocida evolution and adaptation to the environment are reflected by the proportion and function of the pan-genome acquired from GIs, and the large quantities of GI data will aid in additional population genetics studies.}, } @article {pmid30657445, year = {2019}, author = {Wilson, K and Ely, B}, title = {Analyses of four new Caulobacter Phicbkviruses indicate independent lineages.}, journal = {The Journal of general virology}, volume = {100}, number = {2}, pages = {321-331}, pmid = {30657445}, issn = {1465-2099}, support = {R25 GM076277/GM/NIGMS NIH HHS/United States ; }, mesh = {Bacteriophages/*classification/genetics/*isolation & purification ; Caulobacter/*virology ; Evolution, Molecular ; Gene Order ; Gene Transfer, Horizontal ; Genes, Viral ; *Genome, Viral ; Genomics ; Phylogeny ; Sequence Homology, Nucleic Acid ; }, abstract = {Bacteriophages with genomes larger than 200 kbp are considered giant phages, and the giant Phicbkviruses are the most frequently isolated Caulobacter crescentus phages. In this study, we compare six bacteriophage genomes that differ from the genomes of the majority of Phicbkviruses. Four of these genomes are much larger than those of the rest of the Phicbkviruses, with genome sizes that are more than 250 kbp. A comparison of 16 Phicbkvirus genomes identified a 'core genome' of 69 genes that is present in all of these Phicbkvirus genomes, as well as shared accessory genes and genes that are unique for each phage. Most of the core genes are clustered into the regions coding for structural proteins or those involved in DNA replication. A phylogenetic analysis indicated that these 16 CaulobacterPhicbkvirus genomes are related, but they represent four distinct branches of the Phicbkvirus genomic tree with distantly related branches sharing little nucleotide homology. In contrast, pairwise comparisons within each branch of the phylogenetic tree showed that more than 80 % of the entire genome is shared among phages within a group. This conservation of the genomes within each branch indicates that horizontal gene transfer events between the groups are rare. Therefore, the Phicbkvirus genus consists of at least four different phylogenetic branches that are evolving independently from one another. One of these branches contains a 27-gene inversion relative to the other three branches. Also, an analysis of the tRNA genes showed that they are relatively mobile within the Phicbkvirus genus.}, } @article {pmid30656069, year = {2019}, author = {Barajas, HR and Romero, MF and Martínez-Sánchez, S and Alcaraz, LD}, title = {Global genomic similarity and core genome sequence diversity of the Streptococcus genus as a toolkit to identify closely related bacterial species in complex environments.}, journal = {PeerJ}, volume = {6}, number = {}, pages = {e6233}, pmid = {30656069}, issn = {2167-8359}, abstract = {BACKGROUND: The Streptococcus genus is relevant to both public health and food safety because of its ability to cause pathogenic infections. It is well-represented (>100 genomes) in publicly available databases. Streptococci are ubiquitous, with multiple sources of isolation, from human pathogens to dairy products. The Streptococcus genus has traditionally been classified by morphology, serum types, the 16S ribosomal RNA (rRNA) gene, and multi-locus sequence types subject to in-depth comparative genomic analysis.

METHODS: Core and pan-genomes described the genomic diversity of 108 strains belonging to 16 Streptococcus species. The core genome nucleotide diversity was calculated and compared to phylogenomic distances within the genus Streptococcus. The core genome was also used as a resource to recruit metagenomic fragment reads from streptococci dominated environments. A conventional 16S rRNA gene phylogeny reconstruction was used as a reference to compare the resulting dendrograms of average nucleotide identity (ANI) and genome similarity score (GSS) dendrograms.

RESULTS: The core genome, in this work, consists of 404 proteins that are shared by all 108 Streptococcus. The average identity of the pairwise compared core proteins decreases proportionally to GSS lower scores, across species. The GSS dendrogram recovers most of the clades in the 16S rRNA gene phylogeny while distinguishing between 16S polytomies (unresolved nodes). The GSS is a distance metric that can reflect evolutionary history comparing orthologous proteins. Additionally, GSS resulted in the most useful metric for genus and species comparisons, where ANI metrics failed due to false positives when comparing different species.

DISCUSSION: Understanding of genomic variability and species relatedness is the goal of tools like GSS, which makes use of the maximum pairwise shared orthologous sequences for its calculation. It allows for long evolutionary distances (above species) to be included because of the use of amino acid alignment scores, rather than nucleotides, and normalizing by positive matches. Newly sequenced species and strains could be easily placed into GSS dendrograms to infer overall genomic relatedness. The GSS is not restricted to ubiquitous conservancy of gene features; thus, it reflects the mosaic-structure and dynamism of gene acquisition and loss in bacterial genomes.}, } @article {pmid30647471, year = {2019}, author = {Sherman, RM and Forman, J and Antonescu, V and Puiu, D and Daya, M and Rafaels, N and Boorgula, MP and Chavan, S and Vergara, C and Ortega, VE and Levin, AM and Eng, C and Yazdanbakhsh, M and Wilson, JG and Marrugo, J and Lange, LA and Williams, LK and Watson, H and Ware, LB and Olopade, CO and Olopade, O and Oliveira, RR and Ober, C and Nicolae, DL and Meyers, DA and Mayorga, A and Knight-Madden, J and Hartert, T and Hansel, NN and Foreman, MG and Ford, JG and Faruque, MU and Dunston, GM and Caraballo, L and Burchard, EG and Bleecker, ER and Araujo, MI and Herrera-Paz, EF and Campbell, M and Foster, C and Taub, MA and Beaty, TH and Ruczinski, I and Mathias, RA and Barnes, KC and Salzberg, SL}, title = {Author Correction: Assembly of a pan-genome from deep sequencing of 910 humans of African descent.}, journal = {Nature genetics}, volume = {51}, number = {2}, pages = {364}, doi = {10.1038/s41588-018-0335-1}, pmid = {30647471}, issn = {1546-1718}, abstract = {In the version of this article initially published, the statement "there are no pan-genomes for any other animal or plant species" was incorrect. The statement has been corrected to "there are no reported pan-genomes for any other animal species, to our knowledge." We thank David Edwards for bringing this error to our attention. The error has been corrected in the HTML and PDF versions of the article.}, } @article {pmid30646548, year = {2019}, author = {Nguyen, HTL and Panyoyai, N and Kasapis, S and Pang, E and Mantri, N}, title = {Honey and Its Role in Relieving Multiple Facets of Atherosclerosis.}, journal = {Nutrients}, volume = {11}, number = {1}, pages = {}, pmid = {30646548}, issn = {2072-6643}, support = {N/A - RMIT VIED PhD Scholarship//RMIT University/ ; }, mesh = {Antioxidants/analysis ; Atherosclerosis/*prevention & control ; Diabetes Mellitus/prevention & control ; Dietary Sugars/analysis ; Fructose/analysis ; Glucose/analysis ; Honey/adverse effects/*analysis ; Humans ; Hypertension/prevention & control ; Oxidative Stress ; Risk Factors ; Trace Elements/analysis ; Vitamins/analysis ; }, abstract = {Honey, a natural sweetener has been used universally as a complete food and in complementary medicine since early antiquity. Honey contains over 180 substances, including sugars mainly fructose and glucose, water and a plethora of minor constituents such as vitamins, minerals and phytochemicals. The chemical composition of honey varies depending on floral origin, environment and geographical conditions. The sugar components dominate honey composition and they are accountable for sensory and physicochemical properties in food industry. Although present in small quantities, non-sugar components are the major contributors to the health benefits of honey. Our review summarizes and discusses composition of honey, its protective effects and possible action modes on risk factors of atherosclerosis.}, } @article {pmid30637341, year = {2019}, author = {Blaustein, RA and McFarland, AG and Ben Maamar, S and Lopez, A and Castro-Wallace, S and Hartmann, EM}, title = {Pangenomic Approach To Understanding Microbial Adaptations within a Model Built Environment, the International Space Station, Relative to Human Hosts and Soil.}, journal = {mSystems}, volume = {4}, number = {1}, pages = {}, pmid = {30637341}, issn = {2379-5077}, support = {R25 GM079300/GM/NIGMS NIH HHS/United States ; T32 AI007476/AI/NIAID NIH HHS/United States ; }, abstract = {Understanding underlying mechanisms involved in microbial persistence in the built environment (BE) is essential for strategically mitigating potential health risks. To test the hypothesis that BEs impose selective pressures resulting in characteristic adaptive responses, we performed a pangenomics meta-analysis leveraging 189 genomes (accessed from GenBank) of two epidemiologically important taxa, Bacillus cereus and Staphylococcus aureus, isolated from various origins: the International Space Station (ISS; a model BE), Earth-based BEs, soil, and humans. Our objectives were to (i) identify differences in the pangenomic composition of generalist and host-associated organisms, (ii) characterize genes and functions involved in BE-associated selection, and (iii) identify genomic signatures of ISS-derived strains of potential relevance for astronaut health. The pangenome of B. cereus was more expansive than that of S. aureus, which had a dominant core component. Genomic contents of both taxa significantly correlated with isolate origin, demonstrating an importance for biogeography and potential niche adaptations. ISS/BE-enriched functions were often involved in biosynthesis, catabolism, materials transport, metabolism, and stress response. Multiple origin-enriched functions also overlapped across taxa, suggesting conserved adaptive processes. We further characterized two mobile genetic elements with local neighborhood genes encoding biosynthesis and stress response functions that distinctively associated with B. cereus from the ISS. Although antibiotic resistance genes were present in ISS/BE isolates, they were also common in counterparts elsewhere. Overall, despite differences in microbial lifestyle, some functions appear common to remaining viable in the BE, and those functions are not typically associated with direct impacts on human health. IMPORTANCE The built environment contains a variety of microorganisms, some of which pose critical human health risks (e.g., hospital-acquired infection, antibiotic resistance dissemination). We uncovered a combination of complex biological functions that may play a role in bacterial survival under the presumed selective pressures in a model built environment-the International Space Station-by using an approach to compare pangenomes of bacterial strains from two clinically relevant species (B. cereus and S. aureus) isolated from both built environments and humans. Our findings suggest that the most crucial bacterial functions involved in this potential adaptive response are specific to bacterial lifestyle and do not appear to have direct impacts on human health.}, } @article {pmid30631083, year = {2019}, author = {Abreo, E and Altier, N}, title = {Pangenome of Serratia marcescens strains from nosocomial and environmental origins reveals different populations and the links between them.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {46}, pmid = {30631083}, issn = {2045-2322}, mesh = {Computational Biology ; Cross Infection/*microbiology ; *Environmental Microbiology ; *Genetic Variation ; Genome, Bacterial ; *Genotype ; Humans ; Multilocus Sequence Typing ; Serratia Infections/*microbiology ; Serratia marcescens/*genetics/isolation & purification ; }, abstract = {Serratia marcescens is a Gram-negative bacterial species that can be found in a wide range of environments like soil, water and plant surfaces, while it is also known as an opportunistic human pathogen in hospitals and as a plant growth promoting bacteria (PGPR) in crops. We have used a pangenome-based approach, based on publicly available genomes, to apply whole genome multilocus sequence type schemes to assess whether there is an association between source and genotype, aiming at differentiating between isolates from nosocomial sources and the environment, and between strains reported as PGPR from other environmental strains. Most genomes from a nosocomial setting and environmental origin could be assigned to the proposed nosocomial or environmental MLSTs, which is indicative of an association between source and genotype. The fact that a few genomes from a nosocomial source showed an environmental MLST suggests that a minority of nosocomial strains have recently derived from the environment. PGPR strains were assigned to different environmental types and clades but only one clade comprised strains accumulating a low number of known virulence and antibiotic resistance determinants and was exclusively from environmental sources. This clade is envisaged as a group of promissory MLSTs for selecting prospective PGPR strains.}, } @article {pmid30625413, year = {2019}, author = {Melenotte, C and Caputo, A and Bechah, Y and Lepidi, H and Terras, J and Kowalczewska, M and Di Pinto, F and Nappez, C and Raoult, D and Brégeon, F}, title = {The hypervirulent Coxiella burnetii Guiana strain compared in silico, in vitro and in vivo to the Nine Mile and the German strain.}, journal = {Clinical microbiology and infection : the official publication of the European Society of Clinical Microbiology and Infectious Diseases}, volume = {25}, number = {9}, pages = {1155.e1-1155.e8}, doi = {10.1016/j.cmi.2018.12.039}, pmid = {30625413}, issn = {1469-0691}, mesh = {Animals ; Antibodies, Bacterial/blood ; Coxiella burnetii/classification/genetics/growth & development/*pathogenicity ; DNA, Bacterial/analysis ; Disease Models, Animal ; *Disease Outbreaks ; French Guiana/epidemiology ; Genetic Variation ; Genome, Bacterial/genetics ; Mice, Inbred BALB C ; Mice, SCID ; Netherlands/epidemiology ; Q Fever/blood/*epidemiology/*microbiology/pathology ; Survival Analysis ; Virulence ; }, abstract = {OBJECTIVE: Q fever epidemic outbreaks have been reported in French Guiana and in The Netherlands. To determine whether the C. burnetii strains involved in these epidemics had a peculiar virulence pattern, we compared the pathogenicity of the Guiana and the German strain (a clone of The Netherlands strain), in silico, in vitro, and in vivo versus the Nine Mile strain.

METHOD: The pan-genomes of the Guiana (Cb175), German (Z3055), and the referent Nine Mile (RSA 493) C. burnetii strains were compared. In vitro, the growth rate and the morphological presentation were compared. In vivo (SCID and Balb/c mice), weight loss, histological lesions, C. burnetii bacterial load in deep organs, and serological response were reported according to each C. burnetii strain studied.

RESULTS: The Guiana strain had 77 times more missing genes and 12 times more unique genes than the German strain. The Guiana strain presented as large cell variants (LCVs) and led to the most pronounced fatality rate in SCID mice (100% at 4 weeks). The German strain presented as small cell variants (SCVs), and had an intermediate fatality rate (75% at 4 weeks). Both the Guiana and the German strains led to a significant higher serological response at 2 and 4 weeks post infection (p <0.05).

CONCLUSION: The Guiana strain was the most virulent strain, followed by the German strain and the referent Nine Mile strain. Unique and missing genes could be implicated but further investigations are necessary to specify their role.}, } @article {pmid30622973, year = {2018}, author = {Hisham, Y and Ashhab, Y}, title = {Identification of Cross-Protective Potential Antigens against Pathogenic Brucella spp. through Combining Pan-Genome Analysis with Reverse Vaccinology.}, journal = {Journal of immunology research}, volume = {2018}, number = {}, pages = {1474517}, pmid = {30622973}, issn = {2314-7156}, mesh = {Animals ; Antigens, Bacterial/genetics/*immunology ; Bacterial Vaccines/*immunology ; Brucella/pathogenicity/*physiology ; Brucellosis/*immunology ; Cattle ; Computational Biology ; Conserved Sequence/genetics ; Cross Reactions ; Epitopes/immunology ; Genome ; Humans ; Proteome ; Vaccines, Attenuated ; Vaccinology ; *Virulence Factors ; Zoonoses ; }, abstract = {Brucellosis is a zoonotic infectious disease caused by bacteria of the genus Brucella. Brucella melitensis, Brucella abortus, and Brucella suis are the most pathogenic species of this genus causing the majority of human and domestic animal brucellosis. There is a need to develop a safe and potent subunit vaccine to overcome the serious drawbacks of the live attenuated Brucella vaccines. The aim of this work was to discover antigen candidates conserved among the three pathogenic species. In this study, we employed a reverse vaccinology strategy to compute the core proteome of 90 completed genomes: 55 B. melitensis, 17 B. abortus, and 18 B. suis. The core proteome was analyzed by a metasubcellular localization prediction pipeline to identify surface-associated proteins. The identified proteins were thoroughly analyzed using various in silico tools to obtain the most potential protective antigens. The number of core proteins obtained from analyzing the 90 proteomes was 1939 proteins. The surface-associated proteins were 177. The number of potential antigens was 87; those with adhesion score ≥ 0.5 were considered antigen with "high potential," while those with a score of 0.4-0.5 were considered antigens with "intermediate potential." According to a cumulative score derived from protein antigenicity, density of MHC-I and MHC-II epitopes, MHC allele coverage, and B-cell epitope density scores, a final list of 34 potential antigens was obtained. Remarkably, most of the 34 proteins are associated with bacterial adhesion, invasion, evasion, and adaptation to the hostile intracellular environment of macrophages which is adjusted to deprive Brucella of required nutrients. Our results provide a manageable list of potential protective antigens for developing a potent vaccine against brucellosis. Moreover, our elaborated analysis can provide further insights into novel Brucella virulence factors. Our next step is to test some of these antigens using an appropriate antigen delivery system.}, } @article {pmid33204407, year = {2019}, author = {Bandoy, DD}, title = {Large scale enterohemorrhagic E coli population genomic analysis using whole genome typing reveals recombination clusters and potential drug target.}, journal = {F1000Research}, volume = {8}, number = {}, pages = {33}, pmid = {33204407}, issn = {2046-1402}, abstract = {Enterohemorrhagic Escherichia coli continues to be a significant public health risk. With the onset of next generation sequencing, whole genome sequences require a new paradigm of analysis relevant for epidemiology and drug discovery. A large-scale bacterial population genomic analysis was applied to 702 isolates of serotypes associated with EHEC resulting in five pangenome clusters. Serotype incongruence with pangenome types suggests recombination clusters. Core genome analysis was performed to determine the population wide distribution of sdiA as potential drug target. Protein modelling revealed nonsynonymous variants are notably absent in the ligand binding site for quorum sensing, indicating that population wide conservation of the sdiA ligand site can be targeted for potential prophylactic purposes. Applying pathotype-wide pangenomics as a guide for determining evolution of pharmacophore sites is a potential approach in drug discovery.}, } @article {pmid30619233, year = {2018}, author = {Livingstone, PG and Morphew, RM and Whitworth, DE}, title = {Genome Sequencing and Pan-Genome Analysis of 23 Corallococcus spp. Strains Reveal Unexpected Diversity, With Particular Plasticity of Predatory Gene Sets.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {3187}, pmid = {30619233}, issn = {1664-302X}, abstract = {Corallococcus is an abundant genus of predatory soil myxobacteria, containing two species, C. coralloides (for which a genome sequence is available) and C. exiguus. To investigate the genomic basis of predation, we genome-sequenced 23 Corallococcus strains. Genomic similarity metrics grouped the sequenced strains into at least nine distinct genomospecies, divided between two major sub-divisions of the genus, encompassing previously described diversity. The Corallococcus pan-genome was found to be open, with strains exhibiting highly individual gene sets. On average, only 30.5% of each strain's gene set belonged to the core pan-genome, while more than 75% of the accessory pan-genome genes were present in less than four of the 24 genomes. The Corallococcus accessory pan-proteome was enriched for the COG functional category "Secondary metabolism," with each genome containing on average 55 biosynthetic gene clusters (BGCs), of which only 20 belonged to the core pan-genome. Predatory activity was assayed against ten prey microbes and found to be mostly incongruent with phylogeny or BGC complement. Thus, predation seems multifactorial, depending partially on BGC complement, but also on the accessory pan-genome - genes most likely acquired horizontally. These observations encourage further exploration of Corallococcus as a source for novel bioactive secondary metabolites and predatory proteins.}, } @article {pmid30619175, year = {2018}, author = {Wu, Y and Zaiden, N and Cao, B}, title = {The Core- and Pan-Genomic Analyses of the Genus Comamonas: From Environmental Adaptation to Potential Virulence.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {3096}, pmid = {30619175}, issn = {1664-302X}, abstract = {Comamonas is often reported to be one of the major members of microbial communities in various natural and engineered environments. Versatile catabolic capabilities of Comamonas have been studied extensively in the last decade. In contrast, little is known about the ecological roles and adaptation of Comamonas to different environments as well as the virulence of potentially pathogenic Comamonas strains. In this study, we provide genomic insights into the potential ecological roles and virulence of Comamonas by analysing the entire gene set (pangenome) and the genes present in all genomes (core genome) using 34 genomes of 11 different Comamonas species. The analyses revealed that the metabolic pathways enabling Comamonas to acquire energy from various nutrient sources are well conserved. Genes for denitrification and ammonification are abundant in Comamonas, suggesting that Comamonas plays an important role in the nitrogen biogeochemical cycle. They also encode sophisticated redox sensory systems and diverse c-di-GMP controlling systems, allowing them to be able to effectively adjust their biofilm lifestyle to changing environments. The virulence factors in Comamonas were found to be highly species-specific. The conserved strategies used by potentially pathogenic Comamonas for surface adherence, motility control, nutrient acquisition and stress tolerance were also revealed.}, } @article {pmid30618776, year = {2018}, author = {Kayani, MR and Zheng, YC and Xie, FC and Kang, K and Li, HY and Zhao, HT}, title = {Genome Sequences and Comparative Analysis of Two Extended-Spectrum Extensively-Drug Resistant Mycobacterium tuberculosis Strains.}, journal = {Frontiers in pharmacology}, volume = {9}, number = {}, pages = {1492}, pmid = {30618776}, issn = {1663-9812}, } @article {pmid30610074, year = {2019}, author = {Yang, Y and Higgins, CH and Rehman, I and Galvao, KN and Brito, IL and Bicalho, ML and Song, J and Wang, H and Bicalho, RC}, title = {Genomic Diversity, Virulence, and Antimicrobial Resistance of Klebsiella pneumoniae Strains from Cows and Humans.}, journal = {Applied and environmental microbiology}, volume = {85}, number = {6}, pages = {}, pmid = {30610074}, issn = {1098-5336}, mesh = {Animals ; Anti-Bacterial Agents/*pharmacology ; Bacterial Proteins/genetics/metabolism ; Cattle ; Cattle Diseases/*microbiology ; *Drug Resistance, Bacterial ; Genetic Variation ; Genome, Bacterial ; Genomics ; Humans ; Klebsiella Infections/*microbiology/*veterinary ; Klebsiella pneumoniae/classification/drug effects/*genetics/pathogenicity ; Phylogeny ; Plasmids/genetics/metabolism ; Virulence ; }, abstract = {Klebsiella pneumoniae is a leading cause of severe infections in humans and dairy cows, and these infections are rapidly becoming untreatable due to the emergence of multidrug-resistant (MDR) strains. However, little is known about the relationship between bovine and human K. pneumoniae isolates at the genome population level. Here, we investigated the genomic structures, pangenomic profiles, virulence determinants, and resistomes of 308 K. pneumoniae isolates from humans and dairy cows, including 96 newly sequenced cow isolates. We identified 177 functional protein families that were significantly different across human and bovine isolates; genes expressing proteins related to metal ion (iron, zinc, and calcium) metabolism were significantly more prevalent among the bovine isolates. Siderophore systems were found to be prevalent in both the bovine and the human isolates. In addition, we found that the Klebsiella ferric uptake operon kfuABC was significantly more prevalent in clinical mastitis cases than in healthy cows. Furthermore, on two dairy farms, we identified a unique IncN-type plasmid, pC5, coharboring blaCTX-M-1 and mph(A) genes, which confer resistance to cephalosporins and macrolides, respectively. We provide here the complete annotated sequence of this plasmid.IMPORTANCE We demonstrate here the genetic diversity of K. pneumoniae isolates from dairy cows and the mixed phylogenetic lineages between bovine and human isolates. The ferric uptake operon kfuABC genes were more prevalent in strains from clinical mastitis cows. Furthermore, we report the emergence of an IncN-type plasmid carrying the blaCTX-M-1 and mph(A) genes among dairy farms in the United States. Our study evaluated the genomic diversity of the bovine and human isolates, and the findings uncovered different profiles of virulence determinants among bovine and human K. pneumoniae isolates at the genome population level.}, } @article {pmid30608550, year = {2019}, author = {diCenzo, GC and Mengoni, A and Perrin, E}, title = {Chromids Aid Genome Expansion and Functional Diversification in the Family Burkholderiaceae.}, journal = {Molecular biology and evolution}, volume = {36}, number = {3}, pages = {562-574}, doi = {10.1093/molbev/msy248}, pmid = {30608550}, issn = {1537-1719}, mesh = {Adaptation, Biological/genetics ; Burkholderiaceae/*genetics ; Gene Transfer, Horizontal ; Genome Size ; *Genome, Bacterial ; *Replicon ; Selection, Genetic ; }, abstract = {Multipartite genomes, containing at least two large replicons, are found in diverse bacteria; however, the advantage of this genome structure remains incompletely understood. Here, we perform comparative genomics of hundreds of finished β-proteobacterial genomes to gain insights into the role and emergence of multipartite genomes. Almost all essential secondary replicons (chromids) of the β-proteobacteria are found in the family Burkholderiaceae. These replicons arose from just two plasmid acquisition events, and they were likely stabilized early in their evolution by the presence of core genes. On average, Burkholderiaceae genera with multipartite genomes had a larger total genome size, but smaller chromosome, than genera without secondary replicons. Pangenome-level functional enrichment analyses suggested that interreplicon functional biases are partially driven by the enrichment of secondary replicons in the accessory pangenome fraction. Nevertheless, the small overlap in orthologous groups present in each replicon's pangenome indicated a clear functional separation of the replicons. Chromids appeared biased to environmental adaptation, as the functional categories enriched on chromids were also overrepresented on the chromosomes of the environmental genera (Paraburkholderia and Cupriavidus) compared with the pathogenic genera (Burkholderia and Ralstonia). Using ancestral state reconstruction, it was predicted that the rate of accumulation of modern-day genes by chromids was more rapid than the rate of gene accumulation by the chromosomes. Overall, the data are consistent with a model where the primary advantage of secondary replicons is in facilitating increased rates of gene acquisition through horizontal gene transfer, consequently resulting in replicons enriched in genes associated with adaptation to novel environments.}, } @article {pmid30606234, year = {2019}, author = {Dillon, MM and Thakur, S and Almeida, RND and Wang, PW and Weir, BS and Guttman, DS}, title = {Recombination of ecologically and evolutionarily significant loci maintains genetic cohesion in the Pseudomonas syringae species complex.}, journal = {Genome biology}, volume = {20}, number = {1}, pages = {3}, pmid = {30606234}, issn = {1474-760X}, mesh = {*Evolution, Molecular ; Genetic Variation ; *Genome, Bacterial ; *Phylogeny ; Pseudomonas syringae/*genetics ; Recombination, Genetic ; Selection, Genetic ; Type II Secretion Systems/genetics ; }, abstract = {BACKGROUND: Pseudomonas syringae is a highly diverse bacterial species complex capable of causing a wide range of serious diseases on numerous agronomically important crops. We examine the evolutionary relationships of 391 agricultural and environmental strains using whole-genome sequencing and evolutionary genomic analyses.

RESULTS: We describe the phylogenetic distribution of all 77,728 orthologous gene families in the pan-genome, reconstruct the core genome phylogeny using the 2410 core genes, hierarchically cluster the accessory genome, identify the diversity and distribution of type III secretion systems and their effectors, predict ecologically and evolutionary relevant loci, and establish the molecular evolutionary processes operating on gene families. Phylogenetic and recombination analyses reveals that the species complex is subdivided into primary and secondary phylogroups, with the former primarily comprised of agricultural isolates, including all of the well-studied P. syringae strains. In contrast, the secondary phylogroups include numerous environmental isolates. These phylogroups also have levels of genetic diversity typically found among distinct species. An analysis of rates of recombination within and between phylogroups revealed a higher rate of recombination within primary phylogroups than between primary and secondary phylogroups. We also find that "ecologically significant" virulence-associated loci and "evolutionarily significant" loci under positive selection are over-represented among loci that undergo inter-phylogroup genetic exchange.

CONCLUSIONS: While inter-phylogroup recombination occurs relatively rarely, it is an important force maintaining the genetic cohesion of the species complex, particularly among primary phylogroup strains. This level of genetic cohesion, and the shared plant-associated niche, argues for considering the primary phylogroups as a single biological species.}, } @article {pmid30598730, year = {2019}, author = {Lindholm, M and Min Aung, K and Nyunt Wai, S and Oscarsson, J}, title = {Role of OmpA1 and OmpA2 in Aggregatibacter actinomycetemcomitans and Aggregatibacter aphrophilus serum resistance.}, journal = {Journal of oral microbiology}, volume = {11}, number = {1}, pages = {1536192}, pmid = {30598730}, issn = {2000-2297}, abstract = {Aggregatibacter actinomycetemcomitans and Aggregatibacter aphrophilus belong to the HACEK group of fastidious Gram-negative organisms, a recognized cause of infective endocarditis. A. actinomycetemcomitans is also implicated in aggressive forms of periodontitis. We demonstrated that A. aphrophilus strains, as A. actinomycetemcomitans are ubiquitously serum resistant. Both species encode two Outer membrane protein A paralogues, here denoted OmpA1 and OmpA2. As their respective pangenomes contain several OmpA1 and OmpA2 alleles, they represent potential genotypic markers. A naturally competent strain of A. actinomycetemcomitans and A. aphrophilus, respectively were used to elucidate if OmpA1 and OmpA2 contribute to serum resistance. Whereas OmpA1 was critical for survival of A. actinomycetemcomitans D7SS in 50% normal human serum (NHS), serum resistant ompA1 mutants were fortuitously obtained, expressing enhanced levels of OmpA2. Similarly, OmpA1 rather than OmpA2 was a major contributor to serum resistance of A. aphrophilus HK83. Far-Western blot revealed that OmpA1[AA], OmpA2[AA], and OmpA1[AP] can bind to C4-binding protein, an inhibitor of classical and mannose-binding lectin (MBL) complement activation. Indeed, ompA1 mutants were susceptible to these pathways, but also to alternative complement activation. This may at least partly reflect a compromised outer membrane integrity but is also consistent with alternative mechanisms involved in OmpA-mediated serum resistance.}, } @article {pmid30598532, year = {2019}, author = {Hübner, S and Bercovich, N and Todesco, M and Mandel, JR and Odenheimer, J and Ziegler, E and Lee, JS and Baute, GJ and Owens, GL and Grassa, CJ and Ebert, DP and Ostevik, KL and Moyers, BT and Yakimowski, S and Masalia, RR and Gao, L and Ćalić, I and Bowers, JE and Kane, NC and Swanevelder, DZH and Kubach, T and Muños, S and Langlade, NB and Burke, JM and Rieseberg, LH}, title = {Sunflower pan-genome analysis shows that hybridization altered gene content and disease resistance.}, journal = {Nature plants}, volume = {5}, number = {1}, pages = {54-62}, pmid = {30598532}, issn = {2055-0278}, mesh = {Crops, Agricultural/genetics/microbiology ; Disease Resistance/genetics ; Gene Ontology ; Genes, Plant ; Genetic Variation ; Genome, Plant ; Helianthus/*genetics/*microbiology ; *Hybridization, Genetic ; Plant Diseases/*genetics/microbiology ; Recombination, Genetic ; Selection, Genetic ; }, abstract = {Domesticated plants and animals often display dramatic responses to selection, but the origins of the genetic diversity underlying these responses remain poorly understood. Despite domestication and improvement bottlenecks, the cultivated sunflower remains highly variable genetically, possibly due to hybridization with wild relatives. To characterize genetic diversity in the sunflower and to quantify contributions from wild relatives, we sequenced 287 cultivated lines, 17 Native American landraces and 189 wild accessions representing 11 compatible wild species. Cultivar sequences failing to map to the sunflower reference were assembled de novo for each genotype to determine the gene repertoire, or 'pan-genome', of the cultivated sunflower. Assembled genes were then compared to the wild species to estimate origins. Results indicate that the cultivated sunflower pan-genome comprises 61,205 genes, of which 27% vary across genotypes. Approximately 10% of the cultivated sunflower pan-genome is derived through introgression from wild sunflower species, and 1.5% of genes originated solely through introgression. Gene ontology functional analyses further indicate that genes associated with biotic resistance are over-represented among introgressed regions, an observation consistent with breeding records. Analyses of allelic variation associated with downy mildew resistance provide an example in which such introgressions have contributed to resistance to a globally challenging disease.}, } @article {pmid30594655, year = {2019}, author = {Tao, Y and Zhao, X and Mace, E and Henry, R and Jordan, D}, title = {Exploring and Exploiting Pan-genomics for Crop Improvement.}, journal = {Molecular plant}, volume = {12}, number = {2}, pages = {156-169}, doi = {10.1016/j.molp.2018.12.016}, pmid = {30594655}, issn = {1752-9867}, mesh = {Crops, Agricultural/*genetics/growth & development/physiology ; Genes, Plant/genetics ; Genetic Variation ; Genomics/*methods ; }, abstract = {Genetic variation ranging from single-nucleotide polymorphisms to large structural variants (SVs) can cause variation of gene content among individuals within the same species. There is an increasing appreciation that a single reference genome is insufficient to capture the full landscape of genetic diversity of a species. Pan-genome analysis offers a platform to evaluate the genetic diversity of a species via investigation of its entire genome repertoire. Although a recent wave of pan-genomic studies has shed new light on crop diversity and improvement using advanced sequencing technology, the potential applications of crop pan-genomics in crop improvement are yet to be fully exploited. In this review, we highlight the progress achieved in understanding crop pan-genomics, discuss biological activities that cause SVs, review important agronomical traits affected by SVs, and present our perspective on the application of pan-genomics in crop improvement.}, } @article {pmid30588394, year = {2018}, author = {Brankovics, B and Kulik, T and Sawicki, J and Bilska, K and Zhang, H and de Hoog, GS and van der Lee, TA and Waalwijk, C and van Diepeningen, AD}, title = {First steps towards mitochondrial pan-genomics: detailed analysis of Fusarium graminearum mitogenomes.}, journal = {PeerJ}, volume = {6}, number = {}, pages = {e5963}, pmid = {30588394}, issn = {2167-8359}, abstract = {There is a gradual shift from representing a species' genome by a single reference genome sequence to a pan-genome representation. Pan-genomes are the abstract representations of the genomes of all the strains that are present in the population or species. In this study, we employed a pan-genomic approach to analyze the intraspecific mitochondrial genome diversity of Fusarium graminearum. We present an improved reference mitochondrial genome for F. graminearum with an intron-exon annotation that was verified using RNA-seq data. Each of the 24 studied isolates had a distinct mitochondrial sequence. Length variation in the F. graminearum mitogenome was found to be largely due to variation of intron regions (99.98%). The "intronless" mitogenome length was found to be quite stable and could be informative when comparing species. The coding regions showed high conservation, while the variability of intergenic regions was highest. However, the most important variable parts are the intron regions, because they contain approximately half of the variable sites, make up more than half of the mitogenome, and show presence/absence variation. Furthermore, our analyses show that the mitogenome of F. graminearum is recombining, as was previously shown in F. oxysporum, indicating that mitogenome recombination is a common phenomenon in Fusarium. The majority of mitochondrial introns in F. graminearum belongs to group I introns, which are associated with homing endonuclease genes (HEGs). Mitochondrial introns containing HE genes may spread within populations through homing, where the endonuclease recognizes and cleaves the recognition site in the target gene. After cleavage of the "host" gene, it is replaced by the gene copy containing the intron with HEG. We propose to use introns unique to a population for tracking the spread of the given population, because introns can spread through vertical inheritance, recombination as well as via horizontal transfer. We demonstrate how pooled sequencing of strains can be used for mining mitogenome data. The usage of pooled sequencing offers a scalable solution for population analysis and for species level comparisons studies. This study may serve as a basis for future mitochondrial genome variability studies and representations.}, } @article {pmid30587126, year = {2018}, author = {Bochkareva, OO and Moroz, EV and Davydov, II and Gelfand, MS}, title = {Genome rearrangements and selection in multi-chromosome bacteria Burkholderia spp.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {965}, pmid = {30587126}, issn = {1471-2164}, support = {18-14-00358//Russian Science Foundation/ ; 16-54-21004//Russian Foundation of Basic Research/ ; IZLRZ3\163872//Swiss National Science Foundation/ ; }, mesh = {Burkholderia/classification/*genetics ; *Chromosomes, Bacterial ; Databases, Genetic ; Gene Rearrangement/*genetics ; Phylogeny ; }, abstract = {BACKGROUND: The genus Burkholderia consists of species that occupy remarkably diverse ecological niches. Its best known members are important pathogens, B. mallei and B. pseudomallei, which cause glanders and melioidosis, respectively. Burkholderia genomes are unusual due to their multichromosomal organization, generally comprised of 2-3 chromosomes.

RESULTS: We performed integrated genomic analysis of 127 Burkholderia strains. The pan-genome is open with the saturation to be reached between 86,000 and 88,000 genes. The reconstructed rearrangements indicate a strong avoidance of intra-replichore inversions that is likely caused by selection against the transfer of large groups of genes between the leading and the lagging strands. Translocated genes also tend to retain their position in the leading or the lagging strand, and this selection is stronger for large syntenies. Integrated reconstruction of chromosome rearrangements in the context of strains phylogeny reveals parallel rearrangements that may indicate inversion-based phase variation and integration of new genomic islands. In particular, we detected parallel inversions in the second chromosomes of B. pseudomallei with breakpoints formed by genes encoding membrane components of multidrug resistance complex, that may be linked to a phase variation mechanism. Two genomic islands, spreading horizontally between chromosomes, were detected in the B. cepacia group.

CONCLUSIONS: This study demonstrates the power of integrated analysis of pan-genomes, chromosome rearrangements, and selection regimes. Non-random inversion patterns indicate selective pressure, inversions are particularly frequent in a recent pathogen B. mallei, and, together with periods of positive selection at other branches, may indicate adaptation to new niches. One such adaptation could be a possible phase variation mechanism in B. pseudomallei.}, } @article {pmid30587114, year = {2018}, author = {Tyakht, AV and Manolov, AI and Kanygina, AV and Ischenko, DS and Kovarsky, BA and Popenko, AS and Pavlenko, AV and Elizarova, AV and Rakitina, DV and Baikova, JP and Ladygina, VG and Kostryukova, ES and Karpova, IY and Semashko, TA and Larin, AK and Grigoryeva, TV and Sinyagina, MN and Malanin, SY and Shcherbakov, PL and Kharitonova, AY and Khalif, IL and Shapina, MV and Maev, IV and Andreev, DN and Belousova, EA and Buzunova, YM and Alexeev, DG and Govorun, VM}, title = {Genetic diversity of Escherichia coli in gut microbiota of patients with Crohn's disease discovered using metagenomic and genomic analyses.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {968}, pmid = {30587114}, issn = {1471-2164}, support = {16-15-00258//Russian Science Foundation/ ; }, mesh = {Cluster Analysis ; Crohn Disease/microbiology/*pathology ; Escherichia coli/*genetics/isolation & purification ; Feces/microbiology ; *Gastrointestinal Microbiome ; *Genetic Variation ; Genome, Bacterial ; Humans ; Intestinal Mucosa/microbiology ; Metagenomics/*methods ; }, abstract = {BACKGROUND: Crohn's disease is associated with gut dysbiosis. Independent studies have shown an increase in the abundance of certain bacterial species, particularly Escherichia coli with the adherent-invasive pathotype, in the gut. The role of these species in this disease needs to be elucidated.

METHODS: We performed a metagenomic study investigating the gut microbiota of patients with Crohn's disease. A metagenomic reconstruction of the consensus genome content of the species was used to assess the genetic variability.

RESULTS: The abnormal shifts in the microbial community structures in Crohn's disease were heterogeneous among the patients. The metagenomic data suggested the existence of multiple E. coli strains within individual patients. We discovered that the genetic diversity of the species was high and that only a few samples manifested similarity to the adherent-invasive varieties. The other species demonstrated genetic diversity comparable to that observed in the healthy subjects. Our results were supported by a comparison of the sequenced genomes of isolates from the same microbiota samples and a meta-analysis of published gut metagenomes.

CONCLUSIONS: The genomic diversity of Crohn's disease-associated E. coli within and among the patients paves the way towards an understanding of the microbial mechanisms underlying the onset and progression of the Crohn's disease and the development of new strategies for the prevention and treatment of this disease.}, } @article {pmid30586440, year = {2018}, author = {Cheleuitte-Nieves, C and Gulvik, CA and McQuiston, JR and Humrighouse, BW and Bell, ME and Villarma, A and Fischetti, VA and Westblade, LF and Lipman, NS}, title = {Genotypic differences between strains of the opportunistic pathogen Corynebacterium bovis isolated from humans, cows, and rodents.}, journal = {PloS one}, volume = {13}, number = {12}, pages = {e0209231}, pmid = {30586440}, issn = {1932-6203}, support = {P30 CA008748/CA/NCI NIH HHS/United States ; }, mesh = {Animals ; Cattle ; Corynebacterium/*genetics/*isolation & purification/pathogenicity ; Corynebacterium Infections/microbiology/veterinary ; DNA, Circular ; Female ; Genome, Bacterial ; Genotype ; Humans ; Mice, Nude ; Opportunistic Infections/microbiology/veterinary ; Phylogeny ; RNA, Bacterial ; RNA, Ribosomal, 16S ; Rats ; Virulence Factors/genetics ; }, abstract = {Corynebacterium bovis is an opportunistic bacterial pathogen shown to cause eye and prosthetic joint infections as well as abscesses in humans, mastitis in dairy cattle, and skin disease in laboratory mice and rats. Little is known about the genetic characteristics and genomic diversity of C. bovis because only a single draft genome is available for the species. The overall aim of this study was to sequence and compare the genome of C. bovis isolates obtained from different species, locations, and time points. Whole-genome sequencing was conducted on 20 C. bovis isolates (six human, four bovine, nine mouse and one rat) using the Illumina MiSeq platform and submitted to various comparative analysis tools. Sequencing generated high-quality contigs (over 2.53 Mbp) that were comparable to the only reported assembly using C. bovis DSM 20582T (97.8 ± 0.36% completeness). The number of protein-coding DNA sequences (2,174 ± 12.4) was similar among all isolates. A Corynebacterium genus neighbor-joining tree was created, which revealed Corynebacterium falsenii as the nearest neighbor to C. bovis (95.87% similarity), although the reciprocal comparison shows Corynebacterium jeikeium as closest neighbor to C. falsenii. Interestingly, the average nucleotide identity demonstrated that the C. bovis isolates clustered by host, with human and bovine isolates clustering together, and the mouse and rat isolates forming a separate group. The average number of genomic islands and putative virulence factors were significantly higher (p<0.001) in the mouse and rat isolates as compared to human/bovine isolates. Corynebacterium bovis' pan-genome contained a total of 3,067 genes of which 1,354 represented core genes. The known core genes of all isolates were primarily related to ''metabolism" and ''information storage/processing." However, most genes were classified as ''function unknown" or "unclassified". Surprisingly, no intact prophages were found in any isolate; however, almost all isolates had at least one complete CRISPR-Cas system.}, } @article {pmid30576310, year = {2018}, author = {Rendueles, O and de Sousa, JAM and Bernheim, A and Touchon, M and Rocha, EPC}, title = {Genetic exchanges are more frequent in bacteria encoding capsules.}, journal = {PLoS genetics}, volume = {14}, number = {12}, pages = {e1007862}, pmid = {30576310}, issn = {1553-7404}, mesh = {Bacteria/classification/*genetics ; Bacterial Capsules/*genetics ; DNA Restriction-Modification Enzymes/genetics ; Drug Resistance, Bacterial/genetics ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Homologous Recombination ; Interspersed Repetitive Sequences ; Phylogeny ; Species Specificity ; }, abstract = {Capsules allow bacteria to colonize novel environments, to withstand numerous stresses, and to resist antibiotics. Yet, even though genetic exchanges with other cells should be adaptive under such circumstances, it has been suggested that capsules lower the rates of homologous recombination and horizontal gene transfer. We analysed over one hundred pan-genomes and thousands of bacterial genomes for the evidence of an association between genetic exchanges (or lack thereof) and the presence of a capsule system. We found that bacteria encoding capsules have larger pan-genomes, higher rates of horizontal gene transfer, and higher rates of homologous recombination in their core genomes. Accordingly, genomes encoding capsules have more plasmids, conjugative elements, transposases, prophages, and integrons. Furthermore, capsular loci are frequent in plasmids, and can be found in prophages. These results are valid for Bacteria, independently of their ability to be naturally transformable. Since we have shown previously that capsules are commonly present in nosocomial pathogens, we analysed their co-occurrence with antibiotic resistance genes. Genomes encoding capsules have more antibiotic resistance genes, especially those encoding efflux pumps, and they constitute the majority of the most worrisome nosocomial bacteria. We conclude that bacteria with capsule systems are more genetically diverse and have fast-evolving gene repertoires, which may further contribute to their success in colonizing novel niches such as humans under antibiotic therapy.}, } @article {pmid30574560, year = {2018}, author = {Velsko, IM and Chakraborty, B and Nascimento, MM and Burne, RA and Richards, VP}, title = {Species Designations Belie Phenotypic and Genotypic Heterogeneity in Oral Streptococci.}, journal = {mSystems}, volume = {3}, number = {6}, pages = {}, pmid = {30574560}, issn = {2379-5077}, support = {K23 DE023579/DE/NIDCR NIH HHS/United States ; }, abstract = {Health-associated oral Streptococcus species are promising probiotic candidates to protect against dental caries. Ammonia production through the arginine deiminase system (ADS), which can increase the pH of oral biofilms, and direct antagonism of caries-associated bacterial species are desirable properties for oral probiotic strains. ADS and antagonistic activities can vary dramatically among individuals, but the genetic basis for these differences is unknown. We sequenced whole genomes of a diverse set of clinical oral Streptococcus isolates and examined the genetic basis of variability in ADS and antagonistic activities. A total of 113 isolates were included and represented 10 species: Streptococcus australis, A12-like, S. cristatus, S. gordonii, S. intermedius, S. mitis, S. oralis including S. oralis subsp. dentisani, S. parasanguinis, S. salivarius, and S. sanguinis. Mean ADS activity and antagonism on Streptococcus mutans UA159 were measured for each isolate, and each isolate was whole genome shotgun sequenced on an Illumina MiSeq. Phylogenies were built of genes known to be involved in ADS activity and antagonism. Several approaches to correlate the pan-genome with phenotypes were performed. Phylogenies of genes previously identified in ADS activity and antagonism grouped isolates by species, but not by phenotype. A genome-wide association study (GWAS) identified additional genes potentially involved in ADS activity or antagonism across all the isolates we sequenced as well as within several species. Phenotypic heterogeneity in oral streptococci is not necessarily reflected by genotype and is not species specific. Probiotic strains must be carefully selected based on characterization of each strain and not based on inclusion within a certain species. IMPORTANCE Representative type strains are commonly used to characterize bacterial species, yet species are phenotypically and genotypically heterogeneous. Conclusions about strain physiology and activity based on a single strain therefore may be inappropriate and misleading. When selecting strains for probiotic use, the assumption that all strains within a species share the same desired probiotic characteristics may result in selection of a strain that lacks the desired traits, and therefore makes a minimally effective or ineffective probiotic. Health-associated oral streptococci are promising candidates for anticaries probiotics, but strains need to be carefully selected based on observed phenotypes. We characterized the genotypes and anticaries phenotypes of strains from 10 species of oral streptococci and demonstrate poor correlation between genotype and phenotype across all species.}, } @article {pmid30574558, year = {2018}, author = {Fahimipour, AK and Ben Mamaar, S and McFarland, AG and Blaustein, RA and Chen, J and Glawe, AJ and Kline, J and Green, JL and Halden, RU and Van Den Wymelenberg, K and Huttenhower, C and Hartmann, EM}, title = {Antimicrobial Chemicals Associate with Microbial Function and Antibiotic Resistance Indoors.}, journal = {mSystems}, volume = {3}, number = {6}, pages = {}, pmid = {30574558}, issn = {2379-5077}, support = {U19 AI135964/AI/NIAID NIH HHS/United States ; }, abstract = {Humans purposefully and inadvertently introduce antimicrobial chemicals into buildings, resulting in widespread compounds, including triclosan, triclocarban, and parabens, in indoor dust. Meanwhile, drug-resistant infections continue to increase, raising concerns that buildings function as reservoirs of, or even select for, resistant microorganisms. Support for these hypotheses is limited largely since data describing relationships between antimicrobials and indoor microbial communities are scant. We combined liquid chromatography-isotope dilution tandem mass spectrometry with metagenomic shotgun sequencing of dust collected from athletic facilities to characterize relationships between indoor antimicrobial chemicals and microbial communities. Elevated levels of triclosan and triclocarban, but not parabens, were associated with distinct indoor microbiomes. Dust of high triclosan content contained increased Gram-positive species with diverse drug resistance capabilities, whose pangenomes were enriched for genes encoding osmotic stress responses, efflux pump regulation, lipid metabolism, and material transport across cell membranes; such triclosan-associated functional shifts have been documented in laboratory cultures but not yet from buildings. Antibiotic-resistant bacterial isolates were cultured from all but one facility, and resistance often increased in buildings with very high triclosan levels, suggesting links between human encounters with viable drug-resistant bacteria and local biocide conditions. This characterization uncovers complex relationships between antimicrobials and indoor microbiomes: some chemicals elicit effects, whereas others may not, and no single functional or resistance factor explained chemical-microbe associations. These results suggest that anthropogenic chemicals impact microbial systems in or around buildings and their occupants, highlighting an emergent need to identify the most important indoor, outdoor, and host-associated sources of antimicrobial chemical-resistome interactions. IMPORTANCE The ubiquitous use of antimicrobial chemicals may have undesired consequences, particularly on microbes in buildings. This study shows that the taxonomy and function of microbes in indoor dust are strongly associated with antimicrobial chemicals-more so than any other feature of the buildings. Moreover, we identified links between antimicrobial chemical concentrations in dust and culturable bacteria that are cross-resistant to three clinically relevant antibiotics. These findings suggest that humans may be influencing the microbial species and genes that are found indoors through the addition and removal of particular antimicrobial chemicals.}, } @article {pmid30563902, year = {2018}, author = {Potter, RF and Lainhart, W and Twentyman, J and Wallace, MA and Wang, B and Burnham, CA and Rosen, DA and Dantas, G}, title = {Population Structure, Antibiotic Resistance, and Uropathogenicity of Klebsiella variicola.}, journal = {mBio}, volume = {9}, number = {6}, pages = {}, pmid = {30563902}, issn = {2150-7511}, support = {R01 GM099538/GM/NIGMS NIH HHS/United States ; K08 AI127714/AI/NIAID NIH HHS/United States ; R01 AI123394/AI/NIAID NIH HHS/United States ; R01 HD092414/HD/NICHD NIH HHS/United States ; U01 AI123394/AI/NIAID NIH HHS/United States ; T32 GM007067/GM/NIGMS NIH HHS/United States ; }, mesh = {Animals ; Anti-Bacterial Agents/*pharmacology ; Carbapenems/pharmacology ; Ciprofloxacin/pharmacology ; Communicable Diseases, Emerging/microbiology ; *Drug Resistance, Multiple, Bacterial ; Female ; Fimbriae, Bacterial/genetics ; Genome, Bacterial ; Humans ; Klebsiella/*drug effects/genetics/*pathogenicity ; Klebsiella Infections/microbiology ; Mice ; Microbial Sensitivity Tests ; Phylogeny ; Urinary Bladder/microbiology ; Urinary Tract Infections/*microbiology ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Klebsiella variicola is a member of the Klebsiella genus and often misidentified as Klebsiella pneumoniae or Klebsiella quasipneumoniae The importance of K. pneumoniae human infections has been known; however, a dearth of relative knowledge exists for K. variicola Despite its growing clinical importance, comprehensive analyses of K. variicola population structure and mechanistic investigations of virulence factors and antibiotic resistance genes have not yet been performed. To address this, we utilized in silico, in vitro, and in vivo methods to study a cohort of K. variicola isolates and genomes. We found that the K. variicola population structure has two distant lineages composed of two and 143 genomes, respectively. Ten of 145 K. variicola genomes harbored carbapenem resistance genes, and 6/145 contained complete virulence operons. While the β-lactam blaLEN and quinolone oqxAB antibiotic resistance genes were generally conserved within our institutional cohort, unexpectedly 11 isolates were nonresistant to the β-lactam ampicillin and only one isolate was nonsusceptible to the quinolone ciprofloxacin. K. variicola isolates have variation in ability to cause urinary tract infections in a newly developed murine model, but importantly a strain had statistically significant higher bladder CFU than the model uropathogenic K. pneumoniae strain TOP52. Type 1 pilus and genomic identification of altered fim operon structure were associated with differences in bladder CFU for the tested strains. Nine newly reported types of pilus genes were discovered in the K. variicola pan-genome, including the first identified P-pilus in Klebsiella spp.IMPORTANCE Infections caused by antibiotic-resistant bacterial pathogens are a growing public health threat. Understanding of pathogen relatedness and biology is imperative for tracking outbreaks and developing therapeutics. Here, we detail the phylogenetic structure of 145 K. variicola genomes from different continents. Our results have important clinical ramifications as high-risk antibiotic resistance genes are present in K. variicola genomes from a variety of geographic locations and as we demonstrate that K. variicola clinical isolates can establish higher bladder titers than K. pneumoniae Differential presence of these pilus genes inK. variicola isolates may indicate adaption for specific environmental niches. Therefore, due to the potential of multidrug resistance and pathogenic efficacy, identification of K. variicola and K. pneumoniae to a species level should be performed to optimally improve patient outcomes during infection. This work provides a foundation for our improved understanding of K. variicola biology and pathogenesis.}, } @article {pmid30563853, year = {2019}, author = {Pang, TY and Lercher, MJ}, title = {Each of 3,323 metabolic innovations in the evolution of E. coli arose through the horizontal transfer of a single DNA segment.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {116}, number = {1}, pages = {187-192}, pmid = {30563853}, issn = {1091-6490}, mesh = {Adaptation, Physiological/genetics ; Biological Evolution ; DNA, Bacterial/*genetics ; Escherichia coli/*genetics/metabolism ; Gene Transfer, Horizontal/*genetics/physiology ; Genes, Bacterial/genetics ; Genetic Association Studies ; Phylogeny ; }, abstract = {Even closely related prokaryotes often show an astounding diversity in their ability to grow in different nutritional environments. It has been hypothesized that complex metabolic adaptations-those requiring the independent acquisition of multiple new genes-can evolve via selectively neutral intermediates. However, it is unclear whether this neutral exploration of phenotype space occurs in nature, or what fraction of metabolic adaptations is indeed complex. Here, we reconstruct metabolic models for the ancestors of a phylogeny of 53 Escherichia coli strains, linking genotypes to phenotypes on a genome-wide, macroevolutionary scale. Based on the ancestral and extant metabolic models, we identify 3,323 phenotypic innovations in the history of the E. coli clade that arose through changes in accessory genome content. Of these innovations, 1,998 allow growth in previously inaccessible environments, while 1,325 increase biomass yield. Strikingly, every observed innovation arose through the horizontal acquisition of a single DNA segment less than 30 kb long. Although we found no evidence for the contribution of selectively neutral processes, 10.6% of metabolic innovations were facilitated by horizontal gene transfers on earlier phylogenetic branches, consistent with a stepwise adaptation to successive environments. Ninety-eight percent of metabolic phenotypes accessible to the combined E. coli pangenome can be bestowed on any individual strain by transferring a single DNA segment from one of the extant strains. These results demonstrate an amazing ability of the E. coli lineage to adapt to novel environments through single horizontal gene transfers (followed by regulatory adaptations), an ability likely mirrored in other clades of generalist bacteria.}, } @article {pmid30559407, year = {2019}, author = {Vatanen, T and Plichta, DR and Somani, J and Münch, PC and Arthur, TD and Hall, AB and Rudolf, S and Oakeley, EJ and Ke, X and Young, RA and Haiser, HJ and Kolde, R and Yassour, M and Luopajärvi, K and Siljander, H and Virtanen, SM and Ilonen, J and Uibo, R and Tillmann, V and Mokurov, S and Dorshakova, N and Porter, JA and McHardy, AC and Lähdesmäki, H and Vlamakis, H and Huttenhower, C and Knip, M and Xavier, RJ}, title = {Genomic variation and strain-specific functional adaptation in the human gut microbiome during early life.}, journal = {Nature microbiology}, volume = {4}, number = {3}, pages = {470-479}, pmid = {30559407}, issn = {2058-5276}, support = {P30 DK043351/DK/NIDDK NIH HHS/United States ; R24 DK110499/DK/NIDDK NIH HHS/United States ; UM1 AI110498/AI/NIAID NIH HHS/United States ; }, mesh = {*Adaptation, Physiological ; Age Factors ; Bacteriophages/genetics ; Bacteroides/genetics/virology ; Bifidobacterium bifidum/genetics ; Bifidobacterium longum/genetics ; Child Development ; Child, Preschool ; Estonia ; Feces/microbiology ; Female ; Finland ; Gastrointestinal Microbiome/*genetics ; *Genetic Variation ; *Genome, Bacterial ; Humans ; Infant ; Longitudinal Studies ; Male ; Metabolic Networks and Pathways ; Metagenomics ; Polymorphism, Single Nucleotide ; Probiotics ; Russia ; }, abstract = {The human gut microbiome matures towards the adult composition during the first years of life and is implicated in early immune development. Here, we investigate the effects of microbial genomic diversity on gut microbiome development using integrated early childhood data sets collected in the DIABIMMUNE study in Finland, Estonia and Russian Karelia. We show that gut microbial diversity is associated with household location and linear growth of children. Single nucleotide polymorphism- and metagenomic assembly-based strain tracking revealed large and highly dynamic microbial pangenomes, especially in the genus Bacteroides, in which we identified evidence of variability deriving from Bacteroides-targeting bacteriophages. Our analyses revealed functional consequences of strain diversity; only 10% of Finnish infants harboured Bifidobacterium longum subsp. infantis, a subspecies specialized in human milk metabolism, whereas Russian infants commonly maintained a probiotic Bifidobacterium bifidum strain in infancy. Groups of bacteria contributing to diverse, characterized metabolic pathways converged to highly subject-specific configurations over the first two years of life. This longitudinal study extends the current view of early gut microbial community assembly based on strain-level genomic variation.}, } @article {pmid30550841, year = {2019}, author = {Jatuponwiphat, T and Chumnanpuen, P and Othman, S and E-Kobon, T and Vongsangnak, W}, title = {Iron-associated protein interaction networks reveal the key functional modules related to survival and virulence of Pasteurella multocida.}, journal = {Microbial pathogenesis}, volume = {127}, number = {}, pages = {257-266}, doi = {10.1016/j.micpath.2018.12.013}, pmid = {30550841}, issn = {1096-1208}, mesh = {Bacterial Proteins/*genetics/*metabolism ; Biological Transport ; Iron/*metabolism ; Metabolic Networks and Pathways/genetics ; Pasteurella multocida/*genetics/physiology ; Protein Binding ; *Protein Interaction Maps ; Virulence ; }, abstract = {Pasteurella multocida causes respiratory infectious diseases in a multitude of birds and mammals. A number of virulence-associated genes were reported across different strains of P. multocida, including those involved in the iron transport and metabolism. Comparative iron-associated genes of P. multocida among different animal hosts towards their interaction networks have not been fully revealed. Therefore, this study aimed to identify the iron-associated genes from core- and pan-genomes of fourteen P. multocida strains and to construct iron-associated protein interaction networks using genome-scale network analysis which might be associated with the virulence. Results showed that these fourteen strains had 1587 genes in the core-genome and 3400 genes constituting their pan-genome. Out of these, 2651 genes associated with iron transport and metabolism were selected to construct the protein interaction networks and 361 genes were incorporated into the iron-associated protein interaction network (iPIN) consisting of nine different iron-associated functional modules. After comparing with the virulence factor database (VFDB), 21 virulence-associated proteins were determined and 11 of these belonged to the heme biosynthesis module. From this study, the core heme biosynthesis module and the core outer membrane hemoglobin receptor HgbA were proposed as candidate targets to design novel antibiotics and vaccines for preventing pasteurellosis across the serotypes or animal hosts for enhanced precision agriculture to ensure sustainability in food security.}, } @article {pmid30550564, year = {2018}, author = {Moradigaravand, D and Palm, M and Farewell, A and Mustonen, V and Warringer, J and Parts, L}, title = {Prediction of antibiotic resistance in Escherichia coli from large-scale pan-genome data.}, journal = {PLoS computational biology}, volume = {14}, number = {12}, pages = {e1006258}, pmid = {30550564}, issn = {1553-7358}, support = {MR/R004501/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Anti-Bacterial Agents/pharmacology ; DNA, Bacterial/genetics ; Drug Resistance, Bacterial/*genetics ; Drug Resistance, Multiple, Bacterial/drug effects ; Escherichia coli/*genetics ; Escherichia coli Infections ; Forecasting/methods ; Genome/genetics ; Genome, Bacterial ; Humans ; Microbial Sensitivity Tests ; Sequence Analysis, DNA/*methods ; }, abstract = {The emergence of microbial antibiotic resistance is a global health threat. In clinical settings, the key to controlling spread of resistant strains is accurate and rapid detection. As traditional culture-based methods are time consuming, genetic approaches have recently been developed for this task. The detection of antibiotic resistance is typically made by measuring a few known determinants previously identified from genome sequencing, and thus requires the prior knowledge of its biological mechanisms. To overcome this limitation, we employed machine learning models to predict resistance to 11 compounds across four classes of antibiotics from existing and novel whole genome sequences of 1936 E. coli strains. We considered a range of methods, and examined population structure, isolation year, gene content, and polymorphism information as predictors. Gradient boosted decision trees consistently outperformed alternative models with an average accuracy of 0.91 on held-out data (range 0.81-0.97). While the best models most frequently employed gene content, an average accuracy score of 0.79 could be obtained using population structure information alone. Single nucleotide variation data were less useful, and significantly improved prediction only for two antibiotics, including ciprofloxacin. These results demonstrate that antibiotic resistance in E. coli can be accurately predicted from whole genome sequences without a priori knowledge of mechanisms, and that both genomic and epidemiological data can be informative. This paves way to integrating machine learning approaches into diagnostic tools in the clinic.}, } @article {pmid30538677, year = {2018}, author = {Colson, P and Levasseur, A and La Scola, B and Sharma, V and Nasir, A and Pontarotti, P and Caetano-Anollés, G and Raoult, D}, title = {Ancestrality and Mosaicism of Giant Viruses Supporting the Definition of the Fourth TRUC of Microbes.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {2668}, pmid = {30538677}, issn = {1664-302X}, abstract = {Giant viruses of amoebae were discovered in 2003. Since then, their diversity has greatly expanded. They were suggested to form a fourth branch of life, collectively named 'TRUC' (for "Things Resisting Uncompleted Classifications") alongside Bacteria, Archaea, and Eukarya. Their origin and ancestrality remain controversial. Here, we specify the evolution and definition of giant viruses. Phylogenetic and phenetic analyses of informational gene repertoires of giant viruses and selected bacteria, archaea and eukaryota were performed, including structural phylogenomics based on protein structural domains grouped into 289 universal fold superfamilies (FSFs). Hierarchical clustering analysis was performed based on a binary presence/absence matrix constructed using 727 informational COGs from cellular organisms. The presence/absence of 'universal' FSF domains was used to generate an unrooted maximum parsimony phylogenomic tree. Comparison of the gene content of a giant virus with those of a bacterium, an archaeon, and a eukaryote with small genomes was also performed. Overall, both cladistic analyses based on gene sequences of very central and ancient proteins and on highly conserved protein fold structures as well as phenetic analyses were congruent regarding the delineation of a fourth branch of microbes comprised by giant viruses. Giant viruses appeared as a basal group in the tree of all proteomes. A pangenome and core genome determined for Rickettsia bellii (bacteria), Methanomassiliicoccus luminyensis (archaeon), Encephalitozoon intestinalis (eukaryote), and Tupanvirus (giant virus) showed a substantial proportion of Tupanvirus genes that overlap with those of the cellular microbes. In addition, a substantial genome mosaicism was observed, with 51, 11, 8, and 0.2% of Tupanvirus genes best matching with viruses, eukaryota, bacteria, and archaea, respectively. Finally, we found that genes themselves may be subject to lateral sequence transfers. In summary, our data highlight the quantum leap between classical and giant viruses. Phylogenetic and phyletic analyses and the study of protein fold superfamilies confirm previous evidence of the existence of a fourth TRUC of life that includes giant viruses, and highlight its ancestrality and mosaicism. They also point out that best evolutionary representations for giant viruses and cellular microorganisms are rhizomes, and that sequence transfers rather than gene transfers have to be considered.}, } @article {pmid30535304, year = {2018}, author = {Lees, JA and Galardini, M and Bentley, SD and Weiser, JN and Corander, J}, title = {pyseer: a comprehensive tool for microbial pangenome-wide association studies.}, journal = {Bioinformatics (Oxford, England)}, volume = {34}, number = {24}, pages = {4310-4312}, pmid = {30535304}, issn = {1367-4811}, support = {R01 AI105168/AI/NIAID NIH HHS/United States ; R37 AI038446/AI/NIAID NIH HHS/United States ; 098051/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Bacteria/*growth & development ; Computational Biology ; *Genetic Association Studies ; Models, Statistical ; *Software ; }, abstract = {SUMMARY: Genome-wide association studies (GWAS) in microbes have different challenges to GWAS in eukaryotes. These have been addressed by a number of different methods. pyseer brings these techniques together in one package tailored to microbial GWAS, allows greater flexibility of the input data used, and adds new methods to interpret the association results.

pyseer is written in python and is freely available at https://github.com/mgalardini/pyseer, or can be installed through pip. Documentation and a tutorial are available at http://pyseer.readthedocs.io.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid30530095, year = {2019}, author = {Fritsch, L and Felten, A and Palma, F and Mariet, JF and Radomski, N and Mistou, MY and Augustin, JC and Guillier, L}, title = {Insights from genome-wide approaches to identify variants associated to phenotypes at pan-genome scale: Application to L. monocytogenes' ability to grow in cold conditions.}, journal = {International journal of food microbiology}, volume = {291}, number = {}, pages = {181-188}, doi = {10.1016/j.ijfoodmicro.2018.11.028}, pmid = {30530095}, issn = {1879-3460}, mesh = {Bayes Theorem ; *Cold Temperature ; Food Contamination/analysis ; Food Microbiology ; *Genetic Association Studies ; *Genome, Bacterial ; Listeria monocytogenes/*genetics/*isolation & purification ; Phenotype ; Phylogeny ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; }, abstract = {Intraspecific variability of the behavior of most foodborne pathogens is well described and taken into account in Quantitative Microbial Risk Assessment (QMRA), but factors (strain origin, serotype, …) explaining these differences are scarce or contradictory between studies. Nowadays, Whole Genome Sequencing (WGS) offers new opportunities to explain intraspecific variability of food pathogens, based on various recently published bioinformatics tools. The objective of this study is to get a better insight into different existing bioinformatics approaches to associate bacterial phenotype(s) and genotype(s). Therefore, a dataset of 51 L. monocytogenes strains, isolated from multiple sources (i.e. different food matrices and environments) and belonging to 17 clonal complexes (CC), were selected to represent large population diversity. Furthermore, the phenotypic variability of growth at low temperature was determined (i.e. qualitative phenotype), and the whole genomes of selected strains were sequenced. The almost exhaustive gene content, as well as the core genome SNPs based phylogenetic reconstruction, were derived from the whole sequenced genomes. A Bayesian inference method was applied to identify the branches on which the phenotype distribution evolves within sub-lineages. Two different Genome Wide Association Studies (i.e. gene- and SNP-based GWAS) were independently performed in order to link genetic mutations to the phenotype of interest. The genomic analyses presented in this study were successfully applied on the selected dataset. The Bayesian phylogenetic approach emphasized an association with "slow" growth ability at 2 °C of the lineage I, as well as CC9 of the lineage II. Moreover, both gene- and SNP-GWAS approaches displayed significant statistical associations with the tested phenotype. A list of 114 significantly associated genes, including genes already known to be involved in the cold adaption mechanism of L. monocytogenes and genes associated to mobile genetic elements (MGE), resulted from the gene-GWAS. On the other hand, a group of 184 highly associated SNPs were highlighted by SNP-GWAS, including SNPs detected in genes which were already likely involved in cold adaption; hypothetical proteins; and intergenic regions where for example promotors and regulators can be located. The successful application of combined bioinformatics approaches associating WGS-genotypes and specific phenotypes, could contribute to improve prediction of microbial behaviors in food. The implementation of this information in hazard identification and exposure assessment processes will open new possibilities to feed QMRA-models.}, } @article {pmid30513905, year = {2018}, author = {Martignoles, JA and Delhommeau, F and Hirsch, P}, title = {Genetic Hierarchy of Acute Myeloid Leukemia: From Clonal Hematopoiesis to Molecular Residual Disease.}, journal = {International journal of molecular sciences}, volume = {19}, number = {12}, pages = {}, pmid = {30513905}, issn = {1422-0067}, mesh = {Clone Cells/*pathology ; Genetic Predisposition to Disease ; Hematopoiesis/*genetics ; Humans ; Leukemia, Myeloid, Acute/classification/*genetics ; Models, Biological ; Neoplasm, Residual/*genetics ; }, abstract = {Recent advances in the field of cancer genome analysis revolutionized the picture we have of acute myeloid leukemia (AML). Pan-genomic studies, using either single nucleotide polymorphism arrays or whole genome/exome next generation sequencing, uncovered alterations in dozens of new genes or pathways, intimately connected with the development of leukemia. From a simple two-hit model in the late nineties, we are now building clonal stories that involve multiple unexpected cellular functions, leading to full-blown AML. In this review, we will address several seminal concepts that result from these new findings. We will describe the genetic landscape of AML, the association and order of events that define multiple sub-entities, both in terms of pathogenesis and in terms of clinical practice. Finally, we will discuss the use of this knowledge in the settings of new strategies for the evaluation of measurable residual diseases (MRD), using clone-specific multiple molecular targets.}, } @article {pmid30509172, year = {2018}, author = {Timms, VJ and Nguyen, T and Crighton, T and Yuen, M and Sintchenko, V}, title = {Genome-wide comparison of Corynebacterium diphtheriae isolates from Australia identifies differences in the Pan-genomes between respiratory and cutaneous strains.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {869}, pmid = {30509172}, issn = {1471-2164}, support = {Program Round 4//NSW Ministry of Health/ ; }, mesh = {Adolescent ; Adult ; Aged ; Aged, 80 and over ; Australia ; Corynebacterium diphtheriae/classification/*genetics/isolation & purification ; DNA, Bacterial/chemistry/isolation & purification/metabolism ; Female ; *Genome, Bacterial ; Genome-Wide Association Study ; Humans ; Lung/*microbiology ; Male ; Middle Aged ; Multilocus Sequence Typing ; Phylogeny ; Polymorphism, Single Nucleotide ; Skin/*microbiology ; Whole Genome Sequencing ; Young Adult ; }, abstract = {BACKGROUND: Corynebacterium diphtheriae is the main etiological agent of diphtheria, a global disease causing life-threatening infections, particularly in infants and children. Vaccination with diphtheria toxoid protects against infection with potent toxin producing strains. However a growing number of apparently non-toxigenic but potentially invasive C. diphtheriae strains are identified in countries with low prevalence of diphtheria, raising key questions about genomic structures and population dynamics of the species. This study examined genomic diversity among 48 C. diphtheriae isolates collected in Australia over a 12-year period using whole genome sequencing. Phylogeny was determined using SNP-based mapping and genome wide analysis.

RESULTS: C. diphtheriae sequence type (ST) 32, a non-toxigenic clone with evidence of enhanced virulence that has been also circulating in Europe, appears to be endemic in Australia. Isolates from temporospatially related patients displayed the same ST and similarity in their core genomes. The genome-wide analysis highlighted a role of pilins, adhesion factors and iron utilization in infections caused by non-toxigenic strains.

CONCLUSIONS: The genomic diversity of toxigenic and non-toxigenic strains of C. diphtheriae in Australia suggests multiple sources of infection and colonisation. Genomic surveillance of co-circulating toxigenic and non-toxigenic C. diphtheriae offer new insights into the evolution and virulence of pathogenic clones and can inform targeted public health actions and policy. The genomes presented in this investigation will contribute to the global surveillance of C. diphtheriae both for the monitoring of antibiotic resistance genes and virulent strains such as those belonging to ST32.}, } @article {pmid30500991, year = {2019}, author = {Bolger, AM and Poorter, H and Dumschott, K and Bolger, ME and Arend, D and Osorio, S and Gundlach, H and Mayer, KFX and Lange, M and Scholz, U and Usadel, B}, title = {Computational aspects underlying genome to phenome analysis in plants.}, journal = {The Plant journal : for cell and molecular biology}, volume = {97}, number = {1}, pages = {182-198}, pmid = {30500991}, issn = {1365-313X}, mesh = {*Genetic Association Studies ; Genome, Plant/*genetics ; *Genomics ; *Machine Learning ; *Phenomics ; Phenotype ; Plants/*genetics ; Quantitative Trait Loci/genetics ; }, abstract = {Recent advances in genomics technologies have greatly accelerated the progress in both fundamental plant science and applied breeding research. Concurrently, high-throughput plant phenotyping is becoming widely adopted in the plant community, promising to alleviate the phenotypic bottleneck. While these technological breakthroughs are significantly accelerating quantitative trait locus (QTL) and causal gene identification, challenges to enable even more sophisticated analyses remain. In particular, care needs to be taken to standardize, describe and conduct experiments robustly while relying on plant physiology expertise. In this article, we review the state of the art regarding genome assembly and the future potential of pangenomics in plant research. We also describe the necessity of standardizing and describing phenotypic studies using the Minimum Information About a Plant Phenotyping Experiment (MIAPPE) standard to enable the reuse and integration of phenotypic data. In addition, we show how deep phenotypic data might yield novel trait-trait correlations and review how to link phenotypic data to genomic data. Finally, we provide perspectives on the golden future of machine learning and their potential in linking phenotypes to genomic features.}, } @article {pmid30497358, year = {2018}, author = {Bonnici, V and Giugno, R and Manca, V}, title = {PanDelos: a dictionary-based method for pan-genome content discovery.}, journal = {BMC bioinformatics}, volume = {19}, number = {Suppl 15}, pages = {437}, pmid = {30497358}, issn = {1471-2105}, mesh = {Bacteria/genetics ; Databases, Genetic ; *Dictionaries as Topic ; Gene Duplication ; *Genome, Bacterial ; Phylogeny ; *Software ; Time Factors ; }, abstract = {BACKGROUND: Pan-genome approaches afford the discovery of homology relations in a set of genomes, by determining how some gene families are distributed among a given set of genomes. The retrieval of a complete gene distribution among a class of genomes is an NP-hard problem because computational costs increase with the number of analyzed genomes, in fact, all-against-all gene comparisons are required to completely solve the problem. In presence of phylogenetically distant genomes, due to the variability introduced in gene duplication and transmission, the task of recognizing homologous genes becomes even more difficult. A challenge on this field is that of designing fast and adaptive similarity measures in order to find a suitable pan-genome structure of homology relations.

RESULTS: We present PanDelos, a stand alone tool for the discovery of pan-genome contents among phylogenetic distant genomes. The methodology is based on information theory and network analysis. It is parameter-free because thresholds are automatically deduced from the context. PanDelos avoids sequence alignment by introducing a measure based on k-mer multiplicity. The k-mer length is defined according to general arguments rather than empirical considerations. Homology candidate relations are integrated into a global network and groups of homologous genes are extracted by applying a community detection algorithm.

CONCLUSIONS: PanDelos outperforms existing approaches, Roary and EDGAR, in terms of running times and quality content discovery. Tests were run on collections of real genomes, previously used in analogous studies, and in synthetic benchmarks that represent fully trusted golden truth. The software is available at https://github.com/GiugnoLab/PanDelos .}, } @article {pmid30496396, year = {2019}, author = {Freschi, L and Vincent, AT and Jeukens, J and Emond-Rheault, JG and Kukavica-Ibrulj, I and Dupont, MJ and Charette, SJ and Boyle, B and Levesque, RC}, title = {The Pseudomonas aeruginosa Pan-Genome Provides New Insights on Its Population Structure, Horizontal Gene Transfer, and Pathogenicity.}, journal = {Genome biology and evolution}, volume = {11}, number = {1}, pages = {109-120}, pmid = {30496396}, issn = {1759-6653}, support = {P30 DK089507/DK/NIDDK NIH HHS/United States ; }, mesh = {Drug Resistance, Bacterial/genetics ; *Gene Transfer, Horizontal ; *Genetic Variation ; *Phylogeny ; Pseudomonas aeruginosa/*genetics/pathogenicity ; Virulence/genetics ; }, abstract = {The huge increase in the availability of bacterial genomes led us to a point in which we can investigate and query pan-genomes, for example, the full set of genes of a given bacterial species or clade. Here, we used a data set of 1,311 high-quality genomes from the human pathogen Pseudomonas aeruginosa, 619 of which were newly sequenced, to show that a pan-genomic approach can greatly refine the population structure of bacterial species, provide new insights to define species boundaries, and generate hypotheses on the evolution of pathogenicity. The 665-gene P. aeruginosa core genome presented here, which constitutes only 1% of the entire pan-genome, is the first to be in the same order of magnitude as the minimal bacterial genome and represents a conservative estimate of the actual core genome. Moreover, the phylogeny based on this core genome provides strong evidence for a five-group population structure that includes two previously undescribed groups of isolates. Comparative genomics focusing on antimicrobial resistance and virulence genes showed that variation among isolates was partly linked to this population structure. Finally, we hypothesized that horizontal gene transfer had an important role in this respect, and found a total of 3,010 putative complete and fragmented plasmids, 5% and 12% of which contained resistance or virulence genes, respectively. This work provides data and strategies to study the evolutionary trajectories of resistance and virulence in P. aeruginosa.}, } @article {pmid30487573, year = {2018}, author = {Méric, G and Mageiros, L and Pensar, J and Laabei, M and Yahara, K and Pascoe, B and Kittiwan, N and Tadee, P and Post, V and Lamble, S and Bowden, R and Bray, JE and Morgenstern, M and Jolley, KA and Maiden, MCJ and Feil, EJ and Didelot, X and Miragaia, M and de Lencastre, H and Moriarty, TF and Rohde, H and Massey, R and Mack, D and Corander, J and Sheppard, SK}, title = {Disease-associated genotypes of the commensal skin bacterium Staphylococcus epidermidis.}, journal = {Nature communications}, volume = {9}, number = {1}, pages = {5034}, pmid = {30487573}, issn = {2041-1723}, support = {G0801929/MRC_/Medical Research Council/United Kingdom ; MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Genome, Bacterial/genetics ; Genome-Wide Association Study ; Genotype ; Humans ; Interleukin-8/metabolism ; Skin Diseases/*microbiology ; Staphylococcal Infections/*microbiology ; Staphylococcus epidermidis/*genetics/*pathogenicity ; }, abstract = {Some of the most common infectious diseases are caused by bacteria that naturally colonise humans asymptomatically. Combating these opportunistic pathogens requires an understanding of the traits that differentiate infecting strains from harmless relatives. Staphylococcus epidermidis is carried asymptomatically on the skin and mucous membranes of virtually all humans but is a major cause of nosocomial infection associated with invasive procedures. Here we address the underlying evolutionary mechanisms of opportunistic pathogenicity by combining pangenome-wide association studies and laboratory microbiology to compare S. epidermidis from bloodstream and wound infections and asymptomatic carriage. We identify 61 genes containing infection-associated genetic elements (k-mers) that correlate with in vitro variation in known pathogenicity traits (biofilm formation, cell toxicity, interleukin-8 production, methicillin resistance). Horizontal gene transfer spreads these elements, allowing divergent clones to cause infection. Finally, Random Forest model prediction of disease status (carriage vs. infection) identifies pathogenicity elements in 415 S. epidermidis isolates with 80% accuracy, demonstrating the potential for identifying risk genotypes pre-operatively.}, } @article {pmid30483518, year = {2018}, author = {Wang, R and Li, L and Huang, T and Huang, W and Lei, A and Chen, M}, title = {Capsular Switching and ICE Transformation Occurred in Human Streptococcus agalactiae ST19 With High Pathogenicity to Fish.}, journal = {Frontiers in veterinary science}, volume = {5}, number = {}, pages = {281}, pmid = {30483518}, issn = {2297-1769}, abstract = {Although Streptococcus agalactiae (GBS) cross-infection between human and fish has been confirmed in experimental and clinical studies, the mechanisms underlying GBS cross-species infection remain largely unclear. We have found different human GBS ST19 strains exhibiting strong or weak pathogenic to fish (sGBS and wGBS). In this study, our objective was to identify the genetic elements responsible for GBS cross species infection based on genome sequence data and comparative genomics. The genomes of 11 sGBS strains and 11 wGBS strains were sequenced, and the genomic analysis was performed base on pan-genome, CRISPRs, phylogenetic reconstruction and genome comparison. The results from the pan-genome, CRISPRs analysis and phylogenetic reconstruction indicated that genomes between sGBS were more conservative than that of wGBS. The genomic differences between sGBS and wGBS were primarily in the Cps region (about 111 kb) and its adjacent ICE region (about 106 kb). The Cps region included the entire cps operon, and all sGBS were capsular polysaccharide (CPS) type V, while all wGBS were CPS type III. The ICE region of sGBS contained integrative and conjugative elements (ICE) with IQ element and erm(TR), and was very conserved, whereas the ICE region of wGBS contained ICE with mega elements and the variation was large. The capsular switching (III-V) and transformation of ICE adjacent to the Cps region occurred in human GBS ST19 with different pathogenicity to fish, which may be related to the capability of GBS cross-infection.}, } @article {pmid30482178, year = {2018}, author = {Gallo, G and Presta, L and Perrin, E and Gallo, M and Marchetto, D and Puglia, AM and Fani, R and Baldi, F}, title = {Genomic traits of Klebsiella oxytoca DSM 29614, an uncommon metal-nanoparticle producer strain isolated from acid mine drainages.}, journal = {BMC microbiology}, volume = {18}, number = {1}, pages = {198}, pmid = {30482178}, issn = {1471-2180}, mesh = {Anaerobiosis ; Citric Acid/metabolism ; Ferric Compounds/chemistry/*metabolism ; Genome, Bacterial ; Genomics ; Klebsiella oxytoca/classification/*genetics/*isolation & purification/metabolism ; Metal Nanoparticles/*chemistry ; Mining ; Phylogeny ; Wastewater/*microbiology ; }, abstract = {BACKGROUND: Klebsiella oxytoca DSM 29614 - isolated from acid mine drainages - grows anaerobically using Fe(III)-citrate as sole carbon and energy source, unlike other enterobacteria and K. oxytoca clinical isolates. The DSM 29614 strain is multi metal resistant and produces metal nanoparticles that are embedded in its very peculiar capsular exopolysaccharide. These metal nanoparticles were effective as antimicrobial and anticancer compounds, chemical catalysts and nano-fertilizers.

RESULTS: The DSM 29614 strain genome was sequenced and analysed by a combination of in silico procedures. Comparative genomics, performed between 85 K. oxytoca representatives and K. oxytoca DSM 29614, revealed that this bacterial group has an open pangenome, characterized by a very small core genome (1009 genes, about 2%), a high fraction of unique (43,808 genes, about 87%) and accessory genes (5559 genes, about 11%). Proteins belonging to COG categories "Carbohydrate transport and metabolism" (G), "Amino acid transport and metabolism" (E), "Coenzyme transport and metabolism" (H), "Inorganic ion transport and metabolism" (P), and "membrane biogenesis-related proteins" (M) are particularly abundant in the predicted proteome of DSM 29614 strain. The results of a protein functional enrichment analysis - based on a previous proteomic analysis - revealed metabolic optimization during Fe(III)-citrate anaerobic utilization. In this growth condition, the observed high levels of Fe(II) may be due to different flavin metal reductases and siderophores as inferred form genome analysis. The presence of genes responsible for the synthesis of exopolysaccharide and for the tolerance to heavy metals was highlighted too. The inferred genomic insights were confirmed by a set of phenotypic tests showing specific metabolic capability in terms of i) Fe[2+] and exopolysaccharide production and ii) phosphatase activity involved in precipitation of metal ion-phosphate salts.

CONCLUSION: The K. oxytoca DSM 29614 unique capabilities of using Fe(III)-citrate as sole carbon and energy source in anaerobiosis and tolerating diverse metals coincides with the presence at the genomic level of specific genes that can support i) energy metabolism optimization, ii) cell protection by the biosynthesis of a peculiar exopolysaccharide armour entrapping metal ions and iii) general and metal-specific detoxifying activities by different proteins and metabolites.}, } @article {pmid30465642, year = {2019}, author = {Abudahab, K and Prada, JM and Yang, Z and Bentley, SD and Croucher, NJ and Corander, J and Aanensen, DM}, title = {PANINI: Pangenome Neighbour Identification for Bacterial Populations.}, journal = {Microbial genomics}, volume = {5}, number = {4}, pages = {}, pmid = {30465642}, issn = {2057-5858}, support = {//Wellcome Trust/United Kingdom ; MR/N019296/1/MRC_/Medical Research Council/United Kingdom ; MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Bacteria/*classification/*genetics ; Genome, Bacterial/*genetics ; Genomics ; Metadata ; Phylogeny ; *Software ; Unsupervised Machine Learning ; }, abstract = {The standard workhorse for genomic analysis of the evolution of bacterial populations is phylogenetic modelling of mutations in the core genome. However, a notable amount of information about evolutionary and transmission processes in diverse populations can be lost unless the accessory genome is also taken into consideration. Here, we introduce panini (Pangenome Neighbour Identification for Bacterial Populations), a computationally scalable method for identifying the neighbours for each isolate in a data set using unsupervised machine learning with stochastic neighbour embedding based on the t-SNE (t-distributed stochastic neighbour embedding) algorithm. panini is browser-based and integrates with the Microreact platform for rapid online visualization and exploration of both core and accessory genome evolutionary signals, together with relevant epidemiological, geographical, temporal and other metadata. Several case studies with single- and multi-clone pneumococcal populations are presented to demonstrate the ability to identify biologically important signals from gene content data. panini is available at http://panini.pathogen.watch and code at http://gitlab.com/cgps/panini.}, } @article {pmid30458797, year = {2018}, author = {Wang, D and Li, J and Wang, L}, title = {Comprehensive study of instable regions in Pseudomonas aeruginosa and Mycobacterium tuberculosis.}, journal = {Biomedical engineering online}, volume = {17}, number = {Suppl 1}, pages = {133}, pmid = {30458797}, issn = {1475-925X}, mesh = {*Computational Biology ; DNA Transposable Elements ; Drug Resistance, Bacterial ; Gene Deletion ; *Genome, Bacterial ; Humans ; Integrases/metabolism ; Mutagenesis, Insertional ; Mycobacterium tuberculosis/*genetics ; Pseudomonas aeruginosa/*genetics ; Recombination, Genetic ; *Sequence Alignment ; *Sequence Analysis, DNA ; }, abstract = {BACKGROUND: Pseudomonas aeruginosa is a common bacterium which is recognized for its association with hospital-acquired infections and its advanced antibiotic resistance mechanisms. Tuberculosis, one of the major causes of mortality, is initiated by the deposition of Mycobacterium tuberculosis. Accessory sequences shared by a subset of strains of a species play an important role in a species' evolution, antibiotic resistance and infectious potential.

RESULTS: Here, with a multiple sequence aligner, we segmented 25 P. aeruginosa genomes and 28 M. tuberculosis genomes into core blocks (include sequences shared by all the input genomes) and dispensable blocks (include sequences shared by a subset of the input genomes), respectively. For each input genome, we then constructed a scaffold consisting of its core and dispensable blocks sorted by blocks' locations on the chromosomes. Consecutive dispensable blocks on these scaffold formed instable regions. After a comprehensive study of these instable regions, three characteristics of instable regions are summarized: instable regions were short, site specific and varied in different strains. Three DNA elements (directed repeats (DRs), transposons and integrons) were then studied to see whether these DNA elements are associated with the variation of instable regions. A pipeline was developed to search for DR pairs on the flank of every instable sequence. 27 DR pairs in P. aeruginosa strains and 6 pairs in M. tuberculosis strains were found to exist in the instable regions. On the average, 14% and 12% of instable regions in P. aeruginosa strains covered transposase genes and integrase genes, respectively. In M. tuberculosis strains, an average of 43% and 8% of instable regions contain transposase genes and integrase genes, respectively.

CONCLUSIONS: Instable regions were short, site specific and varied in different strains for both P. aeruginosa and M. tuberculosis. Our experimental results showed that DRs, transposons and integrons may be associated with variation of instable regions.}, } @article {pmid30455414, year = {2019}, author = {Sherman, RM and Forman, J and Antonescu, V and Puiu, D and Daya, M and Rafaels, N and Boorgula, MP and Chavan, S and Vergara, C and Ortega, VE and Levin, AM and Eng, C and Yazdanbakhsh, M and Wilson, JG and Marrugo, J and Lange, LA and Williams, LK and Watson, H and Ware, LB and Olopade, CO and Olopade, O and Oliveira, RR and Ober, C and Nicolae, DL and Meyers, DA and Mayorga, A and Knight-Madden, J and Hartert, T and Hansel, NN and Foreman, MG and Ford, JG and Faruque, MU and Dunston, GM and Caraballo, L and Burchard, EG and Bleecker, ER and Araujo, MI and Herrera-Paz, EF and Campbell, M and Foster, C and Taub, MA and Beaty, TH and Ruczinski, I and Mathias, RA and Barnes, KC and Salzberg, SL}, title = {Assembly of a pan-genome from deep sequencing of 910 humans of African descent.}, journal = {Nature genetics}, volume = {51}, number = {1}, pages = {30-35}, pmid = {30455414}, issn = {1546-1718}, support = {R01 HG006677/HG/NHGRI NIH HHS/United States ; R01 HL129239/HL/NHLBI NIH HHS/United States ; U54 GM115428/GM/NIGMS NIH HHS/United States ; R01 HL104608/HL/NHLBI NIH HHS/United States ; R01 AI132476/AI/NIAID NIH HHS/United States ; }, mesh = {Black People/*genetics ; Genome, Human/*genetics ; High-Throughput Nucleotide Sequencing/methods ; Humans ; Sequence Analysis, DNA/methods ; }, abstract = {We used a deeply sequenced dataset of 910 individuals, all of African descent, to construct a set of DNA sequences that is present in these individuals but missing from the reference human genome. We aligned 1.19 trillion reads from the 910 individuals to the reference genome (GRCh38), collected all reads that failed to align, and assembled these reads into contiguous sequences (contigs). We then compared all contigs to one another to identify a set of unique sequences representing regions of the African pan-genome missing from the reference genome. Our analysis revealed 296,485,284 bp in 125,715 distinct contigs present in the populations of African descent, demonstrating that the African pan-genome contains ~10% more DNA than the current human reference genome. Although the functional significance of nearly all of this sequence is unknown, 387 of the novel contigs fall within 315 distinct protein-coding genes, and the rest appear to be intergenic.}, } @article {pmid30453700, year = {2018}, author = {Yu, Z and Ding, Y and Yin, J and Yu, D and Zhang, J and Zhang, M and Ding, M and Zhong, W and Qiu, J and Li, J}, title = {Dissemination of Genetic Acquisition/Loss Provides a Variety of Quorum Sensing Regulatory Properties in Pseudoalteromonas.}, journal = {International journal of molecular sciences}, volume = {19}, number = {11}, pages = {}, pmid = {30453700}, issn = {1422-0067}, support = {31670114 and 31670115//National Natural Science Foundation of China/ ; LY16C010002//Natural Science Foundation of Zhejiang Province, China/ ; }, mesh = {Bacterial Proteins/metabolism ; Biological Evolution ; Colony Count, Microbial ; Genomics ; Mutation/genetics ; Phenotype ; Pseudoalteromonas/*genetics/growth & development/isolation & purification ; Quorum Sensing/*genetics ; }, abstract = {A bstract: Quorum sensing (QS) enables single-celled bacteria to communicate with chemical signals in order to synchronize group-level bacterial behavior. Pseudoalteromonas are marine bacteria found in versatile environments, of which QS regulation for their habitat adaptation is extremely fragmentary. To distinguish genes required for QS regulation in Pseudoalteromonas, comparative genomics was deployed to define the pan-genomics for twelve isolates and previously-sequenced genomes, of which acyl-homoserine lactone (AHL)-based QS traits were characterized. Additionally, transposon mutagenesis was used to identify the essential QS regulatory genes in the selected Pseudoalteromonas isolate. A remarkable feature showed that AHL-based colorization intensity of biosensors induced by Pseudoalteromonas most likely correlates with QS regulators genetic heterogeneity within the genus. This is supported by the relative expression levels of two of the main QS regulatory genes (luxO and rpoN) analyzed in representative Pseudoalteromonas isolates. Notably, comprehensive QS regulatory schema and the working model proposed in Pseudoalteromonas seem to phylogenetically include the network architectures derived from Escherichia coli, Pseudomonas, and Vibrio. Several associated genes were mapped by transposon mutagenesis. Among them, a right origin-binding protein-encoding gene (robp) was functionally identified as a positive QS regulatory gene. This gene lies on a genomic instable region and exists in the aforementioned bioinformatically recruited QS regulatory schema. The obtained data emphasize that the distinctly- and hierarchically-organized mechanisms probably target QS association in Pseudoalteromonas dynamic genomes, thus leading to bacterial ability to accommodate their adaption fitness and survival advantages.}, } @article {pmid30453061, year = {2019}, author = {Kayansamruaj, P and Soontara, C and Unajak, S and Dong, HT and Rodkhum, C and Kondo, H and Hirono, I and Areechon, N}, title = {Comparative genomics inferred two distinct populations of piscine pathogenic Streptococcus agalactiae, serotype Ia ST7 and serotype III ST283, in Thailand and Vietnam.}, journal = {Genomics}, volume = {111}, number = {6}, pages = {1657-1667}, doi = {10.1016/j.ygeno.2018.11.016}, pmid = {30453061}, issn = {1089-8646}, mesh = {Animals ; Fish Diseases/microbiology ; Fishes/microbiology ; Foodborne Diseases/microbiology ; Genomics ; Humans ; *Multilocus Sequence Typing ; *Phylogeny ; *Serogroup ; Streptococcus agalactiae/*genetics/isolation & purification/pathogenicity ; Thailand ; Vietnam ; }, abstract = {The genomes of Streptococcus agalactiae (group B streptococcus; GBS) collected from diseased fish in Thailand and Vietnam over a nine-year period (2008-2016) were sequenced and compared (n = 21). Based on capsular serotype and multilocus sequence typing (MLST), GBS isolates are divided into 2 groups comprised of i) serotype Ia; sequence type (ST)7 and ii) serotype III; ST283. Population structure inferred by core genome (cg)MLST and Bayesian clustering analysis also strongly indicated distribution of two GBS populations in both Thailand and Vietnam. Deep phylogenetic analysis implied by CRISPR array's spacer diversity was able to cluster GBS isolates according to their temporal and geographic origins, though ST7 has varying CRISPR1-spacer profiles when compared to ST283 strains. Based on overall genotypic features, Thai ST283 strains were closely related to the Singaporean ST283 strain causing foodborne illness in humans in 2015, thus, signifying zoonotic potential of this GBS population in the country.}, } @article {pmid30448864, year = {2019}, author = {Gabur, I and Chawla, HS and Snowdon, RJ and Parkin, IAP}, title = {Connecting genome structural variation with complex traits in crop plants.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {132}, number = {3}, pages = {733-750}, pmid = {30448864}, issn = {1432-2242}, support = {SN14/17-1//Deutsche Forschungsgemeinschaft/ ; }, mesh = {Crops, Agricultural/*genetics ; Genome, Plant ; *Genomic Structural Variation ; Polymorphism, Single Nucleotide/genetics ; *Quantitative Trait, Heritable ; Sequence Analysis, DNA ; }, abstract = {Structural genome variation is a major determinant of useful trait diversity. We describe how genome analysis methods are enabling discovery of trait-associated structural variants and their potential impact on breeding. As our understanding of complex crop genomes continues to grow, there is growing evidence that structural genome variation plays a major role in determining traits important for breeding and agriculture. Identifying the extent and impact of structural variants in crop genomes is becoming increasingly feasible with ongoing advances in the sophistication of genome sequencing technologies, particularly as it becomes easier to generate accurate long sequence reads on a genome-wide scale. In this article, we discuss the origins of structural genome variation in crops from ancient and recent genome duplication and polyploidization events and review high-throughput methods to assay such variants in crop populations in order to find associations with phenotypic traits. There is increasing evidence from such studies that gene presence-absence and copy number variation resulting from segmental chromosome exchanges may be at the heart of adaptive variation of crops to counter abiotic and biotic stress factors. We present examples from major crops that demonstrate the potential of pangenomic diversity as a key resource for future plant breeding for resilience and sustainability.}, } @article {pmid30446793, year = {2019}, author = {Monat, C and Schreiber, M and Stein, N and Mascher, M}, title = {Prospects of pan-genomics in barley.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {132}, number = {3}, pages = {785-796}, pmid = {30446793}, issn = {1432-2242}, support = {031B0190A//Bundesministerium für Bildung und Forschung/ ; SAW-2015-IPK-1//Leibniz-Gemeinschaft/ ; }, mesh = {Brachypodium/genetics ; Costs and Cost Analysis ; Genome, Plant ; Genomics/economics/*methods ; Hordeum/*genetics ; Oryza/genetics ; }, abstract = {The concept of a pan-genome refers to intraspecific diversity in genome content and structure, encompassing both genes and intergenic space. Pan-genomic studies employ a combination of de novo sequence assembly and reference-based alignment to discover and genotype structural variants. The large size and complex structure of Triticeae genomes were for a long time an obstacle for genomic research in barley and its relatives. Now that a reference genome is available, computational pipelines for high-quality sequence assembly are in place, and sequence costs continue to drop, investigations into the structural diversity of the barley genome seem within reach. Here, we review the recent progress on pan-genomics in the model grass Brachypodium distachyon, and the cereal crops rice and maize, and devise a multi-tiered strategy for a pan-genome project in barley. Our design involves: (1) the construction of high-quality de novo sequence assemblies for a small core set of representative genotypes, (2) short-read sequencing of a large diversity panel of genebank accessions to medium coverage and (3) the use of complementary methods such as chromosome-conformation capture sequencing and k-mer-based association genetics. The in silico representation of the barley pan-genome may inform about the mechanisms of structural genome evolution in the Triticeae and supplement quantitative genetics models of crop performance for better accuracy and predictive ability.}, } @article {pmid30445215, year = {2019}, author = {Mohapatra, B and Kazy, SK and Sar, P}, title = {Comparative genome analysis of arsenic reducing, hydrocarbon metabolizing groundwater bacterium Achromobacter sp. KAs 3-5[T] explains its competitive edge for survival in aquifer environment.}, journal = {Genomics}, volume = {111}, number = {6}, pages = {1604-1619}, doi = {10.1016/j.ygeno.2018.11.004}, pmid = {30445215}, issn = {1089-8646}, mesh = {*Achromobacter/genetics/metabolism ; Arsenic/*metabolism ; *Genome, Bacterial ; Groundwater/*microbiology ; Hydrocarbons/*metabolism ; Oxidation-Reduction ; *Water Microbiology ; }, abstract = {Whole genome sequence of arsenic (As) reducing, hydrocarbon metabolizing groundwater bacterium Achromobacter sp. KAs 3-5[T] was explored to understand the genomic basis of its As-ecophysiology and niche adaptation in aquifer environment. The genome (5.6 Mbp, 65.5 G + C mol %) encodes 4840 proteins, 1138 enzymes, 53 tRNAs, 11 rRNAs, 608 signal peptides, and 1.13% horizontally transferred genes. Presence of genes encoding cytosolic As[5+]-reduction (arsRCBH, ACR3), aromatics utilization (bph, naph, catABC, boxABCD, genACB), Fe-transformation (tonB, achromobactin, FUR, FeR), and denitrification (nar, nap) processes were observed and validated through proteomics. Phylogenomic analysis (< 90% ANI, < 50% DDH) confirmed strain KAs 3-5[T] to be a novel representative of the genus Achromobacter. An asymptotic open pan-genome (20,855 genes) and high correlation between genomic and ecological diversity suggested niche preference ability of this genus. Assemblage of species specific genes affiliated to transcription-regulation, membrane transport, and redox-transformation explained the strain's competitive survival strategies in As-rich oligotrophic groundwater.}, } @article {pmid30429208, year = {2019}, author = {Smythies, JA and Sun, M and Masson, N and Salama, R and Simpson, PD and Murray, E and Neumann, V and Cockman, ME and Choudhry, H and Ratcliffe, PJ and Mole, DR}, title = {Inherent DNA-binding specificities of the HIF-1α and HIF-2α transcription factors in chromatin.}, journal = {EMBO reports}, volume = {20}, number = {1}, pages = {}, pmid = {30429208}, issn = {1469-3178}, support = {078333/Z/05/Z/WT_/Wellcome Trust/United Kingdom ; 203141/Z/16/Z/WT_/Wellcome Trust/United Kingdom ; FC001501/MRC_/Medical Research Council/United Kingdom ; FC001501//Wellcome Trust/United Kingdom ; FC001501/CRUK_/Cancer Research UK/United Kingdom ; 088182/Z/09/Z/WT_/Wellcome Trust/United Kingdom ; A416016/CRUK_/Cancer Research UK/United Kingdom ; WT091857MA/WT_/Wellcome Trust/United Kingdom ; RG/11/1/28684/BHF_/British Heart Foundation/United Kingdom ; RP-2015-06-004/DH_/Department of Health/United Kingdom ; //Wellcome Trust/United Kingdom ; }, mesh = {Basic Helix-Loop-Helix Transcription Factors/*genetics ; Cell Hypoxia/*genetics ; Cell Line ; Chromatin/genetics ; DNA/genetics ; DNA-Binding Proteins/genetics ; Epigenomics ; Gene Expression Regulation/genetics ; Humans ; Hypoxia-Inducible Factor 1, alpha Subunit/*genetics ; Promoter Regions, Genetic ; Protein Isoforms/genetics ; *Transcription, Genetic ; }, abstract = {Hypoxia-inducible factor (HIF) is the major transcriptional regulator of cellular responses to hypoxia. The two principal HIF-α isoforms, HIF-1α and HIF-2α, are progressively stabilized in response to hypoxia and form heterodimers with HIF-1β to activate a broad range of transcriptional responses. Here, we report on the pan-genomic distribution of isoform-specific HIF binding in response to hypoxia of varying severity and duration, and in response to genetic ablation of each HIF-α isoform. Our findings reveal that, despite an identical consensus recognition sequence in DNA, each HIF heterodimer loads progressively at a distinct repertoire of cell-type-specific sites across the genome, with little evidence of redistribution under any of the conditions examined. Marked biases towards promoter-proximal binding of HIF-1 and promoter-distant binding of HIF-2 were observed under all conditions and were consistent in multiple cell type. The findings imply that each HIF isoform has an inherent property that determines its binding distribution across the genome, which might be exploited to therapeutically target the specific transcriptional output of each isoform independently.}, } @article {pmid30425707, year = {2018}, author = {Fontana, A and Zacconi, C and Morelli, L}, title = {Genetic Signatures of Dairy Lactobacillus casei Group.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {2611}, pmid = {30425707}, issn = {1664-302X}, abstract = {Lactobacillus casei/Lactobacillus paracasei group of species contains strains adapted to a wide range of environments, from dairy products to intestinal tract of animals and fermented vegetables. Understanding the gene acquisitions and losses that induced such different adaptations, implies a comparison between complete genomes, since evolutionary differences spread on the whole sequence. This study compared 12 complete genomes of L. casei/paracasei dairy-niche isolates and 7 genomes of L. casei/paracasei isolated from other habitats (i.e., corn silage, human intestine, sauerkraut, beef, congee). Phylogenetic tree construction and average nucleotide identity (ANI) metric showed a clustering of the two dairy L. casei strains ATCC393 and LC5, indicating a lower genetic relatedness in comparison to the other strains. Genomic analysis revealed a core of 313 genes shared by dairy and non-dairy Lactic Acid bacteria (LAB), within a pan-genome of 9,462 genes. Functional category analyses highlighted the evolutionary genes decay of dairy isolates, particularly considering carbohydrates and amino acids metabolisms. Specifically, dairy L. casei/paracasei strains lost the ability to metabolize myo-inositol and taurine (i.e., iol and tau gene clusters). However, gene acquisitions by dairy strains were also highlighted, mostly related to defense mechanisms and host-pathogen interactions (i.e., yueB, esaA, and sle1). This study aimed to be a preliminary investigation on dairy and non-dairy marker genes that could be further characterized for probiotics or food applications.}, } @article {pmid30425695, year = {2018}, author = {Hiller, NL and Sá-Leão, R}, title = {Puzzling Over the Pneumococcal Pangenome.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {2580}, pmid = {30425695}, issn = {1664-302X}, abstract = {The Gram positive bacterium Streptococcus pneumoniae (pneumococcus) is a major human pathogen. It is a common colonizer of the human host, and in the nasopharynx, sinus, and middle ear it survives as a biofilm. This mode of growth is optimal for multi-strain colonization and genetic exchange. Over the last decades, the far-reaching use of antibiotics and the widespread implementation of pneumococcal multivalent conjugate vaccines have posed considerable selective pressure on pneumococci. This scenario provides an exceptional opportunity to study the evolution of the pangenome of a clinically important bacterium, and has the potential to serve as a case study for other species. The goal of this review is to highlight key findings in the studies of pneumococcal genomic diversity and plasticity.}, } @article {pmid30423098, year = {2018}, author = {Salazar, AN and Abeel, T}, title = {Approximate, simultaneous comparison of microbial genome architectures via syntenic anchoring of quiver representations.}, journal = {Bioinformatics (Oxford, England)}, volume = {34}, number = {17}, pages = {i732-i742}, pmid = {30423098}, issn = {1367-4811}, mesh = {*Genome, Microbial ; Software ; *Synteny ; }, abstract = {MOTIVATION: A long-standing limitation in comparative genomic studies is the dependency on a reference genome, which hinders the spectrum of genetic diversity that can be identified across a population of organisms. This is especially true in the microbial world where genome architectures can significantly vary. There is therefore a need for computational methods that can simultaneously analyze the architectures of multiple genomes without introducing bias from a reference.

RESULTS: In this article, we present Ptolemy: a novel method for studying the diversity of genome architectures-such as structural variation and pan-genomes-across a collection of microbial assemblies without the need of a reference. Ptolemy is a 'top-down' approach to compare whole genome assemblies. Genomes are represented as labeled multi-directed graphs-known as quivers-which are then merged into a single, canonical quiver by identifying 'gene anchors' via synteny analysis. The canonical quiver represents an approximate, structural alignment of all genomes in a given collection encoding structural variation across (sub-) populations within the collection. We highlight various applications of Ptolemy by analyzing structural variation and the pan-genomes of different datasets composing of Mycobacterium, Saccharomyces, Escherichia and Shigella species. Our results show that Ptolemy is flexible and can handle both conserved and highly dynamic genome architectures. Ptolemy is user-friendly-requires only FASTA-formatted assembly along with a corresponding GFF-formatted file-and resource-friendly-can align 24 genomes in ∼10 mins with four CPUs and <2 GB of RAM.

Github: https://github.com/AbeelLab/ptolemy.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid30421490, year = {2019}, author = {Biessy, A and Novinscak, A and Blom, J and Léger, G and Thomashow, LS and Cazorla, FM and Josic, D and Filion, M}, title = {Diversity of phytobeneficial traits revealed by whole-genome analysis of worldwide-isolated phenazine-producing Pseudomonas spp.}, journal = {Environmental microbiology}, volume = {21}, number = {1}, pages = {437-455}, doi = {10.1111/1462-2920.14476}, pmid = {30421490}, issn = {1462-2920}, support = {//Natural Sciences and Engineering Research Council of Canada/International ; //New Brunswick Innovation Foundation/International ; }, mesh = {Genome, Bacterial/genetics ; Phenazines/metabolism ; Phenotype ; Phylogeny ; Plant Development/*physiology ; Plants/genetics/*microbiology ; Pseudomonas fluorescens/*genetics/*metabolism ; Rhizosphere ; Siderophores/metabolism ; Symbiosis/genetics/physiology ; Whole Genome Sequencing ; }, abstract = {Plant-beneficial Pseudomonas spp. competitively colonize the rhizosphere and display plant-growth promotion and/or disease-suppression activities. Some strains within the P. fluorescens species complex produce phenazine derivatives, such as phenazine-1-carboxylic acid. These antimicrobial compounds are broadly inhibitory to numerous soil-dwelling plant pathogens and play a role in the ecological competence of phenazine-producing Pseudomonas spp. We assembled a collection encompassing 63 strains representative of the worldwide diversity of plant-beneficial phenazine-producing Pseudomonas spp. In this study, we report the sequencing of 58 complete genomes using PacBio RS II sequencing technology. Distributed among four subgroups within the P. fluorescens species complex, the diversity of our collection is reflected by the large pangenome which accounts for 25 413 protein-coding genes. We identified genes and clusters encoding for numerous phytobeneficial traits, including antibiotics, siderophores and cyclic lipopeptides biosynthesis, some of which were previously unknown in these microorganisms. Finally, we gained insight into the evolutionary history of the phenazine biosynthetic operon. Given its diverse genomic context, it is likely that this operon was relocated several times during Pseudomonas evolution. Our findings acknowledge the tremendous diversity of plant-beneficial phenazine-producing Pseudomonas spp., paving the way for comparative analyses to identify new genetic determinants involved in biocontrol, plant-growth promotion and rhizosphere competence.}, } @article {pmid30418478, year = {2019}, author = {Yang, ZK and Luo, H and Zhang, Y and Wang, B and Gao, F}, title = {Pan-genomic analysis provides novel insights into the association of E.coli with human host and its minimal genome.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {12}, pages = {1987-1991}, doi = {10.1093/bioinformatics/bty938}, pmid = {30418478}, issn = {1367-4811}, mesh = {*Escherichia coli ; *Genome, Bacterial ; Genomics ; Humans ; Phylogeny ; Software ; }, abstract = {MOTIVATION: Bacteria can usually acquire certain advantageous genes that enable the bacteria to adapt to rapidly changing niches, thereby leading to a wide range of intraspecific genome content and genetic redundancy. The minimal genome of Escherichia coli, which is the most important bacterial species, and the association between E.coli and its human host are worthy of further exploration.

RESULTS: We used gene prediction and phylogenetic analysis to reveal a rich phylogenetic diversity among 491 E.coli strains and to reveal substantial differences between these strains with respect to gene number and genome length. We used pan-genomic analysis to accurately identify 867 core genes, in which only 243 genes are shared by essential genes. This analysis revealed that core genes mainly provide essential functions to the basic lifestyle of E.coli, and accessory genes are likely to confer selective advantages such as niche adaptation or the ability to colonize specific hosts. By association analysis, we found that E.coli strains in non-human hosts may more easily utilize foreign genetic materials to adapt to their surroundings, but the population in human hosts has higher demands for the control of population density, indicating that highly accurate quorum-sensing behavior is very important for harmony between E.coli and its human host. By considering core genes and previous deletions together, we proposed a potential direction for further reduction of the E.coli genome.

The data, analysis process and detailed information on software tools used in this study are all available in the supplementary material.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid30411512, year = {2019}, author = {Nanayakkara, BS and O'Brien, CL and Gordon, DM}, title = {Diversity and distribution of Klebsiella capsules in Escherichia coli.}, journal = {Environmental microbiology reports}, volume = {11}, number = {2}, pages = {107-117}, doi = {10.1111/1758-2229.12710}, pmid = {30411512}, issn = {1758-2229}, mesh = {Australia ; Bacterial Capsules/*genetics ; DNA, Bacterial/genetics ; Escherichia coli/*classification/*cytology/genetics/growth & development ; Fresh Water/microbiology ; Genetic Variation ; Genome, Bacterial/genetics ; Klebsiella/*cytology/genetics ; O Antigens/genetics ; *Phylogeny ; Sequence Alignment ; Serogroup ; }, abstract = {E. coli strains responsible for elevated counts (blooms) in freshwater reservoirs in Australia carry a capsule originating from Klebsiella. The occurrence of Klebsiella capsules in E. coli was about 7% overall and 23 different capsule types were detected. Capsules were observed in strains from phylogroups A, B1 and C, but were absent from phylogroup B2, D, E and F strains. In general, few A, B1 or C lineages were capsule-positive, but when a lineage was encapsulated multiple different capsule types were present. All Klebsiella capsule-positive strains were of serogroups O8, O9 and O89. Regardless of the phylogroup, O9 strains were more likely to be capsule-positive than O8 strains. Given the sequence similarity, it appears that both the capsule region and the O-antigen gene region are transferred to E. coli from Klebsiella as a single block via horizontal gene transfer events. Pan genome analysis indicated that there were only modest differences between encapsulated and non-encapsulated strains belonging to phylogroup A. The possession of a Klebsiella capsule, but not the type of capsule, is likely a key determinant of the bloom status of a strain.}, } @article {pmid30407532, year = {2019}, author = {Portwood, JL and Woodhouse, MR and Cannon, EK and Gardiner, JM and Harper, LC and Schaeffer, ML and Walsh, JR and Sen, TZ and Cho, KT and Schott, DA and Braun, BL and Dietze, M and Dunfee, B and Elsik, CG and Manchanda, N and Coe, E and Sachs, M and Stinard, P and Tolbert, J and Zimmerman, S and Andorf, CM}, title = {MaizeGDB 2018: the maize multi-genome genetics and genomics database.}, journal = {Nucleic acids research}, volume = {47}, number = {D1}, pages = {D1146-D1154}, pmid = {30407532}, issn = {1362-4962}, mesh = {Computational Biology/*methods ; *Databases, Genetic ; Gene Expression Regulation, Plant ; Genetic Variation ; Genome, Plant/*genetics ; Genomics/*methods ; Information Storage and Retrieval/methods ; Internet ; Polymorphism, Single Nucleotide ; Proteomics/methods ; User-Computer Interface ; Zea mays/*genetics/metabolism ; }, abstract = {Since its 2015 update, MaizeGDB, the Maize Genetics and Genomics database, has expanded to support the sequenced genomes of many maize inbred lines in addition to the B73 reference genome assembly. Curation and development efforts have targeted high quality datasets and tools to support maize trait analysis, germplasm analysis, genetic studies, and breeding. MaizeGDB hosts a wide range of data including recent support of new data types including genome metadata, RNA-seq, proteomics, synteny, and large-scale diversity. To improve access and visualization of data types several new tools have been implemented to: access large-scale maize diversity data (SNPversity), download and compare gene expression data (qTeller), visualize pedigree data (Pedigree Viewer), link genes with phenotype images (MaizeDIG), and enable flexible user-specified queries to the MaizeGDB database (MaizeMine). MaizeGDB also continues to be the community hub for maize research, coordinating activities and providing technical support to the maize research community. Here we report the changes MaizeGDB has made within the last three years to keep pace with recent software and research advances, as well as the pan-genomic landscape that cheaper and better sequencing technologies have made possible. MaizeGDB is accessible online at https://www.maizegdb.org.}, } @article {pmid30405580, year = {2018}, author = {Al-Bassam, MM and Haist, J and Neumann, SA and Lindenberg, S and Tschowri, N}, title = {Expression Patterns, Genomic Conservation and Input Into Developmental Regulation of the GGDEF/EAL/HD-GYP Domain Proteins in Streptomyces.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {2524}, pmid = {30405580}, issn = {1664-302X}, abstract = {To proliferate, antibiotic-producing Streptomyces undergo a complex developmental transition from vegetative growth to the production of aerial hyphae and spores. This morphological switch is controlled by the signaling molecule cyclic bis-(3',5') di-guanosine-mono-phosphate (c-di-GMP) that binds to the master developmental regulator, BldD, leading to repression of key sporulation genes during vegetative growth. However, a systematical analysis of all the GGDEF/EAL/HD-GYP proteins that control c-di-GMP levels in Streptomyces is still lacking. Here, we have FLAG-tagged all 10 c-di-GMP turnover proteins in Streptomyces venezuelae and characterized their expression patterns throughout the life cycle, revealing that the diguanylate cyclase (DGC) CdgB and the phosphodiesterase (PDE) RmdB are the most abundant GGDEF/EAL proteins. Moreover, we have deleted all the genes coding for c-di-GMP turnover enzymes individually and analyzed morphogenesis of the mutants in macrocolonies. We show that the composite GGDEF-EAL protein CdgC is an active DGC and that deletion of the DGCs cdgB and cdgC enhance sporulation whereas deletion of the PDEs rmdA and rmdB delay development in S. venezuelae. By comparing the pan genome of 93 fully sequenced Streptomyces species we show that the DGCs CdgA, CdgB, and CdgC, and the PDE RmdB represent the most conserved c-di-GMP-signaling proteins in the genus Streptomyces.}, } @article {pmid30401719, year = {2019}, author = {Garinet, S and Pignot, G and Vacher, S and Le Goux, C and Schnitzler, A and Chemlali, W and Sirab, N and Barry Delongchamps, N and Zerbib, M and Sibony, M and Allory, Y and Damotte, D and Bieche, I}, title = {High Prevalence of a Hotspot of Noncoding Somatic Mutations in Intron 6 of GPR126 in Bladder Cancer.}, journal = {Molecular cancer research : MCR}, volume = {17}, number = {2}, pages = {469-475}, doi = {10.1158/1541-7786.MCR-18-0363}, pmid = {30401719}, issn = {1557-3125}, mesh = {Adult ; Aged ; Aged, 80 and over ; Female ; Humans ; Introns ; Male ; Middle Aged ; *Mutation ; Prevalence ; Receptors, G-Protein-Coupled/*genetics/metabolism ; Urinary Bladder Neoplasms/*genetics/metabolism/pathology ; }, abstract = {Numerous pangenomic studies identified protein-coding genes and signaling pathways involved in bladder carcinogenesis. However, noncoding somatic alterations remain unexplored. A recent study revealed a mutational hotspot in intron 6 of GPR126 gene in 2.7% of a large breast cancer series. As GPR126 is highly expressed in bladder tissues, we investigated here the prevalence and the prognostic significance of these mutations in bladder cancer. We analyzed a cohort of 103 bladder cancers including 44 nonmuscle-invasive bladder cancers (NMIBC) and 59 muscle-invasive bladder cancers (MIBC). GPR126 mutations were analyzed by high-resolution melting and Sanger sequencing, and GPR126 expression levels were assessed using real-time quantitative RT-PCR. In NMIBC, somatic GPR126 noncoding mutations occurred in 47.7% of samples and were negatively associated with GPR126 mRNA levels. GPR126 mutations had higher frequencies in nonsmoker patients and were associated with a prior history of NMIBC. GPR126 overexpression was detected in 70.5% of samples. GPR126 mutation and overexpression status were not associated with outcome. In MIBC, somatic GPR126 mutations occurred in 44.1% of samples. Mutations were more frequent in females. GPR126 overexpression was detected in 27.1% of the sample. A trend toward significance was observed between GPR126 overexpression and better outcome. We identified the second most frequent mutational hotspot after TERT promoter (∼70%) in bladder cancer, with a mutation rate of approximately 50%. IMPLICATIONS: The GPR126 intronic mutational hotspot could be a promising clinical biomarker candidate to monitor tumor burden using circulating tumor DNA in bladder cancer.}, } @article {pmid30391557, year = {2019}, author = {Pinto, M and González-Díaz, A and Machado, MP and Duarte, S and Vieira, L and Carriço, JA and Marti, S and Bajanca-Lavado, MP and Gomes, JP}, title = {Insights into the population structure and pan-genome of Haemophilus influenzae.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {67}, number = {}, pages = {126-135}, doi = {10.1016/j.meegid.2018.10.025}, pmid = {30391557}, issn = {1567-7257}, mesh = {Computational Biology ; Genetic Variation ; *Genome, Bacterial ; *Genomics/methods ; Haemophilus Infections/*microbiology ; Haemophilus influenzae/*genetics/pathogenicity ; Humans ; Phylogeny ; Polymorphism, Single Nucleotide ; Virulence/genetics ; Whole Genome Sequencing ; }, abstract = {The human-restricted bacterium Haemophilus influenzae is responsible for respiratory infections in both children and adults. While colonization begins in the upper airways, it can spread throughout the respiratory tract potentially leading to invasive infections. Although the spread of H. influenzae serotype b (Hib) has been prevented by vaccination, the emergence of infections by other serotypes as well as by non-typeable isolates (NTHi) have been observed, prompting the need for novel prevention strategies. Here, we aimed to study the population structure of H. influenzae and to get some insights into its pan-genome. We studied 305H. influenzae strains, enrolling 217 publicly available genomes, as well as 88 newly sequenced H. influenzae invasive strains isolated in Portugal, spanning a 24-year period. NTHi isolates presented a core-SNP-based genetic diversity about 10-fold higher than the one observed for Hib. The analysis of key factors involved in pathogenesis, such as lipooligosaccharides, hemagglutinating pili and High Molecular Weight-adhesins, suggests that NTHi shape its virulence repertoire, either by acquisition and loss of genes or by SNP-based diversification, likely towards host immune evasion and persistence. Discreet NTHi subpopulations structures are proposed based on core-genome supported with 17 candidate genetic markers identified in the accessory genome. Additionally, this study provides two bioinformatics tools for in silico rapid identification of H. influenzae serotypes and NTHi clades previously proposed, obviating laboratory-based demanding procedures. The present study constitutes an important genomic framework that could lay way for future studies on the genetic determinants underlying invasiveness and disease and population structure of H. influenzae.}, } @article {pmid30386310, year = {2018}, author = {Wüthrich, D and Irmler, S and Berthoud, H and Guggenbühl, B and Eugster, E and Bruggmann, R}, title = {Conversion of Methionine to Cysteine in Lactobacillus paracasei Depends on the Highly Mobile cysK-ctl-cysE Gene Cluster.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {2415}, pmid = {30386310}, issn = {1664-302X}, abstract = {Milk and dairy products are rich in nutrients and are therefore habitats for various microbiomes. However, the composition of nutrients can be quite diverse, in particular among the sulfur containing amino acids. In milk, methionine is present in a 25-fold higher abundance than cysteine. Interestingly, a fraction of strains of the species L. paracasei - a flavor-enhancing adjunct culture species - can grow in medium with methionine as the sole sulfur source. In this study, we focus on genomic and evolutionary aspects of sulfur dependence in L. paracasei strains. From 24 selected L. paracasei strains, 16 strains can grow in medium with methionine as sole sulfur source. We sequenced these strains to perform gene-trait matching. We found that one gene cluster - consisting of a cysteine synthase, a cystathionine lyase, and a serine acetyltransferase - is present in all strains that grow in medium with methionine as sole sulfur source. In contrast, strains that depend on other sulfur sources do not have this gene cluster. We expanded the study and searched for this gene cluster in other species and detected it in the genomes of many bacteria species used in the food production. The comparison to these species showed that two different versions of the gene cluster exist in L. paracasei which were likely gained in two distinct events of horizontal gene transfer. Additionally, the comparison of 62 L. paracasei genomes and the two versions of the gene cluster revealed that this gene cluster is mobile within the species.}, } @article {pmid30383521, year = {2018}, author = {Fleshman, A and Mullins, K and Sahl, J and Hepp, C and Nieto, N and Wiggins, K and Hornstra, H and Kelly, D and Chan, TC and Phetsouvanh, R and Dittrich, S and Panyanivong, P and Paris, D and Newton, P and Richards, A and Pearson, T}, title = {Corrigendum: Comparative pan-genomic analyses of Orientia tsutsugamushi reveal an exceptional model of bacterial evolution driving genomic diversity.}, journal = {Microbial genomics}, volume = {4}, number = {10}, pages = {}, doi = {10.1099/mgen.0.000230}, pmid = {30383521}, issn = {2057-5858}, } @article {pmid30377376, year = {2018}, author = {Franzosa, EA and McIver, LJ and Rahnavard, G and Thompson, LR and Schirmer, M and Weingart, G and Lipson, KS and Knight, R and Caporaso, JG and Segata, N and Huttenhower, C}, title = {Species-level functional profiling of metagenomes and metatranscriptomes.}, journal = {Nature methods}, volume = {15}, number = {11}, pages = {962-968}, pmid = {30377376}, issn = {1548-7105}, support = {P30 DK043351/DK/NIDDK NIH HHS/United States ; U54 DE023798/DE/NIDCR NIH HHS/United States ; }, mesh = {Bacteria/*classification/*genetics/isolation & purification ; Bacterial Proteins/*genetics/metabolism ; *Gene Expression Profiling ; High-Throughput Nucleotide Sequencing ; Humans ; *Metagenome ; Microbiota ; *Software ; Species Specificity ; *Transcriptome ; }, abstract = {Functional profiles of microbial communities are typically generated using comprehensive metagenomic or metatranscriptomic sequence read searches, which are time-consuming, prone to spurious mapping, and often limited to community-level quantification. We developed HUMAnN2, a tiered search strategy that enables fast, accurate, and species-resolved functional profiling of host-associated and environmental communities. HUMAnN2 identifies a community's known species, aligns reads to their pangenomes, performs translated search on unclassified reads, and finally quantifies gene families and pathways. Relative to pure translated search, HUMAnN2 is faster and produces more accurate gene family profiles. We applied HUMAnN2 to study clinal variation in marine metabolism, ecological contribution patterns among human microbiome pathways, variation in species' genomic versus transcriptional contributions, and strain profiling. Further, we introduce 'contributional diversity' to explain patterns of ecological assembly across different microbial community types.}, } @article {pmid30376573, year = {2018}, author = {Nourinejhad Zarghani, S and Hily, JM and Glasa, M and Marais, A and Wetzel, T and Faure, C and Vigne, E and Velt, A and Lemaire, O and Boursiquot, JM and Okic, A and Ruiz-Garcia, AB and Olmos, A and Lacombe, T and Candresse, T}, title = {Grapevine virus T diversity as revealed by full-length genome sequences assembled from high-throughput sequence data.}, journal = {PloS one}, volume = {13}, number = {10}, pages = {e0206010}, pmid = {30376573}, issn = {1932-6203}, mesh = {Base Sequence ; DNA, Viral/genetics ; *Genetic Variation ; *Genome, Viral ; High-Throughput Nucleotide Sequencing/*methods ; Phylogeny ; Plant Viruses/*genetics/isolation & purification ; RNA, Viral/genetics ; Recombination, Genetic/genetics ; Transcriptome/genetics ; Vitis/*virology ; }, abstract = {RNASeq or double-stranded RNA based approaches allowed the reconstruction of a total of 9 full-length or near full-length genomes of the recently discovered grapevine virus T (GVT). In addition, datamining of publicly available grapevine RNASeq transcriptome data allowed the reconstruction of a further 14 GVT genomes from five grapevine sources. Together with four GVT sequences available in Genbank, these novel sequences were used to analyse GVT diversity. GVT shows a very limited amount of indels variation but a high level of nucleotide and aminoacid polymorphism. This level is comparable to that shown in the closely related grapevine rupestris stem pitting-associated virus (GRSPaV). Further analyses showed that GVT mostly evolves under conservative selection pressure and that recombination has contributed to its evolutionary history. Phylogenetic analyses allow to identify at least seven clearly separated groups of GVT isolates. Analysis of the only reported PCR GVT-specific detection primer pair indicates that it is likely to fail to amplify some GVT isolates. Taken together these results point at the distinctiveness of GVT but also at the many points it shares with GRSPaV. They constitute the first pan-genomic analysis of the diversity of this novel virus.}, } @article {pmid30372443, year = {2018}, author = {Goyal, A}, title = {Metabolic adaptations underlying genome flexibility in prokaryotes.}, journal = {PLoS genetics}, volume = {14}, number = {10}, pages = {e1007763}, pmid = {30372443}, issn = {1553-7404}, mesh = {Adaptation, Physiological/*genetics ; Biological Evolution ; Computational Biology/methods ; Evolution, Molecular ; Genome, Archaeal/genetics ; Genome, Bacterial/genetics ; Metabolic Networks and Pathways/*genetics ; Phylogeny ; Prokaryotic Cells/*metabolism ; }, abstract = {Even across genomes of the same species, prokaryotes exhibit remarkable flexibility in gene content. We do not know whether this flexible or "accessory" content is mostly neutral or adaptive, largely due to the lack of explicit analyses of accessory gene function. Here, across 96 diverse prokaryotic species, I show that a considerable fraction (~40%) of accessory genomes harbours beneficial metabolic functions. These functions take two forms: (1) they significantly expand the biosynthetic potential of individual strains, and (2) they help reduce strain-specific metabolic auxotrophies via intra-species metabolic exchanges. I find that the potential of both these functions increases with increasing genome flexibility. Together, these results are consistent with a significant adaptive role for prokaryotic pangenomes.}, } @article {pmid30371758, year = {2019}, author = {Franz, E and Rotariu, O and Lopes, BS and MacRae, M and Bono, JL and Laing, C and Gannon, V and Söderlund, R and van Hoek, AHAM and Friesema, I and French, NP and George, T and Biggs, PJ and Jaros, P and Rivas, M and Chinen, I and Campos, J and Jernberg, C and Gobius, K and Mellor, GE and Chandry, PS and Perez-Reche, F and Forbes, KJ and Strachan, NJC}, title = {Phylogeographic Analysis Reveals Multiple International transmission Events Have Driven the Global Emergence of Escherichia coli O157:H7.}, journal = {Clinical infectious diseases : an official publication of the Infectious Diseases Society of America}, volume = {69}, number = {3}, pages = {428-437}, doi = {10.1093/cid/ciy919}, pmid = {30371758}, issn = {1537-6591}, mesh = {Animals ; Australia/epidemiology ; Canada/epidemiology ; Cattle ; Escherichia coli Infections/*epidemiology/*transmission ; Escherichia coli O157/pathogenicity ; Escherichia coli Proteins/genetics ; Europe/epidemiology ; Feces/microbiology ; *Global Health ; Humans ; *Internationality ; Phylogeny ; Phylogeography ; Polymorphism, Single Nucleotide ; Shiga-Toxigenic Escherichia coli/pathogenicity ; United States/epidemiology ; Whole Genome Sequencing ; }, abstract = {BACKGROUND: Shiga toxin-producing Escherchia coli (STEC) O157:H7 is a zoonotic pathogen that causes numerous food and waterborne disease outbreaks. It is globally distributed, but its origin and the temporal sequence of its geographical spread are unknown.

METHODS: We analyzed whole-genome sequencing data of 757 isolates from 4 continents, and performed a pan-genome analysis to identify the core genome and, from this, extracted single-nucleotide polymorphisms. A timed phylogeographic analysis was performed on a subset of the isolates to investigate its worldwide spread.

RESULTS: The common ancestor of this set of isolates occurred around 1890 (1845-1925) and originated from the Netherlands. Phylogeographic analysis identified 34 major transmission events. The earliest were predominantly intercontinental, moving from Europe to Australia around 1937 (1909-1958), to the United States in 1941 (1921-1962), to Canada in 1960 (1943-1979), and from Australia to New Zealand in 1966 (1943-1982). This pre-dates the first reported human case of E. coli O157:H7, which was in 1975 from the United States.

CONCLUSIONS: Inter- and intra-continental transmission events have resulted in the current international distribution of E. coli O157:H7, and it is likely that these events were facilitated by animal movements (eg, Holstein Friesian cattle). These findings will inform policy on action that is crucial to reduce the further spread of E. coli O157:H7 and other (emerging) STEC strains globally.}, } @article {pmid30366996, year = {2019}, author = {De Filippis, F and La Storia, A and Villani, F and Ercolini, D}, title = {Strain-Level Diversity Analysis of Pseudomonas fragi after In Situ Pangenome Reconstruction Shows Distinctive Spoilage-Associated Metabolic Traits Clearly Selected by Different Storage Conditions.}, journal = {Applied and environmental microbiology}, volume = {85}, number = {1}, pages = {}, pmid = {30366996}, issn = {1098-5336}, mesh = {Food Packaging/*methods ; Food Storage/*methods ; *Genes, Bacterial ; Genome, Bacterial ; Metabolic Networks and Pathways ; Metagenome ; Metagenomics ; Pseudomonas fragi/*genetics/metabolism ; Red Meat/*microbiology ; }, abstract = {Microbial spoilage of raw meat causes huge economic losses every year. An understanding of the microbial ecology associated with the spoilage and its dynamics during the refrigerated storage of meat can help in preventing and delaying the spoilage-related activities. The raw meat microbiota is usually complex, but only a few members will develop during storage and cause spoilage upon the pressure from several external factors, such as temperature and oxygen availability. We characterized the metagenome of beef packed aerobically or under vacuum during refrigerated storage to explore how different packaging conditions may influence the microbial composition and potential spoilage-associated activities. Different population dynamics and spoilage-associated genomic repertoires occurred in beef stored aerobically or in vacuum packaging. Moreover, the pangenomes of Pseudomonas fragi strains were extracted from metagenomes. We demonstrated the presence of specific, storage-driven strain-level profiles of Pseudomonas fragi, characterized by different gene repertoires and thus potentially able to act differently during meat spoilage. The results provide new knowledge on strain-level microbial ecology associated with meat spoilage and may be of value for future strategies of spoilage prevention and food waste reduction.IMPORTANCE This work provides insights on the mechanisms involved in raw beef spoilage during refrigerated storage and on the selective pressure exerted by the packaging conditions. We highlighted the presence of different microbial metagenomes during the spoilage of beef packaged aerobically or under vacuum. The packaging condition was able to select specific Pseudomonas fragi strains with distinctive genomic repertoires. This study may help in deciphering the behavior of different biomes directly in situ in food and in understanding the specific contribution of different strains to food spoilage.}, } @article {pmid30359301, year = {2018}, author = {Hii, SYF and Ahmad, N and Hashim, R and Liow, YL and Abd Wahab, MA and Mohd Khalid, MKN}, title = {A SNP-based phylogenetic analysis of Corynebacterium diphtheriae in Malaysia.}, journal = {BMC research notes}, volume = {11}, number = {1}, pages = {760}, pmid = {30359301}, issn = {1756-0500}, support = {NMRR id: 16-1421-32070 (JPP-IMR: 16-056)//Ministry of Health Malaysia/ ; }, mesh = {Corynebacterium diphtheriae/*classification/*genetics ; Diphtheria/*microbiology/*prevention & control ; *Diphtheria Toxoid/pharmacology ; Genome, Bacterial/*genetics ; Humans ; Malaysia ; *Phylogeny ; }, abstract = {OBJECTIVE: There is a lack of study in Corynebacterium diphtheriae isolates in Malaysia. The alarming surge of cases in year 2016 lead us to evaluate the local clinical C. diphtheriae strains in Malaysia. We conducted single nucleotide polymorphism phylogenetic analysis on the core and pan-genome as well as toxin and diphtheria toxin repressor (DtxR) genes of Malaysian C. diphtheriae isolates from the year 1986-2016.

RESULTS: The comparison between core and pan-genomic comparison showed variation in the distribution of C. diphtheriae. The local isolates portrayed a heterogenous trait and a close relationship between Malaysia's and Belarus's, Africa's and India's strains were observed. A toxigenic C. diphtheriae clone was noted to be circulating in the Malaysian population for nearly 30 years and from our study, the non-toxigenic and toxigenic C. diphtheriae strains can be differentiated significantly into two large clusters, A and B respectively. Analysis against vaccine strain, PW8 portrayed that the amino acid composition of toxin and DtxR in Malaysia's local strains are well-conserved and there was no functional defect noted. Hence, the change in efficacy of the currently used toxoid vaccine is unlikely to occur.}, } @article {pmid30356952, year = {2018}, author = {Cruaud, A and Groussier, G and Genson, G and Sauné, L and Polaszek, A and Rasplus, JY}, title = {Pushing the limits of whole genome amplification: successful sequencing of RADseq library from a single microhymenopteran (Chalcidoidea, Trichogramma).}, journal = {PeerJ}, volume = {6}, number = {}, pages = {e5640}, pmid = {30356952}, issn = {2167-8359}, abstract = {A major obstacle to high-throughput genotyping of microhymenoptera is their small size. As species are difficult to discriminate, and because complexes may exist, the sequencing of a pool of specimens is hazardous. Thus, one should be able to sequence pangenomic markers (e.g., RADtags) from a single specimen. To date, whole genome amplification (WGA) prior to library construction is still a necessity as at most 10 ng of DNA can be obtained from single specimens (sometimes less). However, this amount of DNA is not compatible with manufacturer's requirements for commercial kits. Here we test the accuracy of the GenomiPhi kit V2 on Trichogramma wasps by comparing RAD libraries obtained from the WGA of single specimens (F0 and F1 generation, about1 ng input DNA for the WGA (0.17-2.9 ng)) and a biological amplification of genomic material (the pool of the progeny of the F1 generation). Globally, we found that 99% of the examined loci (up to 48,189 for one of the crosses, 109 bp each) were compatible with the mode of reproduction of the studied model (haplodiploidy) and Mendelian inheritance of alleles. The remaining 1% (0.01% of the analysed nucleotides) could represent WGA bias or other experimental/analytical bias. This study shows that the multiple displacement amplification method on which the GenomiPhi kit relies, could also be of great help for the high-throughput genotyping of microhymenoptera used for biological control, or other organisms from which only a very small amount of DNA can be extracted, such as human disease vectors (e.g., sandflies, fleas, ticks etc.).}, } @article {pmid30353070, year = {2018}, author = {Subedi, D and Vijay, AK and Kohli, GS and Rice, SA and Willcox, M}, title = {Comparative genomics of clinical strains of Pseudomonas aeruginosa strains isolated from different geographic sites.}, journal = {Scientific reports}, volume = {8}, number = {1}, pages = {15668}, pmid = {30353070}, issn = {2045-2322}, mesh = {Australia/epidemiology ; Drug Resistance, Bacterial ; Genetic Variation ; Genome, Bacterial ; Genomic Islands ; Genomics ; Humans ; India/epidemiology ; Phylogeny ; Phylogeography ; Polymorphism, Single Nucleotide ; Pseudomonas Infections/drug therapy/epidemiology/*microbiology ; Pseudomonas aeruginosa/drug effects/*genetics/isolation & purification ; }, abstract = {The large and complex genome of Pseudomonas aeruginosa, which consists of significant portions (up to 20%) of transferable genetic elements contributes to the rapid development of antibiotic resistance. The whole genome sequences of 22 strains isolated from eye and cystic fibrosis patients in Australia and India between 1992 and 2007 were used to compare genomic divergence and phylogenetic relationships as well as genes for antibiotic resistance and virulence factors. Analysis of the pangenome indicated a large variation in the size of accessory genome amongst 22 stains and the size of the accessory genome correlated with number of genomic islands, insertion sequences and prophages. The strains were diverse in terms of sequence type and dissimilar to that of global epidemic P. aeruginosa clones. Of the eye isolates, 62% clustered together within a single lineage. Indian eye isolates possessed genes associated with resistance to aminoglycoside, beta-lactams, sulphonamide, quaternary ammonium compounds, tetracycline, trimethoprims and chloramphenicols. These genes were, however, absent in Australian isolates regardless of source. Overall, our results provide valuable information for understanding the genomic diversity of P. aeruginosa isolated from two different infection types and countries.}, } @article {pmid30349509, year = {2018}, author = {Chaudhari, NM and Gautam, A and Gupta, VK and Kaur, G and Dutta, C and Paul, S}, title = {PanGFR-HM: A Dynamic Web Resource for Pan-Genomic and Functional Profiling of Human Microbiome With Comparative Features.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {2322}, pmid = {30349509}, issn = {1664-302X}, abstract = {The conglomerate of microorganisms inhabiting various body-sites of human, known as the human microbiome, is one of the key determinants of human health and disease. Comprehensive pan-genomic and functional analysis approach for human microbiome components can enrich our understanding about impact of microbiome on human health. By utilizing this approach we developed PanGFR-HM (http://www.bioinfo.iicb.res.in/pangfr-hm/) - a novel dynamic web-resource that integrates genomic and functional characteristics of 1293 complete microbial genomes available from Human Microbiome Project. The resource allows users to explore genomic/functional diversity and genome-based phylogenetic relationships between human associated microbial genomes, not provided by any other resource. The key features implemented here include pan-genome and functional analysis of organisms based on taxonomy or body-site, and comparative analysis between groups of organisms. The first feature can also identify probable gene-loss events and significantly over/under represented KEGG/COG categories within pan-genome. The unique second feature can perform comparative genomic, functional and pathways analysis between 4 groups of microbes. The dynamic nature of this resource enables users to define parameters for orthologous clustering and to select any set of organisms for analysis. As an application for comparative feature of PanGFR-HM, we performed a comparative analysis with 67 Lactobacillus genomes isolated from human gut, oral cavity and urogenital tract, and therefore characterized the body-site specific genes, enzymes and pathways. Altogether, PanGFR-HM, being unique in its content and functionality, is expected to provide a platform for microbiome-based comparative functional and evolutionary genomics.}, } @article {pmid30348668, year = {2019}, author = {Johnson, TJ and Elnekave, E and Miller, EA and Munoz-Aguayo, J and Flores Figueroa, C and Johnston, B and Nielson, DW and Logue, CM and Johnson, JR}, title = {Phylogenomic Analysis of Extraintestinal Pathogenic Escherichia coli Sequence Type 1193, an Emerging Multidrug-Resistant Clonal Group.}, journal = {Antimicrobial agents and chemotherapy}, volume = {63}, number = {1}, pages = {}, pmid = {30348668}, issn = {1098-6596}, mesh = {Alleles ; Anti-Bacterial Agents/pharmacology ; Bacterial Capsules/chemistry/genetics/metabolism ; Biological Evolution ; Clone Cells ; Drug Resistance, Multiple, Bacterial/*genetics ; Escherichia coli Infections/drug therapy/microbiology ; Extraintestinal Pathogenic Escherichia coli/*classification/drug effects/genetics/isolation & purification ; Fluoroquinolones/pharmacology ; Genetic Loci ; *Genome, Bacterial ; Genotype ; Humans ; *Phylogeny ; Plasmids/*chemistry/metabolism ; Whole Genome Sequencing ; }, abstract = {The fluoroquinolone-resistant sequence type 1193 (ST1193) of Escherichia coli, from the ST14 clonal complex (STc14) within phylogenetic group B2, has appeared recently as an important cause of extraintestinal disease in humans. Although this emerging lineage has been characterized to some extent using conventional methods, it has not been studied extensively at the genomic level. Here, we used whole-genome sequence analysis to compare 355 ST1193 isolates with 72 isolates from other STs within STc14. Using core genome phylogeny, the ST1193 isolates formed a tightly clustered clade with many genotypic similarities, unlike ST14 isolates. All ST1193 isolates possessed the same set of three chromosomal mutations conferring fluoroquinolone resistance, carried the fimH64 allele, and were lactose non-fermenting. Analysis revealed an evolutionary progression from K1 to K5 capsular types and acquisition of an F-type virulence plasmid, followed by changes in plasmid structure congruent with genome phylogeny. In contrast, the numerous identified antimicrobial resistance genes were distributed incongruently with the underlying phylogeny, suggesting frequent gain or loss of the corresponding resistance gene cassettes despite retention of the presumed carrier plasmids. Pangenome analysis revealed gains and losses of genetic loci occurring during the transition from ST14 to ST1193 and from the K1 to K5 capsular types. Using time-scaled phylogenetic analysis, we estimated that current ST1193 clades first emerged approximately 25 years ago. Overall, ST1193 appears to be a recently emerged clone in which both stepwise and mosaic evolution have contributed to epidemiologic success.}, } @article {pmid30341495, year = {2019}, author = {Bettgenhaeuser, J and Krattinger, SG}, title = {Rapid gene cloning in cereals.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {132}, number = {3}, pages = {699-711}, pmid = {30341495}, issn = {1432-2242}, mesh = {Cloning, Molecular/*methods ; Edible Grain/*genetics ; Genes, Plant ; Mutation/genetics ; Phenotype ; Physical Chromosome Mapping ; }, abstract = {The large and complex genomes of many cereals hindered cloning efforts in the past. Advances in genomics now allow the rapid cloning of genes from humanity's most valuable crops. The past two decades were characterized by a genomics revolution that entailed profound changes to crop research, plant breeding, and agriculture. Today, high-quality reference sequences are available for all major cereal crop species. Large resequencing and pan-genome projects start to reveal a more comprehensive picture of the genetic makeup and the diversity among domesticated cereals and their wild relatives. These technological advancements will have a dramatic effect on dissecting genotype-phenotype associations and on gene cloning. In this review, we will highlight the status of the genomic resources available for various cereal crops and we will discuss their implications for gene cloning. A particular focus will be given to the cereal species barley and wheat, which are characterized by very large and complex genomes that have been inaccessible to rapid gene cloning until recently. With the advancements in genomics and the development of several rapid gene-cloning methods, it has now become feasible to tackle the cloning of most agriculturally important genes, even in wheat and barley.}, } @article {pmid30341451, year = {2019}, author = {Fraunhofer, ME and Geißler, AJ and Behr, J and Vogel, RF}, title = {Comparative Genomics of Lactobacillus brevis Reveals a Significant Plasmidome Overlap of Brewery and Insect Isolates.}, journal = {Current microbiology}, volume = {76}, number = {1}, pages = {37-47}, pmid = {30341451}, issn = {1432-0991}, support = {AiF 18194 N//German Ministry of Economics/ ; }, mesh = {Adaptation, Physiological ; Animals ; Beer/*microbiology ; Food Microbiology ; Genetic Variation/genetics ; Genome, Bacterial/*genetics ; Genomics ; Insecta/*microbiology ; Levilactobacillus brevis/classification/*genetics/isolation & purification ; Plasmids/*genetics ; }, abstract = {Lactobacillus (L.) brevis represents a versatile, ubiquitistic species of lactic acid bacteria, occurring in various foods, as well as plants and intestinal tracts. The ability to deal with considerably differing environmental conditions in the respective ecological niches implies a genomic adaptation to the particular requirements to use it as a habitat beyond a transient state. Given the isolation source, 24 L. brevis genomes were analyzed via comparative genomics to get a broad view of the genomic complexity and ecological versatility of this species. This analysis showed L. brevis being a genetically diverse species possessing a remarkably large pan genome. As anticipated, it proved difficult to draw a correlation between chromosomal settings and isolation source. However, on plasmidome level, brewery- and insect-derived strains grouped into distinct clusters, referable to a noteworthy gene sharing between both groups. The brewery-specific plasmidome is characterized by several genes, which support a life in the harsh environment beer, but 40% of the brewery plasmidome were found in insect-derived strains as well. This suggests a close interaction between these habitats. Further analysis revealed the presence of a truncated horC cluster version in brewery- and insect-associated strains. This disproves horC, the major contributor to survival in beer, as brewery isolate specific. We conclude that L. brevis does not perform rigorous chromosomal changes to live in different habitats. Rather it appears that the species retains a certain genetic diversity in the plasmidome and meets the requirements of a particular ecological niche with the acquisition of appropriate plasmids.}, } @article {pmid30335848, year = {2018}, author = {Mercante, JW and Caravas, JA and Ishaq, MK and Kozak-Muiznieks, NA and Raphael, BH and Winchell, JM}, title = {Genomic heterogeneity differentiates clinical and environmental subgroups of Legionella pneumophila sequence type 1.}, journal = {PloS one}, volume = {13}, number = {10}, pages = {e0206110}, pmid = {30335848}, issn = {1932-6203}, mesh = {Base Sequence ; Conserved Sequence ; *Disease Outbreaks ; Genetic Heterogeneity ; Genotype ; Humans ; Legionella pneumophila/*genetics ; Legionnaires' Disease/epidemiology/*microbiology ; Molecular Typing/*methods ; Phylogeny ; }, abstract = {Legionella spp. are the cause of a severe bacterial pneumonia known as Legionnaires' disease (LD). In some cases, current genetic subtyping methods cannot resolve LD outbreaks caused by common, potentially endemic L. pneumophila (Lp) sequence types (ST), which complicates laboratory investigations and environmental source attribution. In the United States (US), ST1 is the most prevalent clinical and environmental Lp sequence type. In order to characterize the ST1 population, we sequenced 289 outbreak and non-outbreak associated clinical and environmental ST1 and ST1-variant Lp strains from the US and, together with international isolate sequences, explored their genetic and geographic diversity. The ST1 population was highly conserved at the nucleotide level; 98% of core nucleotide positions were invariant and environmental isolates unassociated with human disease (n = 99) contained ~65% more nucleotide diversity compared to clinical-sporadic (n = 139) or outbreak-associated (n = 28) ST1 subgroups. The accessory pangenome of environmental isolates was also ~30-60% larger than other subgroups and was enriched for transposition and conjugative transfer-associated elements. Up to ~10% of US ST1 genetic variation could be explained by geographic origin, but considerable genetic conservation existed among strains isolated from geographically distant states and from different decades. These findings provide new insight into the ST1 population structure and establish a foundation for interpreting genetic relationships among ST1 strains; these data may also inform future analyses for improved outbreak investigations.}, } @article {pmid30333483, year = {2018}, author = {Kavvas, ES and Catoiu, E and Mih, N and Yurkovich, JT and Seif, Y and Dillon, N and Heckmann, D and Anand, A and Yang, L and Nizet, V and Monk, JM and Palsson, BO}, title = {Machine learning and structural analysis of Mycobacterium tuberculosis pan-genome identifies genetic signatures of antibiotic resistance.}, journal = {Nature communications}, volume = {9}, number = {1}, pages = {4306}, pmid = {30333483}, issn = {2041-1723}, support = {U01 AI124316/AI/NIAID NIH HHS/United States ; 1-U01-AI124316-01//U.S. Department of Health & Human Services | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/International ; U01GM102098//U.S. Department of Health & Human Services | NIH | National Institute of General Medical Sciences (NIGMS)/International ; }, mesh = {Drug Resistance, Bacterial/*genetics ; Gene Frequency ; *Genome, Bacterial ; *Machine Learning ; Mycobacterium tuberculosis/*genetics ; Selection, Genetic ; }, abstract = {Mycobacterium tuberculosis is a serious human pathogen threat exhibiting complex evolution of antimicrobial resistance (AMR). Accordingly, the many publicly available datasets describing its AMR characteristics demand disparate data-type analyses. Here, we develop a reference strain-agnostic computational platform that uses machine learning approaches, complemented by both genetic interaction analysis and 3D structural mutation-mapping, to identify signatures of AMR evolution to 13 antibiotics. This platform is applied to 1595 sequenced strains to yield four key results. First, a pan-genome analysis shows that M. tuberculosis is highly conserved with sequenced variation concentrated in PE/PPE/PGRS genes. Second, the platform corroborates 33 genes known to confer resistance and identifies 24 new genetic signatures of AMR. Third, 97 epistatic interactions across 10 resistance classes are revealed. Fourth, detailed structural analysis of these genes yields mechanistic bases for their selection. The platform can be used to study other human pathogens.}, } @article {pmid30326842, year = {2018}, author = {Zoledowska, S and Motyka-Pomagruk, A and Sledz, W and Mengoni, A and Lojkowska, E}, title = {High genomic variability in the plant pathogenic bacterium Pectobacterium parmentieri deciphered from de novo assembled complete genomes.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {751}, pmid = {30326842}, issn = {1471-2164}, support = {2014/14/M/NZ8/00501//Narodowe Centrum Nauki/ ; }, mesh = {Bacteriophages/physiology ; *Genetic Variation ; Genome, Bacterial/*genetics ; Genomics ; Molecular Sequence Annotation ; Pectobacterium/*genetics/virology ; Phenotype ; Plants/*microbiology ; }, abstract = {BACKGROUND: Pectobacterium parmentieri is a newly established species within the plant pathogenic family Pectobacteriaceae. Bacteria belonging to this species are causative agents of diseases in economically important crops (e.g. potato) in a wide range of different environmental conditions, encountered in Europe, North America, Africa, and New Zealand. Severe disease symptoms result from the activity of P. parmentieri virulence factors, such as plant cell wall degrading enzymes. Interestingly, we observe significant phenotypic differences among P. parmentieri isolates regarding virulence factors production and the abilities to macerate plants. To establish the possible genomic basis of these differences, we sequenced 12 genomes of P. parmentieri strains (10 isolated in Poland, 2 in Belgium) with the combined use of Illumina and PacBio approaches. De novo genome assembly was performed with the use of SPAdes software, while annotation was conducted by NCBI Prokaryotic Genome Annotation Pipeline.

RESULTS: The pan-genome study was performed on 15 genomes (12 de novo assembled and three reference strains: P. parmentieri CFBP 8475[T], P. parmentieri SCC3193, P. parmentieri WPP163). The pan-genome includes 3706 core genes, a high number of accessory (1468) genes, and numerous unique (1847) genes. We identified the presence of well-known genes encoding virulence factors in the core genome fraction, but some of them were located in the dispensable genome. A significant fraction of horizontally transferred genes, virulence-related gene duplications, as well as different CRISPR arrays were found, which can explain the observed phenotypic differences. Finally, we found also, for the first time, the presence of a plasmid in one of the tested P. parmentieri strains isolated in Poland.

CONCLUSIONS: We can hypothesize that a large number of the genes in the dispensable genome and significant genomic variation among P. parmentieri strains could be the basis of the potential wide host range and widespread diffusion of P. parmentieri. The obtained data on the structure and gene content of P. parmentieri strains enabled us to speculate on the importance of high genomic plasticity for P. parmentieri adaptation to different environments.}, } @article {pmid30315621, year = {2019}, author = {Yu, J and Golicz, AA and Lu, K and Dossa, K and Zhang, Y and Chen, J and Wang, L and You, J and Fan, D and Edwards, D and Zhang, X}, title = {Insight into the evolution and functional characteristics of the pan-genome assembly from sesame landraces and modern cultivars.}, journal = {Plant biotechnology journal}, volume = {17}, number = {5}, pages = {881-892}, pmid = {30315621}, issn = {1467-7652}, mesh = {Biological Evolution ; Chromosome Mapping ; Chromosomes, Plant/genetics ; Domestication ; Genes, Plant ; Genetic Variation ; Genome, Plant/*genetics ; Multigene Family/genetics ; Plant Breeding ; Sesamum/*genetics ; }, abstract = {Sesame (Sesamum indicum L.) is an important oil crop renowned for its high oil content and quality. Recently, genome assemblies for five sesame varieties including two landraces (S. indicum cv. Baizhima and Mishuozhima) and three modern cultivars (S. indicum var. Zhongzhi13, Yuzhi11 and Swetha), have become available providing a rich resource for comparative genomic analyses and gene discovery. Here, we employed a reference-assisted assembly approach to improve the draft assemblies of four of the sesame varieties. We then constructed a sesame pan-genome of 554.05 Mb. The pan-genome contained 26 472 orthologous gene clusters; 15 409 (58.21%) of them were core (present across all five sesame genomes), whereas the remaining 41.79% (11 063) clusters and the 15 890 variety-specific genes were dispensable. Comparisons between varieties suggest that modern cultivars from China and India display significant genomic variation. The gene families unique to the sesame modern cultivars contain genes mainly related to yield and quality, while those unique to the landraces contain genes involved in environmental adaptation. Comparative evolutionary analysis indicates that several genes involved in plant-pathogen interaction and lipid metabolism are under positive selection, which may be associated with sesame environmental adaption and selection for high seed oil content. This study of the sesame pan-genome provides insights into the evolution and genomic characteristics of this important oilseed and constitutes a resource for further sesame crop improvement.}, } @article {pmid30314447, year = {2018}, author = {Bobay, LM and Ochman, H}, title = {Factors driving effective population size and pan-genome evolution in bacteria.}, journal = {BMC evolutionary biology}, volume = {18}, number = {1}, pages = {153}, pmid = {30314447}, issn = {1471-2148}, support = {R35 GM118038/GM/NIGMS NIH HHS/United States ; R35GM118038/NH/NIH HHS/United States ; R01GM108657/NH/NIH HHS/United States ; }, mesh = {Archaea/genetics ; Bacteria/*genetics/growth & development ; *Evolution, Molecular ; Genome Size ; *Genome, Bacterial ; Phylogeny ; Population Density ; Recombination, Genetic/genetics ; }, abstract = {BACKGROUND: Knowledge of population-level processes is essential to understanding the efficacy of selection operating within a species. However, attempts at estimating effective population sizes (Ne) are particularly challenging in bacteria due to their extremely large census populations sizes, varying rates of recombination and arbitrary species boundaries.

RESULTS: In this study, we estimated Ne for 153 species (152 bacteria and one archaeon) defined under a common framework and found that ecological lifestyle and growth rate were major predictors of Ne; and that contrary to theoretical expectations, Ne was unaffected by recombination rate. Additionally, we found that Ne shapes the evolution and diversity of total gene repertoires of prokaryotic species.

CONCLUSION: Together, these results point to a new model of genome architecture evolution in prokaryotes, in which pan-genome sizes, not individual genome sizes, are governed by drift-barrier evolution.}, } @article {pmid30297045, year = {2019}, author = {Chun, BH and Kim, KH and Jeong, SE and Jeon, CO}, title = {Genomic and metabolic features of the Bacillus amyloliquefaciens group- B. amyloliquefaciens, B. velezensis, and B. siamensis- revealed by pan-genome analysis.}, journal = {Food microbiology}, volume = {77}, number = {}, pages = {146-157}, doi = {10.1016/j.fm.2018.09.001}, pmid = {30297045}, issn = {1095-9998}, mesh = {Anti-Infective Agents/metabolism ; Bacillus/*genetics/metabolism ; Bacillus amyloliquefaciens/*genetics/metabolism ; DNA, Bacterial ; *Genome, Bacterial ; *Genomics ; Metabolic Networks and Pathways/*genetics ; Multigene Family ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Secondary Metabolism/genetics ; Sequence Analysis, DNA ; Species Specificity ; Xanthine Oxidase/genetics ; }, abstract = {The genomic and metabolic features of the Bacillus amyloliquefaciens group comprising B. amyloliquefaciens, B. velezensis, and B. siamensis were investigated through a pan-genome analysis combined with an experimental verification of some of the functions identified. All B. amyloliquefaciens group genomes were retrieved from GenBank and their phylogenetic relatedness was subsequently investigated. Genome comparisons of B. amyloliquefaciens, B. siamensis, and B. velezensis showed that their genomic and metabolic features were similar; however species-specific features were also identified. Energy metabolism-related genes are more enriched in B. amyloliquefaciens, whereas secondary metabolite biosynthesis-related genes are enriched in B. velezensis. Compared to B. amyloliquefaciens and B. siamensis, B. velezensis harbors more genes in its core-genome which are involved in the biosynthesis of antimicrobial compounds, as well as genes involved in d-galacturonate and d-fructuronate metabolism. B. amyloliquefaciens, B. siamensis, and B. velezensis all harbor a xanthine oxidase gene cluster (xoABCDE) in their core-genomes that is involved in metabolizing xanthine and uric acid to glycine and oxalureate. A reconstruction of B. amyloliquefaciens group metabolic pathways using their individual pan-genomes revealed that the B. amyloliquefaciens group strains have the ability to metabolize diverse carbon sources aerobically, or anaerobically, and can produce various metabolites such as lactate, ethanol, acetate, CO2, xylitol, diacetyl, acetoin, and 2,3-butanediol. This study therefore provides insights into the genomic and metabolic features of the B. amyloliquefaciens group.}, } @article {pmid30291328, year = {2019}, author = {Podell, S and Blanton, JM and Neu, A and Agarwal, V and Biggs, JS and Moore, BS and Allen, EE}, title = {Pangenomic comparison of globally distributed Poribacteria associated with sponge hosts and marine particles.}, journal = {The ISME journal}, volume = {13}, number = {2}, pages = {468-481}, pmid = {30291328}, issn = {1751-7370}, support = {P01 ES021921/ES/NIEHS NIH HHS/United States ; }, mesh = {Animal Distribution ; Animals ; Bacteria/*genetics ; *Phylogeny ; Porifera/*microbiology ; RNA, Ribosomal, 16S/*genetics ; Sequence Analysis, DNA ; }, abstract = {Candidatus Poribacteria is a little-known bacterial phylum, previously characterized by partial genomes from a single sponge host, but never isolated in culture. We have reconstructed multiple genome sequences from four different sponge genera and compared them to recently reported, uncharacterized Poribacteria genomes from the open ocean, discovering shared and unique functional characteristics. Two distinct, habitat-linked taxonomic lineages were identified, designated Entoporibacteria (sponge-associated) and Pelagiporibacteria (free-living). These lineages differed in flagellar motility and chemotaxis genes unique to Pelagiporibacteria, and highly expanded families of restriction endonucleases, DNA methylases, transposases, CRISPR repeats, and toxin-antitoxin gene pairs in Entoporibacteria. Both lineages shared pathways for facultative anaerobic metabolism, denitrification, fermentation, organosulfur compound utilization, type IV pili, cellulosomes, and bacterial proteosomes. Unexpectedly, many features characteristic of eukaryotic host association were also shared, including genes encoding the synthesis of eukaryotic-like cell adhesion molecules, extracellular matrix digestive enzymes, phosphoinositol-linked membrane glycolipids, and exopolysaccharide capsules. Complete Poribacteria 16S rRNA gene sequences were found to contain multiple mismatches to "universal" 16S rRNA gene primer sets, substantiating concerns about potential amplification failures in previous studies. A newly designed primer set corrects these mismatches, enabling more accurate assessment of Poribacteria abundance in diverse marine habitats where it may have previously been overlooked.}, } @article {pmid30289342, year = {2020}, author = {Džunková, M and Moya, A and Chen, X and Kelly, C and D'Auria, G}, title = {Detection of mixed-strain infections by FACS and ultra-low input genome sequencing.}, journal = {Gut microbes}, volume = {11}, number = {3}, pages = {305-309}, pmid = {30289342}, issn = {1949-0984}, mesh = {Clostridioides difficile/classification/cytology/genetics/isolation & purification ; Clostridium Infections/diagnosis/microbiology ; Coinfection/*diagnosis/microbiology ; DNA, Bacterial/genetics ; Feces/microbiology ; *Flow Cytometry ; Genome, Bacterial/*genetics ; Humans ; Microbiological Techniques/*methods ; Microbiota/genetics ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {The epidemiological tracking of a bacterial outbreak may be jeopardized by the presence of multiple pathogenic strains in one patient. Nevertheless, this fact is not considered in most of the epidemiological studies and only one colony per patient is sequenced. On the other hand, the routine whole genome sequencing of many isolates from each patient would be costly and unnecessary, because the number of strains in a patient is never known a priori. In addition, the result would be biased by microbial culture conditions. Herein we propose an approach for detecting mixed-strain infection, providing C. difficile infection as an example. The cells of the target pathogenic species are collected from the bacterial suspension by the fluorescence activated cell sorting (FACS) and a shallow genome sequencing is performed. A modified sequencing library preparation protocol for low-input DNA samples can be used for low prevalence gut pathogens (< 0.1% of the total microbiome). This FACS-seq approach reduces diagnostics time (no culture is needed) and may promote discoveries of novel strains. Methodological details, possible issues and future directions for the sequencing of these natural pan-genomes are herein discussed.}, } @article {pmid30285620, year = {2018}, author = {Wright, ES and Baum, DA}, title = {Exclusivity offers a sound yet practical species criterion for bacteria despite abundant gene flow.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {724}, pmid = {30285620}, issn = {1471-2164}, mesh = {*Gene Flow ; Gene Transfer, Horizontal ; Genes, Bacterial/genetics ; Phylogeny ; Streptomycetaceae/*classification/*genetics ; }, abstract = {BACKGROUND: The question of whether bacterial species objectively exist has long divided microbiologists. A major source of contention stems from the fact that bacteria regularly engage in horizontal gene transfer (HGT), making it difficult to ascertain relatedness and draw boundaries between taxa. A natural way to define taxa is based on exclusivity of relatedness, which applies when members of a taxon are more closely related to each other than they are to any outsider. It is largely unknown whether exclusive bacterial taxa exist when averaging over the genome or are rare due to rampant hybridization.

RESULTS: Here, we analyze a collection of 701 genomes representing a wide variety of environmental isolates from the family Streptomycetaceae, whose members are competent at HGT. We find that the presence/absence of auxiliary genes in the pan-genome displays a hierarchical (tree-like) structure that correlates significantly with the genealogy of the core-genome. Moreover, we identified the existence of many exclusive taxa, although individual genes often contradict these taxa. These conclusions were supported by repeating the analysis on 1,586 genomes belonging to the genus Bacillus. However, despite confirming the existence of exclusive groups (taxa), we were unable to identify an objective threshold at which to assign the rank of species.

CONCLUSIONS: The existence of bacterial taxa is justified by considering average relatedness across the entire genome, as captured by exclusivity, but is rejected if one requires unanimous agreement of all parts of the genome. We propose using exclusivity to delimit taxa and conventional genome similarity thresholds to assign bacterial taxa to the species rank. This approach recognizes species that are phylogenetically meaningful, while also establishing some degree of comparability across species-ranked taxa in different bacterial clades.}, } @article {pmid30277499, year = {2018}, author = {Peng, Y and Tang, S and Wang, D and Zhong, H and Jia, H and Cai, X and Zhang, Z and Xiao, M and Yang, H and Wang, J and Kristiansen, K and Xu, X and Li, J}, title = {MetaPGN: a pipeline for construction and graphical visualization of annotated pangenome networks.}, journal = {GigaScience}, volume = {7}, number = {11}, pages = {}, pmid = {30277499}, issn = {2047-217X}, mesh = {Computational Biology/*methods ; Escherichia coli/genetics ; *Gene Regulatory Networks ; Genome/*genetics ; Genome, Bacterial/genetics ; Genomics/*methods ; Humans ; Metagenome/genetics ; Metagenomics/methods ; Reproducibility of Results ; Software ; }, abstract = {Pangenome analyses facilitate the interpretation of genetic diversity and evolutionary history of a taxon. However, there is an urgent and unmet need to develop new tools for advanced pangenome construction and visualization, especially for metagenomic data. Here, we present an integrated pipeline, named MetaPGN, for construction and graphical visualization of pangenome networks from either microbial genomes or metagenomes. Given either isolated genomes or metagenomic assemblies coupled with a reference genome of the targeted taxon, MetaPGN generates a pangenome in a topological network, consisting of genes (nodes) and gene-gene genomic adjacencies (edges) of which biological information can be easily updated and retrieved. MetaPGN also includes a self-developed Cytoscape plugin for layout of and interaction with the resulting pangenome network, providing an intuitive and interactive interface for full exploration of genetic diversity. We demonstrate the utility of MetaPGN by constructing Escherichia coli pangenome networks from five E. coli pathogenic strains and 760 human gut microbiomes,revealing extensive genetic diversity of E. coli within both isolates and gut microbial populations. With the ability to extract and visualize gene contents and gene-gene physical adjacencies of a specific taxon from large-scale metagenomic data, MetaPGN provides advantages in expanding pangenome analysis to uncultured microbial taxa.}, } @article {pmid30275399, year = {2018}, author = {Sharma, V and Mobeen, F and Prakash, T}, title = {Exploration of Survival Traits, Probiotic Determinants, Host Interactions, and Functional Evolution of Bifidobacterial Genomes Using Comparative Genomics.}, journal = {Genes}, volume = {9}, number = {10}, pages = {}, pmid = {30275399}, issn = {2073-4425}, abstract = {Members of the genus Bifidobacterium are found in a wide-range of habitats and are used as important probiotics. Thus, exploration of their functional traits at the genus level is of utmost significance. Besides, this genus has been demonstrated to exhibit an open pan-genome based on the limited number of genomes used in earlier studies. However, the number of genomes is a crucial factor for pan-genome calculations. We have analyzed the pan-genome of a comparatively larger dataset of 215 members of the genus Bifidobacterium belonging to different habitats, which revealed an open nature. The pan-genome for the 56 probiotic and human-gut strains of this genus, was also found to be open. The accessory- and unique-components of this pan-genome were found to be under the operation of Darwinian selection pressure. Further, their genome-size variation was predicted to be attributed to the abundance of certain functions carried by genomic islands, which are facilitated by insertion elements and prophages. In silico functional and host-microbe interaction analyses of their core-genome revealed significant genomic factors for niche-specific adaptations and probiotic traits. The core survival traits include stress tolerance, biofilm formation, nutrient transport, and Sec-secretion system, whereas the core probiotic traits are imparted by the factors involved in carbohydrate- and protein-metabolism and host-immunomodulations.}, } @article {pmid30258424, year = {2018}, author = {Marasini, D and Karki, AB and Buchheim, MA and Fakhr, MK}, title = {Phylogenetic Relatedness Among Plasmids Harbored by Campylobacter jejuni and Campylobacter coli Isolated From Retail Meats.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {2167}, pmid = {30258424}, issn = {1664-302X}, abstract = {Campylobacter jejuni and Campylobacter coli are two of the major causes of foodborne illness. In this study, 29 plasmids isolated from 20 retail meat isolates of Campylobacter jejuni and Campylobacter coli were fully-sequenced individually or as a part of a whole genome sequencing approach. The fully-sequenced plasmids ranged in size from 3 to 119 kb. Molecular characterization of the sequenced plasmids was based on pangenomic analysis and types of genes present on these plasmids and similar ones from GenBank. The plasmids were categorized into four different groups. These groups include type-1 that consisted mainly of pTet plasmids with the tetO gene, type-2 plasmids commonly found in C. coli strains, type-3 which has pVir plasmids, and type-4 that consisted mainly of smaller plasmids. The type-2 plasmids were unique, common among C. coli strains, and carried several conjugative transfer genes. The type-2 plasmids were most similar to a plasmid from Helicobacter pullorum. Maximum parsimony analysis and NeighborNet analysis were used to assess the phylogenetic relatedness among the 29 plasmid sequences presented in this study in addition to the other 104 plasmid sequences of Campylobacter species available in GenBank to date. Results from MP analysis revealed multiple lineages among Campylobacter plasmids which was supported by NeighborNet analysis. Clustering of plasmids did not conform to species-specific clades which suggested an intra-species dissemination of plasmids among Campylobacter species. To our knowledge, this is the first extensive phylogenetic analysis of Campylobacter plasmids sequenced to date.}, } @article {pmid30257645, year = {2018}, author = {Awan, F and Dong, Y and Liu, J and Wang, N and Mushtaq, MH and Lu, C and Liu, Y}, title = {Comparative genome analysis provides deep insights into Aeromonas hydrophila taxonomy and virulence-related factors.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {712}, pmid = {30257645}, issn = {1471-2164}, support = {31372454//National Nature Science Foundation of China/ ; CX(17)2027//Independent Innovation Fund of Agricultural Science and Technology in Jiangsu Province/ ; D2017-3-1//Jiangsu fishery science and technology project/ ; }, mesh = {Aeromonas hydrophila/*classification/*genetics/pathogenicity ; Bacterial Proteins/*genetics ; Computational Biology ; Computer Simulation ; Drug Resistance, Bacterial ; Genome, Bacterial ; Molecular Typing ; Phylogeny ; Sequence Analysis, DNA/*methods ; Virulence Factors/genetics ; }, abstract = {BACKGROUND: Aeromonas hydrophila is a potential zoonotic pathogen and primary fish pathogen. With overlapping characteristics, multiple isolates are often mislabelled and misclassified. Moreover, the potential pathogenic factors among the publicly available genomes in A. hydrophila strains of different origins have not yet been investigated.

RESULTS: To identify the valid strains of A. hydrophila and their pathogenic factors, we performed a pan-genomic study. It revealed that there were 13 mislabelled strains and 49 valid strains that were further verified by Average nucleotide identity (ANI), digital DNA-DNA hybridization (dDDH) and in silico multiple locus strain typing (MLST). Multiple numbers of phages were detected among the strains and among them Aeromonas phi 018 was frequently present. The diversity in type III secretion system (T3SS) and conservation of type II and type VI secretion systems (T2SS and T6SS, respectively) among all the strains are important to study for designing future strategies. The most prevalent antibiotic resistances were found to be beta-lactamase, polymyxin and colistin resistances. The comparative analyses of sequence type (ST) 251 and other ST groups revealed that there were higher numbers of virulence factors in ST-251 than in other STs group.

CONCLUSION: Publicly available genomes have 13 mislabelled organisms, and there are only 49 valid A. hydrophila strains. This valid pan-genome identifies multiple prophages that can be further utilized. Different A. hydrophila strains harbour multiple virulence factors and antibiotic resistance genes. Identification of such factors is important for designing future treatment regimes.}, } @article {pmid30257640, year = {2018}, author = {Sheikhizadeh Anari, S and de Ridder, D and Schranz, ME and Smit, S}, title = {Efficient inference of homologs in large eukaryotic pan-proteomes.}, journal = {BMC bioinformatics}, volume = {19}, number = {1}, pages = {340}, pmid = {30257640}, issn = {1471-2105}, support = {3184519600//Experimental Plant Sciences/ ; }, mesh = {Algorithms ; Brassicaceae/genetics ; Cluster Analysis ; Databases, Protein ; Eukaryota/*metabolism ; Genes, Plant ; Genome ; Genomics ; Humans ; Proteome/*metabolism ; Sequence Homology, Amino Acid ; Software ; }, abstract = {BACKGROUND: Identification of homologous genes is fundamental to comparative genomics, functional genomics and phylogenomics. Extensive public homology databases are of great value for investigating homology but need to be continually updated to incorporate new sequences. As new sequences are rapidly being generated, there is a need for efficient standalone tools to detect homologs in novel data.

RESULTS: To address this, we present a fast method for detecting homology groups across a large number of individuals and/or species. We adopted a k-mer based approach which considerably reduces the number of pairwise protein alignments without sacrificing sensitivity. We demonstrate accuracy, scalability, efficiency and applicability of the presented method for detecting homology in large proteomes of bacteria, fungi, plants and Metazoa.

CONCLUSIONS: We clearly observed the trade-off between recall and precision in our homology inference. Favoring recall or precision strongly depends on the application. The clustering behavior of our program can be optimized for particular applications by altering a few key parameters. The program is available for public use at https://github.com/sheikhizadeh/pantools as an extension to our pan-genomic analysis tool, PanTools.}, } @article {pmid30252023, year = {2019}, author = {Plaza Oñate, F and Le Chatelier, E and Almeida, M and Cervino, ACL and Gauthier, F and Magoulès, F and Ehrlich, SD and Pichaud, M}, title = {MSPminer: abundance-based reconstitution of microbial pan-genomes from shotgun metagenomic data.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {9}, pages = {1544-1552}, pmid = {30252023}, issn = {1367-4811}, mesh = {Genome, Bacterial ; Genome, Microbial ; Humans ; Metagenome ; *Metagenomics ; *Microbiota ; Software ; }, abstract = {MOTIVATION: Analysis toolkits for shotgun metagenomic data achieve strain-level characterization of complex microbial communities by capturing intra-species gene content variation. Yet, these tools are hampered by the extent of reference genomes that are far from covering all microbial variability, as many species are still not sequenced or have only few strains available. Binning co-abundant genes obtained from de novo assembly is a powerful reference-free technique to discover and reconstitute gene repertoire of microbial species. While current methods accurately identify species core parts, they miss many accessory genes or split them into small gene groups that remain unassociated to core clusters.

RESULTS: We introduce MSPminer, a computationally efficient software tool that reconstitutes Metagenomic Species Pan-genomes (MSPs) by binning co-abundant genes across metagenomic samples. MSPminer relies on a new robust measure of proportionality coupled with an empirical classifier to group and distinguish not only species core genes but accessory genes also. Applied to a large scale metagenomic dataset, MSPminer successfully delineates in a few hours the gene repertoires of 1661 microbial species with similar specificity and higher sensitivity than existing tools. The taxonomic annotation of MSPs reveals microorganisms hitherto unknown and brings coherence in the nomenclature of the species of the human gut microbiota. The provided MSPs can be readily used for taxonomic profiling and biomarkers discovery in human gut metagenomic samples. In addition, MSPminer can be applied on gene count tables from other ecosystems to perform similar analyses.

The binary is freely available for non-commercial users at www.enterome.com/downloads.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid30247672, year = {2018}, author = {Corel, E and Pathmanathan, JS and Watson, AK and Karkar, S and Lopez, P and Bapteste, E}, title = {MultiTwin: A Software Suite to Analyze Evolution at Multiple Levels of Organization Using Multipartite Graphs.}, journal = {Genome biology and evolution}, volume = {10}, number = {10}, pages = {2777-2784}, pmid = {30247672}, issn = {1759-6653}, mesh = {*Biological Evolution ; *Genetic Techniques ; *Software ; }, abstract = {The inclusion of introgressive processes in evolutionary studies induces a less constrained view of evolution. Network-based methods (like large-scale similarity networks) allow to include in comparative genomics all extrachromosomic carriers (like viruses, the most abundant biological entities on the planet) with their cellular hosts. The integration of several levels of biological organization (genes, genomes, communities, environments) enables more comprehensive analyses of gene sharing and improved sequence-based classifications. However, the algorithmic tools for the analysis of such networks are usually restricted to people with high programming skills. We present an integrated suite of software tools named MultiTwin, aimed at the construction, structuring, and analysis of multipartite graphs for evolutionary biology. Typically, this kind of graph is useful for the comparative analysis of the gene content of genomes in microbial communities from the environment and for exploring patterns of gene sharing, for example between distantly related cellular genomes, pangenomes, or between cellular genomes and their mobile genetic elements. We illustrate the use of this tool with an application of the bipartite approach (using gene family-genome graphs) for the analysis of pathogenicity traits in prokaryotes.}, } @article {pmid30233526, year = {2018}, author = {Wang, LYR and Jokinen, CC and Laing, CR and Johnson, RP and Ziebell, K and Gannon, VPJ}, title = {Multi-Year Persistence of Verotoxigenic Escherichia coli (VTEC) in a Closed Canadian Beef Herd: A Cohort Study.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {2040}, pmid = {30233526}, issn = {1664-302X}, abstract = {In this study, fecal samples were collected from a closed beef herd in Alberta, Canada from 2012 to 2015. To limit serotype bias, which was observed in enrichment broth cultures, Verotoxigenic Escherichia coli (VTEC) were isolated directly from samples using a hydrophobic grid-membrane filter verotoxin immunoblot assay. Overall VTEC isolation rates were similar for three different cohorts of yearling heifers on both an annual (68.5 to 71.8%) and seasonal basis (67.3 to 76.0%). Across all three cohorts, O139:H19 (37.1% of VTEC-positive samples), O22:H8 (15.8%) and O?(O108):H8 (15.4%) were among the most prevalent serotypes. However, isolation rates for serotypes O139:H19, O130:H38, O6:H34, O91:H21, and O113:H21 differed significantly between cohort-years, as did isolation rates for some serotypes within a single heifer cohort. There was a high level of VTEC serotype diversity with an average of 4.3 serotypes isolated per heifer and 65.8% of the heifers classified as "persistent shedders" of VTEC based on the criteria of >50% of samples positive and ≥4 consecutive samples positive. Only 26.8% (90/336) of the VTEC isolates from yearling heifers belonged to the human disease-associated seropathotypes A (O157:H7), B (O26:H11, O111:NM), and C (O22:H8, O91:H21, O113:H21, O137:H41, O2:H6). Conversely, seropathotypes B (O26:NM, O111:NM) and C (O91:H21, O2:H29) strains were dominant (76.0%, 19/25) among VTEC isolates from month-old calves from this herd. Among VTEC from heifers, carriage rates of vt1, vt2, vt1+vt2, eae, and hlyA were 10.7, 20.8, 68.5, 3.9, and 88.7%, respectively. The adhesin gene saa was present in 82.7% of heifer strains but absent from all of 13 eae+ve strains (from serotypes/intimin types O157:H7/γ1, O26:H11/β1, O111:NM/θ, O84:H2/ζ, and O182:H25/ζ). Phylogenetic relationships inferred from wgMLST and pan genome-derived core SNP analysis showed that strains clustered by phylotype and serotype. Further, VTEC strains of the same serotype usually shared the same suite of antibiotic resistance and virulence genes, suggesting the circulation of dominant clones within this distinct herd. This study provides insight into the diverse and dynamic nature of VTEC populations within groups of cattle and points to a broad spectrum of human health risks associated with these E. coli strains.}, } @article {pmid30233505, year = {2018}, author = {Golanowska, M and Potrykus, M and Motyka-Pomagruk, A and Kabza, M and Bacci, G and Galardini, M and Bazzicalupo, M and Makalowska, I and Smalla, K and Mengoni, A and Hugouvieux-Cotte-Pattat, N and Lojkowska, E}, title = {Comparison of Highly and Weakly Virulent Dickeya solani Strains, With a View on the Pangenome and Panregulon of This Species.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1940}, pmid = {30233505}, issn = {1664-302X}, abstract = {Bacteria belonging to the genera Dickeya and Pectobacterium are responsible for significant economic losses in a wide variety of crops and ornamentals. During last years, increasing losses in potato production have been attributed to the appearance of Dickeya solani. The D. solani strains investigated so far share genetic homogeneity, although different virulence levels were observed among strains of various origins. The purpose of this study was to investigate the genetic traits possibly related to the diverse virulence levels by means of comparative genomics. First, we developed a new genome assembly pipeline which allowed us to complete the D. solani genomes. Four de novo sequenced and ten publicly available genomes were used to identify the structure of the D. solani pangenome, in which 74.8 and 25.2% of genes were grouped into the core and dispensable genome, respectively. For D. solani panregulon analysis, we performed a binding site prediction for four transcription factors, namely CRP, KdgR, PecS and Fur, to detect the regulons of these virulence regulators. Most of the D. solani potential virulence factors were predicted to belong to the accessory regulons of CRP, KdgR, and PecS. Thus, some differences in gene expression could exist between D. solani strains. The comparison between a highly and a low virulent strain, IFB0099 and IFB0223, respectively, disclosed only small differences between their genomes but significant differences in the production of virulence factors like pectinases, cellulases and proteases, and in their mobility. The D. solani strains also diverge in the number and size of prophages present in their genomes. Another relevant difference is the disruption of the adhesin gene fhaB2 in the highly virulent strain. Strain IFB0223, which has a complete adhesin gene, is less mobile and less aggressive than IFB0099. This suggests that in this case, mobility rather than adherence is needed in order to trigger disease symptoms. This study highlights the utility of comparative genomics in predicting D. solani traits involved in the aggressiveness of this emerging plant pathogen.}, } @article {pmid30230187, year = {2019}, author = {Bayer, PE and Golicz, AA and Tirnaz, S and Chan, CK and Edwards, D and Batley, J}, title = {Variation in abundance of predicted resistance genes in the Brassica oleracea pangenome.}, journal = {Plant biotechnology journal}, volume = {17}, number = {4}, pages = {789-800}, pmid = {30230187}, issn = {1467-7652}, mesh = {Ascomycota/*physiology ; Brassica/*genetics/immunology/microbiology ; Crops, Agricultural ; Disease Resistance/*genetics ; Fusarium/*physiology ; Genome, Plant/*genetics ; Plant Breeding ; Plant Diseases/*immunology/microbiology ; Quantitative Trait Loci/genetics ; }, abstract = {Brassica oleracea is an important agricultural species encompassing many vegetable crops including cabbage, cauliflower, broccoli and kale; however, it can be susceptible to a variety of fungal diseases such as clubroot, blackleg, leaf spot and downy mildew. Resistance to these diseases is meditated by specific disease resistance genes analogs (RGAs) which are differently distributed across B. oleracea lines. The sequenced reference cultivar does not contain all B. oleracea genes due to gene presence/absence variation between individuals, which makes it necessary to search for RGA candidates in the B. oleracea pangenome. Here we present a comparative analysis of RGA candidates in the pangenome of B. oleracea. We show that the presence of RGA candidates differs between lines and suggests that in B. oleracea, SNPs and presence/absence variation drive RGA diversity using separate mechanisms. We identified 59 RGA candidates linked to Sclerotinia, clubroot, and Fusarium wilt resistance QTL, and these findings have implications for crop breeding in B. oleracea, which may also be applicable in other crops species.}, } @article {pmid30228235, year = {2018}, author = {Getz, EW and Tithi, SS and Zhang, L and Aylward, FO}, title = {Parallel Evolution of Genome Streamlining and Cellular Bioenergetics across the Marine Radiation of a Bacterial Phylum.}, journal = {mBio}, volume = {9}, number = {5}, pages = {}, pmid = {30228235}, issn = {2150-7511}, mesh = {Adaptation, Biological ; Aquatic Organisms/*genetics ; Bacteria/*genetics ; Ecosystem ; *Energy Metabolism ; *Evolution, Molecular ; *Genome, Bacterial ; Metabolic Networks and Pathways/*genetics ; }, abstract = {Diverse bacterial and archaeal lineages drive biogeochemical cycles in the global ocean, but the evolutionary processes that have shaped their genomic properties and physiological capabilities remain obscure. Here we track the genome evolution of the globally abundant marine bacterial phylum Marinimicrobia across its diversification into modern marine environments and demonstrate that extant lineages are partitioned between epipelagic and mesopelagic habitats. Moreover, we show that these habitat preferences are associated with fundamental differences in genomic organization, cellular bioenergetics, and metabolic modalities. Multiple lineages present in epipelagic niches independently acquired genes necessary for phototrophy and environmental stress mitigation, and their genomes convergently evolved key features associated with genome streamlining. In contrast, lineages residing in mesopelagic waters independently acquired nitrate respiratory machinery and a variety of cytochromes, consistent with the use of alternative terminal electron acceptors in oxygen minimum zones (OMZs). Further, while epipelagic clades have retained an ancestral Na[+]-pumping respiratory complex, mesopelagic lineages have largely replaced this complex with canonical H[+]-pumping respiratory complex I, potentially due to the increased efficiency of the latter together with the presence of the more energy-limiting environments deep in the ocean's interior. These parallel evolutionary trends indicate that key features of genomic streamlining and cellular bioenergetics have occurred repeatedly and congruently in disparate clades and underscore the importance of environmental conditions and nutrient dynamics in driving the evolution of diverse bacterioplankton lineages in similar ways throughout the global ocean.IMPORTANCE Understanding long-term patterns of microbial evolution is critical to advancing our knowledge of past and present role microbial life in driving global biogeochemical cycles. Historically, it has been challenging to study the evolution of environmental microbes due to difficulties in obtaining genome sequences from lineages that could not be cultivated, but recent advances in metagenomics and single-cell genomics have begun to obviate many of these hurdles. Here we present an evolutionary genomic analysis of the Marinimicrobia, a diverse bacterial group that is abundant in the global ocean. We demonstrate that distantly related Marinimicrobia species that reside in similar habitats have converged to assume similar genome architectures and cellular bioenergetics, suggesting that common factors shape the evolution of a broad array of marine lineages. These findings broaden our understanding of the evolutionary forces that have given rise to microbial life in the contemporary ocean.}, } @article {pmid30223644, year = {2018}, author = {Checcucci, A and diCenzo, GC and Ghini, V and Bazzicalupo, M and Becker, A and Decorosi, F and Döhlemann, J and Fagorzi, C and Finan, TM and Fondi, M and Luchinat, C and Turano, P and Vignolini, T and Viti, C and Mengoni, A}, title = {Creation and Characterization of a Genomically Hybrid Strain in the Nitrogen-Fixing Symbiotic Bacterium Sinorhizobium meliloti.}, journal = {ACS synthetic biology}, volume = {7}, number = {10}, pages = {2365-2378}, doi = {10.1021/acssynbio.8b00158}, pmid = {30223644}, issn = {2161-5063}, mesh = {Escherichia coli/genetics/metabolism ; Genome, Bacterial ; Magnetic Resonance Spectroscopy ; Medicago/microbiology ; Metabolic Engineering/methods ; Nitrogen/*metabolism ; Plant Roots/microbiology ; Plasmids/genetics/metabolism ; Principal Component Analysis ; Sinorhizobium meliloti/genetics/*metabolism ; *Symbiosis ; }, abstract = {Many bacteria, often associated with eukaryotic hosts and of relevance for biotechnological applications, harbor a multipartite genome composed of more than one replicon. Biotechnologically relevant phenotypes are often encoded by genes residing on the secondary replicons. A synthetic biology approach to developing enhanced strains for biotechnological purposes could therefore involve merging pieces or entire replicons from multiple strains into a single genome. Here we report the creation of a genomic hybrid strain in a model multipartite genome species, the plant-symbiotic bacterium Sinorhizobium meliloti. We term this strain as cis-hybrid, since it is produced by genomic material coming from the same species' pangenome. In particular, we moved the secondary replicon pSymA (accounting for nearly 20% of total genome content) from a donor S. meliloti strain to an acceptor strain. The cis-hybrid strain was screened for a panel of complex phenotypes (carbon/nitrogen utilization phenotypes, intra- and extracellular metabolomes, symbiosis, and various microbiological tests). Additionally, metabolic network reconstruction and constraint-based modeling were employed for in silico prediction of metabolic flux reorganization. Phenotypes of the cis-hybrid strain were in good agreement with those of both parental strains. Interestingly, the symbiotic phenotype showed a marked cultivar-specific improvement with the cis-hybrid strains compared to both parental strains. These results provide a proof-of-principle for the feasibility of genome-wide replicon-based remodelling of bacterial strains for improved biotechnological applications in precision agriculture.}, } @article {pmid30212910, year = {2018}, author = {Le, KK and Whiteside, MD and Hopkins, JE and Gannon, VPJ and Laing, CR}, title = {Spfy: an integrated graph database for real-time prediction of bacterial phenotypes and downstream comparative analyses.}, journal = {Database : the journal of biological databases and curation}, volume = {2018}, number = {}, pages = {1-10}, pmid = {30212910}, issn = {1758-0463}, mesh = {Computational Biology ; *Databases as Topic ; Escherichia coli/genetics/pathogenicity/*physiology ; Genome, Bacterial ; Internet ; Phenotype ; *Software ; Virulence Factors/genetics ; }, abstract = {Public health laboratories are currently moving to whole-genome sequence (WGS)-based analyses, and require the rapid prediction of standard reference laboratory methods based solely on genomic data. Currently, these predictive genomics tasks rely on workflows that chain together multiple programs for the requisite analyses. While useful, these systems do not store the analyses in a genome-centric way, meaning the same analyses are often re-computed for the same genomes. To solve this problem, we created Spfy, a platform that rapidly performs the common reference laboratory tests, uses a graph database to store and retrieve the results from the computational workflows and links data to individual genomes using standardized ontologies. The Spfy platform facilitates rapid phenotype identification, as well as the efficient storage and downstream comparative analysis of tens of thousands of genome sequences. Though generally applicable to bacterial genome sequences, Spfy currently contains 10 243 Escherichia coli genomes, for which in-silico serotype and Shiga-toxin subtype, as well as the presence of known virulence factors and antimicrobial resistance determinants have been computed. Additionally, the presence/absence of the entire E. coli pan-genome was computed and linked to each genome. Owing to its database of diverse pre-computed results, and the ability to easily incorporate user data, Spfy facilitates hypothesis testing in fields ranging from population genomics to epidemiology, while mitigating the re-computation of analyses. The graph approach of Spfy is flexible, and can accommodate new analysis software modules as they are developed, easily linking new results to those already stored. Spfy provides a database and analyses approach for E. coli that is able to match the rapid accumulation of WGS data in public databases.}, } @article {pmid30204489, year = {2019}, author = {Kavya, VNS and Tayal, K and Srinivasan, R and Sivadasan, N}, title = {Sequence Alignment on Directed Graphs.}, journal = {Journal of computational biology : a journal of computational molecular cell biology}, volume = {26}, number = {1}, pages = {53-67}, doi = {10.1089/cmb.2017.0264}, pmid = {30204489}, issn = {1557-8666}, mesh = {Algorithms ; Sequence Alignment/*methods ; Sequence Analysis, DNA ; }, abstract = {Genomic variations in a reference collection are naturally represented as genome variation graphs. Such graphs encode common subsequences as vertices and the variations are captured using additional vertices and directed edges. The resulting graphs are directed graphs possibly with cycles. Existing algorithms for aligning sequences on such graphs make use of partial order alignment (POA) techniques that work on directed acyclic graphs (DAGs). To achieve this, acyclic extensions of the input graphs are first constructed through expensive loop unrolling steps (DAGification). Furthermore, such graph extensions could have considerable blowup in their size and in the worst case the blow-up factor is proportional to the input sequence length. We provide a novel alignment algorithm V-ALIGN that aligns the input sequence directly on the input graph while avoiding such expensive DAGification steps. V-ALIGN is based on a novel dynamic programming (DP) formulation that allows gapped alignment directly on the input graph. It supports affine and linear gaps. We also propose refinements to V-ALIGN for better performance in practice. With the proposed refinements, the time to fill the DP table has linear dependence on the sizes of the sequence, the graph, and its feedback vertex set. We conducted experiments to compare the proposed algorithm against the existing POA-based techniques. We also performed alignment experiments on the genome variation graphs constructed from the 1000 Genomes data. For aligning short sequences, standard approaches restrict the expensive gapped alignment to small filtered subgraphs having high similarity to the input sequence. In such cases, the performance of V-ALIGN for gapped alignment on the filtered subgraph depends on the subgraph sizes.}, } @article {pmid30186253, year = {2018}, author = {Chen, X and Zhang, Y and Zhang, Z and Zhao, Y and Sun, C and Yang, M and Wang, J and Liu, Q and Zhang, B and Chen, M and Yu, J and Wu, J and Jin, Z and Xiao, J}, title = {PGAweb: A Web Server for Bacterial Pan-Genome Analysis.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1910}, pmid = {30186253}, issn = {1664-302X}, abstract = {An astronomical increase in microbial genome data in recent years has led to strong demand for bioinformatic tools for pan-genome analysis within and across species. Here, we present PGAweb, a user-friendly, web-based tool for bacterial pan-genome analysis, which is composed of two main pan-genome analysis modules, PGAP and PGAP-X. PGAweb provides key interactive and customizable functions that include orthologous clustering, pan-genome profiling, sequence variation and evolution analysis, and functional classification. PGAweb presents features of genomic structural dynamics and sequence diversity with different visualization methods that are helpful for intuitively understanding the dynamics and evolution of bacterial genomes. PGAweb has an intuitive interface with one-click setting of parameters and is freely available at http://PGAweb.vlcc.cn/.}, } @article {pmid30184068, year = {2018}, author = {Syme, RA and Tan, KC and Rybak, K and Friesen, TL and McDonald, BA and Oliver, RP and Hane, JK}, title = {Pan-Parastagonospora Comparative Genome Analysis-Effector Prediction and Genome Evolution.}, journal = {Genome biology and evolution}, volume = {10}, number = {9}, pages = {2443-2457}, pmid = {30184068}, issn = {1759-6653}, mesh = {Ascomycota/*genetics/pathogenicity/physiology ; *Evolution, Molecular ; Fungal Proteins/genetics ; Genetic Loci ; *Genome, Fungal ; Genomics ; Host-Pathogen Interactions ; Phylogeny ; Plant Diseases/*microbiology ; Point Mutation ; Polymorphism, Genetic ; Quantitative Trait Loci ; Triticum/*microbiology ; }, abstract = {We report a fungal pan-genome study involving Parastagonospora spp., including 21 isolates of the wheat (Triticum aestivum) pathogen Parastagonospora nodorum, 10 of the grass-infecting Parastagonospora avenae, and 2 of a closely related undefined sister species. We observed substantial variation in the distribution of polymorphisms across the pan-genome, including repeat-induced point mutations, diversifying selection and gene gains and losses. We also discovered chromosome-scale inter and intraspecific presence/absence variation of some sequences, suggesting the occurrence of one or more accessory chromosomes or regions that may play a role in host-pathogen interactions. The presence of known pathogenicity effector loci SnToxA, SnTox1, and SnTox3 varied substantially among isolates. Three P. nodorum isolates lacked functional versions for all three loci, whereas three P. avenae isolates carried one or both of the SnTox1 and SnTox3 genes, indicating previously unrecognized potential for discovering additional effectors in the P. nodorum-wheat pathosystem. We utilized the pan-genomic comparative analysis to improve the prediction of pathogenicity effector candidates, recovering the three confirmed effectors among our top-ranked candidates. We propose applying this pan-genomic approach to identify the effector repertoire involved in other host-microbe interactions involving necrotrophic pathogens in the Pezizomycotina.}, } @article {pmid30177918, year = {2018}, author = {Yang, T and Zhong, J and Zhang, J and Li, C and Yu, X and Xiao, J and Jia, X and Ding, N and Ma, G and Wang, G and Yue, L and Liang, Q and Sheng, Y and Sun, Y and Huang, H and Chen, F}, title = {Pan-Genomic Study of Mycobacterium tuberculosis Reflecting the Primary/Secondary Genes, Generality/Individuality, and the Interconversion Through Copy Number Variations.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1886}, pmid = {30177918}, issn = {1664-302X}, abstract = {Tuberculosis (TB) has surpassed HIV as the leading infectious disease killer worldwide since 2014. The main pathogen, Mycobacterium tuberculosis (Mtb), contains ~4,000 genes that account for ~90% of the genome. However, it is still unclear which of these genes are primary/secondary, which are responsible for generality/individuality, and which interconvert during evolution. Here we utilized a pan-genomic analysis of 36 Mtb genomes to address these questions. We identified 3,679 Mtb core (i.e., primary) genes, determining their phenotypic generality (e.g., virulence, slow growth, dormancy). We also observed 1,122 dispensable and 964 strain-specific secondary genes, reflecting partially shared and lineage-/strain-specific individualities. Among which, five L2 lineage-specific genes might be related to the increased virulence of the L2 lineage. Notably, we discovered 28 Mtb "Super Core Genes" (SCGs: more than a copy in at least 90% strains), which might be of increased importance, and reflected the "super phenotype generality." Most SCGs encode PE/PPE, virulence factors, antigens, and transposases, and have been verified as playing crucial roles in Mtb pathogenicity. Further investigation of the 28 SCGs demonstrated the interconversion among SCGs, single-copy core, dispensable, and strain-specific genes through copy number variations (CNVs) during evolution; different mutations on different copies highlight the delicate adaptive-evolution regulation amongst Mtb lineages. This reflects that the importance of genes varied through CNVs, which might be driven by selective pressure from environment/host-adaptation. In addition, compared with Mycobacterium bovis (Mbo), Mtb possesses 48 specific single core genes that partially reflect the differences between Mtb and Mbo individuality.}, } @article {pmid30175026, year = {2018}, author = {Asaf, S and Khan, AL and Khan, MA and Al-Harrasi, A and Lee, IJ}, title = {Complete genome sequencing and analysis of endophytic Sphingomonas sp. LK11 and its potential in plant growth.}, journal = {3 Biotech}, volume = {8}, number = {9}, pages = {389}, pmid = {30175026}, issn = {2190-572X}, abstract = {Our study aimed to elucidate the plant growth-promoting characteristics and the structure and composition of Sphingomonas sp. LK11 genome using the single molecule real-time (SMRT) sequencing technology of Pacific Biosciences. The results revealed that LK11 produces different types of gibberellins (GAs) in pure culture and significantly improves soybean plant growth by influencing endogenous GAs compared with non-inoculated control plants. Detailed genomic analyses revealed that the Sphingomonas sp. LK11 genome consists of a circular chromosome (3.78 Mbp; 66.2% G+C content) and two circular plasmids (122,975 bps and 34,160 bps; 63 and 65% G+C content, respectively). Annotation showed that the LK11 genome consists of 3656 protein-coding genes, 59 tRNAs, and 4 complete rRNA operons. Functional analyses predicted that LK11 encodes genes for phosphate solubilization and nitrate/nitrite ammonification, which are beneficial for promoting plant growth. Genes for production of catalases, superoxide dismutase, and peroxidases that confer resistance to oxidative stress in plants were also identified in LK11. Moreover, genes for trehalose and glycine betaine biosynthesis were also found in LK11 genome. Similarly, Sphingomonas spp. analysis revealed an open pan-genome and a total of 8507 genes were identified in the Sphingomonas spp. pan-genome and about 1356 orthologous genes were found to comprise the core genome. However, the number of genomes analyzed was not enough to describe complete gene sets. Our findings indicated that the genetic makeup of Sphingomonas sp. LK11 can be utilized as an eco-friendly bioresource for cleaning contaminated sites and promoting growth of plants confronted with environmental perturbations.}, } @article {pmid30166979, year = {2018}, author = {Kiu, R and Hall, LJ}, title = {Response: Commentary: Probing Genomic Aspects of the Multi-Host Pathogen Clostridium perfringens Reveals Significant Pangenome Diversity, and a Diverse Array of Virulence Factors.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1857}, pmid = {30166979}, issn = {1664-302X}, support = {/WT_/Wellcome Trust/United Kingdom ; BBS/E/F/00044409/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/F/000PR10353/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/F/000PR10356/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, } @article {pmid30165579, year = {2019}, author = {Inman, JM and Sutton, GG and Beck, E and Brinkac, LM and Clarke, TH and Fouts, DE}, title = {Large-scale comparative analysis of microbial pan-genomes using PanOCT.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {6}, pages = {1049-1050}, pmid = {30165579}, issn = {1367-4811}, support = {U19 AI110819/AI/NIAID NIH HHS/United States ; }, mesh = {Cluster Analysis ; *Genome, Bacterial ; *Genome, Microbial ; Prokaryotic Cells ; Software ; }, abstract = {SUMMARY: The JCVI pan-genome pipeline is a collection of programs to run PanOCT and tools that support and extend the capabilities of PanOCT. PanOCT (pan-genome ortholog clustering tool) is a tool for pan-genome analysis of closely related prokaryotic species or strains. The JCVI Pan-Genome Pipeline wrapper invokes command-line utilities that prepare input genomes, invoke third-party tools such as NCBI Blast+, run PanOCT, generate a consensus pan-genome, annotate features of the pan-genome, detect sets of genes of interest such as antimicrobial resistance (AMR) genes and generate figures, tables and html pages to visualize the results. The pipeline can run in a hierarchical mode, lowering the RAM and compute resources used.

Source code, demo data, and detailed documentation are freely available at https://github.com/JCVenterInstitute/PanGenomePipeline.}, } @article {pmid30154769, year = {2018}, author = {Mehdizadeh Gohari, I and Prescott, JF}, title = {Commentary: Probing Genomic Aspects of the Multi-Host Pathogen Clostridium perfringens Reveals Significant Pangenome Diversity, and a Diverse Array of Virulence Factors.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1856}, pmid = {30154769}, issn = {1664-302X}, } @article {pmid30129229, year = {2018}, author = {Ou, L and Li, D and Lv, J and Chen, W and Zhang, Z and Li, X and Yang, B and Zhou, S and Yang, S and Li, W and Gao, H and Zeng, Q and Yu, H and Ouyang, B and Li, F and Liu, F and Zheng, J and Liu, Y and Wang, J and Wang, B and Dai, X and Ma, Y and Zou, X}, title = {Pan-genome of cultivated pepper (Capsicum) and its use in gene presence-absence variation analyses.}, journal = {The New phytologist}, volume = {220}, number = {2}, pages = {360-363}, doi = {10.1111/nph.15413}, pmid = {30129229}, issn = {1469-8137}, support = {2016YFD0101704//National Key R&D Program of China/International ; }, mesh = {Capsicum/*genetics/*growth & development ; Genes, Plant ; Genetic Variation ; *Genome, Plant ; Genome-Wide Association Study ; Internet ; Molecular Sequence Annotation ; }, } @article {pmid30126366, year = {2018}, author = {Argemi, X and Matelska, D and Ginalski, K and Riegel, P and Hansmann, Y and Bloom, J and Pestel-Caron, M and Dahyot, S and Lebeurre, J and Prévost, G}, title = {Comparative genomic analysis of Staphylococcus lugdunensis shows a closed pan-genome and multiple barriers to horizontal gene transfer.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {621}, pmid = {30126366}, issn = {1471-2164}, support = {2014/15/B/NZ1/03357//Polish National Science Centre/ ; }, mesh = {CRISPR-Cas Systems/genetics ; Gene Transfer, Horizontal/*genetics ; *Genome, Bacterial ; Humans ; Phylogeny ; Sequence Analysis, DNA ; Staphylococcal Infections/microbiology ; Staphylococcus lugdunensis/*genetics ; Virulence ; Virulence Factors/genetics ; }, abstract = {BACKGROUND: Coagulase negative staphylococci (CoNS) are commensal bacteria on human skin. Staphylococcus lugdunensis is a unique CoNS which produces various virulence factors and may, like S. aureus, cause severe infections, particularly in hospital settings. Unlike other staphylococci, it remains highly susceptible to antimicrobials, and genome-based phylogenetic studies have evidenced a highly conserved genome that distinguishes it from all other staphylococci.

RESULTS: We demonstrate that S. lugdunensis possesses a closed pan-genome with a very limited number of new genes, in contrast to other staphylococci that have an open pan-genome. Whole-genome nucleotide and amino acid identity levels are also higher than in other staphylococci. We identified numerous genetic barriers to horizontal gene transfer that might explain this result. The S. lugdunensis genome has multiple operons encoding for restriction-modification, CRISPR/Cas and toxin/antitoxin systems. We also identified a new PIN-like domain-associated protein that might belong to a larger operon, comprising a metalloprotease, that could function as a new toxin/antitoxin or detoxification system.

CONCLUSION: We show that S. lugdunensis has a unique genome profile within staphylococci, with a closed pan-genome and several systems to prevent horizontal gene transfer. Its virulence in clinical settings does not rely on its ability to acquire and exchange antibiotic resistance genes or other virulence factors as shown for other staphylococci.}, } @article {pmid30116789, year = {2018}, author = {Pena-Gonzalez, A and Rodriguez-R, LM and Marston, CK and Gee, JE and Gulvik, CA and Kolton, CB and Saile, E and Frace, M and Hoffmaster, AR and Konstantinidis, KT}, title = {Genomic Characterization and Copy Number Variation of Bacillus anthracis Plasmids pXO1 and pXO2 in a Historical Collection of 412 Strains.}, journal = {mSystems}, volume = {3}, number = {4}, pages = {}, pmid = {30116789}, issn = {2379-5077}, abstract = {Bacillus anthracis plasmids pXO1 and pXO2 carry the main virulence factors responsible for anthrax. However, the extent of copy number variation within the species and how the plasmids are related to pXO1/pXO2-like plasmids in other species of the Bacillus cereus sensu lato group remain unclear. To gain new insights into these issues, we sequenced 412 B. anthracis strains representing the total phylogenetic and ecological diversity of the species. Our results revealed that B. anthracis genomes carried, on average, 3.86 and 2.29 copies of pXO1 and pXO2, respectively, and also revealed a positive linear correlation between the copy numbers of pXO1 and pXO2. No correlation between the plasmid copy number and the phylogenetic relatedness of the strains was observed. However, genomes of strains isolated from animal tissues generally maintained a higher plasmid copy number than genomes of strains from environmental sources (P < 0.05 [Welch two-sample t test]). Comparisons against B. cereus genomes carrying complete or partial pXO1-like and pXO2-like plasmids showed that the plasmid-based phylogeny recapitulated that of the main chromosome, indicating limited plasmid horizontal transfer between or within these species. Comparisons of gene content revealed a closed pXO1 and pXO2 pangenome; e.g., plasmids encode <8 unique genes, on average, and a single large fragment deletion of pXO1 in one B. anthracis strain (2000031682) was detected. Collectively, our results provide a more complete view of the genomic diversity of B. anthracis plasmids, their copy number variation, and the virulence potential of other Bacillus species carrying pXO1/pXO2-like plasmids. IMPORTANCE Bacillus anthracis microorganisms are of historical and epidemiological importance and are among the most homogenous bacterial groups known, even though the B. anthracis genome is rich in mobile elements. Mobile elements can trigger the diversification of lineages; therefore, characterizing the extent of genomic variation in a large collection of strains is critical for a complete understanding of the diversity and evolution of the species. Here, we sequenced a large collection of B. anthracis strains (>400) that were recovered from human, animal, and environmental sources around the world. Our results confirmed the remarkable stability of gene content and synteny of the anthrax plasmids and revealed no signal of plasmid exchange between B. anthracis and pathogenic B. cereus isolates but rather predominantly vertical descent. These findings advance our understanding of the biology and pathogenomic evolution of B. anthracis and its plasmids.}, } @article {pmid30115097, year = {2018}, author = {Thind, AK and Wicker, T and Müller, T and Ackermann, PM and Steuernagel, B and Wulff, BBH and Spannagl, M and Twardziok, SO and Felder, M and Lux, T and Mayer, KFX and , and Keller, B and Krattinger, SG}, title = {Chromosome-scale comparative sequence analysis unravels molecular mechanisms of genome dynamics between two wheat cultivars.}, journal = {Genome biology}, volume = {19}, number = {1}, pages = {104}, pmid = {30115097}, issn = {1474-760X}, mesh = {Base Pairing/genetics ; Chromosomes, Plant/*genetics ; Crossing Over, Genetic ; DNA Breaks, Double-Stranded ; DNA Copy Number Variations/genetics ; DNA Repair/genetics ; Gene Flow ; Genes, Plant ; *Genome, Plant ; Haplotypes/genetics ; Multigene Family ; Polymorphism, Single Nucleotide/genetics ; *Sequence Analysis, DNA ; Sequence Deletion/genetics ; Synteny/genetics ; Triticum/*genetics ; }, abstract = {BACKGROUND: Recent improvements in DNA sequencing and genome scaffolding have paved the way to generate high-quality de novo assemblies of pseudomolecules representing complete chromosomes of wheat and its wild relatives. These assemblies form the basis to compare the dynamics of wheat genomes on a megabase scale.

RESULTS: Here, we provide a comparative sequence analysis of the 700-megabase chromosome 2D between two bread wheat genotypes-the old landrace Chinese Spring and the elite Swiss spring wheat line 'CH Campala Lr22a'. Both chromosomes were assembled into megabase-sized scaffolds. There is a high degree of sequence conservation between the two chromosomes. Analysis of large structural variations reveals four large indels of more than 100 kb. Based on the molecular signatures at the breakpoints, unequal crossing over and double-strand break repair were identified as the molecular mechanisms that caused these indels. Three of the large indels affect copy number of NLRs, a gene family involved in plant immunity. Analysis of SNP density reveals four haploblocks of 4, 8, 9 and 48 Mb with a 35-fold increased SNP density compared to the rest of the chromosome. Gene content across the two chromosomes was highly conserved. Ninety-nine percent of the genic sequences were present in both genotypes and the fraction of unique genes ranged from 0.4 to 0.7%.

CONCLUSIONS: This comparative analysis of two high-quality chromosome assemblies enabled a comprehensive assessment of large structural variations and gene content. The insight obtained from this analysis will form the basis of future wheat pan-genome studies.}, } @article {pmid30106690, year = {2019}, author = {Cleary, A and Ramaraj, T and Kahanda, I and Mudge, J and Mumey, B}, title = {Exploring Frequented Regions in Pan-Genomic Graphs.}, journal = {IEEE/ACM transactions on computational biology and bioinformatics}, volume = {16}, number = {5}, pages = {1424-1435}, doi = {10.1109/TCBB.2018.2864564}, pmid = {30106690}, issn = {1557-9964}, mesh = {Algorithms ; Computer Graphics ; Databases, Genetic ; Genome/*genetics ; Genomics/*methods ; Saccharomyces cerevisiae/genetics ; Sequence Analysis, DNA/*methods ; Staphylococcus aureus/genetics ; }, abstract = {We consider the problem of identifying regions within a pan-genome De Bruijn graph that are traversed by many sequence paths. We define such regions and the subpaths that traverse them as frequented regions (FRs). In this work, we formalize the FR problem and describe an efficient algorithm for finding FRs. Subsequently, we propose some applications of FRs based on machine-learning and pan-genome graph simplification. We demonstrate the effectiveness of these applications using data sets for the organisms Staphylococcus aureus (bacterium) and Saccharomyces cerevisiae (yeast). We corroborate the biological relevance of FRs such as identifying introgressions in yeast that aid in alcohol tolerance, and show that FRs are useful for classification of yeast strains by industrial use and visualizing pan-genomic space.}, } @article {pmid30104693, year = {2018}, author = {Das, S and Pettersson, BMF and Behra, PRK and Mallick, A and Cheramie, M and Ramesh, M and Shirreff, L and DuCote, T and Dasgupta, S and Ennis, DG and Kirsebom, LA}, title = {Extensive genomic diversity among Mycobacterium marinum strains revealed by whole genome sequencing.}, journal = {Scientific reports}, volume = {8}, number = {1}, pages = {12040}, pmid = {30104693}, issn = {2045-2322}, mesh = {Animals ; Base Sequence ; Fishes/classification/*microbiology ; Genetic Variation/*genetics ; Genome, Bacterial/*genetics ; Humans ; Mycobacterium Infections, Nontuberculous/*veterinary ; Mycobacterium marinum/*genetics/*isolation & purification ; Phylogeny ; Plasmids/genetics ; Whole Genome Sequencing ; }, abstract = {Mycobacterium marinum is the causative agent for the tuberculosis-like disease mycobacteriosis in fish and skin lesions in humans. Ubiquitous in its geographical distribution, M. marinum is known to occupy diverse fish as hosts. However, information about its genomic diversity is limited. Here, we provide the genome sequences for 15 M. marinum strains isolated from infected humans and fish. Comparative genomic analysis of these and four available genomes of the M. marinum strains M, E11, MB2 and Europe reveal high genomic diversity among the strains, leading to the conclusion that M. marinum should be divided into two different clusters, the "M"- and the "Aronson"-type. We suggest that these two clusters should be considered to represent two M. marinum subspecies. Our data also show that the M. marinum pan-genome for both groups is open and expanding and we provide data showing high number of mutational hotspots in M. marinum relative to other mycobacteria such as Mycobacterium tuberculosis. This high genomic diversity might be related to the ability of M. marinum to occupy different ecological niches.}, } @article {pmid30075151, year = {2018}, author = {Romero, P and Benhamo, V and Deniziaut, G and Fuhrmann, L and Berger, F and Manié, E and Bhalshankar, J and Vacher, S and Laurent, C and Marangoni, E and Gruel, N and MacGrogan, G and Rouzier, R and Delattre, O and Popova, T and Reyal, F and Stern, MH and Stoppa-Lyonnet, D and Marchiò, C and Bièche, I and Vincent-Salomon, A}, title = {Medullary Breast Carcinoma, a Triple-Negative Breast Cancer Associated with BCLG Overexpression.}, journal = {The American journal of pathology}, volume = {188}, number = {10}, pages = {2378-2391}, doi = {10.1016/j.ajpath.2018.06.021}, pmid = {30075151}, issn = {1525-2191}, mesh = {BRCA2 Protein/genetics ; Carcinoma, Medullary/*genetics ; DNA, Neoplasm/metabolism ; Female ; Gene Expression Profiling ; Genes, Neoplasm/genetics ; Humans ; Loss of Heterozygosity/genetics ; Proto-Oncogene Proteins c-bcl-2/*genetics ; RNA, Neoplasm/metabolism ; Retrospective Studies ; Reverse Transcriptase Polymerase Chain Reaction ; Triple Negative Breast Neoplasms/*genetics ; Ubiquitin-Protein Ligases/genetics ; }, abstract = {Medullary breast carcinoma (MBC) is a rare subtype of triple-negative breast cancer with specific genomic features within the spectrum of basal-like carcinoma (BLC). In this study of 19 MBCs and 36 non-MBC BLCs, we refined the transcriptomic and genomic knowledge about this entity. Unsupervised and supervised analysis of transcriptomic profiles confirmed that MBC clearly differs from non-MBC BLC, with 92 genes overexpressed and 154 genes underexpressed in MBC compared with non-MBC BLC. Immunity-related pathways are the most differentially represented pathways in MBC compared with non-MBC BLC. The proapoptotic gene BCLG (official name BCL2L14) is by far the most intensely overexpressed gene in MBC. A quantitative RT-PCR validation study conducted in 526 breast tumors corresponding to all molecular subtypes documented the specificity of BCLG overexpression in MBC, which was confirmed at the protein level by immunohistochemistry. We also found that most MBCs belong to the immunomodulatory triple-negative breast cancer subtype. Using pan-genomic analysis, it was found that MBC harbors more losses of heterozygosity than non-MBC BLC. These observations corroborate the notion that MBC remains a distinct entity that could benefit from specific treatment strategies (such as deescalation or targeted therapy) adapted to this rare tumor type.}, } @article {pmid30074293, year = {2018}, author = {Pluta, R and Espinosa, M}, title = {Antisense and yet sensitive: Copy number control of rolling circle-replicating plasmids by small RNAs.}, journal = {Wiley interdisciplinary reviews. RNA}, volume = {9}, number = {6}, pages = {e1500}, doi = {10.1002/wrna.1500}, pmid = {30074293}, issn = {1757-7012}, mesh = {DNA Copy Number Variations ; *DNA Replication ; DNA, Bacterial/genetics ; Plasmids/*genetics ; RNA, Antisense/*genetics ; }, abstract = {Bacterial plasmids constitute a wealth of shared DNA amounting to about 20% of the total prokaryotic pangenome. Plasmids replicate autonomously and control their replication by maintaining a fairly constant number of copies within a given host. Plasmids should acquire a good fitness to their hosts so that they do not constitute a genetic load. Here we review some basic concepts in plasmid biology, pertaining to the control of replication and distribution of plasmid copies among daughter cells. A particular class of plasmids is constituted by those that replicate by the rolling circle mode (rolling circle-replicating [RCR]-plasmids). They are small double-stranded DNA molecules, with a rather high number of copies in the original host. RCR-plasmids control their replication by means of a small short-lived antisense RNA, alone or in combination with a plasmid-encoded transcriptional repressor protein. Two plasmid prototypes have been studied in depth, namely the staphylococcal plasmid pT181 and the streptococcal plasmid pMV158, each corresponding to the two types of replication control circuits, respectively. We further discuss possible applications of the plasmid-encoded antisense RNAs and address some future directions that, in our opinion, should be pursued in the study of these small molecules. This article is categorized under: Regulatory RNAs/RNAi/Riboswitches > Regulatory RNAs RNA Structure and Dynamics > Influence of RNA Structure in Biological Systems.}, } @article {pmid30072959, year = {2018}, author = {González-Torres, P and Gabaldón, T}, title = {Genome Variation in the Model Halophilic Bacterium Salinibacter ruber.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1499}, pmid = {30072959}, issn = {1664-302X}, abstract = {The halophilic bacterium Salinibacter ruber is an abundant and ecologically important member of halophilic communities worldwide. Given its broad distribution and high intraspecific genetic diversity, S. ruber is considered one of the main models for ecological and evolutionary studies of bacterial adaptation to hypersaline environments. However, current insights on the genomic diversity of this species is limited to the comparison of the genomes of two co-isolated strains. Here, we present a comparative genomic analysis of eight S. ruber strains isolated at two different time points in each of two different Mediterranean solar salterns. Our results show an open pangenome with contrasting evolutionary patterns in the core and accessory genomes. We found that the core genome is shaped by extensive homologous recombination (HR), which results in limited sequence variation within population clusters. In contrast, the accessory genome is modulated by horizontal gene transfer (HGT), with genomic islands and plasmids acting as gateways to the rest of the genome. In addition, both types of genetic exchange are modulated by restriction and modification (RM) or CRISPR-Cas systems. Finally, genes differentially impacted by such processes reveal functional processes potentially relevant for environmental interactions and adaptation to extremophilic conditions. Altogether, our results support scenarios that conciliate "Neutral" and "Constant Diversity" models of bacterial evolution.}, } @article {pmid30066860, year = {2018}, author = {Gattolliat, CH and Couvé, S and Meurice, G and Oréar, C and Droin, N and Chiquet, M and Ferlicot, S and Verkarre, V and Vasiliu, V and Molinié, V and Méjean, A and Dessen, P and Giraud, S and Bressac-De-Paillerets, B and Gardie, B and Tean Teh, B and Richard, S and Gad, S}, title = {Integrative analysis of dysregulated microRNAs and mRNAs in multiple recurrent synchronized renal tumors from patients with von Hippel-Lindau disease.}, journal = {International journal of oncology}, volume = {53}, number = {4}, pages = {1455-1468}, pmid = {30066860}, issn = {1791-2423}, mesh = {Adult ; Aged ; Aged, 80 and over ; Carcinoma, Renal Cell/epidemiology/*genetics/pathology ; Case-Control Studies ; Cell Line, Tumor ; Female ; Gene Expression Profiling/methods ; *Gene Expression Regulation, Neoplastic ; Humans ; Kidney/pathology ; Kidney Neoplasms/epidemiology/*genetics/pathology ; Male ; MicroRNAs/metabolism ; Middle Aged ; Neoplasm Recurrence, Local/epidemiology/*genetics/pathology ; Neoplasms, Multiple Primary/epidemiology/*genetics/pathology ; Oligonucleotide Array Sequence Analysis/methods ; RNA, Messenger/metabolism ; Von Hippel-Lindau Tumor Suppressor Protein/genetics ; Young Adult ; von Hippel-Lindau Disease/*genetics ; }, abstract = {Von Hippel-Lindau (VHL) disease is a rare autosomal dominant syndrome that is the main cause of inherited clear-cell renal cell carcinoma (ccRCC), which generally occurs in the form of multiple recurrent synchronized tumors. Affected patients are carriers of a germline mutation in the VHL tumor suppressor gene. Somatic mutations of this gene are also found in sporadic ccRCC and numerous pan-genomic studies have reported a dysregulation of microRNA (miRNA) expression in these sporadic tumors. In order to investigate the molecular mechanisms underlying the pathogenesis of VHL-associated ccRCC, particularly in the context of multiple tumors, the present study characterized the mRNA and miRNA transcriptome through an integrative analysis compared with sporadic renal tumors. In the present study, two series of ccRCC samples were used. The first set consisted of several samples from different tumors occurring in the same patient, for two independent patients affected with VHL disease. The second set consisted of 12 VHL-associated tumors and 22 sporadic ccRCC tumors compared with a pool of normal renal tissue. For each sample series, an expression analysis of miRNAs and mRNAs was conducted using microarrays. The results indicated that multiple tumors within the kidney of a patient with VHL disease featured a similar pattern of miRNA and gene expression. In addition, the expression levels of miRNA were able to distinguish VHL-associated tumors from sporadic ccRCC, and it was identified that 103 miRNAs and 2,474 genes were differentially expressed in the ccRCC series compared with in normal renal tissue. The majority of dysregulated genes were implicated in 'immunity' and 'metabolism' pathways. Taken together, these results allow a better understanding of the occurrence of ccRCC in patients with VHL disease, by providing insights into dysregulated miRNA and mRNA. In the set of patients with VHL disease, there were few differences in miRNA and mRNA expression, thus indicating a similar molecular evolution of these synchronous tumors and suggesting that the same molecular mechanisms underlie the pathogenesis of these hereditary tumors.}, } @article {pmid30061736, year = {2018}, author = {Springer, NM and Anderson, SN and Andorf, CM and Ahern, KR and Bai, F and Barad, O and Barbazuk, WB and Bass, HW and Baruch, K and Ben-Zvi, G and Buckler, ES and Bukowski, R and Campbell, MS and Cannon, EKS and Chomet, P and Dawe, RK and Davenport, R and Dooner, HK and Du, LH and Du, C and Easterling, KA and Gault, C and Guan, JC and Hunter, CT and Jander, G and Jiao, Y and Koch, KE and Kol, G and Köllner, TG and Kudo, T and Li, Q and Lu, F and Mayfield-Jones, D and Mei, W and McCarty, DR and Noshay, JM and Portwood, JL and Ronen, G and Settles, AM and Shem-Tov, D and Shi, J and Soifer, I and Stein, JC and Stitzer, MC and Suzuki, M and Vera, DL and Vollbrecht, E and Vrebalov, JT and Ware, D and Wei, S and Wimalanathan, K and Woodhouse, MR and Xiong, W and Brutnell, TP}, title = {The maize W22 genome provides a foundation for functional genomics and transposon biology.}, journal = {Nature genetics}, volume = {50}, number = {9}, pages = {1282-1288}, doi = {10.1038/s41588-018-0158-0}, pmid = {30061736}, issn = {1546-1718}, mesh = {Chromatin/genetics ; Chromosomes, Plant/genetics ; DNA Copy Number Variations/genetics ; DNA Methylation/genetics ; DNA Transposable Elements/*genetics ; DNA, Plant/genetics ; Genes, Plant/*genetics ; Genome, Plant/*genetics ; Genomics/methods ; Open Reading Frames/genetics ; Sequence Analysis, DNA/methods ; Zea mays/*genetics ; }, abstract = {The maize W22 inbred has served as a platform for maize genetics since the mid twentieth century. To streamline maize genome analyses, we have sequenced and de novo assembled a W22 reference genome using short-read sequencing technologies. We show that significant structural heterogeneity exists in comparison to the B73 reference genome at multiple scales, from transposon composition and copy number variation to single-nucleotide polymorphisms. The generation of this reference genome enables accurate placement of thousands of Mutator (Mu) and Dissociation (Ds) transposable element insertions for reverse and forward genetics studies. Annotation of the genome has been achieved using RNA-seq analysis, differential nuclease sensitivity profiling and bisulfite sequencing to map open reading frames, open chromatin sites and DNA methylation profiles, respectively. Collectively, the resources developed here integrate W22 as a community reference genome for functional genomics and provide a foundation for the maize pan-genome.}, } @article {pmid30056958, year = {2018}, author = {Datta, AR and Burall, LS}, title = {Serotype to genotype: The changing landscape of listeriosis outbreak investigations.}, journal = {Food microbiology}, volume = {75}, number = {}, pages = {18-27}, doi = {10.1016/j.fm.2017.06.013}, pmid = {30056958}, issn = {1095-9998}, mesh = {Disease Outbreaks ; Genotype ; Humans ; Listeria monocytogenes/classification/genetics/*isolation & purification ; Listeriosis/epidemiology/*microbiology ; Phylogeny ; Serogroup ; }, abstract = {The classical definition of a disease outbreak is the occurrence of cases of disease in excess of what would normally be expected in a community, geographical area or time period. The establishment of an outbreak then starts with the identification of an incidence of cases above the normally expected threshold during a given time period. Subsequently, the cases are examined using a variety of subtyping methods to identify potential linkages. As listeriosis disease has a long incubation period, relating a single source or multiple sources of contaminated food to clinical disease is challenging and time consuming. The vast majority of human listeriosis cases are caused by three serotypes, 1/2a, 1/2b, and 4b. Thus serotyping of isolates from suspected foods and clinical samples, although useful for eliminating some food sources, has a very limited discriminatory power. The advent of faster and more affordable sequencing technology, coupled with increased computational power, has permitted comparisons of whole Listeria genome sequences from isolates recovered from clinical, food, and environmental sources. These analyses made it possible to identify outbreaks and the source much more accurately and faster, thus leading to a reduction in number of illnesses as well as a reduction in economic losses. Initial DNA sequence information also facilitated the development of a simple molecular serotype protocol which allowed for the identification of major disease causing serotypes of L. monocytogenes, including a clade of 4b variant (4bV) strains of L. monocytogenes involved in at least 3 more recent listeriosis outbreaks in the US. Furthermore, data generated using whole genome sequence (WGS) analyses was successfully utilized to develop a pan-genomic DNA microarray as well as a single nucleotide polymorphism (SNP) based analysis. Herein, we present and compare, the two recently developed sub-typing technologies and discuss how these methods are not only important in outbreak investigations, but could also shed light on possible adaptations to different foods and environments.}, } @article {pmid30055586, year = {2018}, author = {Wolf, IR and Paschoal, AR and Quiroga, C and Domingues, DS and de Souza, RF and Pretto-Giordano, LG and Vilas-Boas, LA}, title = {Functional annotation and distribution overview of RNA families in 27 Streptococcus agalactiae genomes.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {556}, pmid = {30055586}, issn = {1471-2164}, mesh = {*Genome, Bacterial ; Molecular Sequence Annotation ; RNA, Untranslated/classification/*genetics ; Streptococcus agalactiae/*genetics ; }, abstract = {BACKGROUND: Streptococcus agalactiae, also known as Group B Streptococcus (GBS), is a Gram-positive bacterium that colonizes the gastrointestinal and genitourinary tract of humans. This bacterium has also been isolated from various animals, such as fish and cattle. Non-coding RNAs (ncRNAs) can act as regulators of gene expression in bacteria, such as Streptococcus pneumoniae and Streptococcus pyogenes. However, little is known about the genomic distribution of ncRNAs and RNA families in S. agalactiae.

RESULTS: Comparative genome analysis of 27 S. agalactiae strains showed more than 5 thousand genomic regions identified and classified as Core, Exclusive, and Shared genome sequences. We identified 27 to 89 RNA families per genome distributed over these regions, from these, 25 were in Core regions while Shared and Exclusive regions showed variations amongst strains. We propose that the amount and type of ncRNA present in each genome can provide a pattern to contribute in the identification of the clonal types.

CONCLUSIONS: The identification of RNA families provides an insight over ncRNAs, sRNAs and ribozymes function, that can be further explored as targets for antibiotic development or studied in gene regulation of cellular processes. RNA families could be considered as markers to determine infection capabilities of different strains. Lastly, pan-genome analysis of GBS including the full range of functional transcripts provides a broader approach in the understanding of this pathogen.}, } @article {pmid30050512, year = {2018}, author = {Luo, Y and Cheng, Y and Yi, J and Zhang, Z and Luo, Q and Zhang, D and Li, Y}, title = {Complete Genome Sequence of Industrial Biocontrol Strain Paenibacillus polymyxa HY96-2 and Further Analysis of Its Biocontrol Mechanism.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1520}, pmid = {30050512}, issn = {1664-302X}, abstract = {Paenibacillus polymyxa (formerly known as Bacillus polymyxa) has been extensively studied for agricultural applications as a plant-growth-promoting rhizobacterium and is also an important biocontrol agent. Our team has developed the P. polymyxa strain HY96-2 from the tomato rhizosphere as the first microbial biopesticide based on P. polymyxa for controlling plant diseases around the world, leading to the commercialization of this microbial biopesticide in China. However, further research is essential for understanding its precise biocontrol mechanisms. In this paper, we report the complete genome sequence of HY96-2 and the results of a comparative genomic analysis between different P. polymyxa strains. The complete genome size of HY96-2 was found to be 5.75 Mb and 5207 coding sequences were predicted. HY96-2 was compared with seven other P. polymyxa strains for which complete genome sequences have been published, using phylogenetic tree, pan-genome, and nucleic acid co-linearity analysis. In addition, the genes and gene clusters involved in biofilm formation, antibiotic synthesis, and systemic resistance inducer production were compared between strain HY96-2 and two other strains, namely, SC2 and E681. The results revealed that all three of the P. polymyxa strains have the ability to control plant diseases via the mechanisms of colonization (biofilm formation), antagonism (antibiotic production), and induced resistance (systemic resistance inducer production). However, the variation of the corresponding genes or gene clusters between the three strains may lead to different antimicrobial spectra and biocontrol efficacies. Two possible pathways of biofilm formation in P. polymyxa were reported for the first time after searching the KEGG database. This study provides a scientific basis for the further optimization of the field applications and quality standards of industrial microbial biopesticides based on HY96-2. It may also serve as a reference for studying the differences in antimicrobial spectra and biocontrol capability between different biocontrol agents.}, } @article {pmid30042742, year = {2018}, author = {Aherfi, S and Andreani, J and Baptiste, E and Oumessoum, A and Dornas, FP and Andrade, ACDSP and Chabriere, E and Abrahao, J and Levasseur, A and Raoult, D and La Scola, B and Colson, P}, title = {A Large Open Pangenome and a Small Core Genome for Giant Pandoraviruses.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1486}, pmid = {30042742}, issn = {1664-302X}, abstract = {Giant viruses of amoebae are distinct from classical viruses by the giant size of their virions and genomes. Pandoraviruses are the record holders in size of genomes and number of predicted genes. Three strains, P. salinus, P. dulcis, and P. inopinatum, have been described to date. We isolated three new ones, namely P. massiliensis, P. braziliensis, and P. pampulha, from environmental samples collected in Brazil. We describe here their genomes, the transcriptome and proteome of P. massiliensis, and the pangenome of the group encompassing the six pandoravirus isolates. Genome sequencing was performed with an Illumina MiSeq instrument. Genome annotation was performed using GeneMarkS and Prodigal softwares and comparative genomic analyses. The core genome and pangenome were determined using notably ProteinOrtho and CD-HIT programs. Transcriptomics was performed for P. massiliensis with the Illumina MiSeq instrument; proteomics was also performed for this virus using 1D/2D gel electrophoresis and mass spectrometry on a Synapt G2Si Q-TOF traveling wave mobility spectrometer. The genomes of the three new pandoraviruses are comprised between 1.6 and 1.8 Mbp. The genomes of P. massiliensis, P. pampulha, and P. braziliensis were predicted to harbor 1,414, 2,368, and 2,696 genes, respectively. These genes comprise up to 67% of ORFans. Phylogenomic analyses showed that P. massiliensis and P. braziliensis were more closely related to each other than to the other pandoraviruses. The core genome of pandoraviruses comprises 352 clusters of genes, and the ratio core genome/pangenome is less than 0.05. The extinction curve shows clearly that the pangenome is still open. A quarter of the gene content of P. massiliensis was detected by transcriptomics. In addition, a product for a total of 162 open reading frames were found by proteomic analysis of P. massiliensis virions, including notably the products of 28 ORFans, 99 hypothetical proteins, and 90 core genes. Further analyses should allow to gain a better knowledge and understanding of the evolution and origin of these giant pandoraviruses, and of their relationships with viruses and cellular microorganisms.}, } @article {pmid30041921, year = {2018}, author = {Alcorta, J and Espinoza, S and Viver, T and Alcamán-Arias, ME and Trefault, N and Rosselló-Móra, R and Díez, B}, title = {Temperature modulates Fischerella thermalis ecotypes in Porcelana Hot Spring.}, journal = {Systematic and applied microbiology}, volume = {41}, number = {6}, pages = {531-543}, doi = {10.1016/j.syapm.2018.05.006}, pmid = {30041921}, issn = {1618-0984}, mesh = {Chile ; Cyanobacteria/*genetics/isolation & purification ; DNA, Bacterial/genetics ; *Ecotype ; Evolution, Molecular ; Hot Springs/*microbiology ; *Hot Temperature ; Metagenome ; Phylogeny ; Proteome/genetics ; RNA, Ribosomal, 16S/genetics ; Spectrometry, Mass, Matrix-Assisted Laser Desorption-Ionization ; }, abstract = {In the Porcelana Hot Spring (Northern Patagonia), true-branching cyanobacteria are the dominant primary producers in microbial mats, and they are mainly responsible for carbon and nitrogen fixation. However, little is known about their metabolic and genomic adaptations at high temperatures. Therefore, in this study, a total of 81 Fischerella thermalis strains (also known as Mastigocladus laminosus) were isolated from mat samples in a thermal gradient between 61-46°C. The complementary use of proteomic comparisons from these strains, and comparative genomics of F. thermalis pangenomes, suggested that at least two different ecotypes were present within these populations. MALDI-TOF MS analysis separated the strains into three clusters; two with strains obtained from mats within the upper temperature range (61 and 54°C), and a third obtained from mats within the lower temperature range (51 and 46°C). Both groups possessed different but synonymous nifH alleles. The main proteomic differences were associated with the abundance of photosynthesis-related proteins. Three F. thermalis metagenome assembled genomes (MAGs) were described from 66, 58 and 48°C metagenomes. These pangenomes indicated a divergence of orthologous genes and a high abundance of exclusive genes at 66°C. These results improved the current understanding of thermal adaptation of F. thermalis and the evolution of these thermophilic cyanobacterial species.}, } @article {pmid30035711, year = {2018}, author = {Fleshman, A and Mullins, K and Sahl, J and Hepp, C and Nieto, N and Wiggins, K and Hornstra, H and Kelly, D and Chan, TC and Phetsouvanh, R and Dittrich, S and Panyanivong, P and Paris, D and Newton, P and Richards, A and Pearson, T}, title = {Comparative pan-genomic analyses of Orientia tsutsugamushi reveal an exceptional model of bacterial evolution driving genomic diversity.}, journal = {Microbial genomics}, volume = {4}, number = {9}, pages = {}, pmid = {30035711}, issn = {2057-5858}, support = {//Wellcome Trust/United Kingdom ; }, mesh = {*Evolution, Molecular ; Gene Duplication ; Gene Transfer, Horizontal ; *Genetic Variation ; *Genome, Bacterial ; Genomics ; Models, Genetic ; Orientia tsutsugamushi/classification/*genetics ; Phylogeny ; Polymorphism, Single Nucleotide ; Recombination, Genetic ; }, abstract = {Orientia tsutsugamushi, formerly Rickettsia tsutsugamushi, is an obligate intracellular pathogen that causes scrub typhus, an underdiagnosed acute febrile disease with high morbidity. Scrub typhus is transmitted by the larval stage (chigger) of Leptotrombidium mites and is irregularly distributed across endemic regions of Asia, Australia and islands of the western Pacific Ocean. Previous work to understand population genetics in O. tsutsugamushi has been based on sub-genomic sampling methods and whole-genome characterization of two genomes. In this study, we compared 40 genomes from geographically dispersed areas and confirmed patterns of extensive homologous recombination likely driven by transposons, conjugative elements and repetitive sequences. High rates of lateral gene transfer (LGT) among O. tsutsugamushi genomes appear to have effectively eliminated a detectable clonal frame, but not our ability to infer evolutionary relationships and phylogeographical clustering. Pan-genomic comparisons using 31 082 high-quality bacterial genomes from 253 species suggests that genomic duplication in O. tsutsugamushi is almost unparalleled. Unlike other highly recombinant species where the uptake of exogenous DNA largely drives genomic diversity, the pan-genome of O. tsutsugamushi is driven by duplication and divergence. Extensive gene innovation by duplication is most commonly attributed to plants and animals and, in contrast with LGT, is thought to be only a minor evolutionary mechanism for bacteria. The near unprecedented evolutionary characteristics of O. tsutsugamushi, coupled with extensive intra-specific LGT, expand our present understanding of rapid bacterial evolutionary adaptive mechanisms.}, } @article {pmid30033331, year = {2018}, author = {Zhou, Z and Lundstrøm, I and Tran-Dien, A and Duchêne, S and Alikhan, NF and Sergeant, MJ and Langridge, G and Fotakis, AK and Nair, S and Stenøien, HK and Hamre, SS and Casjens, S and Christophersen, A and Quince, C and Thomson, NR and Weill, FX and Ho, SYW and Gilbert, MTP and Achtman, M}, title = {Pan-genome Analysis of Ancient and Modern Salmonella enterica Demonstrates Genomic Stability of the Invasive Para C Lineage for Millennia.}, journal = {Current biology : CB}, volume = {28}, number = {15}, pages = {2420-2428.e10}, pmid = {30033331}, issn = {1879-0445}, support = {BB/L027801/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; MR/M50161X/1/MRC_/Medical Research Council/United Kingdom ; R01 GM114817/GM/NIGMS NIH HHS/United States ; MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; 202792/Z/16/Z/WT_/Wellcome Trust/United Kingdom ; /WT_/Wellcome Trust/United Kingdom ; BB/L020319/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {DNA, Ancient/*analysis ; DNA, Bacterial/*analysis ; Female ; *Genomic Instability ; Genomic Islands ; Humans ; Norway ; Salmonella enterica/*genetics ; Typhoid Fever/*microbiology ; }, abstract = {Salmonella enterica serovar Paratyphi C causes enteric (paratyphoid) fever in humans. Its presentation can range from asymptomatic infections of the blood stream to gastrointestinal or urinary tract infection or even a fatal septicemia [1]. Paratyphi C is very rare in Europe and North America except for occasional travelers from South and East Asia or Africa, where the disease is more common [2, 3]. However, early 20[th]-century observations in Eastern Europe [3, 4] suggest that Paratyphi C enteric fever may once have had a wide-ranging impact on human societies. Here, we describe a draft Paratyphi C genome (Ragna) recovered from the 800-year-old skeleton (SK152) of a young woman in Trondheim, Norway. Paratyphi C sequences were recovered from her teeth and bones, suggesting that she died of enteric fever and demonstrating that these bacteria have long caused invasive salmonellosis in Europeans. Comparative analyses against modern Salmonella genome sequences revealed that Paratyphi C is a clade within the Para C lineage, which also includes serovars Choleraesuis, Typhisuis, and Lomita. Although Paratyphi C only infects humans, Choleraesuis causes septicemia in pigs and boar [5] (and occasionally humans), and Typhisuis causes epidemic swine salmonellosis (chronic paratyphoid) in domestic pigs [2, 3]. These different host specificities likely evolved in Europe over the last ∼4,000 years since the time of their most recent common ancestor (tMRCA) and are possibly associated with the differential acquisitions of two genomic islands, SPI-6 and SPI-7. The tMRCAs of these bacterial clades coincide with the timing of pig domestication in Europe [6].}, } @article {pmid30026808, year = {2018}, author = {Zhong, C and Han, M and Yu, S and Yang, P and Li, H and Ning, K}, title = {Pan-genome analyses of 24 Shewanella strains re-emphasize the diversification of their functions yet evolutionary dynamics of metal-reducing pathway.}, journal = {Biotechnology for biofuels}, volume = {11}, number = {}, pages = {193}, pmid = {30026808}, issn = {1754-6834}, support = {R34 AA021502/AA/NIAAA NIH HHS/United States ; }, abstract = {BACKGROUND: Shewanella strains are important dissimilatory metal-reducing bacteria which are widely distributed in diverse habitats. Despite efforts to genomically characterize Shewanella, knowledge of the molecular components, functional information and evolutionary patterns remain lacking, especially for their compatibility in the metal-reducing pathway. The increasing number of genome sequences of Shewanella strains offers a basis for pan-genome studies.

RESULTS: A comparative pan-genome analysis was conducted to study genomic diversity and evolutionary relationships among 24 Shewanella strains. Results revealed an open pan-genome of 13,406 non-redundant genes and a core-genome of 1878 non-redundant genes. Selective pressure acted on the invariant members of core genome, in which purifying selection drove evolution in the housekeeping mechanisms. Shewanella strains exhibited extensive genome variability, with high levels of gene gain and loss during the evolution, which affected variable gene sets and facilitated the rapid evolution. Additionally, genes related to metal reduction were diversely distributed in Shewanella strains and evolved under purifying selection, which highlighted the basic conserved functionality and specificity of respiratory systems.

CONCLUSIONS: The diversity of genes present in the accessory and specific genomes of Shewanella strains indicates that each strain uses different strategies to adapt to diverse environments. Horizontal gene transfer is an important evolutionary force in shaping Shewanella genomes. Purifying selection plays an important role in the stability of the core-genome and also drives evolution in mtr-omc cluster of different Shewanella strains.}, } @article {pmid30014838, year = {2018}, author = {Beyrouthy, R and Barets, M and Marion, E and Dananché, C and Dauwalder, O and Robin, F and Gauthier, L and Jousset, A and Dortet, L and Guérin, F and Bénet, T and Cassier, P and Vanhems, P and Bonnet, R}, title = {Novel Enterobacter Lineage as Leading Cause of Nosocomial Outbreak Involving Carbapenemase-Producing Strains.}, journal = {Emerging infectious diseases}, volume = {24}, number = {8}, pages = {1505-1515}, pmid = {30014838}, issn = {1080-6059}, mesh = {Aged ; Aged, 80 and over ; Anti-Bacterial Agents/*pharmacology ; *Cross Infection ; *Disease Outbreaks ; Drug Resistance, Multiple, Bacterial ; Enterobacter cloacae/drug effects/*enzymology/genetics ; Enterobacteriaceae Infections/*microbiology ; Female ; Gene Expression Regulation, Bacterial ; Gene Expression Regulation, Enzymologic ; Humans ; Male ; Middle Aged ; beta-Lactamases/genetics/*metabolism ; }, abstract = {We investigated unusual carbapenemase-producing Enterobacter cloacae complex isolates (n = 8) in the novel sequence type (ST) 873, which caused nosocomial infections in 2 hospitals in France. Whole-genome sequence typing showed the 1-year persistence of the epidemic strain, which harbored a blaVIM-4 ST1-IncHI2 plasmid, in 1 health institution and 2 closely related strains harboring blaCTX-M-15 in the other. These isolates formed a new subgroup in the E. hormaechei metacluster, according to their hsp60 sequences and phylogenomic analysis. The average nucleotide identities, specific biochemical properties, and pangenomic and functional investigations of isolates suggested isolates of a novel species that had acquired genes associated with adhesion and mobility. The emergence of this novel Enterobacter phylogenetic lineage within hospitals should be closely monitored because of its ability to persist and spread.}, } @article {pmid30013519, year = {2018}, author = {Collins, FWJ and Mesa-Pereira, B and O'Connor, PM and Rea, MC and Hill, C and Ross, RP}, title = {Reincarnation of Bacteriocins From the Lactobacillus Pangenomic Graveyard.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1298}, pmid = {30013519}, issn = {1664-302X}, abstract = {Bacteria commonly produce narrow spectrum bacteriocins as a means of inhibiting closely related species competing for similar resources in an environment. The increasing availability of genomic data means that it is becoming easier to identify bacteriocins encoded within genomes. Often, however, the presence of bacteriocin genes in a strain does not always translate into biological antimicrobial activity. For example, when analysing the Lactobacillus pangenome we identified strains encoding ten pediocin-like bacteriocin structural genes which failed to display inhibitory activity. Nine of these bacteriocins were novel whilst one was identified as the previously characterized bacteriocin "penocin A." The composition of these bacteriocin operons varied between strains, often with key components missing which are required for bacteriocin production, such as dedicated bacteriocin transporters and accessory proteins. In an effort to functionally express these bacteriocins, the structural genes for the ten pediocin homologs were cloned alongside the dedicated pediocin PA-1 transporter in both Escherichia coli and Lactobacillus paracasei heterologous hosts. Each bacteriocin was cloned with its native leader sequence and as a fusion protein with the pediocin PA-1 leader sequence. Several of these bacteriocins displayed a broader spectrum of inhibition than the original pediocin PA-1. We show how potentially valuable bacteriocins can easily be "reincarnated" from in silico data and produced in vitro despite often lacking the necessary accompanying machinery. Moreover, the study demonstrates how genomic datasets such as the Lactobacilus pangenome harbor a potential "arsenal" of antimicrobial activity with the possibility of being activated when expressed in more genetically amenable hosts.}, } @article {pmid30011247, year = {2018}, author = {Holley, G and Wittler, R and Stoye, J and Hach, F}, title = {Dynamic Alignment-Free and Reference-Free Read Compression.}, journal = {Journal of computational biology : a journal of computational molecular cell biology}, volume = {25}, number = {7}, pages = {825-836}, doi = {10.1089/cmb.2018.0068}, pmid = {30011247}, issn = {1557-8666}, mesh = {Algorithms ; Computational Biology/*methods ; Data Compression ; Genome/genetics ; Genomics/*methods ; High-Throughput Nucleotide Sequencing/*trends ; *Software ; }, abstract = {The advent of high throughput sequencing (HTS) technologies raises a major concern about storage and transmission of data produced by these technologies. In particular, large-scale sequencing projects generate an unprecedented volume of genomic sequences ranging from tens to several thousands of genomes per species. These collections contain highly similar and redundant sequences, also known as pangenomes. The ideal way to represent and transfer pangenomes is through compression. A number of HTS-specific compression tools have been developed to reduce the storage and communication costs of HTS data, yet none of them is designed to process a pangenome. In this article, we present dynamic alignment-free and reference-free read compression (DARRC), a new alignment-free and reference-free compression method. It addresses the problem of pangenome compression by encoding the sequences of a pangenome as a guided de Bruijn graph. The novelty of this method is its ability to incrementally update DARRC archives with new genome sequences without full decompression of the archive. DARRC can compress both single-end and paired-end read sequences of any length using all symbols of the IUPAC nucleotide code. On a large Pseudomonas aeruginosa data set, our method outperforms all other tested tools. It provides a 30% compression ratio improvement in single-end mode compared with the best performing state-of-the-art HTS-specific compression method in our experiments.}, } @article {pmid30010394, year = {2019}, author = {Behzadi, P and Ranjbar, R}, title = {DNA microarray technology and bioinformatic web services.}, journal = {Acta microbiologica et immunologica Hungarica}, volume = {66}, number = {1}, pages = {19-30}, doi = {10.1556/030.65.2018.028}, pmid = {30010394}, issn = {1588-2640}, mesh = {Computational Biology/*methods ; *Internet ; Microarray Analysis/*methods ; Oligonucleotide Array Sequence Analysis/*methods ; Oligonucleotide Probes/genetics ; }, abstract = {The pan-genomic microarray technique is used for environmental and/or clinical studies. Although microarray is an accurate and sharp diagnostic tool, the expertized bioinformaticians were able to minimize the outcome biases and maximize the flexibility and accuracy of the technique. The knowledge of bioinformatics plays a key role in association with probe designing and the utilization of correct probe sets and platforms. This technique is divided into two parts as dry lab (in silico studies) and wet lab (in vitro studies). Each part covers the other and are known as complementary divisions. In the case of microarray probe designing, a wide range of software, tools, and databases are necessary. Obviously, the application of right databases, software, and tools decreases the probable biases in the outcomes. Due to the importance of suitable probe designing, this article has focused its look onto a variety of online/offline databases, software, and tools.}, } @article {pmid30005805, year = {2018}, author = {Driscoll, CB and Meyer, KA and Šulčius, S and Brown, NM and Dick, GJ and Cao, H and Gasiūnas, G and Timinskas, A and Yin, Y and Landry, ZC and Otten, TG and Davis, TW and Watson, SB and Dreher, TW}, title = {A closely-related clade of globally distributed bloom-forming cyanobacteria within the Nostocales.}, journal = {Harmful algae}, volume = {77}, number = {}, pages = {93-107}, doi = {10.1016/j.hal.2018.05.009}, pmid = {30005805}, issn = {1878-1470}, mesh = {Bacterial Proteins/analysis ; Cyanobacteria/*classification/genetics ; *Genome, Bacterial ; Harmful Algal Bloom ; Phylogeny ; }, abstract = {In order to better understand the relationships among current Nostocales cyanobacterial blooms, eight genomes were sequenced from cultured isolates or from environmental metagenomes of recent planktonic Nostocales blooms. Phylogenomic analysis of publicly available sequences placed the new genomes among a group of 15 genomes from four continents in a distinct ADA clade (Anabaena/Dolichospermum/Aphanizomenon) within the Nostocales. This clade contains four species-level groups, two of which include members with both Anabaena-like and Aphanizomenon flos-aquae-like morphology. The genomes contain many repetitive genetic elements and a sizable pangenome, in which ABC-type transporters are highly represented. Alongside common core genes for photosynthesis, the differentiation of N2-fixing heterocysts, and the uptake and incorporation of the major nutrients P, N and S, we identified several gene pathways in the pangenome that may contribute to niche partitioning. Genes for problematic secondary metabolites-cyanotoxins and taste-and-odor compounds-were sporadically present, as were other polyketide synthase (PKS) and nonribosomal peptide synthetase (NRPS) gene clusters. By contrast, genes predicted to encode the ribosomally generated bacteriocin peptides were found in all genomes.}, } @article {pmid29997890, year = {2018}, author = {Subedi, R and Kolodkina, V and Sutcliffe, IC and Simpson-Louredo, L and Hirata, R and Titov, L and Mattos-Guaraldi, AL and Burkovski, A and Sangal, V}, title = {Genomic analyses reveal two distinct lineages of Corynebacterium ulcerans strains.}, journal = {New microbes and new infections}, volume = {25}, number = {}, pages = {7-13}, pmid = {29997890}, issn = {2052-2975}, abstract = {Corynebacteriumulcerans is an important zoonotic pathogen which is causing diphtheria-like disease in humans globally. In this study, the genomes of three recently isolated C. ulcerans strains, 4940, 2590 and BR-AD 2649, respectively from an asymptomatic carrier, a patient with pharyngitis and a canine host, were sequenced to investigate their virulence potential. A comparative analysis was performed including the published genome sequences of 16 other C. ulcerans isolates. C. ulcerans strains belong to two lineages; 13 strains are grouped together in lineage 1, and six strains comprise lineage 2. Consistent with the zoonotic nature of C. ulcerans infections, isolates from both the human and canine hosts clustered in both the lineages. Most of the strains possessed spaDEF and spaBC gene clusters along with the virulence genes cpp, pld, cwlH, nanH, rpfI, tspA and vsp1. The gene encoding Shiga-like toxin was only present in one strain, and 11 strains carried the tox gene encoding the diphtheria-like toxin. However, none of strains 4940, 2590 and BR-AD 2649 carried any toxin genes. These strains varied in the number of prophages in their genomes, which suggests that they play an important role in introducing diversity in C. ulcerans. The pan-genomic analyses revealed a variation in the number of membrane-associated and secreted proteins that may contribute to the variation in pathogenicity among different strains.}, } @article {pmid29994032, year = {2019}, author = {Rizzi, R and Cairo, M and Makinen, V and Tomescu, AI and Valenzuela, D}, title = {Hardness of Covering Alignment: Phase Transition in Post-Sequence Genomics.}, journal = {IEEE/ACM transactions on computational biology and bioinformatics}, volume = {16}, number = {1}, pages = {23-30}, doi = {10.1109/TCBB.2018.2831691}, pmid = {29994032}, issn = {1557-9964}, mesh = {Algorithms ; Diploidy ; Genomics/*methods ; Sequence Alignment/*methods ; Sequence Analysis, DNA/methods ; }, abstract = {Covering alignment problems arise from recent developments in genomics; so called pan-genome graphs are replacing reference genomes, and advances in haplotyping enable full content of diploid genomes to be used as basis of sequence analysis. In this paper, we show that the computational complexity will change for natural extensions of alignments to pan-genome representations and to diploid genomes. More broadly, our approach can also be seen as a minimal extension of sequence alignment to labelled directed acyclic graphs (labeled DAGs). Namely, we show that finding a covering alignment of two labeled DAGs is NP-hard even on binary alphabets. A covering alignment asks for two paths R1 (red) and G1 (green) in DAG D1 and two paths R2 (red) and G2 (green) in DAG D2 that cover the nodes of the graphs and maximize the sum of the global alignment scores: as(sp(R1),sp(R2))+as(sp(G1),sp(G2)), where sp(P) is the concatenation of labels on the path P. Pair-wise alignment of haplotype sequences forming a diploid chromosome can be converted to a two-path coverable labelled DAG, and then the covering alignment models the similarity of two diploids over arbitrary recombinations. We also give a reduction to the other direction, to show that such a recombination-oblivious diploid alignment is NP-hard on alphabets of size 3.}, } @article {pmid29978435, year = {2018}, author = {Tetz, G and Tetz, V}, title = {Tetz's theory and law of longevity.}, journal = {Theory in biosciences = Theorie in den Biowissenschaften}, volume = {137}, number = {2}, pages = {145-154}, pmid = {29978435}, issn = {1611-7530}, mesh = {Aging/*genetics ; Animals ; DNA/analysis ; Humans ; Longevity/*physiology ; Microbiota ; Models, Theoretical ; Mutation ; Symbiosis/*physiology ; Time Factors ; }, abstract = {Here, we present new theory and law of longevity intended to evaluate fundamental factors that control lifespan. This theory is based on the fact that genes affecting host organism longevity are represented by subpopulations: genes of host eukaryotic cells, commensal microbiota, and non-living genetic elements. Based on Tetz's theory of longevity, we propose that lifespan and aging are defined by the accumulation of alterations over all genes of macroorganism and microbiome and the non-living genetic elements associated with them. Tetz's law of longevity states that longevity is limited by the accumulation of alterations to the limiting value that is not compatible with life. Based on theory and law, we also propose a novel model to calculate several parameters, including the rate of aging and the remaining lifespan of individuals. We suggest that this theory and model have explanatory and predictive potential to eukaryotic organisms, allowing the influence of diseases, medication, and medical procedures to be re-examined in relation to longevity. Such estimates also provide a framework to evaluate new fundamental aspects that control aging and lifespan.}, } @article {pmid29975997, year = {2018}, author = {Choi, S and Jin, GD and Park, J and You, I and Kim, EB}, title = {Pan-Genomics of Lactobacillus plantarum Revealed Group-Specific Genomic Profiles without Habitat Association.}, journal = {Journal of microbiology and biotechnology}, volume = {28}, number = {8}, pages = {1352-1359}, doi = {10.4014/jmb.1803.03029}, pmid = {29975997}, issn = {1738-8872}, mesh = {Animals ; Databases, Genetic ; Ecosystem ; Genes, Bacterial/genetics ; Genome, Bacterial/*genetics ; *Genomics ; Lactobacillus plantarum/*classification/*genetics ; Molecular Sequence Annotation ; *Phylogeny ; Polymorphism, Single Nucleotide/genetics ; }, abstract = {Lactobacillus plantarum is a lactic acid bacterium that promotes animal intestinal health as a probiotic and is found in a wide variety of habitats. Here, we investigated the genomic features of different clusters of L. plantarum strains via pan-genomic analysis. We compared the genomes of 108 L. plantarum strains that were available from the NCBI GenBank database. These genomes were 2.9-3.7 Mbp in size and 44-45% in G+C content. A total of 8,847 orthologs were collected, and 1,709 genes were identified to be shared as core genes by all the strains analyzed. On the basis of SNPs from the core genes, 108 strains were clustered into five major groups (G1-G5) that are different from previous reports and are not clearly associated with habitats. Analysis of group-specific enriched or depleted genes revealed that G1 and G2 were rich in genes for carbohydrate utilization (L-arabinose, L-rhamnose, and fructooligosaccharides) and that G3, G4, and G5 possessed more genes for the restriction-modification system and MazEF toxin-antitoxin. These results indicate that there are critical differences in gene content and survival strategies among genetically clustered L. plantarum strains, regardless of habitats.}, } @article {pmid29961804, year = {2018}, author = {Lemos Junior, WJF and da Silva Duarte, V and Treu, L and Campanaro, S and Nadai, C and Giacomini, A and Corich, V}, title = {Whole genome comparison of two Starmerella bacillaris strains with other wine yeasts uncovers genes involved in modulating important winemaking traits.}, journal = {FEMS yeast research}, volume = {18}, number = {7}, pages = {}, doi = {10.1093/femsyr/foy069}, pmid = {29961804}, issn = {1567-1364}, mesh = {Biosynthetic Pathways/genetics ; Ethanol/metabolism ; Fermentation/*genetics ; *Genes, Fungal ; Genetic Variation ; Genome, Fungal/*genetics ; Genomics ; Glycerol/metabolism ; Osmoregulation/genetics ; Phenotype ; Phylogeny ; Saccharomyces cerevisiae/genetics ; Saccharomycetales/classification/*genetics/metabolism ; Wine/*microbiology ; }, abstract = {Starmerella bacillaris is an osmotolerant yeast with interesting winemaking traits such as low-ethanol and high-glycerol production, previously considered as wine spoilage and recently proposed to improve the sensory quality of wine. This is the first work performing a whole-genome analysis of the variants identified by comparing two S. bacillaris strains (PAS13 and FRI751). Additionally, an extensive search for orthologous genes against Saccharomyces and non-Saccharomyces yeasts produced a detailed reconstruction of the pan-genome for yeast species used in winemaking. Starmerella bacillaris PAS13 was able to produce 36% more glycerol than S. bacillaris FRI751 without increasing ethanol level over 5% (v/v). Orthologous genes revealed new insights in the response to osmotic stress determined by the mitogen-activated protein kinase (MAPK) from S. bacillaris strains. The comparison between the two S. bacillaris genomes revealed 33 771 high-quality variants that were ranked considering their predicted impact on gene functions. Furthermore, analysis of structural variations in the genome revealed five translocations. The absence of some transcriptional factors involved in the regulation of GPD (glycerol-3-phosphate dehydrogenase), like the protein kinases YpK1p and YpK2p, and the identification of a tandem duplication increasing the GPP1 (glycerol-3-phosphate phosphatase) gene copy number suggest a remarkably different regulation of the glycerol pathway for S. bacillaris in comparison to S. cerevisiae.}, } @article {pmid29949970, year = {2018}, author = {Her, HL and Wu, YW}, title = {A pan-genome-based machine learning approach for predicting antimicrobial resistance activities of the Escherichia coli strains.}, journal = {Bioinformatics (Oxford, England)}, volume = {34}, number = {13}, pages = {i89-i95}, pmid = {29949970}, issn = {1367-4811}, mesh = {Anti-Bacterial Agents/*pharmacology ; Drug Resistance, Bacterial/*genetics ; Escherichia coli/drug effects/*genetics ; Genome, Bacterial ; *Machine Learning ; Whole Genome Sequencing/*methods ; }, abstract = {MOTIVATION: Antimicrobial resistance (AMR) is becoming a huge problem in both developed and developing countries, and identifying strains resistant or susceptible to certain antibiotics is essential in fighting against antibiotic-resistant pathogens. Whole-genome sequences have been collected for different microbial strains in order to identify crucial characteristics that allow certain strains to become resistant to antibiotics; however, a global inspection of the gene content responsible for AMR activities remains to be done.

RESULTS: We propose a pan-genome-based approach to characterize antibiotic-resistant microbial strains and test this approach on the bacterial model organism Escherichia coli. By identifying core and accessory gene clusters and predicting AMR genes for the E. coli pan-genome, we not only showed that certain classes of genes are unevenly distributed between the core and accessory parts of the pan-genome but also demonstrated that only a portion of the identified AMR genes belong to the accessory genome. Application of machine learning algorithms to predict whether specific strains were resistant to antibiotic drugs yielded the best prediction accuracy for the set of AMR genes within the accessory part of the pan-genome, suggesting that these gene clusters were most crucial to AMR activities in E. coli. Selecting subsets of AMR genes for different antibiotic drugs based on a genetic algorithm (GA) achieved better prediction performances than the gene sets established in the literature, hinting that the gene sets selected by the GA may warrant further analysis in investigating more details about how E. coli fight against antibiotics.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid29949657, year = {2018}, author = {Mathieu, S and Cusant, L and Roux, C and Corradi, N}, title = {Arbuscular mycorrhizal fungi: intraspecific diversity and pangenomes.}, journal = {The New phytologist}, volume = {220}, number = {4}, pages = {1129-1134}, doi = {10.1111/nph.15275}, pmid = {29949657}, issn = {1469-8137}, support = {//Natural Sciences and Engineering Research Council of Canada/International ; ER13-09-190//Ontario Ministry of Research and Innovation/International ; ANR-10-LABX-41//French Ministry of Higher Education, Research and Innovation (MESRI)/International ; }, mesh = {*Genetic Variation ; *Genome, Fungal ; Mycorrhizae/*genetics ; Phenotype ; Plants/microbiology ; Species Specificity ; }, abstract = {Contents Summary 1129 I. Introduction 1129 II. Intraspecific phenotypic variation and the plant host 1130 III. High inter-isolate genetic diversity in model AMF 1130 IV. Genome diversity within the model AM fungus Rhizophagus irregularis 1131 V. Pangenomes and the future of AMF ecological genomics 1131 Acknowledgements 1133 References 1133 SUMMARY: Arbuscular mycorrhizal fungi (AMF) are ubiquitous plant symbionts with an intriguing population biology. Conspecific AMF strains can vary substantially at the genetic and phenotypic levels, leading to direct and quantifiable variation in plant growth. Recent studies have shown that high intraspecific diversity is very common in AMF, and not only found in model species. Studies have also revealed how the phenotype of conspecific isolates varies depending on the plant host, highlighting the functional relevance of intraspecific phenotypic plasticity for the AMF ecology and mycorrhizal symbiosis. Recent work has also demonstrated that conspecific isolates of the model AMF Rhizophagus irregularis harbor large and highly variable pangenomes, highlighting the potential role of intraspecific genome diversity for the ecological adaptation of these symbionts.}, } @article {pmid29946138, year = {2018}, author = {Gemmell, MR and Berry, S and Mukhopadhya, I and Hansen, R and Nielsen, HL and Bajaj-Elliott, M and Nielsen, H and Hold, GL}, title = {Comparative genomics of Campylobacter concisus: Analysis of clinical strains reveals genome diversity and pathogenic potential.}, journal = {Emerging microbes & infections}, volume = {7}, number = {1}, pages = {116}, pmid = {29946138}, issn = {2222-1751}, mesh = {Campylobacter/*classification/*genetics/isolation & purification/pathogenicity ; Campylobacter Infections/*microbiology ; Cluster Analysis ; Computational Biology/methods ; Feces/microbiology ; *Genetic Variation ; *Genome, Bacterial ; *Genomics/methods ; Host-Pathogen Interactions ; Humans ; Molecular Sequence Annotation ; Phenotype ; Phylogeny ; Reproducibility of Results ; Virulence ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {In recent years, an increasing number of Campylobacter species have been associated with human gastrointestinal (GI) diseases including gastroenteritis, inflammatory bowel disease, and colorectal cancer. Campylobacter concisus, an oral commensal historically linked to gingivitis and periodontitis, has been increasingly detected in the lower GI tract. In the present study, we generated robust genome sequence data from C. concisus strains and undertook a comprehensive pangenome assessment to identify C. concisus virulence properties and to explain potential adaptations acquired while residing in specific ecological niche(s) of the GI tract. Genomes of 53 new C. concisus strains were sequenced, assembled, and annotated including 36 strains from gastroenteritis patients, 13 strains from Crohn's disease patients and four strains from colitis patients (three collagenous colitis and one lymphocytic colitis). When compared with previous published sequences, strains clustered into two main groups/genomospecies (GS) with phylogenetic clustering explained neither by disease phenotype nor sample location. Paired oral/faecal isolates, from the same patient, indicated that there are few genetic differences between oral and gut isolates which suggests that gut isolates most likely reflect oral strain relocation. Type IV and VI secretion systems genes, genes known to be important for pathogenicity in the Campylobacter genus, were present in the genomes assemblies, with 82% containing Type VI secretion system genes. Our findings indicate that C. concisus strains are genetically diverse, and the variability in bacterial secretion system content may play an important role in their virulence potential.}, } @article {pmid29945570, year = {2018}, author = {Clarke, TH and Brinkac, LM and Inman, JM and Sutton, G and Fouts, DE}, title = {PanACEA: a bioinformatics tool for the exploration and visualization of bacterial pan-chromosomes.}, journal = {BMC bioinformatics}, volume = {19}, number = {1}, pages = {246}, pmid = {29945570}, issn = {1471-2105}, support = {U19 AI110819/AI/NIAID NIH HHS/United States ; }, mesh = {Chromosomes/*genetics ; Computational Biology/*methods ; Genomics/*methods ; Humans ; }, abstract = {BACKGROUND: Bacterial pan-genomes, comprised of conserved and variable genes across multiple sequenced bacterial genomes, allow for identification of genomic regions that are phylogenetically discriminating or functionally important. Pan-genomes consist of large amounts of data, which can restrict researchers ability to locate and analyze these regions. Multiple software packages are available to visualize pan-genomes, but currently their ability to address these concerns are limited by using only pre-computed data sets, prioritizing core over variable gene clusters, or by not accounting for pan-chromosome positioning in the viewer.

RESULTS: We introduce PanACEA (Pan-genome Atlas with Chromosome Explorer and Analyzer), which utilizes locally-computed interactive web-pages to view ordered pan-genome data. It consists of multi-tiered, hierarchical display pages that extend from pan-chromosomes to both core and variable regions to single genes. Regions and genes are functionally annotated to allow for rapid searching and visual identification of regions of interest with the option that user-supplied genomic phylogenies and metadata can be incorporated. PanACEA's memory and time requirements are within the capacities of standard laptops. The capability of PanACEA as a research tool is demonstrated by highlighting a variable region important in differentiating strains of Enterobacter hormaechei.

CONCLUSIONS: PanACEA can rapidly translate the results of pan-chromosome programs into an intuitive and interactive visual representation. It will empower researchers to visually explore and identify regions of the pan-chromosome that are most biologically interesting, and to obtain publication quality images of these regions.}, } @article {pmid29942087, year = {2018}, author = {Veras, A and Araujo, F and Pinheiro, K and Guimarães, L and Azevedo, V and Soares, S and da Costa da Silva, A and Ramos, R}, title = {Pan4Draft: A Computational Tool to Improve the Accuracy of Pan-Genomic Analysis Using Draft Genomes.}, journal = {Scientific reports}, volume = {8}, number = {1}, pages = {9670}, pmid = {29942087}, issn = {2045-2322}, mesh = {Databases, Genetic ; Genomics/*methods/standards ; Molecular Sequence Annotation ; Reference Standards ; *Software ; User-Computer Interface ; }, abstract = {High-throughput sequencing technologies are a milestone in molecular biology for facilitating great advances in genomics by enabling the deposit of large volumes of biological data to public databases. The availability of such data has made possible the comparative genomic analysis through pipelines, using the entire gene repertoire of genomes. However, a large number of unfinished genomes exist in public databases; their number is approximately 16-fold higher than the number of complete genomes, which creates bias during comparative analyses. Therefore, the present work proposes a new tool called Pan4Drafts, an automated pipeline for pan-genomic analysis of draft prokaryotic genomes to maximize the representation and accuracy of the gene repertoire of unfinished genomes by using reads from sequencing data. Pan4Draft allows to perform comparative analyses using different methodologies such as combining complete and draft genomes, using only draft genomes or only complete genomes. Pan4Draft is available at http://www.computationalbiology.ufpa.br/pan4drafts and the test dataset is available at https://sourceforge.net/projects/pan4drafts .}, } @article {pmid29940840, year = {2018}, author = {Matey-Hernandez, ML and , and Brunak, S and Izarzugaza, JMG}, title = {Benchmarking the HLA typing performance of Polysolver and Optitype in 50 Danish parental trios.}, journal = {BMC bioinformatics}, volume = {19}, number = {1}, pages = {239}, pmid = {29940840}, issn = {1471-2105}, mesh = {Benchmarking/*methods ; Family ; Genomics/*methods ; Genotyping Techniques/*methods ; HLA Antigens/*genetics ; Histocompatibility Testing ; Humans ; Parents ; Sweden ; }, abstract = {BACKGROUND: The adaptive immune response intrinsically depends on hypervariable human leukocyte antigen (HLA) genes. Concomitantly, correct HLA phenotyping is crucial for successful donor-patient matching in organ transplantation. The cost and technical limitations of current laboratory techniques, together with advances in next-generation sequencing (NGS) methodologies, have increased the need for precise computational typing methods.

RESULTS: We tested two widespread HLA typing methods using high quality full genome sequencing data from 150 individuals in 50 family trios from the Genome Denmark project. First, we computed descendant accuracies assessing the agreement in the inheritance of alleles from parents to offspring. Second, we compared the locus-specific homozygosity rates as well as the allele frequencies; and we compared those to the observed values in related populations. We provide guidelines for testing the accuracy of HLA typing methods by comparing family information, which is independent of the availability of curated alleles.

CONCLUSIONS: Although current computational methods for HLA typing generally provide satisfactory results, our benchmark - using data with ultra-high sequencing depth - demonstrates the incompleteness of current reference databases, and highlights the importance of providing genomic databases addressing current sequencing standards, a problem yet to be resolved before benefiting fully from personalised medicine approaches HLA phenotyping is essential.}, } @article {pmid29939210, year = {2018}, author = {Cislak, A and Grabowski, S and Holub, J}, title = {SOPanG: online text searching over a pan-genome.}, journal = {Bioinformatics (Oxford, England)}, volume = {34}, number = {24}, pages = {4290-4292}, doi = {10.1093/bioinformatics/bty506}, pmid = {29939210}, issn = {1367-4811}, mesh = {Algorithms ; *Genome/genetics ; *Genomics/methods ; Information Storage and Retrieval ; Internet ; *Software/standards ; }, abstract = {MOTIVATION: The many thousands of high-quality genomes available now-a-days imply a shift from single genome to pan-genomic analyses. A basic algorithmic building brick for such a scenario is online search over a collection of similar texts, a problem with surprisingly few solutions presented so far.

RESULTS: We present SOPanG, a simple tool for exact pattern matching over an elastic-degenerate string, a recently proposed simplified model for the pan-genome. Thanks to bit-parallelism, it achieves pattern matching speeds above 400 MB/s, more than an order of magnitude higher than of other software.

SOPanG is available for free from: https://github.com/MrAlexSee/sopang.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid29937764, year = {2018}, author = {Zhang, X and Liu, Z and Wei, G and Yang, F and Liu, X}, title = {In Silico Genome-Wide Analysis Reveals the Potential Links Between Core Genome of Acidithiobacillus thiooxidans and Its Autotrophic Lifestyle.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1255}, pmid = {29937764}, issn = {1664-302X}, abstract = {The coinage "pan-genome" was first introduced dating back to 2005, and was used to elaborate the entire gene repertoire of any given species. Core genome consists of genes shared by all bacterial strains studied and is considered to encode essential functions associated with species' basic biology and phenotypes, yet its relatedness with bacterial lifestyle of the species remains elusive. We performed the pan-genome analysis of sulfur-oxidizing acidophile Acidithiobacillus thiooxidans as a case study to highlight species' core genome and its relevance with autotrophic lifestyle of bacterial species. The mathematical modeling based on bacterial genomes of A. thiooxidans species, including a novel strain ZBY isolated from Zambian copper mine plus eight other recognized strains, was attempted to extrapolate the expansion of its pan-genome, suggesting that A. thiooxidans pan-genome is closed. Further investigation revealed a common set of genes, many of which were assigned to metabolic profiles, notably with respect to energy metabolism, amino acid metabolism, and carbohydrate metabolism. The predicted metabolic profiles of A. thiooxidans were characterized by the fixation of inorganic carbon, assimilation of nitrogen compounds, and aerobic oxidation of various sulfur species. Notably, several hydrogenase (H2ase)-like genes dispersed in core genome might represent the novel classes due to the potential functional disparities, despite being closely related homologous genes that code for H2ase. Overall, the findings shed light on the distinguishing features of A. thiooxidans genomes on a global scale, and extend the understanding of its conserved core genome pertaining to autotrophic lifestyle.}, } @article {pmid29925429, year = {2018}, author = {Tschitschko, B and Erdmann, S and DeMaere, MZ and Roux, S and Panwar, P and Allen, MA and Williams, TJ and Brazendale, S and Hancock, AM and Eloe-Fadrosh, EA and Cavicchioli, R}, title = {Genomic variation and biogeography of Antarctic haloarchaea.}, journal = {Microbiome}, volume = {6}, number = {1}, pages = {113}, pmid = {29925429}, issn = {2049-2618}, mesh = {Antarctic Regions ; Archaeal Viruses/*genetics/isolation & purification ; Base Sequence ; Genetic Variation/genetics ; Genome, Archaeal/*genetics ; Genomic Islands/genetics ; Geography ; Halorubrum/classification/*genetics/isolation & purification ; Lakes/microbiology ; Metagenome/genetics ; Microbiota/*genetics ; Sequence Analysis, DNA ; }, abstract = {BACKGROUND: The genomes of halophilic archaea (haloarchaea) often comprise multiple replicons. Genomic variation in haloarchaea has been linked to viral infection pressure and, in the case of Antarctic communities, can be caused by intergenera gene exchange. To expand understanding of genome variation and biogeography of Antarctic haloarchaea, here we assessed genomic variation between two strains of Halorubrum lacusprofundi that were isolated from Antarctic hypersaline lakes from different regions (Vestfold Hills and Rauer Islands). To assess variation in haloarchaeal populations, including the presence of genomic islands, metagenomes from six hypersaline Antarctic lakes were characterised.

RESULTS: The sequence of the largest replicon of each Hrr. lacusprofundi strain (primary replicon) was highly conserved, while each of the strains' two smaller replicons (secondary replicons) were highly variable. Intergenera gene exchange was identified, including the sharing of a type I-B CRISPR system. Evaluation of infectivity of an Antarctic halovirus provided experimental evidence for the differential susceptibility of the strains, bolstering inferences that strain variation is important for modulating interactions with viruses. A relationship was found between genomic structuring and the location of variation within replicons and genomic islands, demonstrating that the way in which haloarchaea accommodate genomic variability relates to replicon structuring. Metagenome read and contig mapping and clustering and scaling analyses demonstrated biogeographical patterning of variation consistent with environment and distance effects. The metagenome data also demonstrated that specific haloarchaeal species dominated the hypersaline systems indicating they are endemic to Antarctica.

CONCLUSION: The study describes how genomic variation manifests in Antarctic-lake haloarchaeal communities and provides the basis for future assessments of Antarctic regional and global biogeography of haloarchaea.}, } @article {pmid29915568, year = {2018}, author = {Yu, J and Zhao, J and Song, Y and Zhang, J and Yu, Z and Zhang, H and Sun, Z}, title = {Comparative Genomics of the Herbivore Gut Symbiont Lactobacillus reuteri Reveals Genetic Diversity and Lifestyle Adaptation.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1151}, pmid = {29915568}, issn = {1664-302X}, abstract = {Lactobacillus reuteri is a catalase-negative, Gram-positive, non-motile, obligately heterofermentative bacterial species that has been used as a model to describe the ecology and evolution of vertebrate gut symbionts. However, the genetic features and evolutionary strategies of L. reuteri from the gastrointestinal tract of herbivores remain unknown. Therefore, 16 L. reuteri strains isolated from goat, sheep, cow, and horse in Inner Mongolia, China were sequenced in this study. A comparative genomic approach was used to assess genetic diversity and gain insight into the distinguishing features related to the different hosts based on 21 published genomic sequences. Genome size, G + C content, and average nucleotide identity values of the L. reuteri strains from different hosts indicated that the strains have broad genetic diversity. The pan-genome of 37 L. reuteri strains contained 8,680 gene families, and the core genome contained 726 gene families. A total of 92,270 nucleotide mutation sites were discovered among 37 L. reuteri strains, and all core genes displayed a Ka/Ks ratio much lower than 1, suggesting strong purifying selective pressure (negative selection). A highly robust maximum likelihood tree based on the core genes shown in the herbivore isolates were divided into three clades; clades A and B contained most of the herbivore isolates and were more closely related to human isolates and vastly distinct from clade C. Some functional genes may be attributable to host-specific of the herbivore, omnivore, and sourdough groups. Moreover, the numbers of genes encoding cell surface proteins and active carbohydrate enzymes were host-specific. This study provides new insight into the adaptation of L. reuteri to the intestinal habitat of herbivores, suggesting that the genomic diversity of L. reuteri from different ecological origins is closely associated with their living environment.}, } @article {pmid29915429, year = {2018}, author = {Sibbesen, JA and Maretty, L and , and Krogh, A}, title = {Accurate genotyping across variant classes and lengths using variant graphs.}, journal = {Nature genetics}, volume = {50}, number = {7}, pages = {1054-1059}, doi = {10.1038/s41588-018-0145-5}, pmid = {29915429}, issn = {1546-1718}, mesh = {Genetic Variation/*genetics ; Genome, Human/*genetics ; Genotype ; High-Throughput Nucleotide Sequencing/methods ; Humans ; Sequence Analysis, DNA/methods ; }, abstract = {Genotype estimates from short-read sequencing data are typically based on the alignment of reads to a linear reference, but reads originating from more complex variants (for example, structural variants) often align poorly, resulting in biased genotype estimates. This bias can be mitigated by first collecting a set of candidate variants across discovery methods, individuals and databases, and then realigning the reads to the variants and reference simultaneously. However, this realignment problem has proved computationally difficult. Here, we present a new method (BayesTyper) that uses exact alignment of read k-mers to a graph representation of the reference and variants to efficiently perform unbiased, probabilistic genotyping across the variation spectrum. We demonstrate that BayesTyper generally provides superior variant sensitivity and genotyping accuracy relative to existing methods when used to integrate variants across discovery approaches and individuals. Finally, we demonstrate that including a 'variation-prior' database containing already known variants significantly improves sensitivity.}, } @article {pmid29915111, year = {2018}, author = {Kawasaki, M and Delamare-Deboutteville, J and Bowater, RO and Walker, MJ and Beatson, S and Ben Zakour, NL and Barnes, AC}, title = {Microevolution of Streptococcus agalactiae ST-261 from Australia Indicates Dissemination via Imported Tilapia and Ongoing Adaptation to Marine Hosts or Environment.}, journal = {Applied and environmental microbiology}, volume = {84}, number = {16}, pages = {}, pmid = {29915111}, issn = {1098-5336}, mesh = {Acclimatization ; Animals ; Aquaculture ; Communicable Diseases, Imported/microbiology/*veterinary ; *Evolution, Molecular ; Fish Diseases/microbiology/*transmission ; Food Microbiology ; Genome, Bacterial ; Genotype ; Marine Biology ; Phylogeny ; Polymorphism, Single Nucleotide ; Queensland ; Serogroup ; Streptococcal Infections/microbiology/transmission/*veterinary ; Streptococcus agalactiae/*genetics/isolation & purification/pathogenicity ; Tilapia/*microbiology ; Virulence ; Virulence Factors ; }, abstract = {Streptococcus agalactiae (group B Streptococcus [GBS]) causes disease in a wide range of animals. The serotype Ib lineage is highly adapted to aquatic hosts, exhibiting substantial genome reduction compared with terrestrial conspecifics. Here, we sequence genomes from 40 GBS isolates, including 25 isolates from wild fish and captive stingrays in Australia, six local veterinary or human clinical isolates, and nine isolates from farmed tilapia in Honduras, and compared them with 42 genomes from public databases. Phylogenetic analysis based on nonrecombinant core-genome single nucleotide polymorphisms (SNPs) indicated that aquatic serotype Ib isolates from Queensland were distantly related to local veterinary and human clinical isolates. In contrast, Australian aquatic isolates are most closely related to a tilapia isolate from Israel, differing by only 63 core-genome SNPs. A consensus minimum spanning tree based on core-genome SNPs indicates the dissemination of sequence type 261 (ST-261) from an ancestral tilapia strain, which is congruent with several introductions of tilapia into Australia from Israel during the 1970s and 1980s. Pangenome analysis identified 1,440 genes as core, with the majority being dispensable or strain specific, with non-protein-coding intergenic regions (IGRs) divided among core and strain-specific genes. Aquatic serotype Ib strains have lost many virulence factors during adaptation, but six adhesins were well conserved across the aquatic isolates and might be critical for virulence in fish and for targets in vaccine development. The close relationship among recent ST-261 isolates from Ghana, the United States, and China with the Israeli tilapia isolate from 1988 implicates the global trade in tilapia seed for aquaculture in the widespread dissemination of serotype Ib fish-adapted GBS.IMPORTANCEStreptococcus agalactiae (GBS) is a significant pathogen of humans and animals. Some lineages have become adapted to particular hosts, and serotype Ib is highly specialized to fish. Here, we show that this lineage is likely to have been distributed widely by the global trade in tilapia for aquaculture, with probable introduction into Australia in the 1970s and subsequent dissemination in wild fish populations. We report here the variability in the polysaccharide capsule among this lineage but identify a cohort of common surface proteins that may be a focus of future vaccine development to reduce the biosecurity risk in international fish trade.}, } @article {pmid29913357, year = {2018}, author = {Badhan, S and Kole, P and Ball, A and Mantri, N}, title = {RNA sequencing of leaf tissues from two contrasting chickpea genotypes reveals mechanisms for drought tolerance.}, journal = {Plant physiology and biochemistry : PPB}, volume = {129}, number = {}, pages = {295-304}, doi = {10.1016/j.plaphy.2018.06.007}, pmid = {29913357}, issn = {1873-2690}, mesh = {Cicer/*genetics/physiology ; Dehydration ; Genes, Plant/genetics/physiology ; Genotype ; Oxidation-Reduction ; Photosynthesis ; Plant Growth Regulators/physiology ; Plant Leaves/*genetics/physiology ; Plant Stomata/physiology ; RNA, Plant/*genetics/physiology ; Real-Time Polymerase Chain Reaction ; Sequence Analysis, RNA ; Transcriptome ; }, abstract = {Chickpea (Cicer arietinum L.) is the second most important winter crop which is consumed globally due to its high nutritional value. Chickpea as one of the leguminous crop is important in crop rotation with cereal crops like wheat and barley. The main constraints for chickpea production are abiotic stresses such as drought, salinity, and heat. Among these, drought is a major cause of the decline in chickpea production in worldwide. Studies conducted so far have provided a limited insight into different genetic pathways associated with drought tolerance/response. In this study, the leaf tissue from shoots apical meristem stage of drought tolerant (ICC8261) and drought sensitive (ICC283) genotypes were analysed using RNA sequencing to identify genes/pathways associated with drought tolerance/sensitivity in both genotypes. It was observed that genes related to ethylene response, MYB-related protein, xyloglucan endotransglycosylase, alkane hydroxylase MAH-like, BON-1 associated, peroxidase 3, cysteine-rich and transmembrane domain, vignain and mitochondrial uncoupling were specifically up-regulated in the tolerant genotype whereas, same genes were down-regulated in sensitive genotype. The crosstalk between the different hormones and transcriptional factors involved in drought tolerance and sensitivity in both genotypes make them great candidates for future research.}, } @article {pmid29905870, year = {2018}, author = {Rodriguez-R, LM and Gunturu, S and Harvey, WT and Rosselló-Mora, R and Tiedje, JM and Cole, JR and Konstantinidis, KT}, title = {The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene diversity analysis of Archaea and Bacteria at the whole genome level.}, journal = {Nucleic acids research}, volume = {46}, number = {W1}, pages = {W282-W288}, pmid = {29905870}, issn = {1362-4962}, mesh = {Classification ; Genetic Variation/genetics ; Genome, Archaeal/genetics ; Genome, Bacterial/genetics ; *Genomics ; *Internet ; Phylogeny ; RNA, Ribosomal, 16S/*genetics ; *Software ; }, abstract = {The small subunit ribosomal RNA gene (16S rRNA) has been successfully used to catalogue and study the diversity of prokaryotic species and communities but it offers limited resolution at the species and finer levels, and cannot represent the whole-genome diversity and fluidity. To overcome these limitations, we introduced the Microbial Genomes Atlas (MiGA), a webserver that allows the classification of an unknown query genomic sequence, complete or partial, against all taxonomically classified taxa with available genome sequences, as well as comparisons to other related genomes including uncultivated ones, based on the genome-aggregate Average Nucleotide and Amino Acid Identity (ANI/AAI) concepts. MiGA integrates best practices in sequence quality trimming and assembly and allows input to be raw reads or assemblies from isolate genomes, single-cell sequences, and metagenome-assembled genomes (MAGs). Further, MiGA can take as input hundreds of closely related genomes of the same or closely related species (a so-called 'Clade Project') to assess their gene content diversity and evolutionary relationships, and calculate important clade properties such as the pangenome and core gene sets. Therefore, MiGA is expected to facilitate a range of genome-based taxonomic and diversity studies, and quality assessment across environmental and clinical settings. MiGA is available at http://microbial-genomes.org/.}, } @article {pmid29895899, year = {2018}, author = {Mahfouz, N and Caucci, S and Achatz, E and Semmler, T and Guenther, S and Berendonk, TU and Schroeder, M}, title = {High genomic diversity of multi-drug resistant wastewater Escherichia coli.}, journal = {Scientific reports}, volume = {8}, number = {1}, pages = {8928}, pmid = {29895899}, issn = {2045-2322}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacteria/classification/drug effects/genetics ; Drug Resistance, Multiple, Bacterial/drug effects/*genetics ; Escherichia coli/drug effects/*genetics ; *Genetic Variation ; Genome, Bacterial/*genetics ; Genomics/methods ; Germany ; Microbial Sensitivity Tests/methods ; Waste Disposal, Fluid/methods ; Wastewater/*microbiology ; }, abstract = {Wastewater treatment plants play an important role in the emergence of antibiotic resistance. They provide a hot spot for exchange of resistance within and between species. Here, we analyse and quantify the genomic diversity of the indicator Escherichia coli in a German wastewater treatment plant and we relate it to isolates' antibiotic resistance. Our results show a surprisingly large pan-genome, which mirrors how rich an environment a treatment plant is. We link the genomic analysis to a phenotypic resistance screen and pinpoint genomic hot spots, which correlate with a resistance phenotype. Besides well-known resistance genes, this forward genomics approach generates many novel genes, which correlated with resistance and which are partly completely unknown. A surprising overall finding of our analyses is that we do not see any difference in resistance and pan genome size between isolates taken from the inflow of the treatment plant and from the outflow. This means that while treatment plants reduce the amount of bacteria released into the environment, they do not reduce the potential for antibiotic resistance of these bacteria.}, } @article {pmid29892946, year = {2018}, author = {Li, X and Huang, X and Chen, G and Zou, L and Wei, L and Hua, J}, title = {Complete genome sequence of the sesame pathogen Ralstonia solanacearum strain SEPPX 05.}, journal = {Genes & genomics}, volume = {40}, number = {6}, pages = {657-668}, pmid = {29892946}, issn = {2092-9293}, support = {31360428//National Natural Science Foundation of China/International ; 20121BBF60015//Key Technology Research and Development Program Jiangxi Proveince/International ; 20142C13S006//Innovation Fund for the Doctoral Program of Jiangxi Academy of Agricultural Sciences/International ; CARS-14//National Industry Technical System of Secondary Centre of Oil Crops/International ; }, mesh = {Bacterial Proteins/genetics ; Base Composition/genetics ; Base Sequence/genetics ; Genome, Bacterial/genetics ; Genomics/methods ; Phylogeny ; Plant Diseases/microbiology ; Ralstonia/genetics ; Ralstonia solanacearum/classification/*genetics ; Sequence Analysis, DNA/methods ; Sesamum/microbiology ; Virulence/genetics ; Virulence Factors/genetics ; Whole Genome Sequencing/methods ; }, abstract = {Ralstonia solanacearum is a soil-borne phytopathogen associated with bacterial wilt disease of sesame. R. solanacearum is the predominant agent causing damping-off from tropical to temperate regions. Because bacterial wilt has decreased the sesame industry yield, we sequenced the SEPPX05 genome using PacBio and Illumina HiSeq 2500 systems and revealed that R. solanacearum strain SEPPX05 carries a bipartite genome consisting of a 3,930,849 bp chromosome and a 2,066,085 bp megaplasmid with 66.84% G+C content that harbors 5,427 coding sequences. Based on the whole genome, phylogenetic analysis showed that strain SEPPX05 is grouped with two phylotype I strains (EP1 and GMI1000). Pan-genomic analysis shows that R. solanacearum is a complex species with high biological diversity and was able to colonize various environments during evolution. Despite deletions, insertions, and inversions, most genes of strain SEPPX05 have relatively high levels of synteny compared with strain GMI1000. We identified 104 genes involved in virulence-related factors in the SEPPX05 genome and eight absent genes encoding T3Es of GMI1000. Comparing SEPPX05 with other species, we found highly conserved secretion systems central to modulating interactions of host bacteria. These data may provide important clues for understanding underlying pathogenic mechanisms of R. solanacearum and help in the control of sesame bacterial wilt.}, } @article {pmid29891839, year = {2018}, author = {Legendre, M and Fabre, E and Poirot, O and Jeudy, S and Lartigue, A and Alempic, JM and Beucher, L and Philippe, N and Bertaux, L and Christo-Foroux, E and Labadie, K and Couté, Y and Abergel, C and Claverie, JM}, title = {Diversity and evolution of the emerging Pandoraviridae family.}, journal = {Nature communications}, volume = {9}, number = {1}, pages = {2285}, pmid = {29891839}, issn = {2041-1723}, mesh = {Acanthamoeba/*virology ; DNA Viruses/*classification/*genetics/physiology ; DNA, Viral/genetics ; Environmental Microbiology ; Evolution, Molecular ; Gene Duplication ; Gene Transfer, Horizontal ; Genetic Variation ; Genome, Viral ; Molecular Sequence Annotation ; Phylogeny ; Proteomics ; Sequence Analysis, DNA ; Virion/ultrastructure ; Virus Replication ; }, abstract = {With DNA genomes reaching 2.5 Mb packed in particles of bacterium-like shape and dimension, the first two Acanthamoeba-infecting pandoraviruses remained up to now the most complex viruses since their discovery in 2013. Our isolation of three new strains from distant locations and environments is now used to perform the first comparative genomics analysis of the emerging worldwide-distributed Pandoraviridae family. Thorough annotation of the genomes combining transcriptomic, proteomic, and bioinformatic analyses reveals many non-coding transcripts and significantly reduces the former set of predicted protein-coding genes. Here we show that the pandoraviruses exhibit an open pan-genome, the enormous size of which is not adequately explained by gene duplications or horizontal transfers. As most of the strain-specific genes have no extant homolog and exhibit statistical features comparable to intergenic regions, we suggest that de novo gene creation could contribute to the evolution of the giant pandoravirus genomes.}, } @article {pmid29890970, year = {2018}, author = {Fang, X and Monk, JM and Mih, N and Du, B and Sastry, AV and Kavvas, E and Seif, Y and Smarr, L and Palsson, BO}, title = {Escherichia coli B2 strains prevalent in inflammatory bowel disease patients have distinct metabolic capabilities that enable colonization of intestinal mucosa.}, journal = {BMC systems biology}, volume = {12}, number = {1}, pages = {66}, pmid = {29890970}, issn = {1752-0509}, support = {U01 AI124316/AI/NIAID NIH HHS/United States ; }, mesh = {Escherichia coli/genetics/*metabolism/physiology ; Genomics ; Humans ; Inflammatory Bowel Diseases/*microbiology ; Intestinal Mucosa/*microbiology ; }, abstract = {BACKGROUND: Escherichia coli is considered a leading bacterial trigger of inflammatory bowel disease (IBD). E. coli isolates from IBD patients primarily belong to phylogroup B2. Previous studies have focused on broad comparative genomic analysis of E. coli B2 isolates, and identified virulence factors that allow B2 strains to reside within human intestinal mucosa. Metabolic capabilities of E. coli strains have been shown to be related to their colonization site, but remain unexplored in IBD-associated strains.

RESULTS: In this study, we utilized pan-genome analysis and genome-scale models (GEMs) of metabolism to study metabolic capabilities of IBD-associated E. coli B2 strains. The study yielded three results: i) Pan-genome analysis of 110 E. coli strains (including 53 isolates from IBD studies) revealed discriminating metabolic genes between B2 strains and other strains; ii) Both comparative genomic analysis and GEMs suggested that B2 strains have an advantage in degrading and utilizing sugars derived from mucus glycan, and iii) GEMs revealed distinct metabolic features in B2 strains that potentially allow them to utilize energy more efficiently. For example, B2 strains lack the enzymes to degrade amadori products, but instead rely on neighboring bacteria to convert these substrates into a more readily usable and potentially less sought after product.

CONCLUSIONS: Taken together, these results suggest that the metabolic capabilities of B2 strains vary significantly from those of other strains, enabling B2 strains to colonize intestinal mucosa.The results from this study motivate a broad experimental assessment of the nutritional effects on E. coli B2 pathophysiology in IBD patients.}, } @article {pmid29887853, year = {2018}, author = {Wilkinson, TJ and Huws, SA and Edwards, JE and Kingston-Smith, AH and Siu-Ting, K and Hughes, M and Rubino, F and Friedersdorff, M and Creevey, CJ}, title = {CowPI: A Rumen Microbiome Focussed Version of the PICRUSt Functional Inference Software.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {1095}, pmid = {29887853}, issn = {1664-302X}, abstract = {Metataxonomic 16S rDNA based studies are a commonplace and useful tool in the research of the microbiome, but they do not provide the full investigative power of metagenomics and metatranscriptomics for revealing the functional potential of microbial communities. However, the use of metagenomic and metatranscriptomic technologies is hindered by high costs and skills barrier necessary to generate and interpret the data. To address this, a tool for Phylogenetic Investigation of Communities by Reconstruction of Unobserved States (PICRUSt) was developed for inferring the functional potential of an observed microbiome profile, based on 16S data. This allows functional inferences to be made from metataxonomic 16S rDNA studies with little extra work or cost, but its accuracy relies on the availability of completely sequenced genomes of representative organisms from the community being investigated. The rumen microbiome is an example of a community traditionally underrepresented in genome and sequence databases, but recent efforts by projects such as the Global Rumen Census and Hungate 1000 have resulted in a wide sampling of 16S rDNA profiles and almost 500 fully sequenced microbial genomes from this environment. Using this information, we have developed "CowPI," a focused version of the PICRUSt tool provided for use by the wider scientific community in the study of the rumen microbiome. We evaluated the accuracy of CowPI and PICRUSt using two 16S datasets from the rumen microbiome: one generated from rDNA and the other from rRNA where corresponding metagenomic and metatranscriptomic data was also available. We show that the functional profiles predicted by CowPI better match estimates for both the meta-genomic and transcriptomic datasets than PICRUSt, and capture the higher degree of genetic variation and larger pangenomes of rumen organisms. Nonetheless, whilst being closer in terms of predictive power for the rumen microbiome, there were differences when compared to both the metagenomic and metatranscriptome data and so we recommend, where possible, functional inferences from 16S data should not replace metagenomic and metatranscriptomic approaches. The tool can be accessed at http://www.cowpi.org and is provided to the wider scientific community for use in the study of the rumen microbiome.}, } @article {pmid29867869, year = {2018}, author = {Eldarov, MA and Beletsky, AV and Tanashchuk, TN and Kishkovskaya, SA and Ravin, NV and Mardanov, AV}, title = {Whole-Genome Analysis of Three Yeast Strains Used for Production of Sherry-Like Wines Revealed Genetic Traits Specific to Flor Yeasts.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {965}, pmid = {29867869}, issn = {1664-302X}, abstract = {Flor yeast strains represent a specialized group of Saccharomyces cerevisiae yeasts used for biological wine aging. We have sequenced the genomes of three flor strains originated from different geographic regions and used for production of sherry-like wines in Russia. According to the obtained phylogeny of 118 yeast strains, flor strains form very tight cluster adjacent to the main wine clade. SNP analysis versus available genomes of wine and flor strains revealed 2,270 genetic variants in 1,337 loci specific to flor strains. Gene ontology analysis in combination with gene content evaluation revealed a complex landscape of possibly adaptive genetic changes in flor yeast, related to genes associated with cell morphology, mitotic cell cycle, ion homeostasis, DNA repair, carbohydrate metabolism, lipid metabolism, and cell wall biogenesis. Pangenomic analysis discovered the presence of several well-known "non-reference" loci of potential industrial importance. Events of gene loss included deletions of asparaginase genes, maltose utilization locus, and FRE-FIT locus involved in iron transport. The latter in combination with a flor-yeast-specific mutation in the Aft1 transcription factor gene is likely to be responsible for the discovered phenotype of increased iron sensitivity and improved iron uptake of analyzed strains. Expansion of the coding region of the FLO11 flocullin gene and alteration of the balance between members of the FLO gene family are likely to positively affect the well-known propensity of flor strains for velum formation. Our study provides new insights in the nature of genetic variation in flor yeast strains and demonstrates that different adaptive properties of flor yeast strains could have evolved through different mechanisms of genetic variation.}, } @article {pmid29867794, year = {2018}, author = {de Moraes, MH and Soto, EB and Salas González, I and Desai, P and Chu, W and Porwollik, S and McClelland, M and Teplitski, M}, title = {Genome-Wide Comparative Functional Analyses Reveal Adaptations of Salmonella sv. Newport to a Plant Colonization Lifestyle.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {877}, pmid = {29867794}, issn = {1664-302X}, abstract = {Outbreaks of salmonellosis linked to the consumption of vegetables have been disproportionately associated with strains of serovar Newport. We tested the hypothesis that strains of sv. Newport have evolved unique adaptations to persistence in plants that are not shared by strains of other Salmonella serovars. We used a genome-wide mutant screen to compare growth in tomato fruit of a sv. Newport strain from an outbreak traced to tomatoes, and a sv. Typhimurium strain from animals. Most genes in the sv. Newport strain that were selected during persistence in tomatoes were shared with, and similarly selected in, the sv. Typhimurium strain. Many of their functions are linked to central metabolism, including amino acid biosynthetic pathways, iron acquisition, and maintenance of cell structure. One exception was a greater need for the core genes involved in purine metabolism in sv. Typhimurium than in sv. Newport. We discovered a gene, papA, that was unique to sv. Newport and contributed to the strain's fitness in tomatoes. The papA gene was present in about 25% of sv. Newport Group III genomes and generally absent from other Salmonella genomes. Homologs of papA were detected in the genomes of Pantoea, Dickeya, and Pectobacterium, members of the Enterobacteriacea family that can colonize both plants and animals.}, } @article {pmid29859036, year = {2018}, author = {Adamek, M and Alanjary, M and Sales-Ortells, H and Goodfellow, M and Bull, AT and Winkler, A and Wibberg, D and Kalinowski, J and Ziemert, N}, title = {Comparative genomics reveals phylogenetic distribution patterns of secondary metabolites in Amycolatopsis species.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {426}, pmid = {29859036}, issn = {1471-2164}, support = {TTU 9.704//Deutsches Zentrum für Infektionsforschung/ ; 031A533//Bielefeld-Gießen Center for Microbial Bioinformatics/ ; Emeritus Fellowship//Leverhulme Trust/ ; }, mesh = {Actinomycetales/*genetics/*metabolism ; Genome, Bacterial/genetics ; *Genomics ; Multigene Family/genetics ; *Phylogeny ; Secondary Metabolism/*genetics ; }, abstract = {BACKGROUND: Genome mining tools have enabled us to predict biosynthetic gene clusters that might encode compounds with valuable functions for industrial and medical applications. With the continuously increasing number of genomes sequenced, we are confronted with an overwhelming number of predicted clusters. In order to guide the effective prioritization of biosynthetic gene clusters towards finding the most promising compounds, knowledge about diversity, phylogenetic relationships and distribution patterns of biosynthetic gene clusters is necessary.

RESULTS: Here, we provide a comprehensive analysis of the model actinobacterial genus Amycolatopsis and its potential for the production of secondary metabolites. A phylogenetic characterization, together with a pan-genome analysis showed that within this highly diverse genus, four major lineages could be distinguished which differed in their potential to produce secondary metabolites. Furthermore, we were able to distinguish gene cluster families whose distribution correlated with phylogeny, indicating that vertical gene transfer plays a major role in the evolution of secondary metabolite gene clusters. Still, the vast majority of the diverse biosynthetic gene clusters were derived from clusters unique to the genus, and also unique in comparison to a database of known compounds. Our study on the locations of biosynthetic gene clusters in the genomes of Amycolatopsis' strains showed that clusters acquired by horizontal gene transfer tend to be incorporated into non-conserved regions of the genome thereby allowing us to distinguish core and hypervariable regions in Amycolatopsis genomes.

CONCLUSIONS: Using a comparative genomics approach, it was possible to determine the potential of the genus Amycolatopsis to produce a huge diversity of secondary metabolites. Furthermore, the analysis demonstrates that horizontal and vertical gene transfer play an important role in the acquisition and maintenance of valuable secondary metabolites. Our results cast light on the interconnections between secondary metabolite gene clusters and provide a way to prioritize biosynthetic pathways in the search and discovery of novel compounds.}, } @article {pmid29858585, year = {2018}, author = {Zhao, Q and Feng, Q and Lu, H and Li, Y and Wang, A and Tian, Q and Zhan, Q and Lu, Y and Zhang, L and Huang, T and Wang, Y and Fan, D and Zhao, Y and Wang, Z and Zhou, C and Chen, J and Zhu, C and Li, W and Weng, Q and Xu, Q and Wang, ZX and Wei, X and Han, B and Huang, X}, title = {Publisher Correction: Pan-genome analysis highlights the extent of genomic variation in cultivated and wild rice.}, journal = {Nature genetics}, volume = {50}, number = {8}, pages = {1196}, doi = {10.1038/s41588-018-0136-6}, pmid = {29858585}, issn = {1546-1718}, abstract = {When published, this article did not initially appear open access. This error has been corrected, and the open access status of the paper is noted in all versions of the paper.}, } @article {pmid29857590, year = {2018}, author = {Angermeyer, A and Das, MM and Singh, DV and Seed, KD}, title = {Analysis of 19 Highly Conserved Vibrio cholerae Bacteriophages Isolated from Environmental and Patient Sources Over a Twelve-Year Period.}, journal = {Viruses}, volume = {10}, number = {6}, pages = {}, pmid = {29857590}, issn = {1999-4915}, support = {R01 AI127652/AI/NIAID NIH HHS/United States ; }, mesh = {Bacteriophages/*genetics/*isolation & purification ; Bangladesh/epidemiology ; CRISPR-Cas Systems ; Cholera/epidemiology/virology ; Feces/microbiology/*virology ; Genes, Bacterial ; Genetic Variation ; *Genome, Viral ; Humans ; Phylogeny ; Vibrio cholerae O1/*virology ; *Water Microbiology ; }, abstract = {The Vibrio cholerae biotype "El Tor" is responsible for all of the current epidemic and endemic cholera outbreaks worldwide. These outbreaks are clonal, and it is hypothesized that they originate from the coastal areas near the Bay of Bengal, where the lytic bacteriophage ICP1 (International Centre for Diarrhoeal Disease Research, Bangladesh cholera phage 1) specifically preys upon these pathogenic outbreak strains. ICP1 has also been the dominant bacteriophage found in cholera patient stools since 2001. However, little is known about the genomic differences between the ICP1 strains that have been collected over time. Here, we elucidate the pan-genome and the phylogeny of the ICP1 strains by aligning, annotating, and analyzing the genomes of 19 distinct isolates that were collected between 2001 and 2012. Our results reveal that the ICP1 isolates are highly conserved and possess a large core-genome as well as a smaller, somewhat flexible accessory-genome. Despite its overall conservation, ICP1 strains have managed to acquire a number of unknown genes, as well as a CRISPR-Cas system which is known to be critical for its ongoing struggle for co-evolutionary dominance over its host. This study describes a foundation on which to construct future molecular and bioinformatic studies of these V. cholerae-associated bacteriophages.}, } @article {pmid29855598, year = {2018}, author = {Bulagonda, EP and Manivannan, B and Mahalingam, N and Lama, M and Chanakya, PP and Khamari, B and Jadhao, S and Vasudevan, M and Nagaraja, V}, title = {Comparative genomic analysis of a naturally competent Elizabethkingia anophelis isolated from an eye infection.}, journal = {Scientific reports}, volume = {8}, number = {1}, pages = {8447}, pmid = {29855598}, issn = {2045-2322}, mesh = {Aged ; Anti-Bacterial Agents/pharmacology ; Comparative Genomic Hybridization ; Drug Resistance, Bacterial/drug effects/genetics ; Endophthalmitis/*microbiology/pathology ; Flavobacteriaceae/classification/*genetics/metabolism/pathogenicity ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Humans ; Phylogeny ; Virulence/genetics ; }, abstract = {Elizabethkingia anophelis has now emerged as an opportunistic human pathogen. However, its mechanisms of transmission remain unexplained. Comparative genomic (CG) analysis of E. anopheles endophthalmitis strain surprisingly found from an eye infection patient with twenty-five other E. anophelis genomes revealed its potential to participate in horizontal gene transfer. CG analysis revealed that the study isolate has an open pan genome and has undergone extensive gene rearrangements. We demonstrate that the strain is naturally competent, hitherto not reported in any members of Elizabethkingia. Presence of competence related genes, mobile genetic elements, Type IV, VI secretory systems and a unique virulence factor arylsulfatase suggests a different lineage of the strain. Deciphering the genome of E. anophelis having a reservoir of antibiotic resistance genes and virulence factors associated with diverse human infections may open up avenues to deal with the myriad of its human infections and devise strategies to combat the pathogen.}, } @article {pmid29850478, year = {2018}, author = {Oyedara, OO and Segura-Cabrera, A and Guo, X and Elufisan, TO and Cantú González, RA and Rodríguez Pérez, MA}, title = {Whole-Genome Sequencing and Comparative Genome Analysis Provided Insight into the Predatory Features and Genetic Diversity of Two Bdellovibrio Species Isolated from Soil.}, journal = {International journal of genomics}, volume = {2018}, number = {}, pages = {9402073}, pmid = {29850478}, issn = {2314-436X}, abstract = {Bdellovibrio spp. are predatory bacteria with great potential as antimicrobial agents. Studies have shown that members of the genus Bdellovibrio exhibit peculiar characteristics that influence their ecological adaptations. In this study, whole genomes of two different Bdellovibrio spp. designated SKB1291214 and SSB218315 isolated from soil were sequenced. The core genes shared by all the Bdellovibrio spp. considered for the pangenome analysis including the epibiotic B. exovorus were 795. The number of unique genes identified in Bdellovibrio spp. SKB1291214, SSB218315, W, and B. exovorus JJS was 1343, 113, 857, and 1572, respectively. These unique genes encode hydrolytic, chemotaxis, and transporter proteins which might be useful for predation in the Bdellovibrio strains. Furthermore, the two Bdellovibrio strains exhibited differences based on the % GC content, amino acid identity, and 16S rRNA gene sequence. The 16S rRNA gene sequence of Bdellovibrio sp. SKB1291214 shared 99% identity with that of an uncultured Bdellovibrio sp. clone 12L 106 (a pairwise distance of 0.008) and 95-97% identity (a pairwise distance of 0.043) with that of other culturable terrestrial Bdellovibrio spp., including strain SSB218315. In Bdellovibrio sp. SKB1291214, 174 bp sequence was inserted at the host interaction (hit) locus region usually attributed to prey attachment, invasion, and development of host independent Bdellovibrio phenotypes. Also, a gene equivalent to Bd0108 in B. bacteriovorus HD100 was not conserved in Bdellovibrio sp. SKB1291214. The results of this study provided information on the genetic characteristics and diversity of the genus Bdellovibrio that can contribute to their successful applications as a biocontrol agent.}, } @article {pmid29806079, year = {2018}, author = {Gias, E and Brosnahan, CL and Orr, D and Binney, B and Ha, HJ and Preece, MA and Jones, B}, title = {In vivo growth and genomic characterization of rickettsia-like organisms isolated from farmed Chinook salmon (Oncorhynchus tshawytscha) in New Zealand.}, journal = {Journal of fish diseases}, volume = {}, number = {}, pages = {}, doi = {10.1111/jfd.12817}, pmid = {29806079}, issn = {1365-2761}, abstract = {A rickettsia-like organism, designated NZ-RLO2, was isolated from Chinook salmon (Oncorhynchus tshawytscha) farmed in the South Island, New Zealand. In vivo growth showed NZ-RLO2 was able to grow in CHSE-214, EPC, BHK-21, C6/36 and Sf21 cell lines, while Piscirickettsia salmonis LF-89[T] grew in all but BHK-21 and Sf21. NZ-RLO2 grew optimally in EPC at 15°C, CHSE-214 and EPC at 18°C. The growth of LF-89 [T] was optimal at 15°C, 18°C and 22°C in CHSE-24, but appeared less efficient in EPC cells at all temperatures. Pan-genome comparison of predicted proteomes shows that available Chilean strains of P. salmonis grouped into two clusters (p-value = 94%). NZ-RLO2 was genetically different from previously described NZ-RLO1, and both strains grouped separately from the Chilean strains in one of the two clusters (p-value = 88%), but were closely related to each other. TaqMan and Sybr Green real-time PCR targeting RNA polymerase (rpoB) and DNA primase (dnaG), respectively, were developed to detect NZ-RLO2. This study indicates that the New Zealand strains showed a closer genetic relationship to one of the Chilean P. salmonis clusters; however, more Piscirickettsia genomes from wider geographical regions and diverse hosts are needed to better understand the classification within this genus.}, } @article {pmid29802996, year = {2018}, author = {Hurtado, R and Carhuaricra, D and Soares, S and Viana, MVC and Azevedo, V and Maturrano, L and Aburjaile, F}, title = {Pan-genomic approach shows insight of genetic divergence and pathogenic-adaptation of Pasteurella multocida.}, journal = {Gene}, volume = {670}, number = {}, pages = {193-206}, doi = {10.1016/j.gene.2018.05.084}, pmid = {29802996}, issn = {1879-0038}, mesh = {Animals ; Gene Transfer, Horizontal ; Genetic Drift ; Genome, Bacterial ; Genomics/*methods ; Pasteurella Infections/*microbiology ; Pasteurella multocida/*classification/genetics/isolation & purification/pathogenicity ; Phylogeny ; }, abstract = {Pasteurella multocida is a gram-negative, non-motile bacterial pathogen, which is associated with chronic and acute infections as snuffles, pneumonia, atrophic rhinitis, fowl cholera and hemorrhagic septicemia. These diseases affect a wide range of domestic animals, leading to significant morbidity and mortality and causing significant economic losses worldwide. Due to the interest in deciphering the genetic diversity and process adaptive between P. multocida strains, this work aimed was to perform a pan-genome analysis to evidence horizontal gene transfer and positive selection among 23 P. multocida strains isolated from distinct diseases and hosts. The results revealed an open pan-genome containing 3585 genes and an accessory genome presenting 1200 genes. The phylogenomic analysis based on the presence/absence of genes and islands exhibit high levels of plasticity, which reflects a high intraspecific diversity and a possible adaptive mechanism responsible for the specific disease manifestation between the established groups (pneumonia, fowl cholera, hemorrhagic septicemia and snuffles). Additionally, we identified differences in accessory genes among groups, which are involved in sugar metabolism and transport systems, virulence-related genes and a high concentration of hypothetical proteins. However, there was no specific indispensable functional mechanism to decisively correlate the presence of genes and their adaptation to a specific host/disease. Also, positive selection was found only for two genes from sub-group hemorrhagic septicemia, serotype B. This comprehensive comparative genome analysis will provide new insights of horizontal gene transfers that play an essential role in the diversification and adaptation mechanism into P. multocida species to a specific disease.}, } @article {pmid29801938, year = {2018}, author = {Belahbib, H and Summers, ZM and Fardeau, ML and Joseph, M and Tamburini, C and Dolla, A and Ollivier, B and Armougom, F}, title = {Towards a congruent reclassification and nomenclature of the thermophilic species of the genus Pseudothermotoga within the order Thermotogales.}, journal = {Systematic and applied microbiology}, volume = {41}, number = {6}, pages = {555-563}, doi = {10.1016/j.syapm.2018.04.007}, pmid = {29801938}, issn = {1618-0984}, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Gram-Negative Anaerobic Straight, Curved, and Helical Rods/*classification/genetics ; Nucleic Acid Hybridization ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Temperature ; }, abstract = {The phylum Thermotogae gathers thermophilic, hyperthermophic, mesophilic, and thermo-acidophilic anaerobic bacteria that are mostly originated from geothermally heated environments. The metabolic and phenotypic properties harbored by the Thermotogae species questions the evolutionary events driving the emergence of this early branch of the universal tree of life. Recent reshaping of the Thermotogae taxonomy has led to the description of a new genus, Pseudothermotoga, a sister group of the genus Thermotoga within the order Thermotogales. Comparative genomics of both Pseudothermotoga and Thermotoga spp., including 16S-rRNA-based phylogenetic, pan-genomic analysis as well as signature indel conservation, provided evidence that Thermotoga caldifontis and Thermotoga profunda species should be reclassified within the genus Pseudothermotoga and renamed as Pseudothermotoga caldifontis comb. nov. (type strain=AZM44c09[T]) and Pseudothermotoga profunda comb. nov. (type strain=AZM34c06[T]), respectively. In addition, based upon whole-genome relatedness indices and DNA-DNA Hybridization results, the reclassification of Pseudothermotoga lettingae and Pseudothermotoga subterranea as latter heterotypic synonyms of Pseudothermotoga elfii is proposed. Finally, potential genetic elements resulting from the distinct evolutionary story of the Thermotoga and Pseudothermotoga clades are discussed.}, } @article {pmid29795803, year = {2018}, author = {Abreu, VAC and Popin, RV and Alvarenga, DO and Schaker, PDC and Hoff-Risseti, C and Varani, AM and Fiore, MF}, title = {Corrigendum: Genomic and Genotypic Characterization of Cylindrospermopsis raciborskii: Toward an Intraspecific Phylogenetic Evaluation by Comparative Genomics.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {979}, doi = {10.3389/fmicb.2018.00979}, pmid = {29795803}, issn = {1664-302X}, abstract = {[This corrects the article on p. 306 in vol. 9, PMID: 29535689.].}, } @article {pmid29795552, year = {2018}, author = {Jiao, J and Ni, M and Zhang, B and Zhang, Z and Young, JPW and Chan, TF and Chen, WX and Lam, HM and Tian, CF}, title = {Coordinated regulation of core and accessory genes in the multipartite genome of Sinorhizobium fredii.}, journal = {PLoS genetics}, volume = {14}, number = {5}, pages = {e1007428}, pmid = {29795552}, issn = {1553-7404}, mesh = {Adaptation, Biological/*genetics ; Bacterial Proteins/genetics ; *Gene Expression Regulation, Bacterial ; Genes, Bacterial/genetics ; Genome, Bacterial ; Nitrogen Fixation/genetics ; Plasmids/*genetics ; Replicon/genetics ; Sinorhizobium fredii/*genetics ; Soybeans/microbiology ; Symbiosis/*genetics ; Transcriptome ; }, abstract = {Prokaryotes benefit from having accessory genes, but it is unclear how accessory genes can be linked with the core regulatory network when developing adaptations to new niches. Here we determined hierarchical core/accessory subsets in the multipartite pangenome (composed of genes from the chromosome, chromid and plasmids) of the soybean microsymbiont Sinorhizobium fredii by comparing twelve Sinorhizobium genomes. Transcriptomes of two S. fredii strains at mid-log and stationary growth phases and in symbiotic conditions were obtained. The average level of gene expression, variation of expression between different conditions, and gene connectivity within the co-expression network were positively correlated with the gene conservation level from strain-specific accessory genes to genus core. Condition-dependent transcriptomes exhibited adaptive transcriptional changes in pangenome subsets shared by the two strains, while strain-dependent transcriptomes were enriched with accessory genes on the chromid. Proportionally more chromid genes than plasmid genes were co-expressed with chromosomal genes, while plasmid genes had a higher within-replicon connectivity in expression than chromid ones. However, key nitrogen fixation genes on the symbiosis plasmid were characterized by high connectivity in both within- and between-replicon analyses. Among those genes with host-specific upregulation patterns, chromosomal znu and mdt operons, encoding a conserved high-affinity zinc transporter and an accessory multi-drug efflux system, respectively, were experimentally demonstrated to be involved in host-specific symbiotic adaptation. These findings highlight the importance of integrative regulation of hierarchical core/accessory components in the multipartite genome of bacteria during niche adaptation and in shaping the prokaryotic pangenome in the long run.}, } @article {pmid29792377, year = {2018}, author = {Satti, M and Tanizawa, Y and Endo, A and Arita, M}, title = {Comparative analysis of probiotic bacteria based on a new definition of core genome.}, journal = {Journal of bioinformatics and computational biology}, volume = {16}, number = {3}, pages = {1840012}, doi = {10.1142/S0219720018400127}, pmid = {29792377}, issn = {1757-6334}, mesh = {Bacterial Proteins/genetics ; Base Composition ; Bifidobacterium/*genetics ; Carbohydrate Metabolism/genetics ; Gene Library ; Genome Size ; *Genome, Bacterial ; Genomics/*methods/statistics & numerical data ; Lactobacillus/*genetics ; Multigene Family ; Odds Ratio ; Probiotics ; }, abstract = {The commensal genus Bifidobacterium has probiotic properties. We prepared a public library of the gene functions of the genus Bifidobacterium for its online annotation. Orthologous gene cluster analysis showed that the pan genomes of Bifidobacterium and Lactobacillus exhibit striking similarities when mapped to the Clusters of Orthologous Group (COG) database of proteins. When the core genes in each genus were selected based on our statistical definition of "core genome", core genes were present in at least 92% of 52 Bifidobacterium and in 97% of 178 Lactobacillus genomes. Functional comparison of the core genes of the two genera revealed a significant difference in the categories "amino acid transport and metabolism" representing their difference in niche specificity. Over-represented Bifidobacterium protein families were primarily involved in host interactions, the complex compound metabolism, and in stress responses. These findings coincide with the published information and validate our bias-resilient definition of the core genome.}, } @article {pmid29788909, year = {2018}, author = {Lacey, JA and Allnutt, TR and Vezina, B and Van, TTH and Stent, T and Han, X and Rood, JI and Wade, B and Keyburn, AL and Seemann, T and Chen, H and Haring, V and Johanesen, PA and Lyras, D and Moore, RJ}, title = {Whole genome analysis reveals the diversity and evolutionary relationships between necrotic enteritis-causing strains of Clostridium perfringens.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {379}, pmid = {29788909}, issn = {1471-2164}, support = {1.1.2//Poultry Cooperative Research Centre/ ; }, mesh = {Animals ; Chickens/microbiology ; Chromosomes/genetics ; Clostridium perfringens/*genetics/*physiology ; Enteritis/complications/*microbiology ; *Evolution, Molecular ; *Genetic Variation ; Necrosis/complications ; Plasmids/genetics ; }, abstract = {BACKGROUND: Clostridium perfringens causes a range of diseases in animals and humans including necrotic enteritis in chickens and food poisoning and gas gangrene in humans. Necrotic enteritis is of concern in commercial chicken production due to the cost of the implementation of infection control measures and to productivity losses. This study has focused on the genomic analysis of a range of chicken-derived C. perfringens isolates, from around the world and from different years. The genomes were sequenced and compared with 20 genomes available from public databases, which were from a diverse collection of isolates from chickens, other animals, and humans. We used a distance based phylogeny that was constructed based on gene content rather than sequence identity. Similarity between strains was defined as the number of genes that they have in common divided by their total number of genes. In this type of phylogenetic analysis, evolutionary distance can be interpreted in terms of evolutionary events such as acquisition and loss of genes, whereas the underlying properties (the gene content) can be interpreted in terms of function. We also compared these methods to the sequence-based phylogeny of the core genome.

RESULTS: Distinct pathogenic clades of necrotic enteritis-causing C. perfringens were identified. They were characterised by variable regions encoded on the chromosome, with predicted roles in capsule production, adhesion, inhibition of related strains, phage integration, and metabolism. Some strains have almost identical genomes, even though they were isolated from different geographic regions at various times, while other highly distant genomes appear to result in similar outcomes with regard to virulence and pathogenesis.

CONCLUSIONS: The high level of diversity in chicken isolates suggests there is no reliable factor that defines a chicken strain of C. perfringens, however, disease-causing strains can be defined by the presence of netB-encoding plasmids. This study reveals that horizontal gene transfer appears to play a significant role in genetic variation of the C. perfringens chromosome as well as the plasmid content within strains.}, } @article {pmid29785479, year = {2018}, author = {Kulsum, U and Kapil, A and Singh, H and Kaur, P}, title = {NGSPanPipe: A Pipeline for Pan-genome Identification in Microbial Strains from Experimental Reads.}, journal = {Advances in experimental medicine and biology}, volume = {1052}, number = {}, pages = {39-49}, doi = {10.1007/978-981-10-7572-8_4}, pmid = {29785479}, issn = {0065-2598}, mesh = {Bacteria/classification/*genetics/isolation & purification ; Databases, Genetic ; *Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; }, abstract = {Recent advancements in sequencing technologies have decreased both time span and cost for sequencing the whole bacterial genome. High-throughput Next-Generation Sequencing (NGS) technology has led to the generation of enormous data concerning microbial populations publically available across various repositories. As a consequence, it has become possible to study and compare the genomes of different bacterial strains within a species or genus in terms of evolution, ecology and diversity. Studying the pan-genome provides insights into deciphering microevolution, global composition and diversity in virulence and pathogenesis of a species. It can also assist in identifying drug targets and proposing vaccine candidates. The effective analysis of these large genome datasets necessitates the development of robust tools. Current methods to develop pan-genome do not support direct input of raw reads from the sequencer machine but require preprocessing of reads as an assembled protein/gene sequence file or the binary matrix of orthologous genes/proteins. We have designed an easy-to-use integrated pipeline, NGSPanPipe, which can directly identify the pan-genome from short reads. The output from the pipeline is compatible with other pan-genome analysis tools. We evaluated our pipeline with other methods for developing pan-genome, i.e. reference-based assembly and de novo assembly using simulated reads of Mycobacterium tuberculosis. The single script pipeline (pipeline.pl) is applicable for all bacterial strains. It integrates multiple in-house Perl scripts and is freely accessible from https://github.com/Biomedinformatics/NGSPanPipe .}, } @article {pmid29773867, year = {2018}, author = {Kim, YB and Kim, JY and Song, HS and Lee, C and Ahn, SW and Lee, SH and Jung, MY and Rhee, JK and Kim, J and Hyun, DW and Bae, JW and Roh, SW}, title = {Novel haloarchaeon Natrinema thermophila having the highest growth temperature among haloarchaea with a large genome size.}, journal = {Scientific reports}, volume = {8}, number = {1}, pages = {7777}, pmid = {29773867}, issn = {2045-2322}, mesh = {Climate ; Genome Size ; Genome, Archaeal ; Halobacteriaceae/genetics/growth & development/*physiology ; Phylogeny ; *Temperature ; Thermotolerance/genetics ; }, abstract = {Environmental temperature is one of the most important factors for the growth and survival of microorganisms. Here we describe a novel extremely halophilic archaeon (haloarchaea) designated as strain CBA1119[T] isolated from solar salt. Strain CBA1119[T] had the highest maximum and optimal growth temperatures (66 °C and 55 °C, respectively) and one of the largest genome sizes among haloarchaea (5.1 Mb). It also had the largest number of strain-specific pan-genome orthologous groups and unique pathways among members of the genus Natrinema in the class Halobacteria. A dendrogram based on the presence/absence of genes and a phylogenetic tree constructed based on OrthoANI values highlighted the particularities of strain CBA1119[T] as compared to other Natrinema species and other haloarchaea members. The large genome of strain CBA1119[T] may provide information on genes that confer tolerance to extreme environmental conditions, which may lead to the discovery of other thermophilic strains with potential applications in industrial biotechnology.}, } @article {pmid29768136, year = {2018}, author = {Sánchez-Vallet, A and Fouché, S and Fudal, I and Hartmann, FE and Soyer, JL and Tellier, A and Croll, D}, title = {The Genome Biology of Effector Gene Evolution in Filamentous Plant Pathogens.}, journal = {Annual review of phytopathology}, volume = {56}, number = {}, pages = {21-40}, doi = {10.1146/annurev-phyto-080516-035303}, pmid = {29768136}, issn = {1545-2107}, mesh = {Adaptation, Biological ; *Evolution, Molecular ; Fungi/*genetics ; Genes, Fungal/genetics ; *Genome ; Genome, Fungal ; Oomycetes/*genetics ; Plant Diseases/microbiology/*prevention & control ; *Polymorphism, Genetic ; }, abstract = {Filamentous pathogens, including fungi and oomycetes, pose major threats to global food security. Crop pathogens cause damage by secreting effectors that manipulate the host to the pathogen's advantage. Genes encoding such effectors are among the most rapidly evolving genes in pathogen genomes. Here, we review how the major characteristics of the emergence, function, and regulation of effector genes are tightly linked to the genomic compartments where these genes are located in pathogen genomes. The presence of repetitive elements in these compartments is associated with elevated rates of point mutations and sequence rearrangements with a major impact on effector diversification. The expression of many effectors converges on an epigenetic control mediated by the presence of repetitive elements. Population genomics analyses showed that rapidly evolving pathogens show high rates of turnover at effector loci and display a mosaic in effector presence-absence polymorphism among strains. We conclude that effective pathogen containment strategies require a thorough understanding of the effector genome biology and the pathogen's potential for rapid adaptation.}, } @article {pmid29765358, year = {2018}, author = {Vinuesa, P and Ochoa-Sánchez, LE and Contreras-Moreira, B}, title = {GET_PHYLOMARKERS, a Software Package to Select Optimal Orthologous Clusters for Phylogenomics and Inferring Pan-Genome Phylogenies, Used for a Critical Geno-Taxonomic Revision of the Genus Stenotrophomonas.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {771}, pmid = {29765358}, issn = {1664-302X}, abstract = {The massive accumulation of genome-sequences in public databases promoted the proliferation of genome-level phylogenetic analyses in many areas of biological research. However, due to diverse evolutionary and genetic processes, many loci have undesirable properties for phylogenetic reconstruction. These, if undetected, can result in erroneous or biased estimates, particularly when estimating species trees from concatenated datasets. To deal with these problems, we developed GET_PHYLOMARKERS, a pipeline designed to identify high-quality markers to estimate robust genome phylogenies from the orthologous clusters, or the pan-genome matrix (PGM), computed by GET_HOMOLOGUES. In the first context, a set of sequential filters are applied to exclude recombinant alignments and those producing anomalous or poorly resolved trees. Multiple sequence alignments and maximum likelihood (ML) phylogenies are computed in parallel on multi-core computers. A ML species tree is estimated from the concatenated set of top-ranking alignments at the DNA or protein levels, using either FastTree or IQ-TREE (IQT). The latter is used by default due to its superior performance revealed in an extensive benchmark analysis. In addition, parsimony and ML phylogenies can be estimated from the PGM. We demonstrate the practical utility of the software by analyzing 170 Stenotrophomonas genome sequences available in RefSeq and 10 new complete genomes of Mexican environmental S. maltophilia complex (Smc) isolates reported herein. A combination of core-genome and PGM analyses was used to revise the molecular systematics of the genus. An unsupervised learning approach that uses a goodness of clustering statistic identified 20 groups within the Smc at a core-genome average nucleotide identity (cgANIb) of 95.9% that are perfectly consistent with strongly supported clades on the core- and pan-genome trees. In addition, we identified 16 misclassified RefSeq genome sequences, 14 of them labeled as S. maltophilia, demonstrating the broad utility of the software for phylogenomics and geno-taxonomic studies. The code, a detailed manual and tutorials are freely available for Linux/UNIX servers under the GNU GPLv3 license at https://github.com/vinuesa/get_phylomarkers. A docker image bundling GET_PHYLOMARKERS with GET_HOMOLOGUES is available at https://hub.docker.com/r/csicunam/get_homologues/, which can be easily run on any platform.}, } @article {pmid29764365, year = {2018}, author = {Valenzuela, D and Norri, T and Välimäki, N and Pitkänen, E and Mäkinen, V}, title = {Towards pan-genome read alignment to improve variation calling.}, journal = {BMC genomics}, volume = {19}, number = {Suppl 2}, pages = {87}, pmid = {29764365}, issn = {1471-2164}, mesh = {Access to Information ; *Genetic Variation ; Genome, Human ; Humans ; Internet ; Sequence Alignment ; Sequence Analysis, DNA/*methods ; Software ; Workflow ; }, abstract = {BACKGROUND: Typical human genome differs from the reference genome at 4-5 million sites. This diversity is increasingly catalogued in repositories such as ExAC/gnomAD, consisting of >15,000 whole-genomes and >126,000 exome sequences from different individuals. Despite this enormous diversity, resequencing data workflows are still based on a single human reference genome. Identification and genotyping of genetic variants is typically carried out on short-read data aligned to a single reference, disregarding the underlying variation.

RESULTS: We propose a new unified framework for variant calling with short-read data utilizing a representation of human genetic variation - a pan-genomic reference. We provide a modular pipeline that can be seamlessly incorporated into existing sequencing data analysis workflows. Our tool is open source and available online: https://gitlab.com/dvalenzu/PanVC .

CONCLUSIONS: Our experiments show that by replacing a standard human reference with a pan-genomic one we achieve an improvement in single-nucleotide variant calling accuracy and in short indel calling accuracy over the widely adopted Genome Analysis Toolkit (GATK) in difficult genomic regions.}, } @article {pmid29755426, year = {2018}, author = {Howat, AM and Vollmers, J and Taubert, M and Grob, C and Dixon, JL and Todd, JD and Chen, Y and Kaster, AK and Murrell, JC}, title = {Comparative Genomics and Mutational Analysis Reveals a Novel XoxF-Utilizing Methylotroph in the Roseobacter Group Isolated From the Marine Environment.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {766}, pmid = {29755426}, issn = {1664-302X}, abstract = {The Roseobacter group comprises a significant group of marine bacteria which are involved in global carbon and sulfur cycles. Some members are methylotrophs, using one-carbon compounds as a carbon and energy source. It has recently been shown that methylotrophs generally require a rare earth element when using the methanol dehydrogenase enzyme XoxF for growth on methanol. Addition of lanthanum to methanol enrichments of coastal seawater facilitated the isolation of a novel methylotroph in the Roseobacter group: Marinibacterium anthonyi strain La 6. Mutation of xoxF5 revealed the essential nature of this gene during growth on methanol and ethanol. Physiological characterization demonstrated the metabolic versatility of this strain. Genome sequencing revealed that strain La 6 has the largest genome of all Roseobacter group members sequenced to date, at 7.18 Mbp. Multilocus sequence analysis (MLSA) showed that whilst it displays the highest core gene sequence similarity with subgroup 1 of the Roseobacter group, it shares very little of its pangenome, suggesting unique genetic adaptations. This research revealed that the addition of lanthanides to isolation procedures was key to cultivating novel XoxF-utilizing methylotrophs from the marine environment, whilst genome sequencing and MLSA provided insights into their potential genetic adaptations and relationship to the wider community.}, } @article {pmid29743119, year = {2018}, author = {Zolfo, M and Asnicar, F and Manghi, P and Pasolli, E and Tett, A and Segata, N}, title = {Profiling microbial strains in urban environments using metagenomic sequencing data.}, journal = {Biology direct}, volume = {13}, number = {1}, pages = {9}, pmid = {29743119}, issn = {1745-6150}, mesh = {Acinetobacter/genetics ; Genome, Bacterial/genetics ; Humans ; Metagenome/*genetics ; Metagenomics/*methods ; Microbiota/genetics ; Phylogeny ; }, abstract = {BACKGROUND: The microbial communities populating human and natural environments have been extensively characterized with shotgun metagenomics, which provides an in-depth representation of the microbial diversity within a sample. Microbes thriving in urban environments may be crucially important for human health, but have received less attention than those of other environments. Ongoing efforts started to target urban microbiomes at a large scale, but the most recent computational methods to profile these metagenomes have never been applied in this context. It is thus currently unclear whether such methods, that have proven successful at distinguishing even closely related strains in human microbiomes, are also effective in urban settings for tasks such as cultivation-free pathogen detection and microbial surveillance. Here, we aimed at a) testing the currently available metagenomic profiling tools on urban metagenomics; b) characterizing the organisms in urban environment at the resolution of single strain and c) discussing the biological insights that can be inferred from such methods.

RESULTS: We applied three complementary methods on the 1614 metagenomes of the CAMDA 2017 challenge. With MetaMLST we identified 121 known sequence-types from 15 species of clinical relevance. For instance, we identified several Acinetobacter strains that were close to the nosocomial opportunistic pathogen A. nosocomialis. With StrainPhlAn, a generalized version of the MetaMLST approach, we inferred the phylogenetic structure of Pseudomonas stutzeri strains and suggested that the strain-level heterogeneity in environmental samples is higher than in the human microbiome. Finally, we also probed the functional potential of the different strains with PanPhlAn. We further showed that SNV-based and pangenome-based profiling provide complementary information that can be combined to investigate the evolutionary trajectories of microbes and to identify specific genetic determinants of virulence and antibiotic resistances within closely related strains.

CONCLUSION: We show that strain-level methods developed primarily for the analysis of human microbiomes can be effective for city-associated microbiomes. In fact, (opportunistic) pathogens can be tracked and monitored across many hundreds of urban metagenomes. However, while more effort is needed to profile strains of currently uncharacterized species, this work poses the basis for high-resolution analyses of microbiomes sampled in city and mass transportation environments.

REVIEWERS: This article was reviewed by Alexandra Bettina Graf, Daniel Huson and Trevor Cickovski.}, } @article {pmid29721151, year = {2018}, author = {Kumar, R and Acharya, V and Singh, D and Kumar, S}, title = {Strategies for high-altitude adaptation revealed from high-quality draft genome of non-violacein producing Janthinobacterium lividum ERGS5:01.}, journal = {Standards in genomic sciences}, volume = {13}, number = {}, pages = {11}, pmid = {29721151}, issn = {1944-3277}, abstract = {A light pink coloured bacterial strain ERGS5:01 isolated from glacial stream water of Sikkim Himalaya was affiliated to Janthinobacterium lividum based on 16S rRNA gene sequence identity and phylogenetic clustering. Whole genome sequencing was performed for the strain to confirm its taxonomy as it lacked the typical violet pigmentation of the genus and also to decipher its survival strategy at the aquatic ecosystem of high elevation. The PacBio RSII sequencing generated genome of 5,168,928 bp with 4575 protein-coding genes and 118 RNA genes. Whole genome-based multilocus sequence analysis clustering, in silico DDH similarity value of 95.1% and, the ANI value of 99.25% established the identity of the strain ERGS5:01 (MCC 2953) as a non-violacein producing J. lividum. The genome comparisons across genus Janthinobacterium revealed an open pan-genome with the scope of the addition of new orthologous cluster to complete the genomic inventory. The genomic insight provided the genetic basis of freezing and frequent freeze-thaw cycle tolerance and, for industrially important enzymes. Extended insight into the genome provided clues of crucial genes associated with adaptation in the harsh aquatic ecosystem of high altitude.}, } @article {pmid29716534, year = {2018}, author = {Oliver, A and Kay, M and Cooper, KK}, title = {Comparative genomics of cocci-shaped Sporosarcina strains with diverse spatial isolation.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {310}, pmid = {29716534}, issn = {1471-2164}, support = {P30 CA062203/CA/NCI NIH HHS/United States ; S10 OD010794/OD/NIH HHS/United States ; S10 RR025496/RR/NCRR NIH HHS/United States ; }, mesh = {DNA Methylation ; Genome, Bacterial/genetics ; *Genomics ; Phylogeny ; Spatial Analysis ; Sporosarcina/*genetics ; Synteny ; }, abstract = {BACKGROUND: Cocci-shaped Sporosarcina strains are currently one of the few known cocci-shaped spore-forming bacteria, yet we know very little about the genomics. The goal of this study is to utilize comparative genomics to investigate the diversity of cocci-shaped Sporosarcina strains that differ in their geographical isolation and show different nutritional requirements.

RESULTS: For this study, we sequenced 28 genomes of cocci-shaped Sporosarcina strains isolated from 13 different locations around the world. We generated the first six complete genomes and methylomes utilizing PacBio sequencing, and an additional 22 draft genomes using Illumina sequencing. Genomic analysis revealed that cocci-shaped Sporosarcina strains contained an average genome of 3.3 Mb comprised of 3222 CDS, 54 tRNAs and 6 rRNAs, while only two strains contained plasmids. The cocci-shaped Sporosarcina genome on average contained 2.3 prophages and 15.6 IS elements, while methylome analysis supported the diversity of these strains as only one of 31 methylation motifs were shared under identical growth conditions. Analysis with a 90% identity cut-off revealed 221 core genes or ~ 7% of the genome, while a 30% identity cut-off generated a pan-genome of 8610 genes. The phylogenetic relationship of the cocci-shaped Sporosarcina strains based on either core genes, accessory genes or spore-related genes consistently resulted in the 29 strains being divided into eight clades.

CONCLUSIONS: This study begins to unravel the phylogenetic relationship of cocci-shaped Sporosarcina strains, and the comparative genomics of these strains supports identification of several new species.}, } @article {pmid29695866, year = {2018}, author = {Wang, W and Mauleon, R and Hu, Z and Chebotarov, D and Tai, S and Wu, Z and Li, M and Zheng, T and Fuentes, RR and Zhang, F and Mansueto, L and Copetti, D and Sanciangco, M and Palis, KC and Xu, J and Sun, C and Fu, B and Zhang, H and Gao, Y and Zhao, X and Shen, F and Cui, X and Yu, H and Li, Z and Chen, M and Detras, J and Zhou, Y and Zhang, X and Zhao, Y and Kudrna, D and Wang, C and Li, R and Jia, B and Lu, J and He, X and Dong, Z and Xu, J and Li, Y and Wang, M and Shi, J and Li, J and Zhang, D and Lee, S and Hu, W and Poliakov, A and Dubchak, I and Ulat, VJ and Borja, FN and Mendoza, JR and Ali, J and Li, J and Gao, Q and Niu, Y and Yue, Z and Naredo, MEB and Talag, J and Wang, X and Li, J and Fang, X and Yin, Y and Glaszmann, JC and Zhang, J and Li, J and Hamilton, RS and Wing, RA and Ruan, J and Zhang, G and Wei, C and Alexandrov, N and McNally, KL and Li, Z and Leung, H}, title = {Genomic variation in 3,010 diverse accessions of Asian cultivated rice.}, journal = {Nature}, volume = {557}, number = {7703}, pages = {43-49}, pmid = {29695866}, issn = {1476-4687}, mesh = {Asia ; Crops, Agricultural/*classification/*genetics ; Evolution, Molecular ; Genes, Plant/genetics ; *Genetic Variation ; Genetics, Population ; Genome, Plant/*genetics ; Genomics ; Haplotypes ; INDEL Mutation/genetics ; Oryza/*classification/*genetics ; Phylogeny ; Plant Breeding ; Polymorphism, Single Nucleotide/genetics ; }, abstract = {Here we analyse genetic variation, population structure and diversity among 3,010 diverse Asian cultivated rice (Oryza sativa L.) genomes from the 3,000 Rice Genomes Project. Our results are consistent with the five major groups previously recognized, but also suggest several unreported subpopulations that correlate with geographic location. We identified 29 million single nucleotide polymorphisms, 2.4 million small indels and over 90,000 structural variations that contribute to within- and between-population variation. Using pan-genome analyses, we identified more than 10,000 novel full-length protein-coding genes and a high number of presence-absence variations. The complex patterns of introgression observed in domestication genes are consistent with multiple independent rice domestication events. The public availability of data from the 3,000 Rice Genomes Project provides a resource for rice genomics research and breeding.}, } @article {pmid29695424, year = {2018}, author = {Rodrigues, RAL and Andreani, J and Andrade, ACDSP and Machado, TB and Abdi, S and Levasseur, A and Abrahão, JS and La Scola, B}, title = {Morphologic and Genomic Analyses of New Isolates Reveal a Second Lineage of Cedratviruses.}, journal = {Journal of virology}, volume = {92}, number = {13}, pages = {}, pmid = {29695424}, issn = {1098-5514}, mesh = {Acanthamoeba castellanii/*virology ; DNA, Viral ; *Evolution, Molecular ; *Genome, Viral ; Genomics/*methods ; Giant Viruses/*classification/*genetics ; Phylogeny ; Sequence Analysis, DNA/methods ; Virion/*genetics ; }, abstract = {Giant viruses have been isolated and characterized in different environments, expanding our knowledge about the biology of these unique microorganisms. In the last 2 years, a new group was discovered, the cedratviruses, currently composed of only two isolates and members of a putative new family, "Pithoviridae," along with previously known pithoviruses. Here we report the isolation and biological and genomic characterization of two novel cedratviruses isolated from samples collected in France and Brazil. Both viruses were isolated using Acanthamoeba castellanii as a host cell and exhibit ovoid particles with corks at either extremity of the particle. Curiously, the Brazilian cedratvirus is ∼20% smaller and presents a shorter genome of 460,038 bp, coding for fewer proteins than other cedratviruses. In addition, it has a completely asyntenic genome and presents a lower amino acid identity of orthologous genes (∼73%). Pangenome analysis comprising the four cedratviruses revealed an increase in the pangenome concomitant with a decrease in the core genome with the addition of the two novel viruses. Finally, phylogenetic analyses clustered the Brazilian virus in a separate branch within the group of cedratviruses, while the French isolate is closer to the previously reported Cedratvirus lausannensis Taking all together, we propose the existence of a second lineage of this emerging viral genus and provide new insights into the biodiversity and ubiquity of these giant viruses.IMPORTANCE Various giant viruses have been described in recent years, revealing a unique part of the virosphere. A new group among the giant viruses has recently been described, the cedratviruses, which is currently composed of only two isolates. In this paper, we describe two novel cedratviruses isolated from French and Brazilian samples. Biological and genomic analyses showed viruses with different particle sizes, genome lengths, and architecture, revealing the existence of a second lineage of this new group of giant viruses. Our results provide new insights into the biodiversity of cedratviruses and highlight the importance of ongoing efforts to prospect for and characterize new giant viruses.}, } @article {pmid29695124, year = {2018}, author = {Nguyen, TL and Kim, DH}, title = {Genome-Wide Comparison Reveals a Probiotic Strain Lactococcus Lactis WFLU12 Isolated from the Gastrointestinal Tract of Olive Flounder (Paralichthys Olivaceus) Harboring Genes Supporting Probiotic Action.}, journal = {Marine drugs}, volume = {16}, number = {5}, pages = {}, pmid = {29695124}, issn = {1660-3397}, mesh = {Animals ; Chromosome Mapping ; Chromosomes, Bacterial ; Flounder/*microbiology ; Gene Expression Regulation, Bacterial/*physiology ; *Genome, Bacterial ; Lactococcus lactis/*genetics/*metabolism ; *Probiotics ; }, abstract = {Our previous study has shown that dietary supplementation with Lactococcus lactis WFLU12 can enhance the growth of olive flounder and its resistance against streptococcal infection. The objective of the present study was to use comparative genomics tools to investigate genomic characteristics of strain WFLU12 and the presence of genes supporting its probiotic action using sequenced genomes of L. lactis strains. Dispensable and singleton genes of strain WFLU12 were found to be more enriched in genes associated with metabolism (e.g., energy production and conversion, and carbohydrate transport and metabolism) than pooled dispensable and singleton genes in other L. lactis strains, reflecting WFLU12 strain-specific ecosystem origin and its ability to metabolize different energy sources. Strain WFLU12 produced antimicrobial compounds that could inhibit several bacterial fish pathogens. It possessed the nisin gene cluster (nisZBTCIPRKFEG) and genes encoding lysozyme and colicin V. However, only three other strains (CV56, IO-1, and SO) harbor a complete nisin gene cluster. We also found that L. lactis WFLU12 possessed many other important functional genes involved in stress responses to the gastrointestinal tract environment, dietary energy extraction, and metabolism to support the probiotic action of this strain found in our previous study. This strongly indicates that not all L. lactis strains can be used as probiotics. This study highlights comparative genomics approaches as very useful and powerful tools to select probiotic candidates and predict their probiotic effects.}, } @article {pmid29694430, year = {2018}, author = {Chen, C and Wu, L and Cao, Q and Shao, H and Li, X and Zhang, Y and Wang, H and Tan, X}, title = {Genome comparison of different Zymomonas mobilis strains provides insights on conservation of the evolution.}, journal = {PloS one}, volume = {13}, number = {4}, pages = {e0195994}, pmid = {29694430}, issn = {1932-6203}, mesh = {Ethanol/metabolism ; Evolution, Molecular ; Genome Size ; *Genome, Bacterial ; Hydrogen-Ion Concentration ; Phylogeny ; Sequence Analysis, DNA/*methods ; Zymomonas/*classification/genetics/metabolism ; }, abstract = {Zymomonas mobilis has the special Entner-Doudoroff (ED) pathway and it has excellent industrial characteristics, including low cell mass formation, high-specific productivity,ethanol yield, notable ethanol tolerance and wide pH range, a relatively small genome size. In this study, the genome sequences of NRRL B-14023 and NRRL B-12526 were sequenced and compared with other strains to explore their evolutionary relationships and the genetic basis of Z. mobilis. The comparative genomic analyses revealed that the 8 strains share a conserved core chromosomal backbone. ZM4, NRRL B-12526, NRRL B-14023, NCIMB 11163 and NRRL B-1960 share 98% sequence identity across the whole genome sequences. Highly similar plasmids and CRISPR repeats were detected in these strains. A whole-genome phylogenetic tree of the 8 strains indicated that NRRL B-12526, NRRL B-14023 and ATCC 10988 had a close evolutionary relationship with the strain ZM4. Furthermore, strains ATCC29191 and ATCC29192 had distinctive CRISPR with a far distant relationship. The size of the pan-genome was 1945 genes, including 1428 core genes and 517 accessory genes. The genomes of Z. mobilis were highly conserved; particularly strains ZM4, NRRL B-12526, NRRL B-14023, NCIMB 11163 and NRRL B-1960 had a close genomic relationship. This comparative study of Z. mobilis presents a foundation for future functional analyses and applications.}, } @article {pmid29690879, year = {2018}, author = {Inglin, RC and Meile, L and Stevens, MJA}, title = {Clustering of Pan- and Core-genome of Lactobacillus provides Novel Evolutionary Insights for Differentiation.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {284}, pmid = {29690879}, issn = {1471-2164}, support = {145214//Schweizerischer Nationalfonds zur Förderung der Wissenschaftlichen Forschung/ ; }, mesh = {Algorithms ; Cluster Analysis ; *Evolution, Molecular ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Genomics ; Lactobacillus/*genetics ; }, abstract = {BACKGROUND: Bacterial taxonomy aims to classify bacteria based on true evolutionary events and relies on a polyphasic approach that includes phenotypic, genotypic and chemotaxonomic analyses. Until now, complete genomes are largely ignored in taxonomy. The genus Lactobacillus consists of 173 species and many genomes are available to study taxonomy and evolutionary events.

RESULTS: We analyzed and clustered 98 completely sequenced genomes of the genus Lactobacillus and 234 draft genomes of 5 different Lactobacillus species, i.e. L. reuteri, L. delbrueckii, L. plantarum, L. rhamnosus and L. helveticus. The core-genome of the genus Lactobacillus contains 266 genes and the pan-genome 20'800 genes. Clustering of the Lactobacillus pan- and core-genome resulted in two highly similar trees. This shows that evolutionary history is traceable in the core-genome and that clustering of the core-genome is sufficient to explore relationships. Clustering of core- and pan-genomes at species' level resulted in similar trees as well. Detailed analyses of the core-genomes showed that the functional class "genetic information processing" is conserved in the core-genome but that "signaling and cellular processes" is not. The latter class encodes functions that are involved in environmental interactions. Evolution of lactobacilli seems therefore directed by the environment. The type species L. delbrueckii was analyzed in detail and its pan-genome based tree contained two major clades whose members contained different genes yet identical functions. In addition, evidence for horizontal gene transfer between strains of L. delbrueckii, L. plantarum, and L. rhamnosus, and between species of the genus Lactobacillus is presented. Our data provide evidence for evolution of some lactobacilli according to a parapatric-like model for species differentiation.

CONCLUSIONS: Core-genome trees are useful to detect evolutionary relationships in lactobacilli and might be useful in taxonomic analyses. Lactobacillus' evolution is directed by the environment and HGT.}, } @article {pmid29688542, year = {2018}, author = {Kupczok, A and Neve, H and Huang, KD and Hoeppner, MP and Heller, KJ and Franz, CMAP and Dagan, T}, title = {Rates of Mutation and Recombination in Siphoviridae Phage Genome Evolution over Three Decades.}, journal = {Molecular biology and evolution}, volume = {35}, number = {5}, pages = {1147-1159}, pmid = {29688542}, issn = {1537-1719}, support = {281357/ERC_/European Research Council/International ; }, mesh = {*Evolution, Molecular ; *Genome, Viral ; Lactococcus lactis/virology ; Mutation Rate ; Recombination, Genetic ; Siphoviridae/*genetics ; }, abstract = {The evolution of asexual organisms is driven not only by the inheritance of genetic modification but also by the acquisition of foreign DNA. The contribution of vertical and horizontal processes to genome evolution depends on their rates per year and is quantified by the ratio of recombination to mutation. These rates have been estimated for bacteria; however, no estimates have been reported for phages. Here, we delineate the contribution of mutation and recombination to dsDNA phage genome evolution. We analyzed 34 isolates of the 936 group of Siphoviridae phages using a Lactococcus lactis strain from a single dairy over 29 years. We estimate a constant substitution rate of 1.9 × 10-4 substitutions per site per year due to mutation that is within the range of estimates for eukaryotic RNA and DNA viruses. The reconstruction of recombination events reveals a constant rate of five recombination events per year and 4.5 × 10-3 nucleotide alterations due to recombination per site per year. Thus, the recombination rate exceeds the substitution rate, resulting in a relative effect of recombination to mutation (r/m) of ∼24 that is homogenous over time. Especially in the early transcriptional region, we detect frequent gene loss and regain due to recombination with phages of the 936 group, demonstrating the role of the 936 group pangenome as a reservoir of genetic variation. The observed substitution rate homogeneity conforms to the neutral theory of evolution; hence, the neutral theory can be applied to phage genome evolution and also to genetic variation brought about by recombination.}, } @article {pmid29680695, year = {2018}, author = {Parry-Hanson Kunadu, A and Holmes, M and Miller, EL and Grant, AJ}, title = {Microbiological quality and antimicrobial resistance characterization of Salmonella spp. in fresh milk value chains in Ghana.}, journal = {International journal of food microbiology}, volume = {277}, number = {}, pages = {41-49}, doi = {10.1016/j.ijfoodmicro.2018.04.025}, pmid = {29680695}, issn = {1879-3460}, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Cheese/*microbiology ; Ciprofloxacin/pharmacology ; *Drug Resistance, Bacterial ; Escherichia coli O157/drug effects/*isolation & purification ; Food Microbiology ; Ghana ; Listeria monocytogenes/drug effects/*isolation & purification ; Microbial Sensitivity Tests ; Milk/*microbiology ; Phylogeny ; Salmonella enterica/classification/drug effects/*isolation & purification ; Staphylococcus aureus/drug effects/*isolation & purification ; }, abstract = {Consumer perception of poor hygiene of fresh milk products is a major barrier to promotion of milk consumption as an intervention to alleviate the burden of malnutrition in Ghana. Fresh milk is retailed raw, boiled, or processed into unfermented cheese and spontaneously fermented products in unlicensed outlets. In this study, we have determined microbiological quality of informally retailed fresh milk products and characterized the genomic diversity and antimicrobial resistance (AMR) patterns of non-typhoidal Salmonella (NTS) in implicated products. A total of 159 common dairy products were purchased from five traditional milk markets in Accra. Samples were analysed for concentrations of aerobic bacteria, total and fecal coliforms, Escherichia coli, staphylococci, lactic acid bacteria and yeast and moulds. The presence of Salmonella, E. coli O157:H7, Listeria monocytogenes and Staphylococcus aureus were determined. AMR of Salmonella against 18 antibiotics was experimentally determined. Genome sequencing of 19 Salmonella isolates allowed determination of serovars, antigenic profiles, prediction of AMR genes in silico and inference of phylogenetic relatedness between strains. Raw and heat-treated milk did not differ significantly in overall bacterial quality (P = 0.851). E. coli O157:H7 and Staphylococcus aureus were present in 34.3% and 12.9% of dairy products respectively. Multidrug resistant (MDR) Salmonella enterica serovars Muenster and Legon were identified in 11.8% and 5.9% of unfermented cheese samples respectively. Pan genome analysis revealed a total of 3712 core genes. All Salmonella strains were resistant to Trimethoprim/Sulfamethoxazole, Cefoxitin, Cefuroxime Axetil and Cefuroxime. Resistance to Chloramphenicol (18%) and Ciprofloxacin (100%), which are first line antibiotics used in treatment of NTS bacteremia in Ghana, was evident. AMR was attributed to presence and/or mutations in the following genes: golS, sdiA for cephalosporins, aac(6')-Iy, ant(9) for aminoglycosides, mdtK, gyrA, gyrB, parC, parE for quinolones and cat1, cat4 for phenicols. Phylogenetic analysis based on accessory genes clustered S. Legon strains separately from the S. Muenster strains. These strains were from different markets suggesting local circulation of related strains. Our study justifies consumer resistance to consumption of unripened soft cheese without further lethal heat treatment, and provides evidence that supports the Ghana Health Service recommendation for use of 3rd generation cephalosporins for the treatment of MDR NTS infections.}, } @article {pmid29678917, year = {2018}, author = {Jibrin, MO and Potnis, N and Timilsina, S and Minsavage, GV and Vallad, GE and Roberts, PD and Jones, JB and Goss, EM}, title = {Genomic Inference of Recombination-Mediated Evolution in Xanthomonas euvesicatoria and X. perforans.}, journal = {Applied and environmental microbiology}, volume = {84}, number = {13}, pages = {}, pmid = {29678917}, issn = {1098-5336}, mesh = {Bacterial Proteins/genetics ; Base Sequence ; Breeding ; *Evolution, Molecular ; Florida ; Gene Transfer, Horizontal ; *Genome, Bacterial ; *Genomics ; Homologous Recombination ; Host-Pathogen Interactions ; India ; Italy ; Solanum lycopersicum/microbiology ; Nigeria ; Phylogeny ; Piper/microbiology ; Plant Diseases/microbiology ; *Recombination, Genetic ; Type III Secretion Systems/genetics ; Virulence Factors/genetics ; Xanthomonas/classification/*genetics/pathogenicity ; }, abstract = {Recombination is a major driver of evolution in bacterial populations, because it can spread and combine independently evolved beneficial mutations. Recombinant lineages of bacterial pathogens of plants are typically associated with the colonization of novel hosts and the emergence of new diseases. Here we show that recombination between evolutionarily and phenotypically distinct plant-pathogenic lineages generated recombinant lineages with unique combinations of pathogenicity and virulence factors. Xanthomonas euvesicatoria and Xanthomonas perforans are two closely related lineages causing bacterial spot disease on tomato and pepper worldwide. We sequenced the genomes of atypical strains collected from tomato in Nigeria and observed recombination in the type III secretion system and effector genes, which showed alleles from both X. euvesicatoria and X. perforans Wider horizontal gene transfer was indicated by the fact that the lipopolysaccharide cluster of one strain was most similar to that of a distantly related Xanthomonas pathogen of barley. This strain and others have experienced extensive genomewide homologous recombination, and both species exhibited dynamic open pangenomes. Variation in effector gene repertoires within and between species must be taken into consideration when one is breeding tomatoes for disease resistance. Resistance breeding strategies that target specific effectors must consider possibly dramatic variation in bacterial spot populations across global production regions, as illustrated by the recombinant strains observed here.IMPORTANCE The pathogens that cause bacterial spot of tomato and pepper are extensively studied models of plant-microbe interactions and cause problematic disease worldwide. Atypical bacterial spot strains collected from tomato in Nigeria, and other strains from Italy, India, and Florida, showed evidence of genomewide recombination that generated genetically distinct pathogenic lineages. The strains from Nigeria and Italy were found to have a mix of type III secretion system genes from X. perforans and X. euvesicatoria, as well as effectors from Xanthomonas gardneri These genes and effectors are important in the establishment of disease, and effectors are common targets of resistance breeding. Our findings point to global diversity in the genomes of bacterial spot pathogens, which is likely to affect the host-pathogen interaction and influence management decisions.}, } @article {pmid29674528, year = {2018}, author = {Loux, V and Coeuret, G and Zagorec, M and Champomier Vergès, MC and Chaillou, S}, title = {Complete and Draft Genome Sequences of Nine Lactobacillus sakei Strains Selected from the Three Known Phylogenetic Lineages and Their Main Clonal Complexes.}, journal = {Genome announcements}, volume = {6}, number = {16}, pages = {}, pmid = {29674528}, issn = {2169-8287}, abstract = {We present here the complete and draft genome sequences of nine Lactobacillus sakei strains, selected from the entire range of clonal complexes from the three known lineages of the species. The strains were chosen to provide a wide view of pangenomic and plasmidic diversity for this important foodborne species.}, } @article {pmid29657968, year = {2018}, author = {Baby, V and Lachance, JC and Gagnon, J and Lucier, JF and Matteau, D and Knight, T and Rodrigue, S}, title = {Inferring the Minimal Genome of Mesoplasma florum by Comparative Genomics and Transposon Mutagenesis.}, journal = {mSystems}, volume = {3}, number = {3}, pages = {}, pmid = {29657968}, issn = {2379-5077}, support = {27307C0010/ES/NIEHS NIH HHS/United States ; }, abstract = {The creation and comparison of minimal genomes will help better define the most fundamental mechanisms supporting life. Mesoplasma florum is a near-minimal, fast-growing, nonpathogenic bacterium potentially amenable to genome reduction efforts. In a comparative genomic study of 13 M. florum strains, including 11 newly sequenced genomes, we have identified the core genome and open pangenome of this species. Our results show that all of the strains have approximately 80% of their gene content in common. Of the remaining 20%, 17% of the genes were found in multiple strains and 3% were unique to any given strain. On the basis of random transposon mutagenesis, we also estimated that ~290 out of 720 genes are essential for M. florum L1 in rich medium. We next evaluated different genome reduction scenarios for M. florum L1 by using gene conservation and essentiality data, as well as comparisons with the first working approximation of a minimal organism, Mycoplasma mycoides JCVI-syn3.0. Our results suggest that 409 of the 473 M. mycoides JCVI-syn3.0 genes have orthologs in M. florum L1. Conversely, 57 putatively essential M. florum L1 genes have no homolog in M. mycoides JCVI-syn3.0. This suggests differences in minimal genome compositions, even for these evolutionarily closely related bacteria. IMPORTANCE The last years have witnessed the development of whole-genome cloning and transplantation methods and the complete synthesis of entire chromosomes. Recently, the first minimal cell, Mycoplasma mycoides JCVI-syn3.0, was created. Despite these milestone achievements, several questions remain to be answered. For example, is the composition of minimal genomes virtually identical in phylogenetically related species? On the basis of comparative genomics and transposon mutagenesis, we investigated this question by using an alternative model, Mesoplasma florum, that is also amenable to genome reduction efforts. Our results suggest that the creation of additional minimal genomes could help reveal different gene compositions and strategies that can support life, even within closely related species.}, } @article {pmid29636744, year = {2018}, author = {Zhang, X and Liu, X and Yang, F and Chen, L}, title = {Pan-Genome Analysis Links the Hereditary Variation of Leptospirillum ferriphilum With Its Evolutionary Adaptation.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {577}, pmid = {29636744}, issn = {1664-302X}, abstract = {Niche adaptation has long been recognized to drive intra-species differentiation and speciation, yet knowledge about its relatedness with hereditary variation of microbial genomes is relatively limited. Using Leptospirillum ferriphilum species as a case study, we present a detailed analysis of genomic features of five recognized strains. Genome-to-genome distance calculation preliminarily determined the roles of spatial distance and environmental heterogeneity that potentially contribute to intra-species variation within L. ferriphilum species at the genome level. Mathematical models were further constructed to extrapolate the expansion of L. ferriphilum genomes (an 'open' pan-genome), indicating the emergence of novel genes with new sequenced genomes. The identification of diverse mobile genetic elements (MGEs) (such as transposases, integrases, and phage-associated genes) revealed the prevalence of horizontal gene transfer events, which is an important evolutionary mechanism that provides avenues for the recruitment of novel functionalities and further for the genetic divergence of microbial genomes. Comprehensive analysis also demonstrated that the genome reduction by gene loss in a broad sense might contribute to the observed diversification. We thus inferred a plausible explanation to address this observation: the community-dependent adaptation that potentially economizes the limiting resources of the entire community. Now that the introduction of new genes is accompanied by a parallel abandonment of some other ones, our results provide snapshots on the biological fitness cost of environmental adaptation within the L. ferriphilum genomes. In short, our genome-wide analyses bridge the relation between genetic variation of L. ferriphilum with its evolutionary adaptation.}, } @article {pmid29635365, year = {2018}, author = {Holm, KO and Bækkedal, C and Söderberg, JJ and Haugen, P}, title = {Complete Genome Sequences of Seven Vibrio anguillarum Strains as Derived from PacBio Sequencing.}, journal = {Genome biology and evolution}, volume = {10}, number = {4}, pages = {1127-1131}, pmid = {29635365}, issn = {1759-6653}, mesh = {DNA Transposable Elements/*genetics ; Genome, Bacterial/genetics ; High-Throughput Nucleotide Sequencing ; Molecular Sequence Annotation ; Sequence Analysis, DNA ; Vibrio/*genetics ; *Whole Genome Sequencing ; }, abstract = {We report here the complete genome sequences of seven Vibrio anguillarum strains isolated from multiple geographic locations, thus increasing the total number of genomes of finished quality to 11. The genomes were de novo assembled from long-sequence PacBio reads. Including draft genomes, a total of 44 V. anguillarum genomes are currently available in the genome databases. They represent an important resource in the study of, for example, genetic variations and for identifying virulence determinants. In this article, we present the genomes and basic genome comparisons of the 11 complete genomes, including a BRIG analysis, and pan genome calculation. We also describe some structural features of superintegrons on chromosome 2 s, and associated insertion sequence (IS) elements, including 18 new ISs (ISVa3 - ISVa20), both of importance in the complement of V. anguillarum genomes.}, } @article {pmid29635296, year = {2018}, author = {Thorpe, HA and Bayliss, SC and Sheppard, SK and Feil, EJ}, title = {Piggy: a rapid, large-scale pan-genome analysis tool for intergenic regions in bacteria.}, journal = {GigaScience}, volume = {7}, number = {4}, pages = {1-11}, pmid = {29635296}, issn = {2047-217X}, support = {G0801929/MRC_/Medical Research Council/United Kingdom ; MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; G1000803//Medical Research Council/United Kingdom ; }, mesh = {DNA, Intergenic ; Escherichia coli/*genetics ; *Genome, Bacterial ; Genomics/*methods ; Staphylococcus aureus/*genetics ; }, abstract = {BACKGROUND: The concept of the "pan-genome," which refers to the total complement of genes within a given sample or species, is well established in bacterial genomics. Rapid and scalable pipelines are available for managing and interpreting pan-genomes from large batches of annotated assemblies. However, despite overwhelming evidence that variation in intergenic regions in bacteria can directly influence phenotypes, most current approaches for analyzing pan-genomes focus exclusively on protein-coding sequences.

FINDINGS: To address this we present Piggy, a novel pipeline that emulates Roary except that it is based only on intergenic regions. A key utility provided by Piggy is the detection of highly divergent ("switched") intergenic regions (IGRs) upstream of genes. We demonstrate the use of Piggy on large datasets of clinically important lineages of Staphylococcus aureus and Escherichia coli.

CONCLUSIONS: For S. aureus, we show that highly divergent (switched) IGRs are associated with differences in gene expression and we establish a multilocus reference database of IGR alleles (igMLST; implemented in BIGSdb).}, } @article {pmid29629856, year = {2018}, author = {Croxen, MA and Lee, TD and Azana, R and Hoang, LM}, title = {Use of genomics to design a diagnostic assay to discriminate between Streptococcus pneumoniae and Streptococcus pseudopneumoniae.}, journal = {Microbial genomics}, volume = {4}, number = {7}, pages = {}, pmid = {29629856}, issn = {2057-5858}, mesh = {Genetic Markers ; Genome, Bacterial ; *Genomics ; Phylogeny ; Real-Time Polymerase Chain Reaction ; Serogroup ; Streptococcal Infections/*diagnosis ; Streptococcus mitis/*genetics ; Streptococcus pneumoniae/*genetics ; Whole Genome Sequencing ; }, abstract = {Distinuishing the species of mitis group streptococci is challenging due to ambiguous phenotypic characteristics and high degree of genetic similarity. This has been particularly true for resolving atypical Streptococcus pneumoniae and Streptococcus pseudopneumoniae. We used phylogenetic clustering to demonstrate specific and separate clades for both S. pneumoniae and S. pseudopneumoniae genomes. The genomes that clustered within these defined clades were used to extract species-specific genes from the pan-genome. The S. pneumoniae marker was detected in 8027 out of 8051 (>99.7 %) S. pneumoniae genomes. The S. pseudopneumoniae marker was specific for all genomes that clustered in the S. pseudopneumoniae clade, including unresolved species of the genus Streptococcus sequenced by the BC Centre for Disease Control Public Health Laboratory that previously could not be distinguished by other methods. Other than the presence of the S. pseudopneumoniae marker in six of 8051 (<0.08 %) S. pneumoniae genomes, both the S. pneumoniae and S. pseudopneumoniae markers showed little to no detectable cross-reactivity to the genomes of any other species of the genus Streptococcus or to a panel of over 46 000 genomes from viral, fungal, bacterial pathogens and microbiota commonly found in the respiratory tract. A real-time PCR assay was designed targeting these two markers. Genomics provides a useful technique for PCR assay design and development.}, } @article {pmid29621323, year = {2018}, author = {Bonnet, E and Moutet, ML and Baulard, C and Bacq-Daian, D and Sandron, F and Mesrob, L and Fin, B and Delépine, M and Palomares, MA and Jubin, C and Blanché, H and Meyer, V and Boland, A and Olaso, R and Deleuze, JF}, title = {Performance comparison of three DNA extraction kits on human whole-exome data from formalin-fixed paraffin-embedded normal and tumor samples.}, journal = {PloS one}, volume = {13}, number = {4}, pages = {e0195471}, pmid = {29621323}, issn = {1932-6203}, mesh = {Colon/metabolism ; Colonic Neoplasms/metabolism ; Computational Biology ; Cryopreservation ; DNA/analysis/*isolation & purification ; DNA, Neoplasm/analysis/*isolation & purification ; Fixatives ; Formaldehyde ; Humans ; INDEL Mutation ; Liver/metabolism ; Liver Neoplasms/metabolism ; Paraffin Embedding ; Polymorphism, Single Nucleotide ; *Tissue Fixation ; *Exome Sequencing ; }, abstract = {Next-generation sequencing (NGS) studies are becoming routinely used for the detection of novel and clinically actionable DNA variants at a pangenomic scale. Such analyses are now used in the clinical practice to enable precision medicine. Formalin-fixed paraffin-embedded (FFPE) tissues are still one of the most abundant source of cancer clinical specimen, unfortunately this method of preparation is known to degrade DNA and therefore compromise subsequent analysis. Some studies have reported that variant detection can be performed on FFPE samples sequenced with NGS techniques, but few or none have done an in-depth coverage analysis and compared the influence of different state-of-the-art FFPE DNA extraction kits on the quality of the variant calling. Here, we generated 42 human whole-exome sequencing data sets from fresh-frozen (FF) and FFPE samples. These samples include normal and tumor tissues from two different organs (liver and colon), that we extracted with three different FFPE extraction kits (QIAamp DNA FFPE Tissue kit and GeneRead DNA FFPE kit from Qiagen, Maxwell™ RSC DNA FFPE Kit from Promega). We determined the rate of concordance of called variants between matched FF and FFPE samples on all common variants (representing at least 86% of the total number of variants for SNVs). The concordance rate is very high between all matched FF / FFPE pairs, with equivalent values for the three kits we analyzed. On the other hand, when looking at the difference between the total number of variants in FF and FFPE, we find a significant variation for the three different FFPE DNA extraction kits. Coverage analysis shows that FFPE samples have less good indicators than FF samples, yet the coverage quality remains above accepted thresholds. We detect limited but statistically significant variations in coverage indicator values between the three FFPE extraction kits. Globally, the GeneRead and QIAamp kits have better variant calling and coverage indicators than the Maxwell kit on the samples used in this study, although this kit performs better on some indicators and has advantages in terms of practical usage. Taken together, our results confirm the potential of FFPE samples analysis for clinical genomic studies, but also indicate that the choice of a FFPE DNA extraction kit should be done with careful testing and analysis beforehand in order to maximize the accuracy of the results.}, } @article {pmid29617810, year = {2018}, author = {Murillo, T and Ramírez-Vargas, G and Riedel, T and Overmann, J and Andersen, JM and Guzmán-Verri, C and Chaves-Olarte, E and Rodríguez, C}, title = {Two Groups of Cocirculating, Epidemic Clostridiodes difficile Strains Microdiversify through Different Mechanisms.}, journal = {Genome biology and evolution}, volume = {10}, number = {3}, pages = {982-998}, pmid = {29617810}, issn = {1759-6653}, mesh = {Clostridioides difficile/*genetics ; Clostridium Infections/*genetics/microbiology ; Disease Outbreaks ; Drug Resistance, Bacterial/genetics ; Gene Transfer, Horizontal/*genetics ; *Genetic Variation ; Genome, Bacterial ; Genotype ; Humans ; Mutation ; Virulence/genetics ; }, abstract = {Clostridiodes difficile strains from the NAPCR1/ST54 and NAP1/ST01 types have caused outbreaks despite of their notable differences in genome diversity. By comparing whole genome sequences of 32 NAPCR1/ST54 isolates and 17 NAP1/ST01 recovered from patients infected with C. difficile we assessed whether mutation, homologous recombination (r) or nonhomologous recombination (NHR) through lateral gene transfer (LGT) have differentially shaped the microdiversification of these strains. The average number of single nucleotide polymorphisms (SNPs) in coding sequences (NAPCR1/ST54 = 24; NAP1/ST01 = 19) and SNP densities (NAPCR1/ST54 = 0.54/kb; NAP1/ST01 = 0.46/kb) in the NAPCR1/ST54 and NAP1/ST01 isolates was comparable. However, the NAP1/ST01 isolates showed 3× higher average dN/dS rates (8.35) that the NAPCR1/ST54 isolates (2.62). Regarding r, whereas 31 of the NAPCR1/ST54 isolates showed 1 recombination block (3,301-8,226 bp), the NAP1/ST01 isolates showed no bases in recombination. As to NHR, the pangenome of the NAPCR1/ST54 isolates was larger (4,802 gene clusters, 26% noncore genes) and more heterogeneous (644 ± 33 gene content changes) than that of the NAP1/ST01 isolates (3,829 gene clusters, ca. 6% noncore genes, 129 ± 37 gene content changes). Nearly 55% of the gene content changes seen among the NAPCR1/ST54 isolates (355 ± 31) were traced back to MGEs with putative genes for antimicrobial resistance and virulence factors that were only detected in single isolates or isolate clusters. Congruently, the LGT/SNP rate calculated for the NAPCR1/ST54 isolates (26.8 ± 2.8) was 4× higher than the one obtained for the NAP1/ST1 isolates (6.8 ± 2.0). We conclude that NHR-LGT has had a greater role in the microdiversification of the NAPCR1/ST54 strains, opposite to the NAP1/ST01 strains, where mutation is known to play a more prominent role.}, } @article {pmid29617440, year = {2018}, author = {Azarian, T and Grant, LR and Arnold, BJ and Hammitt, LL and Reid, R and Santosham, M and Weatherholtz, R and Goklish, N and Thompson, CM and Bentley, SD and O'Brien, KL and Hanage, WP and Lipsitch, M}, title = {The impact of serotype-specific vaccination on phylodynamic parameters of Streptococcus pneumoniae and the pneumococcal pan-genome.}, journal = {PLoS pathogens}, volume = {14}, number = {4}, pages = {e1006966}, pmid = {29617440}, issn = {1553-7374}, support = {/WT_/Wellcome Trust/United Kingdom ; R01 AI048935/AI/NIAID NIH HHS/United States ; R01 AI106786/AI/NIAID NIH HHS/United States ; }, mesh = {Adolescent ; Adult ; Aged ; Child ; Genetics, Population ; *Genome, Bacterial ; Heptavalent Pneumococcal Conjugate Vaccine/*administration & dosage ; Humans ; Middle Aged ; Nasopharynx/microbiology ; Phylogeny ; Pneumococcal Infections/epidemiology/immunology/microbiology/*prevention & control ; Pneumococcal Vaccines/*administration & dosage ; Population Dynamics ; Prospective Studies ; *Serogroup ; Serotyping ; Streptococcus pneumoniae/genetics/*immunology ; Vaccination ; Young Adult ; }, abstract = {In the United States, the introduction of the heptavalent pneumococcal conjugate vaccine (PCV) largely eliminated vaccine serotypes (VT); non-vaccine serotypes (NVT) subsequently increased in carriage and disease. Vaccination also disrupts the composition of the pneumococcal pangenome, which includes mobile genetic elements and polymorphic non-capsular antigens important for virulence, transmission, and pneumococcal ecology. Antigenic proteins are of interest for future vaccines; yet, little is known about how the they are affected by PCV use. To investigate the evolutionary impact of vaccination, we assessed recombination, evolution, and pathogen demographic history of 937 pneumococci collected from 1998-2012 among Navajo and White Mountain Apache Native American communities. We analyzed changes in the pneumococcal pangenome, focusing on metabolic loci and 19 polymorphic protein antigens. We found the impact of PCV on the pneumococcal population could be observed in reduced diversity, a smaller pangenome, and changing frequencies of accessory clusters of orthologous groups (COGs). Post-PCV7, diversity rebounded through clonal expansion of NVT lineages and inferred in-migration of two previously unobserved lineages. Accessory COGs frequencies trended toward pre-PCV7 values with increasing time since vaccine introduction. Contemporary frequencies of protein antigen variants are better predicted by pre-PCV7 values (1998-2000) than the preceding period (2006-2008), suggesting balancing selection may have acted in maintaining variant frequencies in this population. Overall, we present the largest genomic analysis of pneumococcal carriage in the United States to date, which includes a snapshot of a true vaccine-naïve community prior to the introduction of PCV7. These data improve our understanding of pneumococcal evolution and emphasize the need to consider pangenome composition when inferring the impact of vaccination and developing future protein-based pneumococcal vaccines.}, } @article {pmid29610707, year = {2018}, author = {Åvall-Jääskeläinen, S and Taponen, S and Kant, R and Paulin, L and Blom, J and Palva, A and Koort, J}, title = {Comparative genome analysis of 24 bovine-associated Staphylococcus isolates with special focus on the putative virulence genes.}, journal = {PeerJ}, volume = {6}, number = {}, pages = {e4560}, pmid = {29610707}, issn = {2167-8359}, abstract = {Non-aureus staphylococci (NAS) are most commonly isolated from subclinical mastitis. Different NAS species may, however, have diverse effects on the inflammatory response in the udder. We determined the genome sequences of 20 staphylococcal isolates from clinical or subclinical bovine mastitis, belonging to the NAS species Staphylococcus agnetis, S. chromogenes, and S. simulans, and focused on the putative virulence factor genes present in the genomes. For comparison we used our previously published genome sequences of four S. aureus isolates from bovine mastitis. The pan-genome and core genomes of the non-aureus isolates were characterized. After that, putative virulence factor orthologues were searched in silico. We compared the presence of putative virulence factors in the NAS species and S. aureus and evaluated the potential association between bacterial genotype and type of mastitis (clinical vs. subclinical). The NAS isolates had much less virulence gene orthologues than the S. aureus isolates. One third of the virulence genes were detected only in S. aureus. About 100 virulence genes were present in all S. aureus isolates, compared to about 40 to 50 in each NAS isolate. S. simulans differed the most. Several of the virulence genes detected among NAS were harbored only by S. simulans, but it also lacked a number of genes present both in S. agnetis and S. chromogenes. The type of mastitis was not associated with any specific virulence gene profile. It seems that the virulence gene profiles or cumulative number of different virulence genes are not directly associated with the type of mastitis (clinical or subclinical), indicating that host derived factors such as the immune status play a pivotal role in the manifestation of mastitis.}, } @article {pmid29593678, year = {2018}, author = {Moldovan, MA and Gelfand, MS}, title = {Pangenomic Definition of Prokaryotic Species and the Phylogenetic Structure of Prochlorococcus spp.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {428}, pmid = {29593678}, issn = {1664-302X}, abstract = {The pangenome is the collection of all groups of orthologous genes (OGGs) from a set of genomes. We apply the pangenome analysis to propose a definition of prokaryotic species based on identification of lineage-specific gene sets. While being similar to the classical biological definition based on allele flow, it does not rely on DNA similarity levels and does not require analysis of homologous recombination. Hence this definition is relatively objective and independent of arbitrary thresholds. A systematic analysis of 110 accepted species with the largest numbers of sequenced strains yields results largely consistent with the existing nomenclature. However, it has revealed that abundant marine cyanobacteria Prochlorococcus marinus should be divided into two species. As a control we have confirmed the paraphyletic origin of Yersinia pseudotuberculosis (with embedded, monophyletic Y. pestis) and Burkholderia pseudomallei (with B. mallei). We also demonstrate that by our definition and in accordance with recent studies Escherichia coli and Shigella spp. are one species.}, } @article {pmid29584736, year = {2018}, author = {Kelly, AC and Ward, TJ}, title = {Population genomics of Fusarium graminearum reveals signatures of divergent evolution within a major cereal pathogen.}, journal = {PloS one}, volume = {13}, number = {3}, pages = {e0194616}, pmid = {29584736}, issn = {1932-6203}, mesh = {Bacterial Toxins/classification/metabolism ; Bayes Theorem ; *Biological Evolution ; Fusarium/classification/*genetics/isolation & purification ; Genetic Variation ; Genetics, Population ; *Genome, Bacterial ; Haplotypes ; Phylogeny ; Plant Diseases/microbiology ; Polymorphism, Single Nucleotide ; Trichothecenes/biosynthesis/classification ; Triticum/metabolism/*microbiology ; }, abstract = {The cereal pathogen Fusarium graminearum is the primary cause of Fusarium head blight (FHB) and a significant threat to food safety and crop production. To elucidate population structure and identify genomic targets of selection within major FHB pathogen populations in North America we sequenced the genomes of 60 diverse F. graminearum isolates. We also assembled the first pan-genome for F. graminearum to clarify population-level differences in gene content potentially contributing to pathogen diversity. Bayesian and phylogenomic analyses revealed genetic structure associated with isolates that produce the novel NX-2 mycotoxin, suggesting a North American population that has remained genetically distinct from other endemic and introduced cereal-infecting populations. Genome scans uncovered distinct signatures of selection within populations, focused in high diversity, frequently recombining regions. These patterns suggested selection for genomic divergence at the trichothecene toxin gene cluster and thirteen additional regions containing genes potentially involved in pathogen specialization. Gene content differences further distinguished populations, in that 121 genes showed population-specific patterns of conservation. Genes that differentiated populations had predicted functions related to pathogenesis, secondary metabolism and antagonistic interactions, though a subset had unique roles in temperature and light sensitivity. Our results indicated that F. graminearum populations are distinguished by dozens of genes with signatures of selection and an array of dispensable accessory genes, suggesting that FHB pathogen populations may be equipped with different traits to exploit the agroecosystem. These findings provide insights into the evolutionary processes and genomic features contributing to population divergence in plant pathogens, and highlight candidate genes for future functional studies of pathogen specialization across evolutionarily and ecologically diverse fungi.}, } @article {pmid29575852, year = {2018}, author = {Dutkiewicz, J and Zając, V and Sroka, J and Wasiński, B and Cisak, E and Sawczyn, A and Kloc, A and Wójcik-Fatla, A}, title = {Streptococcus suis: a re-emerging pathogen associated with occupational exposure to pigs or pork products. Part II - Pathogenesis.}, journal = {Annals of agricultural and environmental medicine : AAEM}, volume = {25}, number = {1}, pages = {186-203}, doi = {10.26444/aaem/85651}, pmid = {29575852}, issn = {1898-2263}, mesh = {Agricultural Workers' Diseases/*microbiology ; Animals ; Humans ; Occupational Exposure/*adverse effects ; Streptococcal Infections/*microbiology/transmission ; Streptococcus suis/genetics/isolation & purification/pathogenicity/*physiology ; Swine ; Swine Diseases/*microbiology ; Virulence ; Virulence Factors/genetics/metabolism ; Zoonoses/*microbiology/transmission ; }, abstract = {Streptococcus suis is a re-emerging zoonotic pathogen that may cause severe disease, mostly meningitis, in pigs and in humans having occupational contact with pigs and pork, such as farmers, slaughterhose workers and butchers. The first stage of the pathogenic process, similar in pigs and humans, is adherence to and colonisation of mucosal and/or epithelial surface(s) of the host. The second stage is invasion into deeper tissue and extracellular translocation of bacterium in the bloodstream, either free in circulation or attached to the surface of monocytes. If S. suis present in blood fails to cause fatal septicaemia, it is able to progress into the third stage comprising penetration into host's organs, mostly by crossing the blood-brain barrier and/or blood-cerebrospinal fluid barrier to gain access to the central nervous system (CNS) and cause meningitis. The fourth stage is inflammation that plays a key role in the pathogen esis of both systemic and CNS infections caused by S. suis. The pathogen may induce the overproduction of pro-inflammatory cytokines that cause septic shock and/or the recruitment and activation of different leukocyte populations, causing acute inflammation of the CNS. Streptococcus suis can also evoke - through activation of microglial cells, astrocytes and possibly other cell types - a fulminant inflammatory reaction of the brain which leads to intracranial complications, including brain oedema, increased intracranial pressure, cerebrovascular insults, and deafness, as a result of cochlear sepsis. In all stages of the pathogenic process, S. suis interacts with many types of immunocompetent host's cells, such as polymorphonuclear leukocytes, mononuclear macrophages, lymphocytes, dendritic cells and microglia, using a range of versatile virulence factors for evasion of the innate and adaptive immune defence of the host, and for overcoming environmental stress. It is estimated that S. suis produces more than 100 different virulence factors that could be classified into 4 groups: surface components or secreted elements, enzymes, transcription factors or regulatory systems and transporter factors or secretion systems. A major virulence factor is capsular polysaccharide (CPS) that protects bacteria from phagocytosis. However, it hampers adhesion to and invasion of host's cells, release of inflammatory cytokines and formation of the resistant biofilm which, in many cases, is vital for the persistence of bacteria. It has been demonstrated that the arising by mutation unencapsulated S. suis clones, which are more successful in penetration to and propagation within the host's cells, may coexist in the organism of a single host together with those that are encapsulated. Both 'complementary' clones assist each other in the successful colonization of host's tissues and persistence therein. S. suis has an open pan-genome characterized by a frequent gene transfer and a large diversity. Of the genetic determinants of S. suis pathogenicity, the most important are pathogenicity islands (PAI), in particular, a novel DNA segment of 89 kb length with evident pathogenic traits that has been designated as 89K PAI. It has been estimated that more than one-third of the S. suis virulence factors is associated with this PAI. It has been proved that the virulent S. suis strains possess smaller genomes, compared to avirulent ones, but more genes associated with virulence. Overall, the evolution of the species most probably aims towards increased pathogenicity, and hence the most significant task of the current research is an elaboration of a vaccine, efficient both for humans and pigs.}, } @article {pmid29556550, year = {2018}, author = {Medema, MH}, title = {Computational Genomics of Specialized Metabolism: from Natural Product Discovery to Microbiome Ecology.}, journal = {mSystems}, volume = {3}, number = {2}, pages = {}, pmid = {29556550}, issn = {2379-5077}, support = {U01 GM110699/GM/NIGMS NIH HHS/United States ; U01 GM110706/GM/NIGMS NIH HHS/United States ; }, abstract = {Microbial and plant specialized metabolites, also known as natural products, are key mediators of microbe-microbe and host-microbe interactions and constitute a rich resource for drug development. In the past decade, genome mining has emerged as a prominent strategy for natural product discovery. Initially, such mining was performed on the basis of individual microbial genome sequences. Now, these efforts are being scaled up to fully genome-sequenced strain collections, pangenomes of bacterial genera, and large sets of metagenome-assembled genomes from microbial communities. The Medema research group aims to play a leading role in these developments by developing and applying computational approaches to identify, classify, and prioritize specialized metabolite biosynthetic gene clusters and pathways and to connect them to specific molecules and microbiome-associated phenotypes. Moreover, we are extending the scope of genome mining from microbes to plants, which will allow more comprehensive interpretation of the chemical language between hosts and microbes in a microbiome setting.}, } @article {pmid29555923, year = {2018}, author = {Kaashyap, M and Ford, R and Kudapa, H and Jain, M and Edwards, D and Varshney, R and Mantri, N}, title = {Differential Regulation of Genes Involved in Root Morphogenesis and Cell Wall Modification is Associated with Salinity Tolerance in Chickpea.}, journal = {Scientific reports}, volume = {8}, number = {1}, pages = {4855}, pmid = {29555923}, issn = {2045-2322}, mesh = {Cell Wall/*metabolism ; Cicer/cytology/*genetics/growth & development/*physiology ; *Gene Expression Profiling ; Genotype ; Organogenesis, Plant/*genetics ; Plant Roots/*growth & development ; Salt Stress/genetics ; Salt Tolerance/*genetics ; }, abstract = {Salinity is a major constraint for intrinsically salt sensitive grain legume chickpea. Chickpea exhibits large genetic variation amongst cultivars, which show better yields in saline conditions but still need to be improved further for sustainable crop production. Based on previous multi-location physiological screening, JG 11 (salt tolerant) and ICCV 2 (salt sensitive) were subjected to salt stress to evaluate their physiological and transcriptional responses. A total of ~480 million RNA-Seq reads were sequenced from root tissues which resulted in identification of 3,053 differentially expressed genes (DEGs) in response to salt stress. Reproductive stage shows high number of DEGs suggesting major transcriptional reorganization in response to salt to enable tolerance. Importantly, cationic peroxidase, Aspartic ase, NRT1/PTR, phosphatidylinositol phosphate kinase, DREB1E and ERF genes were significantly up-regulated in tolerant genotype. In addition, we identified a suite of important genes involved in cell wall modification and root morphogenesis such as dirigent proteins, expansin and casparian strip membrane proteins that could potentially confer salt tolerance. Further, phytohormonal cross-talk between ERF and PIN-FORMED genes which modulate the root growth was observed. The gene set enrichment analysis and functional annotation of these genes suggests they may be utilised as potential candidates for improving chickpea salt tolerance.}, } @article {pmid29551444, year = {2018}, author = {Cundon, CC and Ameal, A and Maubecín, E and Bentancor, A}, title = {[Characterization of extraintestinal pathogenic Escherichia coli strains isolated from household dogs and cats in Buenos Aires, Argentina].}, journal = {Revista Argentina de microbiologia}, volume = {50}, number = {3}, pages = {290-294}, doi = {10.1016/j.ram.2017.11.003}, pmid = {29551444}, issn = {0325-7541}, mesh = {Animals ; Argentina ; Cats/microbiology ; Dogs/microbiology ; *Escherichia coli Infections/veterinary ; *Extraintestinal Pathogenic Escherichia coli/isolation & purification ; Phylogeny ; Virulence ; Virulence Factors ; }, abstract = {The pangenome of Escherichia coli is composed of a conserved core and variable genomic regions. The constant genetic component allows to determine the phylogeny of the microorganism, while genetic variability promoted the emergence of intestinal pathogenic strains and extraintestinal strains. In this study we characterized 85 extraintestinal pathogenic isolates genetically isolated from canines and felines. We used the Clermont scheme that includes intestinal (A and B1) and extraintestinal (B2 and D) phylogroups, virulence markers (pap1-2, pap3-4, sfa, afa, hlyA, aer and cnf) and hybrid pathogens. A percentage of 69.4% of the isolates belonged to phylogroup A; 1.2% to phylogroup B1; 16.5% to phylogroup B2 and 12.9% to phylogroup D. The most commonly found gene was sfa (21/85), followed by pap1-2 and cnf (20/85) and pap3-4 (19/85). No hybrids were detected. Animal isolates should be studied due to the zoonotic potential of the microorganism.}, } @article {pmid29535689, year = {2018}, author = {Abreu, VAC and Popin, RV and Alvarenga, DO and Schaker, PDC and Hoff-Risseti, C and Varani, AM and Fiore, MF}, title = {Genomic and Genotypic Characterization of Cylindrospermopsis raciborskii: Toward an Intraspecific Phylogenetic Evaluation by Comparative Genomics.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {306}, pmid = {29535689}, issn = {1664-302X}, abstract = {Cylindrospermopsis raciborskii is a freshwater cyanobacterial species with increasing bloom reports worldwide that are likely due to factors related to climate change. In addition to the deleterious effects of blooms on aquatic ecosystems, the majority of ecotypes can synthesize toxic secondary metabolites causing public health issues. To overcome the harmful effects of C. raciborskii blooms, it is important to advance knowledge of diversity, genetic variation, and evolutionary processes within populations. An efficient approach to exploring this diversity and understanding the evolution of C. raciborskii is to use comparative genomics. Here, we report two new draft genomes of C. raciborskii (strains CENA302 and CENA303) from Brazilian isolates of different origins and explore their molecular diversity, phylogeny, and evolutionary diversification by comparing their genomes with sequences from other strains available in public databases. The results obtained by comparing seven C. raciborskii and the Raphidiopsis brookii D9 genomes revealed a set of conserved core genes and a variable set of accessory genes, such as those involved in the biosynthesis of natural products, heterocyte glycolipid formation, and nitrogen fixation. Gene cluster arrangements related to the biosynthesis of the antifungal cyclic glycosylated lipopeptide hassallidin were identified in four C. raciborskii genomes, including the non-nitrogen fixing strain CENA303. Shifts in gene clusters involved in toxin production according to geographic origins were observed, as well as a lack of nitrogen fixation (nif) and heterocyte glycolipid (hgl) gene clusters in some strains. Single gene phylogeny (16S rRNA sequences) was congruent with phylogeny based on 31 concatenated housekeeping protein sequences, and both analyses have shown, with high support values, that the species C. raciborskii is monophyletic. This comparative genomics study allowed a species-wide view of the biological diversity of C. raciborskii and in some cases linked genome differences to phenotype.}, } @article {pmid29522508, year = {2018}, author = {Beck, C and Knoop, H and Steuer, R}, title = {Modules of co-occurrence in the cyanobacterial pan-genome reveal functional associations between groups of ortholog genes.}, journal = {PLoS genetics}, volume = {14}, number = {3}, pages = {e1007239}, pmid = {29522508}, issn = {1553-7404}, mesh = {Bacterial Proteins/*genetics/metabolism ; Cyanobacteria/*genetics ; Gene Regulatory Networks ; *Genome, Bacterial ; Molecular Sequence Annotation ; Multigene Family ; Phylogeny ; }, abstract = {Cyanobacteria are a monophyletic phylogenetic group of global importance and have received considerable attention as potential host organisms for the renewable synthesis of chemical bulk products from atmospheric CO2. The cyanobacterial phylum exhibits enormous metabolic diversity with respect to morphology, lifestyle and habitat. As yet, however, research has mostly focused on few model strains and cyanobacterial diversity is insufficiently understood. In this respect, the increasing availability of fully sequenced bacterial genomes opens new and unprecedented opportunities to investigate the genetic inventory of organisms in the context of their pan-genome. Here, we seek understand cyanobacterial diversity using a comparative genome analysis of 77 fully sequenced and assembled cyanobacterial genomes. We use phylogenetic profiling to analyze the co-occurrence of clusters of likely ortholog genes (CLOGs) and reveal novel functional associations between CLOGs that are not captured by co-localization of genes. Going beyond pair-wise co-occurrences, we propose a network approach that allows us to identify modules of co-occurring CLOGs. The extracted modules exhibit a high degree of functional coherence and reveal known as well as previously unknown functional associations. We argue that the high functional coherence observed for the modules is a consequence of the similar-yet-diverse nature of cyanobacteria. Our approach highlights the importance of a multi-strain analysis to understand gene functions and environmental adaptations, with implications beyond the cyanobacterial phylum. The analysis is augmented with a simple toolbox that facilitates further analysis to investigate the co-occurrence neighborhood of specific CLOGs of interest.}, } @article {pmid29515533, year = {2018}, author = {Dias, GM and Bidault, A and Le Chevalier, P and Choquet, G and Der Sarkissian, C and Orlando, L and Medigue, C and Barbe, V and Mangenot, S and Thompson, CC and Thompson, FL and Jacq, A and Pichereau, V and Paillard, C}, title = {Vibrio tapetis Displays an Original Type IV Secretion System in Strains Pathogenic for Bivalve Molluscs.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {227}, pmid = {29515533}, issn = {1664-302X}, abstract = {The Brown Ring Disease (BRD) caused high mortality rates since 1986 in the Manila clam Venerupis philippinarum introduced and cultured in Western Europe from the 1970s. The causative agent of BRD is a Gram-Negative bacterium, Vibrio tapetis, which is also pathogenic to fish. Here we report the first assembly of the complete genome of V. tapetis CECT4600[T], together with the genome sequences of 16 additional strains isolated across a broad host and geographic range. Our extensive genome dataset allowed us to describe the pathogen pan- and core genomes and to identify putative virulence factors. The V. tapetis core genome consists of 3,352 genes, including multiple potential virulence factors represented by haemolysins, transcriptional regulators, Type I restriction modification system, GGDEF domain proteins, several conjugative plasmids, and a Type IV secretion system. Future research on the coevolutionary arms race between V. tapetis virulence factors and host resistance mechanisms will improve our understanding of how pathogenicity develops in this emerging pathogen.}, } @article {pmid29509989, year = {2018}, author = {Méric, G and Mageiros, L and Pascoe, B and Woodcock, DJ and Mourkas, E and Lamble, S and Bowden, R and Jolley, KA and Raymond, B and Sheppard, SK}, title = {Lineage-specific plasmid acquisition and the evolution of specialized pathogens in Bacillus thuringiensis and the Bacillus cereus group.}, journal = {Molecular ecology}, volume = {27}, number = {7}, pages = {1524-1540}, pmid = {29509989}, issn = {1365-294X}, support = {MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; MR/M501608/1/MRC_/Medical Research Council/United Kingdom ; BB/L00819X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Alleles ; Bacillus cereus/*genetics/isolation & purification ; Bacillus thuringiensis/*genetics ; Bacillus thuringiensis Toxins ; Bacterial Proteins/genetics ; Bacterial Toxins/metabolism ; Chromosomes, Bacterial/genetics ; Endotoxins/genetics ; Genetic Variation ; Genome, Bacterial ; Hemolysin Proteins/genetics ; *Phylogeny ; Plasmids/*genetics ; }, abstract = {Bacterial plasmids can vary from small selfish genetic elements to large autonomous replicons that constitute a significant proportion of total cellular DNA. By conferring novel function to the cell, plasmids may facilitate evolution but their mobility may be opposed by co-evolutionary relationships with chromosomes or encouraged via the infectious sharing of genes encoding public goods. Here, we explore these hypotheses through large-scale examination of the association between plasmids and chromosomal DNA in the phenotypically diverse Bacillus cereus group. This complex group is rich in plasmids, many of which encode essential virulence factors (Cry toxins) that are known public goods. We characterized population genomic structure, gene content and plasmid distribution to investigate the role of mobile elements in diversification. We analysed coding sequence within the core and accessory genome of 190 B. cereus group isolates, including 23 novel sequences and genes from 410 reference plasmid genomes. While cry genes were widely distributed, those with invertebrate toxicity were predominantly associated with one sequence cluster (clade 2) and phenotypically defined Bacillus thuringiensis. Cry toxin plasmids in clade 2 showed evidence of recent horizontal transfer and variable gene content, a pattern of plasmid segregation consistent with transfer during infectious cooperation. Nevertheless, comparison between clades suggests that co-evolutionary interactions may drive association between plasmids and chromosomes and limit wider transfer of key virulence traits. Proliferation of successful plasmid and chromosome combinations is a feature of specialized pathogens with characteristic niches (Bacillus anthracis, B. thuringiensis) and has occurred multiple times in the B. cereus group.}, } @article {pmid29495323, year = {2018}, author = {Argemi, X and Nanoukon, C and Affolabi, D and Keller, D and Hansmann, Y and Riegel, P and Baba-Moussa, L and Prévost, G}, title = {Comparative Genomics and Identification of an Enterotoxin-Bearing Pathogenicity Island, SEPI-1/SECI-1, in Staphylococcus epidermidis Pathogenic Strains.}, journal = {Toxins}, volume = {10}, number = {3}, pages = {}, pmid = {29495323}, issn = {2072-6651}, mesh = {Enterotoxins/*genetics ; Genomic Islands ; Genomics ; Staphylococcus aureus/genetics ; Staphylococcus epidermidis/*genetics ; }, abstract = {Staphylococcus epidermidis is a leading cause of nosocomial infections, majorly resistant to beta-lactam antibiotics, and may transfer several mobile genetic elements among the members of its own species, as well as to Staphylococcus aureus; however, a genetic exchange from S. aureus to S. epidermidis remains controversial. We recently identified two pathogenic clinical strains of S. epidermidis that produce a staphylococcal enterotoxin C3-like (SEC) similar to that by S. aureus pathogenicity islands. This study aimed to determine the genetic environment of the SEC-coding sequence and to identify the mobile genetic elements. Whole-genome sequencing and annotation of the S. epidermidis strains were performed using Illumina technology and a bioinformatics pipeline for assembly, which provided evidence that the SEC-coding sequences were located in a composite pathogenicity island that was previously described in the S. epidermidis strain FRI909, called SePI-1/SeCI-1, with 83.8-89.7% nucleotide similarity. Various other plasmids were identified, particularly p_3_95 and p_4_95, which carry antibiotic resistance genes (hsrA and dfrG, respectively), and share homologies with SAP085A and pUSA04-2-SUR11, two plasmids described in S. aureus. Eventually, one complete prophage was identified, ΦSE90, sharing 30 out of 52 coding sequences with the Acinetobacter phage vB_AbaM_IME200. Thus, the SePI-1/SeCI-1 pathogenicity island was identified in two pathogenic strains of S. epidermidis that produced a SEC enterotoxin causing septic shock. These findings suggest the existence of in vivo genetic exchange from S. aureus to S. epidermidis.}, } @article {pmid29491851, year = {2018}, author = {Stice, SP and Stumpf, SD and Gitaitis, RD and Kvitko, BH and Dutta, B}, title = {Pantoea ananatis Genetic Diversity Analysis Reveals Limited Genomic Diversity as Well as Accessory Genes Correlated with Onion Pathogenicity.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {184}, pmid = {29491851}, issn = {1664-302X}, abstract = {Pantoea ananatis is a member of the family Enterobacteriaceae and an enigmatic plant pathogen with a broad host range. Although P. ananatis strains can be aggressive on onion causing foliar necrosis and onion center rot, previous genomic analysis has shown that P. ananatis lacks the primary virulence secretion systems associated with other plant pathogens. We assessed a collection of fifty P. ananatis strains collected from Georgia over three decades to determine genetic factors that correlated with onion pathogenic potential. Previous genetic analysis studies have compared strains isolated from different hosts with varying diseases potential and isolation sources. Strains varied greatly in their pathogenic potential and aggressiveness on different cultivated Allium species like onion, leek, shallot, and chive. Using multi-locus sequence analysis (MLSA) and repetitive extragenic palindrome repeat (rep)-PCR techniques, we did not observe any correlation between onion pathogenic potential and genetic diversity among strains. Whole genome sequencing and pan-genomic analysis of a sub-set of 10 strains aided in the identification of a novel series of genetic regions, likely plasmid borne, and correlating with onion pathogenicity observed on single contigs of the genetic assemblies. We named these loci Onion Virulence Regions (OVR) A-D. The OVR loci contain genes involved in redox regulation as well as pectate lyase and rhamnogalacturonase genes. Previous studies have not identified distinct genetic loci or plasmids correlating with onion foliar pathogenicity or pathogenicity on a single host pathosystem. The lack of focus on a single host system for this phytopathgenic disease necessitates the pan-genomic analysis performed in this study.}, } @article {pmid29475877, year = {2018}, author = {Jouinot, A and Bertherat, J}, title = {MANAGEMENT OF ENDOCRINE DISEASE: Adrenocortical carcinoma: differentiating the good from the poor prognosis tumors.}, journal = {European journal of endocrinology}, volume = {178}, number = {5}, pages = {R215-R230}, doi = {10.1530/EJE-18-0027}, pmid = {29475877}, issn = {1479-683X}, mesh = {Adrenal Cortex/drug effects/metabolism/*pathology/surgery ; Adrenalectomy ; Adrenocortical Carcinoma/*diagnosis/metabolism/pathology/therapy ; Age Factors ; Antineoplastic Agents/therapeutic use ; Biomarkers, Tumor/blood/metabolism ; Combined Modality Therapy ; Humans ; Neoplasm Grading ; Neoplasm Staging ; *Precision Medicine ; Prognosis ; }, abstract = {Adrenocortical carcinoma (ACC) is a rare malignancy with a poor prognosis, the five-years overall survival being below 40%. However, there is great variability of outcomes and we have now a better view of the heterogeneity of tumor aggressiveness. The extent of the disease at the time of diagnosis, best assayed by the European Network for the Study of Adrenal Tumors (ENSAT) Staging Score, is a major determinant of survival. The tumor grade, including the mitotic count and the Ki67 proliferation index, also appears as a strong prognostic factor. The assessment of tumor grade, even by expert pathologists, still suffers from inter-observer reproducibility. The emergence of genomics in the last decade has revolutionized the knowledge of molecular biology and genetics of cancers. In ACC, genomic approaches - including pan-genomic studies of gene expression (transcriptome), recurrent mutations (exome or whole-genome sequencing), chromosome alterations, DNA methylation (methylome), miRNA expression (miRnome) - converge in a new classification of ACC, characterized by distinct molecular profiles and very different outcomes. Targeted measurements of a few discriminant molecular alterations have been developed in the perspective of clinical routine, and thus, may help defining therapeutic strategy. By individualizing patients' prognosis and tumor biology, these recent progresses appear as an important step forward towards precision medicine.}, } @article {pmid29475869, year = {2018}, author = {Lee, LL and Blumer-Schuette, SE and Izquierdo, JA and Zurawski, JV and Loder, AJ and Conway, JM and Elkins, JG and Podar, M and Clum, A and Jones, PC and Piatek, MJ and Weighill, DA and Jacobson, DA and Adams, MWW and Kelly, RM}, title = {Genus-Wide Assessment of Lignocellulose Utilization in the Extremely Thermophilic Genus Caldicellulosiruptor by Genomic, Pangenomic, and Metagenomic Analyses.}, journal = {Applied and environmental microbiology}, volume = {84}, number = {9}, pages = {}, pmid = {29475869}, issn = {1098-5336}, support = {T32 GM008776/GM/NIGMS NIH HHS/United States ; }, mesh = {Cellulose/metabolism ; Firmicutes/classification/*genetics/*metabolism ; *Genome, Bacterial ; Genomics ; Lignin/*metabolism ; *Metagenome ; Metagenomics ; }, abstract = {Metagenomic data from Obsidian Pool (Yellowstone National Park, USA) and 13 genome sequences were used to reassess genus-wide biodiversity for the extremely thermophilic Caldicellulosiruptor The updated core genome contains 1,401 ortholog groups (average genome size for 13 species = 2,516 genes). The pangenome, which remains open with a revised total of 3,493 ortholog groups, encodes a variety of multidomain glycoside hydrolases (GHs). These include three cellulases with GH48 domains that are colocated in the glucan degradation locus (GDL) and are specific determinants for microcrystalline cellulose utilization. Three recently sequenced species, Caldicellulosiruptor sp. strain Rt8.B8 (renamed here Caldicellulosiruptor morganii), Thermoanaerobacter cellulolyticus strain NA10 (renamed here Caldicellulosiruptor naganoensis), and Caldicellulosiruptor sp. strain Wai35.B1 (renamed here Caldicellulosiruptor danielii), degraded Avicel and lignocellulose (switchgrass). C. morganii was more efficient than Caldicellulosiruptor bescii in this regard and differed from the other 12 species examined, both based on genome content and organization and in the specific domain features of conserved GHs. Metagenomic analysis of lignocellulose-enriched samples from Obsidian Pool revealed limited new information on genus biodiversity. Enrichments yielded genomic signatures closely related to that of Caldicellulosiruptor obsidiansis, but there was also evidence for other thermophilic fermentative anaerobes (Caldanaerobacter, Fervidobacterium, Caloramator, and Clostridium). One enrichment, containing 89.8% Caldicellulosiruptor and 9.7% Caloramator, had a capacity for switchgrass solubilization comparable to that of C. bescii These results refine the known biodiversity of Caldicellulosiruptor and indicate that microcrystalline cellulose degradation at temperatures above 70°C, based on current information, is limited to certain members of this genus that produce GH48 domain-containing enzymes.IMPORTANCE The genus Caldicellulosiruptor contains the most thermophilic bacteria capable of lignocellulose deconstruction, which are promising candidates for consolidated bioprocessing for the production of biofuels and bio-based chemicals. The focus here is on the extant capability of this genus for plant biomass degradation and the extent to which this can be inferred from the core and pangenomes, based on analysis of 13 species and metagenomic sequence information from environmental samples. Key to microcrystalline hydrolysis is the content of the glucan degradation locus (GDL), a set of genes encoding glycoside hydrolases (GHs), several of which have GH48 and family 3 carbohydrate binding module domains, that function as primary cellulases. Resolving the relationship between the GDL and lignocellulose degradation will inform efforts to identify more prolific members of the genus and to develop metabolic engineering strategies to improve this characteristic.}, } @article {pmid29472910, year = {2018}, author = {Castillo, D and Pérez-Reytor, D and Plaza, N and Ramírez-Araya, S and Blondel, CJ and Corsini, G and Bastías, R and Loyola, DE and Jaña, V and Pavez, L and García, K}, title = {Exploring the Genomic Traits of Non-toxigenic Vibrio parahaemolyticus Strains Isolated in Southern Chile.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {161}, pmid = {29472910}, issn = {1664-302X}, abstract = {Vibrio parahaemolyticus is the leading cause of seafood-borne gastroenteritis worldwide. As reported in other countries, after the rise and fall of the pandemic strain in Chile, other post-pandemic strains have been associated with clinical cases, including strains lacking the major toxins TDH and TRH. Since the presence or absence of tdh and trh genes has been used for diagnostic purposes and as a proxy of the virulence of V. parahaemolyticus isolates, the understanding of virulence in V. parahaemolyticus strains lacking toxins is essential to detect these strains present in water and marine products to avoid possible food-borne infection. In this study, we characterized the genome of four environmental and two clinical non-toxigenic strains (tdh-, trh-, and T3SS2-). Using whole-genome sequencing, phylogenetic, and comparative genome analysis, we identified the core and pan-genome of V. parahaemolyticus of strains of southern Chile. The phylogenetic tree based on the core genome showed low genetic diversity but the analysis of the pan-genome revealed that all strains harbored genomic islands carrying diverse virulence and fitness factors or prophage-like elements that encode toxins like Zot and RTX. Interestingly, the three strains carrying Zot-like toxin have a different sequence, although the alignment showed some conserved areas with the zot sequence found in V. cholerae. In addition, we identified an unexpected diversity in the genetic architecture of the T3SS1 gene cluster and the presence of the T3SS2 gene cluster in a non-pandemic environmental strain. Our study sheds light on the diversity of V. parahaemolyticus strains from the southern Pacific which increases our current knowledge regarding the global diversity of this organism.}, } @article {pmid29467746, year = {2018}, author = {Duchaud, E and Rochat, T and Habib, C and Barbier, P and Loux, V and Guérin, C and Dalsgaard, I and Madsen, L and Nilsen, H and Sundell, K and Wiklund, T and Strepparava, N and Wahli, T and Caburlotto, G and Manfrin, A and Wiens, GD and Fujiwara-Nagata, E and Avendaño-Herrera, R and Bernardet, JF and Nicolas, P}, title = {Genomic Diversity and Evolution of the Fish Pathogen Flavobacterium psychrophilum.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {138}, pmid = {29467746}, issn = {1664-302X}, abstract = {Flavobacterium psychrophilum, the etiological agent of rainbow trout fry syndrome and bacterial cold-water disease in salmonid fish, is currently one of the main bacterial pathogens hampering the productivity of salmonid farming worldwide. In this study, the genomic diversity of the F. psychrophilum species is analyzed using a set of 41 genomes, including 30 newly sequenced isolates. These were selected on the basis of available MLST data with the two-fold objective of maximizing the coverage of the species diversity and of allowing a focus on the main clonal complex (CC-ST10) infecting farmed rainbow trout (Oncorhynchus mykiss) worldwide. The results reveal a bacterial species harboring a limited genomic diversity both in terms of nucleotide diversity, with ~0.3% nucleotide divergence inside CDSs in pairwise genome comparisons, and in terms of gene repertoire, with the core genome accounting for ~80% of the genes in each genome. The pan-genome seems nevertheless "open" according to the scaling exponent of a power-law fitted on the rate of new gene discovery when genomes are added one-by-one. Recombination is a key component of the evolutionary process of the species as seen in the high level of apparent homoplasy in the core genome. Using a Hidden Markov Model to delineate recombination tracts in pairs of closely related genomes, the average recombination tract length was estimated to ~4.0 Kbp and the typical ratio of the contributions of recombination and mutations to nucleotide-level differentiation (r/m) was estimated to ~13. Within CC-ST10, evolutionary distances computed on non-recombined regions and comparisons between 22 isolates sampled up to 27 years apart suggest a most recent common ancestor in the second half of the nineteenth century in North America with subsequent diversification and transmission of this clonal complex coinciding with the worldwide expansion of rainbow trout farming. With the goal to promote the development of tools for the genetic manipulation of F. psychrophilum, a particular attention was also paid to plasmids. Their extraction and sequencing to completion revealed plasmid diversity that remained hidden to classical plasmid profiling due to size similarities.}, } @article {pmid29438517, year = {2018}, author = {Juergens, H and Varela, JA and Gorter de Vries, AR and Perli, T and Gast, VJM and Gyurchev, NY and Rajkumar, AS and Mans, R and Pronk, JT and Morrissey, JP and Daran, JG}, title = {Genome editing in Kluyveromyces and Ogataea yeasts using a broad-host-range Cas9/gRNA co-expression plasmid.}, journal = {FEMS yeast research}, volume = {18}, number = {3}, pages = {}, pmid = {29438517}, issn = {1567-1364}, mesh = {CRISPR-Associated Protein 9/*genetics ; Clustered Regularly Interspaced Short Palindromic Repeats/genetics ; Fungal Proteins/genetics ; Gene Deletion ; *Gene Editing ; Gene Expression ; Kluyveromyces/*genetics ; Plasmids/genetics ; RNA, Guide, Kinetoplastida/*genetics ; Saccharomycetales/*genetics ; }, abstract = {While CRISPR-Cas9-mediated genome editing has transformed yeast research, current plasmids and cassettes for Cas9 and guide-RNA expression are species specific. CRISPR tools that function in multiple yeast species could contribute to the intensifying research on non-conventional yeasts. A plasmid carrying a pangenomic origin of replication and two constitutive expression cassettes for Cas9 and ribozyme-flanked gRNAs was constructed. Its functionality was tested by analyzing inactivation of the ADE2 gene in four yeast species. In two Kluyveromyces species, near-perfect targeting (≥96%) and homologous repair (HR) were observed in at least 24% of transformants. In two Ogataea species, Ade- mutants were not observed directly after transformation, but prolonged incubation of transformed cells resulted in targeting efficiencies of 9% to 63% mediated by non-homologous end joining (NHEJ). In an Ogataea parapolymorpha ku80 mutant, deletion of OpADE2 mediated by HR was achieved, albeit at low efficiencies (<1%). Furthermore the expression of a dual polycistronic gRNA array enabled simultaneous interruption of OpADE2 and OpYNR1 demonstrating flexibility of ribozyme-flanked gRNA design for multiplexing. While prevalence of NHEJ prevented HR-mediated editing in Ogataea, such targeted editing was possible in Kluyveromyces. This broad-host-range CRISPR/gRNA system may contribute to exploration of Cas9-mediated genome editing in other Saccharomycotina yeasts.}, } @article {pmid29436707, year = {2018}, author = {Huntsman, DG and Ladanyi, M}, title = {The molecular pathology of cancer: from pan-genomics to post-genomics.}, journal = {The Journal of pathology}, volume = {244}, number = {5}, pages = {509-511}, doi = {10.1002/path.5057}, pmid = {29436707}, issn = {1096-9896}, mesh = {Animals ; Biomarkers, Tumor/*genetics ; Genetic Predisposition to Disease ; Genomics/*methods ; Humans ; Neoplasms/*genetics/*pathology/therapy ; Pathology, Molecular/*methods ; Phenotype ; Predictive Value of Tests ; Prognosis ; }, abstract = {As the cancer genomics of most major cancer types have been comprehensively catalogued over the past decade through a variety of national and international efforts, the delineation of cancer subtypes has been refined, and our understanding of critical cancer drivers and of the potentially targetable vulnerabilities that they create has grown tremendously. The 2018 Annual Review Issue of the Journal of Pathology provides in-depth assessments of how these pan-genomic approaches have enabled advances in cancer classification, targeted therapy selection, and assessment of cancer progression, all of which are now genomically informed, using several cancer types as examples. Beyond these areas of by now conventional pan-genomic tumour analysis, there are also reviews of diverse 'post-genomic' areas, such as the analysis of circulating free tumour DNA in plasma, concurrent germline cancer predisposition profiling in the setting of apparently sporadic cancer, genetic alterations in epigenetic control and DNA repair, proteomics of tumour heterogeneity, computational pathology, and the roles of the cellular stress response and the microbiome in human cancers. As we are able to derive more and more biologically useful information from diverse human biospecimens, these many advances are informing and transforming the practice of cancer pathology. Copyright © 2018 Pathological Society of Great Britain and Ireland. Published by John Wiley & Sons, Ltd.}, } @article {pmid29433445, year = {2018}, author = {Lin, H and Yu, M and Wang, X and Zhang, XH}, title = {Comparative genomic analysis reveals the evolution and environmental adaptation strategies of vibrios.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {135}, pmid = {29433445}, issn = {1471-2164}, support = {41730530//National Natural Science Foundation of China/International ; 41476112//National Natural Science Foundation of China/International ; 41521064//National Natural Science Foundation of China/International ; 41506154//National Natural Science Foundation of China/International ; }, mesh = {Adaptation, Physiological/*genetics ; *Evolution, Molecular ; Genetic Variation ; Genome, Bacterial/*genetics ; Genomics/*methods ; High-Throughput Nucleotide Sequencing ; Phylogeny ; Seawater/microbiology ; Species Specificity ; Vibrio/classification/*genetics ; }, abstract = {BACKGROUND: Vibrios are among the most diverse and ecologically important marine bacteria, which have evolved many characteristics and lifestyles to occupy various niches. The relationship between genome features and environmental adaptation strategies is an essential part for understanding the ecological functions of vibrios in the marine system. The advent of complete genome sequencing technology has provided an important method of examining the genetic characteristics of vibrios on the genomic level.

RESULTS: Two Vibrio genomes were sequenced and found to occupy many unique orthologues families which absent from the previously genes pool of the complete genomes of vibrios. Comparative genomics analysis found vibrios encompass a steady core-genome and tremendous pan-genome with substantial gene gain and horizontal gene transfer events in the evolutionary history. Evolutionary analysis based on the core-genome tree suggested that V. fischeri emerged ~ 385 million years ago, along with the occurrence of cephalopods and the flourish of fish. The relatively large genomes, the high number of 16S rRNA gene copies, and the presence of R-M systems and CRISPR system help vibrios live in various marine environments. Chitin-degrading related genes are carried in nearly all the Vibrio genomes. The number of chitinase genes in vibrios has been extremely expanded compared to which in the most recent ancestor of the genus. The chitinase A genes were estimated to have evolved along with the genus, and have undergone significant purifying selective force to conserve the ancestral state.

CONCLUSIONS: Vibrios have experienced extremely genome expansion events during their evolutionary history, allowing them to develop various functions to spread globally. Despite their close phylogenetic relationships, vibrios were found to have a tremendous pan-genome with a steady core-genome, which indicates the highly plastic genome of the genus. Additionally, the existence of various chitin-degrading related genes and the expansion of chitinase A in the genus demonstrate the importance of the chitin utilization for vibrios. Defensive systems in the Vibrio genomes may protect them from the invasion of external DNA. These genomic features investigated here provide a better knowledge of how the evolutionary process has forged Vibrio genomes to occupy various niches.}, } @article {pmid29429564, year = {2018}, author = {Viver, T and Orellana, L and González-Torres, P and Díaz, S and Urdiain, M and Farías, ME and Benes, V and Kaempfer, P and Shahinpei, A and Ali Amoozegar, M and Amann, R and Antón, J and Konstantinidis, KT and Rosselló-Móra, R}, title = {Genomic comparison between members of the Salinibacteraceae family, and description of a new species of Salinibacter (Salinibacter altiplanensis sp. nov.) isolated from high altitude hypersaline environments of the Argentinian Altiplano.}, journal = {Systematic and applied microbiology}, volume = {41}, number = {3}, pages = {198-212}, doi = {10.1016/j.syapm.2017.12.004}, pmid = {29429564}, issn = {1618-0984}, mesh = {Altitude ; Argentina ; Bacterial Typing Techniques ; Bacteroidetes/*classification/genetics/isolation & purification ; CRISPR-Cas Systems ; DNA, Bacterial/genetics ; *Genome, Bacterial ; Lakes/*microbiology ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Rhodopsin/genetics ; *Salinity ; Sequence Analysis, DNA ; Type VI Secretion Systems/genetics ; Water Microbiology ; }, abstract = {The application of tandem MALDI-TOF MS screening with 16S rRNA gene sequencing of selected isolates has been demonstrated to be an excellent approach for retrieving novelty from large-scale culturing. The application of such methodologies in different hypersaline samples allowed the isolation of the culture-recalcitrant Salinibacter ruber second phylotype (EHB-2) for the first time, as well as a new species recently isolated from the Argentinian Altiplano hypersaline lakes. In this study, the genome sequences of the different species of the phylum Rhodothermaeota were compared and the genetic repertoire along the evolutionary gradient was analyzed together with each intraspecific variability. Altogether, the results indicated an open pan-genome for the family Salinibacteraceae, as well as the codification of relevant traits such as diverse rhodopsin genes, CRISPR-Cas systems and spacers, and one T6SS secretion system that could give ecological advantages to an EHB-2 isolate. For the new Salinibacter species, we propose the name Salinibacter altiplanensis sp. nov. (the designated type strain is AN15[T]=CECT 9105[T]=IBRC-M 11031[T]).}, } @article {pmid29423345, year = {2018}, author = {Delmont, TO and Eren, AM}, title = {Linking pangenomes and metagenomes: the Prochlorococcus metapangenome.}, journal = {PeerJ}, volume = {6}, number = {}, pages = {e4320}, pmid = {29423345}, issn = {2167-8359}, support = {P30 DK042086/DK/NIDDK NIH HHS/United States ; }, abstract = {Pangenomes offer detailed characterizations of core and accessory genes found in a set of closely related microbial genomes, generally by clustering genes based on sequence homology. In comparison, metagenomes facilitate highly resolved investigations of the relative distribution of microbial genomes and individual genes across environments through read recruitment analyses. Combining these complementary approaches can yield unique insights into the functional basis of microbial niche partitioning and fitness, however, advanced software solutions are lacking. Here we present an integrated analysis and visualization strategy that provides an interactive and reproducible framework to generate pangenomes and to study them in conjunction with metagenomes. To investigate its utility, we applied this strategy to a Prochlorococcus pangenome in the context of a large-scale marine metagenomic survey. The resulting Prochlorococcus metapangenome revealed remarkable differential abundance patterns between very closely related isolates that belonged to the same phylogenetic cluster and that differed by only a small number of gene clusters in the pangenome. While the relationships between these genomes based on gene clusters correlated with their environmental distribution patterns, phylogenetic analyses using marker genes or concatenated single-copy core genes did not recapitulate these patterns. The metapangenome also revealed a small set of core genes that mostly occurred in hypervariable genomic islands of the Prochlorococcus populations, which systematically lacked read recruitment from surface ocean metagenomes. Notably, these core gene clusters were all linked to sugar metabolism, suggesting potential benefits to Prochlorococcus from a high sequence diversity of sugar metabolism genes. The rapidly growing number of microbial genomes and increasing availability of environmental metagenomes provide new opportunities to investigate the functioning and the ecology of microbial populations, and metapangenomes can provide unique insights for any taxon and biome for which genomic and sufficiently deep metagenomic data are available.}, } @article {pmid29419787, year = {2018}, author = {Heavner, GLW and Mansfeldt, CB and Debs, GE and Hellerstedt, ST and Rowe, AR and Richardson, RE}, title = {Biomarkers' Responses to Reductive Dechlorination Rates and Oxygen Stress in Bioaugmentation Culture KB-1[TM].}, journal = {Microorganisms}, volume = {6}, number = {1}, pages = {}, pmid = {29419787}, issn = {2076-2607}, support = {T32 GM008283/GM/NIGMS NIH HHS/United States ; }, abstract = {Using mRNA transcript levels for key functional enzymes as proxies for the organohalide respiration (OHR) rate, is a promising approach for monitoring bioremediation populations in situ at chlorinated solvent-contaminated field sites. However, to date, no correlations have been empirically derived for chlorinated solvent respiring, Dehalococcoides mccartyi (DMC) containing, bioaugmentation cultures. In the current study, genome-wide transcriptome and proteome data were first used to confirm the most highly expressed OHR-related enzymes in the bioaugmentation culture, KB-1[TM], including several reductive dehalogenases (RDases) and a Ni-Fe hydrogenase, Hup. Different KB-1™ DMC strains could be resolved at the RNA and protein level through differences in the sequence of a common RDase (DET1545-like homologs) and differences in expression of their vinyl chloride-respiring RDases. The dominant strain expresses VcrA, whereas the minor strain utilizes BvcA. We then used quantitative reverse-transcriptase PCR (qRT-PCR) as a targeted approach for quantifying transcript copies in the KB-1[TM] consortium operated under a range of TCE respiration rates in continuously-fed, pseudo-steady-state reactors. These candidate biomarkers from KB-1[TM] demonstrated a variety of trends in terms of transcript abundance as a function of respiration rate over the range: 7.7 × 10[-12] to 5.9 × 10[-10] microelectron equivalents per cell per hour (μeeq/cell∙h). Power law trends were observed between the respiration rate and transcript abundance for the main DMC RDase (VcrA) and the hydrogenase HupL (R[2] = 0.83 and 0.88, respectively), but not transcripts for 16S rRNA or three other RDases examined: TceA, BvcA or the RDase DET1545 homologs in KB1[TM]. Overall, HupL transcripts appear to be the most robust activity biomarker across multiple DMC strains and in mixed communities including DMC co-cultures such as KB1[TM]. The addition of oxygen induced cell stress that caused respiration rates to decline immediately (>95% decline within one hour). Although transcript levels did decline, they did so more slowly than the respiration rate observed (transcript decay rates between 0.02 and 0.03 per hour). Data from strain-specific probes on the pangenome array strains suggest that a minor DMC strain in KB-1™ that harbors a bvcA homolog preferentially recovered following oxygen stress relative to the dominant, vcrA-containing strain.}, } @article {pmid29403020, year = {2018}, author = {Wilkinson, DA and O'Donnell, AJ and Akhter, RN and Fayaz, A and Mack, HJ and Rogers, LE and Biggs, PJ and French, NP and Midwinter, AC}, title = {Updating the genomic taxonomy and epidemiology of Campylobacter hyointestinalis.}, journal = {Scientific reports}, volume = {8}, number = {1}, pages = {2393}, pmid = {29403020}, issn = {2045-2322}, mesh = {Animals ; Campylobacter Infections/epidemiology/microbiology/*veterinary ; Campylobacter hyointestinalis/*classification/*genetics/isolation & purification ; Cattle ; Cattle Diseases/epidemiology/microbiology ; Deer ; Evolution, Molecular ; Gene Transfer, Horizontal ; *Genetic Variation ; Genome, Bacterial ; Genomics ; New Zealand/epidemiology ; *Phylogeny ; Sheep ; Sheep Diseases/epidemiology/microbiology ; Zoonoses/*epidemiology/*microbiology ; }, abstract = {Campylobacter hyointestinalis is a member of an emerging group of zoonotic Campylobacter spp. that are increasingly identified in both gastric and non-gastric disease in humans. Here, we discovered C. hyointestinalis in three separate classes of New Zealand ruminant livestock; cattle, sheep and deer. To investigate the relevance of these findings we performed a systematic literature review on global C. hyointestinalis epidemiology and used comparative genomics to better understand and classify members of the species. We found that C. hyointestinalis subspecies hyointestinalis has an open pangenome, with accessory gene contents involved in many essential processes such as metabolism, virulence and defence. We observed that horizontal gene transfer is likely to have played an overwhelming role in species diversification, favouring a public-goods-like mechanism of gene 'acquisition and resampling' over a tree-of-life-like vertical inheritance model of evolution. As a result, simplistic gene-based inferences of taxonomy by similarity are likely to be misleading. Such genomic plasticity will also mean that local evolutionary histories likely influence key species characteristics, such as host-association and virulence. This may help explain geographical differences in reported C. hyointestinalis epidemiology and limits what characteristics may be generalised, requiring further genomic studies of C. hyointestinalis in areas where it causes disease.}, } @article {pmid29402214, year = {2018}, author = {Darracq, A and Vitte, C and Nicolas, S and Duarte, J and Pichon, JP and Mary-Huard, T and Chevalier, C and Bérard, A and Le Paslier, MC and Rogowsky, P and Charcosset, A and Joets, J}, title = {Sequence analysis of European maize inbred line F2 provides new insights into molecular and chromosomal characteristics of presence/absence variants.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {119}, pmid = {29402214}, issn = {1471-2164}, support = {ANR-10-GENM-0003//Agence Nationale de la Recherche/International ; ANR-10-BTBR-01-01//Agence Nationale de la Recherche/International ; }, mesh = {*Chromosomes, Plant ; Computational Biology/methods ; DNA Copy Number Variations ; DNA Transposable Elements ; Evolution, Molecular ; *Genetic Variation ; *Genome, Plant ; Genomics/methods ; *Inbreeding ; Linkage Disequilibrium ; Poaceae/genetics ; Sequence Analysis, DNA ; Zea mays/*genetics ; }, abstract = {BACKGROUND: Maize is well known for its exceptional structural diversity, including copy number variants (CNVs) and presence/absence variants (PAVs), and there is growing evidence for the role of structural variation in maize adaptation. While PAVs have been described in this important crop species, they have been only scarcely characterized at the sequence level and the extent of presence/absence variation and relative chromosomal landscape of inbred-specific regions remain to be elucidated.

RESULTS: De novo genome sequencing of the French F2 maize inbred line revealed 10,044 novel genomic regions larger than 1 kb, making up 88 Mb of DNA, that are present in F2 but not in B73 (PAV). This set of maize PAV sequences allowed us to annotate PAV content and to analyze sequence breakpoints. Using PAV genotyping on a collection of 25 temperate lines, we also analyzed Linkage Disequilibrium in PAVs and flanking regions, and PAV frequencies within maize genetic groups.

CONCLUSIONS: We highlight the possible role of MMEJ-type double strand break repair in maize PAV formation and discover 395 new genes with transcriptional support. Pattern of linkage disequilibrium within PAVs strikingly differs from this of flanking regions and is in accordance with the intuition that PAVs may recombine less than other genomic regions. We show that most PAVs are ancient, while some are found only in European Flint material, thus pinpointing structural features that may be at the origin of adaptive traits involved in the success of this material. Characterization of such PAVs will provide useful material for further association genetic studies in European and temperate maize.}, } @article {pmid29392354, year = {2018}, author = {Nejat, N and Ramalingam, A and Mantri, N}, title = {Advances in Transcriptomics of Plants.}, journal = {Advances in biochemical engineering/biotechnology}, volume = {164}, number = {}, pages = {161-185}, doi = {10.1007/10_2017_52}, pmid = {29392354}, issn = {0724-6145}, mesh = {Gene Editing ; *Gene Expression Profiling/trends ; MicroRNAs/genetics ; Plant Breeding ; *Plants/genetics ; RNA, Small Interfering ; Stress, Physiological/genetics ; }, abstract = {The current global population of 7.3 billion is estimated to reach 9.7 billion in the year 2050. Rapid population growth is driving up global food demand. Additionally, global climate change, environmental degradation, drought, emerging diseases, and salty soils are the current threats to global food security. In order to mitigate the adverse effects of these diverse agricultural productivity constraints and enhance crop yield and stress-tolerance in plants, we need to go beyond traditional and molecular plant breeding. The powerful new tools for genome editing, Transcription Activator-Like Effector Nucleases (TALENs) and Clustered Regulatory Interspaced Short Palindromic Repeats (CRISPR)/Cas systems (CRISPR-Cas9), have been hailed as a quantum leap forward in the development of stress-resistant plants. Plant breeding techniques, however, have several drawbacks. Hence, identification of transcriptional regulatory elements and deciphering mechanisms underlying transcriptional regulation are crucial to avoiding unintended consequences in modified crop plants, which could ultimately have negative impacts on human health. RNA splicing as an essential regulated post-transcriptional process, alternative polyadenylation as an RNA-processing mechanism, along with non-coding RNAs (microRNAs, small interfering RNAs and long non-coding RNAs) have been identified as major players in gene regulation. In this chapter, we highlight new findings on the essential roles of alternative splicing and alternative polyadenylation in plant development and response to biotic and abiotic stresses. We also discuss biogenesis and the functions of microRNAs (miRNAs) and small interfering RNAs (siRNAs) in plants and recent advances in our knowledge of the roles of miRNAs and siRNAs in plant stress response. Graphical Abstract.}, } @article {pmid29385986, year = {2018}, author = {Khorramdelazad, M and Bar, I and Whatmore, P and Smetham, G and Bhaaskaria, V and Yang, Y and Bai, SH and Mantri, N and Zhou, Y and Ford, R}, title = {Transcriptome profiling of lentil (Lens culinaris) through the first 24 hours of Ascochyta lentis infection reveals key defence response genes.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {108}, pmid = {29385986}, issn = {1471-2164}, support = {CUR00014//Grains Research and Development Corporation/International ; CUR00023//Grains Research and Development Corporation/International ; 1059775//National Health and Medical Research Council/International ; 1083450//National Health and Medical Research Council/International ; }, mesh = {Ascomycota/genetics/immunology/pathogenicity ; Gene Expression Profiling ; *Gene Expression Regulation, Plant ; *Genes, Plant ; Genotype ; High-Throughput Nucleotide Sequencing/methods ; Immunity, Innate/*genetics ; Lens Plant/*genetics/growth & development ; Mycoses/*genetics/microbiology ; Plant Diseases/*genetics/immunology/microbiology ; }, abstract = {BACKGROUND: Ascochyta blight, caused by the fungus Ascochyta lentis, is one of the most destructive lentil diseases worldwide, resulting in over $16 million AUD annual loss in Australia alone. The use of resistant cultivars is currently considered the most effective and environmentally sustainable strategy to control this disease. However, little is known about the genes and molecular mechanisms underlying lentil resistance against A. lentis.

RESULTS: To uncover the genetic basis of lentil resistance to A. lentis, differentially expressed genes were profiled in lentil plants during the early stages of A. lentis infection. The resistant 'ILL7537' and susceptible 'ILL6002' lentil genotypes were examined at 2, 6, and 24 h post inoculation utilising high throughput RNA-Sequencing. Genotype and time-dependent differential expression analysis identified genes which play key roles in several functions of the defence response: fungal elicitors recognition and early signalling; structural response; biochemical response; transcription regulators; hypersensitive reaction and cell death; and systemic acquired resistance. Overall, the resistant genotype displayed an earlier and faster detection and signalling response to the A. lentis infection and demonstrated higher expression levels of structural defence-related genes.

CONCLUSIONS: This study presents a first-time defence-related transcriptome of lentil to A. lentis, including a comprehensive characterisation of the molecular mechanism through which defence against A. lentis is induced in the resistant lentil genotype.}, } @article {pmid29382867, year = {2018}, author = {Kirk, KF and Méric, G and Nielsen, HL and Pascoe, B and Sheppard, SK and Thorlacius-Ussing, O and Nielsen, H}, title = {Molecular epidemiology and comparative genomics of Campylobacter concisus strains from saliva, faeces and gut mucosal biopsies in inflammatory bowel disease.}, journal = {Scientific reports}, volume = {8}, number = {1}, pages = {1902}, pmid = {29382867}, issn = {2045-2322}, support = {MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Adult ; Aged ; Biopsy/methods ; Campylobacter/*genetics ; Campylobacter Infections/*microbiology ; Feces/*microbiology ; Female ; Gastroenteritis/microbiology ; Gastrointestinal Microbiome/*genetics ; Genetic Variation/genetics ; Genomics/methods ; Humans ; Inflammatory Bowel Diseases/*microbiology ; Male ; Middle Aged ; Molecular Epidemiology/methods ; Mouth/microbiology ; Multilocus Sequence Typing/methods ; Phenotype ; Phylogeny ; Saliva/*microbiology ; Young Adult ; }, abstract = {Campylobacter concisus is an emerging pathogen associated with inflammatory bowel disease (IBD), yet little is known about the genetic diversity of C. concisus in relation to host niches and disease. We isolated 104 C. concisus isolates from saliva, mucosal biopsies and faecal samples from 41 individuals (26 IBD, 3 Gastroenteritis (GE), 12 Healthy controls (HC)). Whole genomes were sequenced and the dataset pan-genome examined, and genomic information was used for typing using multi-locus-sequence typing (MLST). C. concisus isolates clustered into two main groups/genomospecies (GS) with 71 distinct sequence types (STs) represented. Sampling site (p < 0.001), rather than disease phenotype (p = 1.00) was associated with particular GS. We identified 97 candidate genes associated with increase or decrease in prevalence during the anatomical descent from the oral cavity to mucosal biopsies to faeces. Genes related to cell wall/membrane biogenesis were more common in oral isolates, whereas genes involved in cell transport, metabolism and secretory pathways were more prevalent in enteric isolates. Furthermore, there was no correlation between individual genetic diversity and clinical phenotype. This study confirms the genetic heterogeneity of C. concisus and provides evidence that genomic variation is related to the source of isolation, but not clinical phenotype.}, } @article {pmid29379215, year = {2018}, author = {Poole, P and Ramachandran, V and Terpolilli, J}, title = {Rhizobia: from saprophytes to endosymbionts.}, journal = {Nature reviews. Microbiology}, volume = {16}, number = {5}, pages = {291-303}, pmid = {29379215}, issn = {1740-1534}, support = {BB/F004753/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/F013159/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Fabaceae/*microbiology ; Nitrogen Fixation/physiology ; Plant Roots/microbiology ; Rhizobium/*physiology ; Symbiosis/*physiology ; }, abstract = {Rhizobia are some of the best-studied plant microbiota. These oligotrophic Alphaproteobacteria or Betaproteobacteria form symbioses with their legume hosts. Rhizobia must exist in soil and compete with other members of the microbiota before infecting legumes and forming N2-fixing bacteroids. These dramatic lifestyle and developmental changes are underpinned by large genomes and even more complex pan-genomes, which encompass the whole population and are subject to rapid genetic exchange. The ability to respond to plant signals and chemoattractants and to colonize nutrient-rich roots are crucial for the competitive success of these bacteria. The availability of a large body of genomic, physiological, biochemical and ecological studies makes rhizobia unique models for investigating community interactions and plant colonization.}, } @article {pmid29371424, year = {2018}, author = {Doron, S and Melamed, S and Ofir, G and Leavitt, A and Lopatina, A and Keren, M and Amitai, G and Sorek, R}, title = {Systematic discovery of antiphage defense systems in the microbial pangenome.}, journal = {Science (New York, N.Y.)}, volume = {359}, number = {6379}, pages = {}, pmid = {29371424}, issn = {1095-9203}, support = {681203/ERC_/European Research Council/International ; }, mesh = {Bacillus subtilis/genetics/*immunology/*virology ; Bacteriophages/*immunology/*pathogenicity ; Escherichia coli/genetics/*immunology/*virology ; Genes, Bacterial/*physiology ; Genome, Bacterial ; Multigene Family ; }, abstract = {The arms race between bacteria and phages led to the development of sophisticated antiphage defense systems, including CRISPR-Cas and restriction-modification systems. Evidence suggests that known and unknown defense systems are located in "defense islands" in microbial genomes. Here, we comprehensively characterized the bacterial defensive arsenal by examining gene families that are clustered next to known defense genes in prokaryotic genomes. Candidate defense systems were systematically engineered and validated in model bacteria for their antiphage activities. We report nine previously unknown antiphage systems and one antiplasmid system that are widespread in microbes and strongly protect against foreign invaders. These include systems that adopted components of the bacterial flagella and condensin complexes. Our data also suggest a common, ancient ancestry of innate immunity components shared between animals, plants, and bacteria.}, } @article {pmid29363431, year = {2018}, author = {Zhao, Y and Sun, C and Zhao, D and Zhang, Y and You, Y and Jia, X and Yang, J and Wang, L and Wang, J and Fu, H and Kang, Y and Chen, F and Yu, J and Wu, J and Xiao, J}, title = {PGAP-X: extension on pan-genome analysis pipeline.}, journal = {BMC genomics}, volume = {19}, number = {Suppl 1}, pages = {36}, pmid = {29363431}, issn = {1471-2164}, mesh = {Chlamydia trachomatis/classification/*genetics ; Computer Graphics ; *Evolution, Molecular ; *Genetic Variation ; *Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; *Software ; Streptococcus pneumoniae/classification/*genetics ; }, abstract = {BACKGROUND: Since PGAP (pan-genome analysis pipeline) was published in 2012, it has been widely employed in bacterial genomics research. Though PGAP has integrated several modules for pan-genomics analysis, how to properly and effectively interpret and visualize the results data is still a challenge.

RESULT: To well present bacterial genomic characteristics, a novel cross-platform software was developed, named PGAP-X. Four kinds of data analysis modules were developed and integrated: whole genome sequences alignment, orthologous genes clustering, pan-genome profile analysis, and genetic variants analysis. The results from these analyses can be directly visualized in PGAP-X. The modules for data visualization in PGAP-X include: comparison of genome structure, gene distribution by conservation, pan-genome profile curve and variation on genic and genomic region. Meanwhile, result data produced by other programs with similar function can be imported to be further analyzed and visualized in PGAP-X. To test the performance of PGAP-X, we comprehensively analyzed 14 Streptococcus pneumonia strains and 14 Chlamydia trachomatis. The results show that, S. pneumonia strains have higher diversity on genome structure and gene contents than C. trachomatis strains. In addition, S. pneumonia strains might have suffered many evolutionary events, such genomic rearrangements, frequent horizontal gene transfer, homologous recombination, and other evolutionary process.

CONCLUSION: Briefly, PGAP-X directly presents the characteristics of bacterial genomic diversity with different visualization methods, which could help us to intuitively understand dynamics and evolution in bacterial genomes. The source code and the pre-complied executable programs are freely available from http://pgapx.ybzhao.com .}, } @article {pmid29358148, year = {2018}, author = {Xu, H and Wang, X and Yu, X and Zhang, J and Guo, L and Huang, C and Jiang, X and Li, X and Feng, Y and Zheng, B}, title = {First detection and genomics analysis of KPC-2-producing Citrobacter isolates from river sediments.}, journal = {Environmental pollution (Barking, Essex : 1987)}, volume = {235}, number = {}, pages = {931-937}, doi = {10.1016/j.envpol.2017.12.084}, pmid = {29358148}, issn = {1873-6424}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/*biosynthesis ; China ; Citrobacter/drug effects/enzymology/genetics/*isolation & purification ; DNA, Bacterial ; Electrophoresis, Gel, Pulsed-Field ; Escherichia coli/genetics ; Genomics ; Genotype ; Geologic Sediments/*microbiology ; Klebsiella pneumoniae ; Multilocus Sequence Typing ; Plasmids ; Polymerase Chain Reaction ; Rivers/microbiology ; beta-Lactamases/*biosynthesis ; }, abstract = {The wide spread of carbapenemase-producing Enterobacteriaceae (CPE) in the environment is an emerging environmental issue with potentially-serious public health implications. However, carbapenemase-producing Citrobacter from environment has rarely been investigated. Here we report the isolation and comparative genomics of carbapenemase-producing Citrobacter isolates from river sediment in China. Potential CPE was isolated by selective MacConkey agar plates containing 2 mg/L meropenem. The presence of carbapenemase genes was detected by PCR and sequencing. The clonal relatedness of Klebsiella pneumoniae carbapenemase (KPC-2)-producing Citrobacter isolates was assessed by pulsed-field gel electrophoresis (PFGE) and multilocus sequence typing. Plasmid analysis of KPC-2-producing Citrobacter isolates was performed by S1-PFGE, Southern blotting, and whole genome sequencing. A total of four KPC-2-producing Citrobacter and three Aeromonas isolates were recovered from 54 sediment cultures of Shifeng River. Notably, all KPC-producing isolates were isolated from sampling sites near a waste water treatment plant. Antimicrobial susceptibility testing showed that three of the four sequenced isolates (C1710, C191, and C196) resistant to multiple antibiotics. Genotyping and pan-genome analyses revealed that the C191 and C196 C. freundii isolates exhibited a high level of genetic similarity. Plasmid analysis confirmed that the blaKPC-2 gene is located on either IncF or IncN3 plasmids in all isolates. The blaKPC-2 gene of C1710, C181 and C191 was successfully transferred with E. coli EC600 as the recipient strain. In silico analysis further suggested that pKPC-191 is a novel IncF plasmid, with 99% identity to two previously described IncFII plasmids at 71% coverage. We report here the presence of diverse conjugative blaKPC-2 plasmids from environmental Citrobacter isolates, which poses the possible dissemination of antimicrobial resistance into clinical isolates. To our knowledge, this is the first study to culture and characterize KPC-2-producing Citrobacter isolates from river sediments in China.}, } @article {pmid29355972, year = {2018}, author = {Chen, ECH and Morin, E and Beaudet, D and Noel, J and Yildirir, G and Ndikumana, S and Charron, P and St-Onge, C and Giorgi, J and Krüger, M and Marton, T and Ropars, J and Grigoriev, IV and Hainaut, M and Henrissat, B and Roux, C and Martin, F and Corradi, N}, title = {High intraspecific genome diversity in the model arbuscular mycorrhizal symbiont Rhizophagus irregularis.}, journal = {The New phytologist}, volume = {220}, number = {4}, pages = {1161-1171}, doi = {10.1111/nph.14989}, pmid = {29355972}, issn = {1469-8137}, support = {//Natural Sciences and Engineering Research Council of Canada/International ; //Early Researcher Award/International ; ER13-09-190//Ontario Ministry of Research and Innovation/International ; DEB 1441677//National Science Foundation/International ; ANR-11-LABX-0002-01//Laboratory of Excellence Advanced Research on the Biology of Tree and Forest Ecosystems/International ; //Region Lorraine Research Council/International ; //European Commission/International ; //European Regional Development Fund/International ; //European Social Fund/International ; CZ.1.07/2.3.00/30.0048//Operational Programme Education for Competitiveness (OPEC)/International ; DE-AC02-05CH11231//US DOE JGI/International ; }, mesh = {Adaptation, Physiological/genetics ; DNA Transposable Elements/genetics ; Fungal Proteins/chemistry ; Genes, Fungal ; *Genetic Variation ; *Genome, Fungal ; Glomeromycota/*genetics/isolation & purification ; *Models, Biological ; Molecular Sequence Annotation ; Mycorrhizae/*genetics ; Phylogeny ; Protein Domains ; Species Specificity ; Symbiosis/*genetics ; }, abstract = {Arbuscular mycorrhizal fungi (AMF) are known to improve plant fitness through the establishment of mycorrhizal symbioses. Genetic and phenotypic variations among closely related AMF isolates can significantly affect plant growth, but the genomic changes underlying this variability are unclear. To address this issue, we improved the genome assembly and gene annotation of the model strain Rhizophagus irregularis DAOM197198, and compared its gene content with five isolates of R. irregularis sampled in the same field. All isolates harbor striking genome variations, with large numbers of isolate-specific genes, gene family expansions, and evidence of interisolate genetic exchange. The observed variability affects all gene ontology terms and PFAM protein domains, as well as putative mycorrhiza-induced small secreted effector-like proteins and other symbiosis differentially expressed genes. High variability is also found in active transposable elements. Overall, these findings indicate a substantial divergence in the functioning capacity of isolates harvested from the same field, and thus their genetic potential for adaptation to biotic and abiotic changes. Our data also provide a first glimpse into the genome diversity that resides within natural populations of these symbionts, and open avenues for future analyses of plant-AMF interactions that link AMF genome variation with plant phenotype and fitness.}, } @article {pmid29347966, year = {2018}, author = {Zhou, W and Gay, N and Oh, J}, title = {ReprDB and panDB: minimalist databases with maximal microbial representation.}, journal = {Microbiome}, volume = {6}, number = {1}, pages = {15}, pmid = {29347966}, issn = {2049-2618}, support = {DP2 GM126893/GM/NIGMS NIH HHS/United States ; K22 AI119231/AI/NIAID NIH HHS/United States ; }, mesh = {Access to Information ; Algorithms ; Computational Biology/methods ; *Databases, Genetic ; *Gastrointestinal Microbiome ; Humans ; Metagenomics/*methods ; Phylogeny ; Sequence Alignment ; Sequence Analysis, DNA ; Skin/*microbiology ; }, abstract = {BACKGROUND: Profiling of shotgun metagenomic samples is hindered by a lack of unified microbial reference genome databases that (i) assemble genomic information from all open access microbial genomes, (ii) have relatively small sizes, and (iii) are compatible to various metagenomic read mapping tools. Moreover, computational tools to rapidly compile and update such databases to accommodate the rapid increase in new reference genomes do not exist. As a result, database-guided analyses often fail to profile a substantial fraction of metagenomic shotgun sequencing reads from complex microbiomes.

RESULTS: We report pipelines that efficiently traverse all open access microbial genomes and assemble non-redundant genomic information. The pipelines result in two species-resolution microbial reference databases of relatively small sizes: reprDB, which assembles microbial representative or reference genomes, and panDB, for which we developed a novel iterative alignment algorithm to identify and assemble non-redundant genomic regions in multiple sequenced strains. With the databases, we managed to assign taxonomic labels and genome positions to the majority of metagenomic reads from human skin and gut microbiomes, demonstrating a significant improvement over a previous database-guided analysis on the same datasets.

CONCLUSIONS: reprDB and panDB leverage the rapid increases in the number of open access microbial genomes to more fully profile metagenomic samples. Additionally, the databases exclude redundant sequence information to avoid inflated storage or memory space and indexing or analyzing time. Finally, the novel iterative alignment algorithm significantly increases efficiency in pan-genome identification and can be useful in comparative genomic analyses.}, } @article {pmid29340898, year = {2018}, author = {Mirande, C and Bizine, I and Giannetti, A and Picot, N and van Belkum, A}, title = {Epidemiological aspects of healthcare-associated infections and microbial genomics.}, journal = {European journal of clinical microbiology & infectious diseases : official publication of the European Society of Clinical Microbiology}, volume = {37}, number = {5}, pages = {823-831}, pmid = {29340898}, issn = {1435-4373}, mesh = {Bacteria/classification/genetics ; Bacterial Typing Techniques/methods ; Cross Infection/*epidemiology/*microbiology/prevention & control ; Disease Outbreaks ; Genome, Bacterial ; Humans ; *Metagenome ; *Metagenomics/methods ; Microbiota/*genetics ; Molecular Epidemiology ; Whole Genome Sequencing ; }, abstract = {Hospital-acquired infections (HAIs) are a cause of continuously increasing morbidity and mortality. Most of these infections are caused by a limited set of bacterial species, which share the capability to efficiently spread from patient to patient and to easily acquire antibiotic resistance determinants. This renders correct and rapid species identification and antibiotic susceptibility testing (AST) important and underscores the relevance of bacterial epidemiological typing. The latter is needed for the sensitive detection and exact tracing of nosocomial spread of these potentially multidrug-resistant microorganisms (MDRO). Many microbial typing technologies have been developed and put to some level of executive practice, but it seems that the continued evolution in methodology has currently reached an apex: there is likely to be scientific and practical consensus on the ultimate typing potential of bacterial whole-genome sequencing (WGS). The possibility to perform pan-genomic nucleotide-to-nucleotide comparisons between strains belonging to a single species and to detect even minute changes in nucleotide order will identify closely related organisms, while upon accumulation of such mutations, independent descend can be assumed. Calibration of difference levels [i.e. number of single nucleotide polymorphisms (SNPs)] into categories of inter-strain relatedness needs to be performed in order to generate robust, portable typing schemes. Here, we will briefly discuss the state of affairs regarding bacterial epidemiology based upon WGS, its relatedness with the nomenclature of former typing approaches and the continuing need for a global typing language.}, } @article {pmid29335547, year = {2018}, author = {Zhao, Q and Feng, Q and Lu, H and Li, Y and Wang, A and Tian, Q and Zhan, Q and Lu, Y and Zhang, L and Huang, T and Wang, Y and Fan, D and Zhao, Y and Wang, Z and Zhou, C and Chen, J and Zhu, C and Li, W and Weng, Q and Xu, Q and Wang, ZX and Wei, X and Han, B and Huang, X}, title = {Pan-genome analysis highlights the extent of genomic variation in cultivated and wild rice.}, journal = {Nature genetics}, volume = {50}, number = {2}, pages = {278-284}, doi = {10.1038/s41588-018-0041-z}, pmid = {29335547}, issn = {1546-1718}, mesh = {Crops, Agricultural/*genetics ; Domestication ; *Genetic Variation ; *Genome, Plant ; Genomics/*methods ; High-Throughput Nucleotide Sequencing ; Oryza/classification/*genetics ; Plant Breeding ; Sequence Analysis, DNA ; }, abstract = {The rich genetic diversity in Oryza sativa and Oryza rufipogon serves as the main sources in rice breeding. Large-scale resequencing has been undertaken to discover allelic variants in rice, but much of the information for genetic variation is often lost by direct mapping of short sequence reads onto the O. sativa japonica Nipponbare reference genome. Here we constructed a pan-genome dataset of the O. sativa-O. rufipogon species complex through deep sequencing and de novo assembly of 66 divergent accessions. Intergenomic comparisons identified 23 million sequence variants in the rice genome. This catalog of sequence variations includes many known quantitative trait nucleotides and will be helpful in pinpointing new causal variants that underlie complex traits. In particular, we systemically investigated the whole set of coding genes using this pan-genome data, which revealed extensive presence and absence of variation among rice accessions. This pan-genome resource will further promote evolutionary and functional studies in rice.}, } @article {pmid29335226, year = {2018}, author = {Stanborough, T and Fegan, N and Powell, SM and Singh, T and Tamplin, M and Chandry, PS}, title = {Genomic and metabolic characterization of spoilage-associated Pseudomonas species.}, journal = {International journal of food microbiology}, volume = {268}, number = {}, pages = {61-72}, doi = {10.1016/j.ijfoodmicro.2018.01.005}, pmid = {29335226}, issn = {1879-3460}, mesh = {Acetates/metabolism ; Animals ; Cattle ; Esters/metabolism ; Food Contamination/*analysis ; Food Microbiology ; Genome, Bacterial/*genetics ; Genomics ; Meat/*analysis/microbiology ; Polymorphism, Single Nucleotide/genetics ; Pseudomonas fragi/*classification/genetics/isolation & purification/*metabolism ; Volatile Organic Compounds/*analysis ; }, abstract = {Pseudomonas are common spoilage agents of aerobically stored fresh foods. Their ability to cause spoilage is species- and may be strain-specific. To improve our understanding of the meat and milk spoilage agents Pseudomonas fragi and Pseudomonas lundensis, we sequenced the genomes of 12 P. fragi and seven P. lundensis isolates. These genomes provided a dataset for genomic analyses. Key volatile organic compounds (VOCs) produced or metabolised by the isolates were determined during their growth on a beef paste and where possible, metabolic activity was associated with gene repertoire. Genome analyses showed that the isolates included in this work may belong to more than two Pseudomonas species with possible spoilage potential. Pan-genome analyses demonstrated a high degree of diversity among the P. fragi and genetic flexibility and diversity may be traits of both species. Growth of the P. lundensis isolates was characterised by the production of large amounts of 1-undecene, 5-methyl-2-hexanone and methyl-2-butenoic acid. P. fragi isolates produced extensive amounts of methyl and ethyl acetate and the production of methyl esters predominated over ethyl esters. Some of the P. fragi produced extremely low levels of VOCs, highlighting the importance of strain-specific studies in food matrices. Furthermore, although usually not considered to be denitrifiers, all isolates generated molecular nitrogen, indicating that at least some steps of this pathway are intact.}, } @article {pmid29334898, year = {2018}, author = {Jandrasits, C and Dabrowski, PW and Fuchs, S and Renard, BY}, title = {seq-seq-pan: building a computational pan-genome data structure on whole genome alignment.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {47}, pmid = {29334898}, issn = {1471-2164}, mesh = {Genomics/*methods ; Phylogeny ; Sequence Alignment/*methods ; Software ; }, abstract = {BACKGROUND: The increasing application of next generation sequencing technologies has led to the availability of thousands of reference genomes, often providing multiple genomes for the same or closely related species. The current approach to represent a species or a population with a single reference sequence and a set of variations cannot represent their full diversity and introduces bias towards the chosen reference. There is a need for the representation of multiple sequences in a composite way that is compatible with existing data sources for annotation and suitable for established sequence analysis methods. At the same time, this representation needs to be easily accessible and extendable to account for the constant change of available genomes.

RESULTS: We introduce seq-seq-pan, a framework that provides methods for adding or removing new genomes from a set of aligned genomes and uses these to construct a whole genome alignment. Throughout the sequential workflow the alignment is optimized for generating a representative linear presentation of the aligned set of genomes, that enables its usage for annotation and in downstream analyses.

CONCLUSIONS: By providing dynamic updates and optimized processing, our approach enables the usage of whole genome alignment in the field of pan-genomics. In addition, the sequential workflow can be used as a fast alternative to existing whole genome aligners for aligning closely related genomes. seq-seq-pan is freely available at https://gitlab.com/rki_bioinformatics.}, } @article {pmid29331569, year = {2018}, author = {López-Hermoso, C and de la Haba, RR and Sánchez-Porro, C and Ventosa, A}, title = {Salinivibrio kushneri sp. nov., a moderately halophilic bacterium isolated from salterns.}, journal = {Systematic and applied microbiology}, volume = {41}, number = {3}, pages = {159-166}, doi = {10.1016/j.syapm.2017.12.001}, pmid = {29331569}, issn = {1618-0984}, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; Genes, Bacterial ; Nucleic Acid Hybridization ; *Phylogeny ; Ponds/*microbiology ; RNA, Ribosomal, 16S/genetics ; *Salinity ; Sequence Analysis, DNA ; Spain ; Vibrionaceae/*classification/genetics/isolation & purification ; }, abstract = {Ten Gram-strain-negative, facultatively anaerobic, moderately halophilic bacterial strains, designated AL184[T], IB560, IB563, IC202, IC317, MA421, ML277, ML318, ML328A and ML331, were isolated from water ponds of five salterns located in Spain. The cells were motile, curved rods and oxidase and catalase positive. All of them grew optimally at 37°C, at pH 7.2-7.4 and in the presence of 7.5% (w/v) NaCl. Based on phylogenetic analyses of the 16S rRNA, the isolates were most closely related to Salinivibrio sharmensis BAG[T] (99.6-98.2% 16S rRNA gene sequence similarity) and Salinivibrio costicola subsp. costicola ATCC 35508[T] (99.0-98.1%). According to the MLSA analyses based on four (gyrB, recA, rpoA and rpoD) and eight (ftsZ, gapA, gyrB, mreB, pyrH, recA, rpoA and topA) concatenated gene sequences, the most closely relatives were S. siamensis JCM 14472[T] (96.8-95.4% and 94.9-94.7%, respectively) and S. sharmensis DSM 18182[T] (94.0-92.6% and 92.9-92.7%, respectively). In silico DNA-DNA hybridization (GGDC) and average nucleotide identity (ANI) showed values of 23.3-44.8% and 80.2-91.8%, respectively with the related species demonstrating that the ten isolates constituted a single novel species of the genus Salinivibrio. Its pangenome and core genome consist of 6041 and 1230 genes, respectively. The phylogeny based on the concatenated orthologous core genes revealed that the ten strains form a coherent phylogroup well separated from the rest of the species of the genus Salinivibrio. The major cellular fatty acids of strain AL184[T] were C16:0 and C18:1. The DNA G+C content range was 51.9-52.5mol% (Tm) and 50.2-50.9mol% (genome). Based on the phylogenetic-phylogenomic, phenotypic and chemotaxonomic data, the ten isolates represent a novel species of the genus Salinivibrio, for which the name Salinivibrio kushneri sp. nov. is proposed. The type strain is AL184[T] (=CECT 9177[T]=LMG 29817[T]).}, } @article {pmid29325559, year = {2018}, author = {Plissonneau, C and Hartmann, FE and Croll, D}, title = {Pangenome analyses of the wheat pathogen Zymoseptoria tritici reveal the structural basis of a highly plastic eukaryotic genome.}, journal = {BMC biology}, volume = {16}, number = {1}, pages = {5}, pmid = {29325559}, issn = {1741-7007}, mesh = {Ascomycota/*genetics ; Eukaryotic Cells/microbiology/*physiology ; Genetic Variation/*genetics ; Genome, Fungal/*genetics ; Humans ; Plant Diseases/genetics/microbiology ; Triticum/*genetics/microbiology ; }, abstract = {BACKGROUND: Structural variation contributes substantially to polymorphism within species. Chromosomal rearrangements that impact genes can lead to functional variation among individuals and influence the expression of phenotypic traits. Genomes of fungal pathogens show substantial chromosomal polymorphism that can drive virulence evolution on host plants. Assessing the adaptive significance of structural variation is challenging, because most studies rely on inferences based on a single reference genome sequence.

RESULTS: We constructed and analyzed the pangenome of Zymoseptoria tritici, a major pathogen of wheat that evolved host specialization by chromosomal rearrangements and gene deletions. We used single-molecule real-time sequencing and high-density genetic maps to assemble multiple genomes. We annotated the gene space based on transcriptomics data that covered the infection life cycle of each strain. Based on a total of five telomere-to-telomere genomes, we constructed a pangenome for the species and identified a core set of 9149 genes. However, an additional 6600 genes were exclusive to a subset of the isolates. The substantial accessory genome encoded on average fewer expressed genes but a larger fraction of the candidate effector genes that may interact with the host during infection. We expanded our analyses of the pangenome to a worldwide collection of 123 isolates of the same species. We confirmed that accessory genes were indeed more likely to show deletion polymorphisms and loss-of-function mutations compared to core genes.

CONCLUSIONS: The pangenome construction of a highly polymorphic eukaryotic pathogen showed that a single reference genome significantly underestimates the gene space of a species. The substantial accessory genome provides a cradle for adaptive evolution.}, } @article {pmid29324905, year = {2018}, author = {Sela, U and Euler, CW and Correa da Rosa, J and Fischetti, VA}, title = {Strains of bacterial species induce a greatly varied acute adaptive immune response: The contribution of the accessory genome.}, journal = {PLoS pathogens}, volume = {14}, number = {1}, pages = {e1006726}, pmid = {29324905}, issn = {1553-7374}, support = {UL1 TR000043/TR/NCATS NIH HHS/United States ; 8 UL1 TR000043/NH/NIH HHS/United States ; }, mesh = {*Adaptive Immunity ; Adult ; B-Lymphocytes/cytology/*immunology/metabolism/microbiology ; Biomarkers/metabolism ; Cell Proliferation ; Cells, Cultured ; Gene Knockout Techniques ; *Genome, Bacterial ; Humans ; Leukocytes, Mononuclear/cytology/immunology/metabolism/microbiology ; Methicillin Resistance ; Methicillin-Resistant Staphylococcus aureus/genetics/immunology/metabolism/pathogenicity ; Reproducibility of Results ; Species Specificity ; Staphylococcus aureus/genetics/*immunology/metabolism/pathogenicity ; Streptococcus pyogenes/genetics/*immunology/metabolism/pathogenicity ; T-Lymphocytes/cytology/*immunology/metabolism/microbiology ; Th1 Cells/cytology/immunology/metabolism/microbiology ; Th17 Cells/cytology/immunology/metabolism/microbiology ; Vancomycin Resistance ; }, abstract = {A fundamental question in human susceptibility to bacterial infections is to what extent variability is a function of differences in the pathogen species or in individual humans. To focus on the pathogen species, we compared in the same individual the human adaptive T and B cell immune response to multiple strains of two major human pathogens, Staphylococcus aureus and Streptococcus pyogenes. We found wide variability in the acute adaptive immune response induced by various strains of a species, with a unique combination of activation within the two arms of the adaptive response. Further, this was also accompanied by a dramatic difference in the intensity of the specific protective T helper (Th) response. Importantly, the same immune response differences induced by the individual strains were maintained across multiple healthy human donors. A comparison of isogenic phage KO strains, demonstrated that of the pangenome, prophages were the major contributor to inter-strain immune heterogeneity, as the T cell response to the remaining "core genome" was noticeably blunted. Therefore, these findings extend and modify the notion of an adaptive response to a pathogenic bacterium, by implying that the adaptive immune response signature of a bacterial species should be defined either per strain or alternatively to the species' 'core genome', common to all of its strains. Further, our results demonstrate that the acquired immune response variation is as wide among different strains within a single pathogenic species as it is among different humans, and therefore may explain in part the clinical heterogeneity observed in patients infected with the same species.}, } @article {pmid29321769, year = {2017}, author = {Cortés, MP and Mendoza, SN and Travisany, D and Gaete, A and Siegel, A and Cambiazo, V and Maass, A}, title = {Analysis of Piscirickettsia salmonis Metabolism Using Genome-Scale Reconstruction, Modeling, and Testing.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {2462}, pmid = {29321769}, issn = {1664-302X}, abstract = {Piscirickettsia salmonis is an intracellular bacterial fish pathogen that causes piscirickettsiosis, a disease with highly adverse impact in the Chilean salmon farming industry. The development of effective treatment and control methods for piscireckttsiosis is still a challenge. To meet it the number of studies on P. salmonis has grown in the last couple of years but many aspects of the pathogen's biology are still poorly understood. Studies on its metabolism are scarce and only recently a metabolic model for reference strain LF-89 was developed. We present a new genome-scale model for P. salmonis LF-89 with more than twice as many genes as in the previous model and incorporating specific elements of the fish pathogen metabolism. Comparative analysis with models of different bacterial pathogens revealed a lower flexibility in P. salmonis metabolic network. Through constraint-based analysis, we determined essential metabolites required for its growth and showed that it can benefit from different carbon sources tested experimentally in new defined media. We also built an additional model for strain A1-15972, and together with an analysis of P. salmonis pangenome, we identified metabolic features that differentiate two main species clades. Both models constitute a knowledge-base for P. salmonis metabolism and can be used to guide the efficient culture of the pathogen and the identification of specific drug targets.}, } @article {pmid29321635, year = {2018}, author = {Brüggemann, H and Jensen, A and Nazipi, S and Aslan, H and Meyer, RL and Poehlein, A and Brzuszkiewicz, E and Al-Zeer, MA and Brinkmann, V and Söderquist, B}, title = {Pan-genome analysis of the genus Finegoldia identifies two distinct clades, strain-specific heterogeneity, and putative virulence factors.}, journal = {Scientific reports}, volume = {8}, number = {1}, pages = {266}, pmid = {29321635}, issn = {2045-2322}, mesh = {Base Composition ; Gene Order ; *Genetic Heterogeneity ; Genetic Loci ; Genome Size ; *Genome, Bacterial ; Genomics/methods ; *Genotype ; Gram-Positive Bacterial Infections/microbiology ; Gram-Positive Cocci/*classification/*genetics/pathogenicity/ultrastructure ; Host-Pathogen Interactions ; Phylogeny ; Virulence Factors/genetics ; }, abstract = {Finegoldia magna, a Gram-positive anaerobic coccus, is an opportunistic pathogen, associated with medical device-related infections. F. magna is the only described species of the genus Finegoldia. We report the analysis of 17 genomes of Finegoldia isolates. Phylogenomic analyses showed that the Finegoldia population can be divided into two distinct clades, with an average nucleotide identity of 90.7%. One clade contains strains of F. magna, whereas the other clade includes more heterogeneous strains, hereafter tentatively named "Finegoldia nericia". The latter species appears to be more abundant in the human microbiome. Surface structure differences between strains of F. magna and "F. nericia" were detected by microscopy. Strain-specific heterogeneity is high and previously identified host-interacting factors are present only in subsets of "F. nericia" and F. magna strains. However, all genomes encode multiple host factor-binding proteins such as albumin-, collagen-, and immunoglobulin-binding proteins, and two to four copies of CAMP (Christie-Atkins-Munch-Petersen) factors; in accordance, most strains show a positive CAMP reaction for co-hemolysis. Our work sheds new light of the genus Finegoldia and its ability to bind host components. Future research should explore if the genomic differences identified here affect the potential of different Finegoldia species and strains to cause opportunistic infections.}, } @article {pmid29315617, year = {2018}, author = {Rouleau, FD and Vincent, AT and Charette, SJ}, title = {Genomic and phenotypic characterization of an atypical Aeromonas salmonicida strain isolated from a lumpfish and producing unusual granular structures.}, journal = {Journal of fish diseases}, volume = {41}, number = {4}, pages = {673-681}, doi = {10.1111/jfd.12769}, pmid = {29315617}, issn = {1365-2761}, mesh = {Aeromonas salmonicida/genetics/*physiology ; Animals ; Fish Diseases/*microbiology ; *Fishes ; *Genotype ; Gram-Negative Bacterial Infections/microbiology/*veterinary ; *Phenotype ; Phylogeny ; Quebec ; Sequence Analysis, DNA/veterinary ; }, abstract = {Aeromonas salmonicida strains are roughly classified into two categories, typical and atypical strains. The latter mainly regroup isolates that present unusual phenotypes or hosts, comparatively to the typical strains that belong to the salmonicida subspecies. This study focuses on an uncharacterized atypical strain, M18076-11, isolated from lumpfish (Cyclopterus lumpus) and not part of the four recognized Aeromonas salmonicida subspecies. This isolate presents an unreported phenotype in the A. salmonicida species: the formation of large granular aggregates. Granules are formed of a heterogeneous mix of live and dead cells, with live cells composing the majority of the population. Even if no mechanism was determined to cause cellular aggregation, small globular structures at the cell surface were observed, which might affect granular formation. Pan-genome phylogenetic analysis indicated that this strain groups alongside the masoucida subspecies. However, phenotypic tests showed that these strains have diverging phenotypes, suggesting that M18076-11 might belong to a new subspecies. Also, a pAsal1-like plasmid, which was only reported in strains of the subspecies salmonicida, was discovered in M18076-11. This study sheds light on unsuspected diversity in A. salmonicida subspecies and stresses the need of thorough identification when a new strain is encountered, as unique traits might be discovered.}, } @article {pmid29312269, year = {2017}, author = {Liu, Y and Zhang, DF and Zhou, X and Xu, L and Zhang, L and Shi, X}, title = {Comprehensive Analysis Reveals Two Distinct Evolution Patterns of Salmonella Flagellin Gene Clusters.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {2604}, pmid = {29312269}, issn = {1664-302X}, abstract = {Salmonella is one of the primary causes of foodborne disease, especially Salmonella enterica subsp. enterica (I) which has caused ~99% of clinical salmonellosis cases for humans and domestic mammals. The flagella genes, fliC and fljB, which encode the Salmonella phase 1 and phase 2 antigens respectively, are considered as the Salmonella serotype determinant genes, and contribute to the virulence of Salmonella. However, the evolution of the two flagellin genes is still not well-understood. In this study, the fliC and fljB gene clusters were analyzed among 205 S. enterica subspecies I genomes. The dataset covered 87 different serovars of S. enterica subsp. enterica and included 9 genomes (six serovars) of four other Salmonella subspecies. Based on a pan-genome definition and flanked gene linkages, the fliC and fljB gene clusters were identified in 207 (91 serovars) and 138 (61 serovars) genomes, respectively. A phylogenetic tree constructed based on SNPs (Single Nucleotide Polymorphisms) of core genes were used to reflect the essential evolutionary relationships among various serovars. Congruence analysis was performed among the core genome and each gene of fliC and fljB gene clusters, with only fliA and fliS showing congruence to Salmonella core genome. Congruence was also observed among fliB, fliC/fljB, and fliD genes, and their phylogeny revealed a division into two major groups, which strongly corresponded to monophasic and biphasic serovars. Besides, homologous recombination events referring fliB, fliC, and fliD were found to have mainly occurred within each group. These results suggested two distinct evolutionary patterns of Salmonella flagellin gene clusters. Further insight on the evolutionary implication of the two patterns and a framework for phase variation mechanism are needed to be further processed.}, } @article {pmid29312242, year = {2017}, author = {Assis, FL and Franco-Luiz, APM and Dos Santos, RN and Campos, FS and Dornas, FP and Borato, PVM and Franco, AC and Abrahao, JS and Colson, P and Scola, B}, title = {Genome Characterization of the First Mimiviruses of Lineage C Isolated in Brazil.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {2562}, pmid = {29312242}, issn = {1664-302X}, abstract = {The family Mimiviridae, comprised by giant DNA viruses, has been increasingly studied since the isolation of the Acanthamoeba polyphaga mimivirus (APMV), in 2003. In this work, we describe the genome analysis of two new mimiviruses, each isolated from a distinct Brazilian environment. Furthermore, for the first time, we are reporting the genomic characterization of mimiviruses of group C in Brazil (Br-mimiC), where a predominance of mimiviruses from group A has been previously reported. The genomes of the Br-mimiC isolates Mimivirus gilmour (MVGM) and Mimivirus golden (MVGD) are composed of double-stranded DNA molecules of ∼1.2 Mb, each encoding more than 1,100 open reading frames. Genome functional annotations highlighted the presence of mimivirus group C hallmark genes, such as the set of seven aminoacyl-tRNA synthetases. However, the set of tRNA encoded by the Br-mimiC was distinct from those of other group C mimiviruses. Differences could also be observed in a genome synteny analysis, which demonstrated the presence of inversions and loci translocations at both extremities of Br-mimiC genomes. Both phylogenetic and phyletic analyses corroborate previous results, undoubtedly grouping the new Brazilian isolates into mimivirus group C. Finally, an updated pan-genome analysis of genus Mimivirus was performed including all new genomes available until the present moment. This last analysis showed a slight increase in the number of clusters of orthologous groups of proteins among mimiviruses of group A, with a larger increase after addition of sequences from mimiviruses of groups B and C, as well as a plateau tendency after the inclusion of the last four mimiviruses of group C, including the Br-mimiC isolates. Future prospective studies will help us to understand the genetic diversity among mimiviruses.}, } @article {pmid29312194, year = {2017}, author = {Kiu, R and Caim, S and Alexander, S and Pachori, P and Hall, LJ}, title = {Probing Genomic Aspects of the Multi-Host Pathogen Clostridium perfringens Reveals Significant Pangenome Diversity, and a Diverse Array of Virulence Factors.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {2485}, pmid = {29312194}, issn = {1664-302X}, support = {//Wellcome Trust/United Kingdom ; BB/J004529/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/F/00044409/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {Clostridium perfringens is an important cause of animal and human infections, however information about the genetic makeup of this pathogenic bacterium is currently limited. In this study, we sought to understand and characterise the genomic variation, pangenomic diversity, and key virulence traits of 56 C. perfringens strains which included 51 public, and 5 newly sequenced and annotated genomes using Whole Genome Sequencing. Our investigation revealed that C. perfringens has an "open" pangenome comprising 11667 genes and 12.6% of core genes, identified as the most divergent single-species Gram-positive bacterial pangenome currently reported. Our computational analyses also defined C. perfringens phylogeny (16S rRNA gene) in relation to some 25 Clostridium species, with C. baratii and C. sardiniense determined to be the closest relatives. Profiling virulence-associated factors confirmed presence of well-characterised C. perfringens-associated exotoxins genes including α-toxin (plc), enterotoxin (cpe), and Perfringolysin O (pfo or pfoA), although interestingly there did not appear to be a close correlation with encoded toxin type and disease phenotype. Furthermore, genomic analysis indicated significant horizontal gene transfer events as defined by presence of prophage genomes, and notably absence of CRISPR defence systems in >70% (40/56) of the strains. In relation to antimicrobial resistance mechanisms, tetracycline resistance genes (tet) and anti-defensins genes (mprF) were consistently detected in silico (tet: 75%; mprF: 100%). However, pre-antibiotic era strain genomes did not encode for tet, thus implying antimicrobial selective pressures in C. perfringens evolutionary history over the past 80 years. This study provides new genomic understanding of this genetically divergent multi-host bacterium, and further expands our knowledge on this medically and veterinary important pathogen.}, } @article {pmid29311580, year = {2018}, author = {Boulding, T and McCuaig, RD and Tan, A and Hardy, K and Wu, F and Dunn, J and Kalimutho, M and Sutton, CR and Forwood, JK and Bert, AG and Goodall, GJ and Malik, L and Yip, D and Dahlstrom, JE and Zafar, A and Khanna, KK and Rao, S}, title = {LSD1 activation promotes inducible EMT programs and modulates the tumour microenvironment in breast cancer.}, journal = {Scientific reports}, volume = {8}, number = {1}, pages = {73}, pmid = {29311580}, issn = {2045-2322}, mesh = {Biomarkers ; Breast Neoplasms/*genetics/metabolism/*pathology ; Cell Line, Tumor ; Cell Nucleus/metabolism ; Chromatin/genetics/metabolism ; Drug Resistance, Neoplasm/genetics ; Epigenesis, Genetic ; Epithelial-Mesenchymal Transition/*genetics ; Female ; *Gene Expression Regulation, Neoplastic ; Gene Regulatory Networks ; Histone Demethylases/*genetics/metabolism ; Histones/metabolism ; Humans ; Neoplastic Stem Cells/metabolism ; Phenotype ; Protein Transport ; Signal Transduction ; *Transcriptional Activation ; Tumor Microenvironment/*genetics ; }, abstract = {Complex regulatory networks control epithelial-to-mesenchymal transition (EMT) but the underlying epigenetic control is poorly understood. Lysine-specific demethylase 1 (LSD1) is a key histone demethylase that alters the epigenetic landscape. Here we explored the role of LSD1 in global epigenetic regulation of EMT, cancer stem cells (CSCs), the tumour microenvironment, and therapeutic resistance in breast cancer. LSD1 induced pan-genomic gene expression in networks implicated in EMT and selectively elicits gene expression programs in CSCs whilst repressing non-CSC programs. LSD1 phosphorylation at serine-111 (LSD1-s111p) by chromatin anchored protein kinase C-theta (PKC-θ), is critical for its demethylase and EMT promoting activity and LSD1-s111p is enriched in chemoresistant cells in vivo. LSD1 couples to PKC-θ on the mesenchymal gene epigenetic template promotes LSD1-mediated gene induction. In vivo, chemotherapy reduced tumour volume, and when combined with an LSD1 inhibitor, abrogated the mesenchymal signature and promoted an innate, M1 macrophage-like tumouricidal immune response. Circulating tumour cells (CTCs) from metastatic breast cancer (MBC) patients were enriched with LSD1 and pharmacological blockade of LSD1 suppressed the mesenchymal and stem-like signature in these patient-derived CTCs. Overall, LSD1 inhibition may serve as a promising epigenetic adjuvant therapy to subvert its pleiotropic roles in breast cancer progression and treatment resistance.}, } @article {pmid29310748, year = {2018}, author = {Thibeaux, R and Iraola, G and Ferrés, I and Bierque, E and Girault, D and Soupé-Gilbert, ME and Picardeau, M and Goarant, C}, title = {Deciphering the unexplored Leptospira diversity from soils uncovers genomic evolution to virulence.}, journal = {Microbial genomics}, volume = {4}, number = {1}, pages = {}, pmid = {29310748}, issn = {2057-5858}, mesh = {*Biodiversity ; *Evolution, Molecular ; Gene Duplication ; *Genome, Bacterial ; Humans ; Leptospira/classification/*genetics/isolation & purification/*pathogenicity ; Leptospirosis/diagnosis/*epidemiology/*microbiology/mortality ; New Caledonia/epidemiology ; Phylogeny ; Prevalence ; Repetitive Sequences, Nucleic Acid/physiology ; *Soil Microbiology ; Statistics, Nonparametric ; Virulence/genetics ; Whole Genome Sequencing ; }, abstract = {Despite recent advances in our understanding of the genomics of members of the genus Leptospira, little is known on how virulence has emerged in this heterogeneous bacterial genus as well as on the lifestyle of pathogenic members of the genus Leptospira outside animal hosts. Here, we isolated 12 novel species of the genus Leptospira from tropical soils, significantly increasing the number of known species to 35 and finding evidence of highly unexplored biodiversity in the genus. Extended comparative phylogenomics and pan-genome analyses at the genus level by incorporating 26 novel genomes, revealed that, the traditional leptospiral 'pathogens' cluster, as defined by their phylogenetic position, can be split in two groups with distinct virulence potential and accessory gene patterns. These genomic distinctions are strongly linked to the ability to cause or not severe infections in animal models and humans. Our results not only provide new insights into virulence evolution in the members of the genus Leptospira, but also lay the foundations for refining the classification of the pathogenic species.}, } @article {pmid29310579, year = {2018}, author = {Arboleya, S and Bottacini, F and O'Connell-Motherway, M and Ryan, CA and Ross, RP and van Sinderen, D and Stanton, C}, title = {Gene-trait matching across the Bifidobacterium longum pan-genome reveals considerable diversity in carbohydrate catabolism among human infant strains.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {33}, pmid = {29310579}, issn = {1471-2164}, support = {SFI/12/RC/2273//Science Foundation Ireland/Ireland ; 10FDairy//Department of Agriculture, Food and the Marine/International ; }, mesh = {Bifidobacterium longum/*genetics/*metabolism ; Biodiversity ; *Carbohydrate Metabolism ; Databases, Genetic ; Gastrointestinal Microbiome ; *Genes, Bacterial ; *Genome, Bacterial ; Humans ; Infant ; Infant, Newborn ; Phylogeny ; Probiotics ; Quantitative Trait Loci ; *Quantitative Trait, Heritable ; }, abstract = {BACKGROUND: Bifidobacterium longum is a common member of the human gut microbiota and is frequently present at high numbers in the gut microbiota of humans throughout life, thus indicative of a close symbiotic host-microbe relationship. Different mechanisms may be responsible for the high competitiveness of this taxon in its human host to allow stable establishment in the complex and dynamic intestinal microbiota environment. The objective of this study was to assess the genetic and metabolic diversity in a set of 20 B. longum strains, most of which had previously been isolated from infants, by performing whole genome sequencing and comparative analysis, and to analyse their carbohydrate utilization abilities using a gene-trait matching approach.

RESULTS: We analysed their pan-genome and their phylogenetic relatedness. All strains clustered in the B. longum ssp. longum phylogenetic subgroup, except for one individual strain which was found to cluster in the B. longum ssp. suis phylogenetic group. The examined strains exhibit genomic diversity, while they also varied in their sugar utilization profiles. This allowed us to perform a gene-trait matching exercise enabling the identification of five gene clusters involved in the utilization of xylo-oligosaccharides, arabinan, arabinoxylan, galactan and fucosyllactose, the latter of which is an abundant human milk oligosaccharide (HMO).

CONCLUSIONS: The results showed high diversity in terms of genes and predicted glycosyl-hydrolases, as well as the ability to metabolize a large range of sugars. Moreover, we corroborate the capability of B. longum ssp. longum to metabolise HMOs. Ultimately, their intraspecific genomic diversity and the ability to consume a wide assortment of carbohydrates, ranging from plant-derived carbohydrates to HMOs, may provide an explanation for the competitive advantage and persistence of B. longum in the human gut microbiome.}, } @article {pmid29309930, year = {2018}, author = {Schürch, AC and Arredondo-Alonso, S and Willems, RJL and Goering, RV}, title = {Whole genome sequencing options for bacterial strain typing and epidemiologic analysis based on single nucleotide polymorphism versus gene-by-gene-based approaches.}, journal = {Clinical microbiology and infection : the official publication of the European Society of Clinical Microbiology and Infectious Diseases}, volume = {24}, number = {4}, pages = {350-354}, doi = {10.1016/j.cmi.2017.12.016}, pmid = {29309930}, issn = {1469-0691}, mesh = {Bacterial Infections/*diagnosis/*epidemiology ; Bacteriological Techniques/methods ; Genotyping Techniques/*methods ; Humans ; Molecular Epidemiology/*methods ; Molecular Typing/*methods ; Polymorphism, Single Nucleotide ; Sequence Homology ; Whole Genome Sequencing/*methods ; }, abstract = {BACKGROUND: Whole genome sequence (WGS)-based strain typing finds increasing use in the epidemiologic analysis of bacterial pathogens in both public health as well as more localized infection control settings.

AIMS: This minireview describes methodologic approaches that have been explored for WGS-based epidemiologic analysis and considers the challenges and pitfalls of data interpretation.

SOURCES: Personal collection of relevant publications.

CONTENT: When applying WGS to study the molecular epidemiology of bacterial pathogens, genomic variability between strains is translated into measures of distance by determining single nucleotide polymorphisms in core genome alignments or by indexing allelic variation in hundreds to thousands of core genes, assigning types to unique allelic profiles. Interpreting isolate relatedness from these distances is highly organism specific, and attempts to establish species-specific cutoffs are unlikely to be generally applicable. In cases where single nucleotide polymorphism or core gene typing do not provide the resolution necessary for accurate assessment of the epidemiology of bacterial pathogens, inclusion of accessory gene or plasmid sequences may provide the additional required discrimination.

IMPLICATIONS: As with all epidemiologic analysis, realizing the full potential of the revolutionary advances in WGS-based approaches requires understanding and dealing with issues related to the fundamental steps of data generation and interpretation.}, } @article {pmid29304019, year = {2018}, author = {Anand, S and Pang, E and Livanos, G and Mantri, N}, title = {Characterization of Physico-Chemical Properties and Antioxidant Capacities of Bioactive Honey Produced from Australian Grown Agastache rugosa and its Correlation with Colour and Poly-Phenol Content.}, journal = {Molecules (Basel, Switzerland)}, volume = {23}, number = {1}, pages = {}, pmid = {29304019}, issn = {1420-3049}, mesh = {Agastache/*chemistry ; Australia ; Benzothiazoles/chemistry ; Biphenyl Compounds/chemistry ; Free Radical Scavengers/*chemistry/isolation & purification ; Free Radicals/chemistry ; Honey/*analysis ; Picrates/chemistry ; Polyphenols/*chemistry/isolation & purification ; Sulfonic Acids/chemistry ; }, abstract = {The antioxidant and antimicrobial components of honey vary based on sourced of nectar. Medicinal plants with the therapeutic value have potential to produce honey with greater bioactivity. The aim of the present study was to characterize the physico-chemical and antioxidant capacities of Agastache honey produced from Agastache rugosa and compare them with other popular commercial honeys sold in Australia. The total phenolics, total flavonoids, moisture content, colour, pH, protein content and antioxidant capacity were evaluated for Agastache, Manuka, Jelly bush, Tea tree, Super manuka and Jarrah honeys. The results reveal that the moisture content ranged from 17-21%, pH ranged from 3.8-4.3 and estimated protein content ranged from 900-2200 µg/g. The DPPH•, ABTS•+, ORAC and FRAP methods were used to measure the antioxidant capacity of the honey samples. The DPPH• % inhibition, ABTS•+, ORAC and FRAP values for Agastache honey were 9.85 (±1.98 µmol TE/g), 26.88 (±0.32 µmol TE/g), 19.78 (±1.1 µmol TE/g) and 3.61 (±0.02 µmol TE/g) whereas the highest antioxidant capacity values obtained were 18.69 (±0.9 µmol TE/g), 30.72 (±0.27 µmol TE/g), 26.95 (±0.9 µmol TE/g) and 3.68 (±0.04 µmol TE/g), respectively. There was a positive correlation between colour, total phenolic content and DPPH• scavenging activity for most of the honeys except Tea tree honey. However, there was no clear correlation with ABTS•+, ORAC and FRAP values. The measured antioxidant capacity of samples varied with the assays used. The DPPH• assay clearly indicated that the phenolic compounds contribute to the scavenging activity of the honeys. Nevertheless, all assays confirm that Agastache honey has significant antioxidant capacity. Therefore, Agastache honey can be important to human nutrition and health.}, } @article {pmid29298677, year = {2018}, author = {Gámez, G and Castro, A and Gómez-Mejia, A and Gallego, M and Bedoya, A and Camargo, M and Hammerschmidt, S}, title = {The variome of pneumococcal virulence factors and regulators.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {10}, pmid = {29298677}, issn = {1471-2164}, support = {CIEMB-097-13//Committee for Development of Research (CODI)/International ; }, mesh = {Bacterial Proteins/genetics ; Chromosome Mapping ; Genes, Bacterial ; Genes, Regulator ; *Genetic Variation ; Genome, Bacterial ; Phylogeny ; Streptococcus pneumoniae/classification/*genetics/pathogenicity ; Virulence Factors/*genetics ; }, abstract = {BACKGROUND: In recent years, the idea of a highly immunogenic protein-based vaccine to combat Streptococcus pneumoniae and its severe invasive infectious diseases has gained considerable interest. However, the target proteins to be included in a vaccine formulation have to accomplish several genetic and immunological characteristics, (such as conservation, distribution, immunogenicity and protective effect), in order to ensure its suitability and effectiveness. This study aimed to get comprehensive insights into the genomic organization, population distribution and genetic conservation of all pneumococcal surface-exposed proteins, genetic regulators and other virulence factors, whose important function and role in pathogenesis has been demonstrated or hypothesized.

RESULTS: After retrieving the complete set of DNA and protein sequences reported in the databases GenBank, KEGG, VFDB, P2CS and Uniprot for pneumococcal strains whose genomes have been fully sequenced and annotated, a comprehensive bioinformatic analysis and systematic comparison has been performed for each virulence factor, stand-alone regulator and two-component regulatory system (TCS) encoded in the pan-genome of S. pneumoniae. A total of 25 S. pneumoniae strains, representing different pneumococcal phylogenetic lineages and serotypes, were considered. A set of 92 different genes and proteins were identified, classified and studied to construct a pan-genomic variability map (variome) for S. pneumoniae. Both, pneumococcal virulence factors and regulatory genes, were well-distributed in the pneumococcal genome and exhibited a conserved feature of genome organization, where replication and transcription are co-oriented. The analysis of the population distribution for each gene and protein showed that 49 of them are part of the core genome in pneumococci, while 43 belong to the accessory-genome. Estimating the genetic variability revealed that pneumolysin, enolase and Usp45 (SP_2216 in S. p. TIGR4) are the pneumococcal virulence factors with the highest conservation, while TCS08, TCS05, and TCS02 represent the most conserved pneumococcal genetic regulators.

CONCLUSIONS: The results identified well-distributed and highly conserved pneumococcal virulence factors as well as regulators, representing promising candidates for a new generation of serotype-independent protein-based vaccine(s) to combat pneumococcal infections.}, } @article {pmid29277863, year = {2018}, author = {Setubal, JC and Almeida, NF and Wattam, AR}, title = {Comparative Genomics for Prokaryotes.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {1704}, number = {}, pages = {55-78}, doi = {10.1007/978-1-4939-7463-4_3}, pmid = {29277863}, issn = {1940-6029}, support = {HHSN272201400027C/AI/NIAID NIH HHS/United States ; }, mesh = {*Algorithms ; Computational Biology ; Evolution, Molecular ; Genes, Archaeal ; Genes, Bacterial ; *Genome, Archaeal ; *Genome, Bacterial ; Genomics/*methods ; Phylogeny ; Sequence Alignment ; Sequence Analysis, DNA ; Software ; }, abstract = {Bacteria and archaea, collectively known as prokaryotes, have in general genomes that are much smaller than those of eukaryotes. As a result, thousands of these genomes have been sequenced. In prokaryotes, gene architecture lacks the intron-exon structure of eukaryotic genes (with an occasional exception). These two facts mean that there is an abundance of data for prokaryotic genomes, and that they are easier to study than the more complex eukaryotic genomes. In this chapter, we provide an overview of genome comparison tools that have been developed primarily (sometimes exclusively) for prokaryotic genomes. We cover methods that use only the DNA sequences, methods that use only the gene content, and methods that use both data types.}, } @article {pmid29277862, year = {2018}, author = {Zekic, T and Holley, G and Stoye, J}, title = {Pan-Genome Storage and Analysis Techniques.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {1704}, number = {}, pages = {29-53}, doi = {10.1007/978-1-4939-7463-4_2}, pmid = {29277862}, issn = {1940-6029}, mesh = {*Algorithms ; Cluster Analysis ; Computational Biology/methods ; Databases, Genetic ; *Genome, Microbial ; Genomics/*methods ; Phylogeny ; Sequence Analysis, DNA/*methods ; Software ; }, abstract = {Computational pan-genome analysis has emerged from the rapid increase of available genome sequencing data. Starting from a microbial pan-genome, the concept has spread to a variety of species, such as plants or viruses. Characterizing a pan-genome provides insights into intra-species evolution, functions, and diversity. However, researchers face challenges such as processing and maintaining large datasets while providing accurate and efficient analysis approaches. Comparative genomics methods are required for detecting conserved and unique regions between a set of genomes. This chapter gives an overview of tools available for indexing pan-genomes, identifying the sub-regions of a pan-genome and offering a variety of downstream analysis methods. These tools are categorized into two groups, gene-based and sequence-based, according to the pan-genome identification method. We highlight the differences, advantages, and disadvantages between the tools, and provide information about the general workflow, methodology of pan-genome identification, covered functionalities, usability and availability of the tools.}, } @article {pmid29277352, year = {2019}, author = {Valeriano, VDV and Oh, JK and Bagon, BB and Kim, H and Kang, DK}, title = {Comparative genomic analysis of Lactobacillus mucosae LM1 identifies potential niche-specific genes and pathways for gastrointestinal adaptation.}, journal = {Genomics}, volume = {111}, number = {1}, pages = {24-33}, doi = {10.1016/j.ygeno.2017.12.009}, pmid = {29277352}, issn = {1089-8646}, mesh = {*Adaptation, Physiological/genetics ; Bacterial Adhesion ; Ecosystem ; Folic Acid/biosynthesis ; *Genome, Bacterial ; Genomic Islands ; Genomics ; Glycogen/metabolism ; Glycoside Hydrolases/metabolism ; Lactobacillus/*genetics/*metabolism ; Phylogeny ; Probiotics ; Proteomics ; Whole Genome Sequencing ; }, abstract = {Lactobacillus mucosae is currently of interest as putative probiotics due to their metabolic capabilities and ability to colonize host mucosal niches. L. mucosae LM1 has been studied in its functions in cell adhesion and pathogen inhibition, etc. It demonstrated unique abilities to use energy from carbohydrate and non-carbohydrate sources. Due to these functions, we report the first complete genome sequence of an L. mucosae strain, L. mucosae LM1. Analysis of the pan-genome in comparison with closely-related Lactobacillus species identified a complete glycogen metabolism pathway, as well as folate biosynthesis, complementing previous proteomic data on the LM1 strain. It also revealed common and unique niche-adaptation genes among the various L. mucosae strains. The aim of this study was to derive genomic information that would reveal the probable mechanisms underlying the probiotic effect of L. mucosae LM1, and provide a better understanding of the nature of L. mucosae sp.}, } @article {pmid29272410, year = {2018}, author = {Brito, PH and Chevreux, B and Serra, CR and Schyns, G and Henriques, AO and Pereira-Leal, JB}, title = {Genetic Competence Drives Genome Diversity in Bacillus subtilis.}, journal = {Genome biology and evolution}, volume = {10}, number = {1}, pages = {108-124}, pmid = {29272410}, issn = {1759-6653}, mesh = {Bacillus subtilis/*genetics ; Bacterial Proteins/genetics ; Evolution, Molecular ; Gene Transfer, Horizontal ; Genes, Bacterial ; *Genetic Variation ; Genome, Bacterial ; Phylogeny ; }, abstract = {Prokaryote genomes are the result of a dynamic flux of genes, with increases achieved via horizontal gene transfer and reductions occurring through gene loss. The ecological and selective forces that drive this genomic flexibility vary across species. Bacillus subtilis is a naturally competent bacterium that occupies various environments, including plant-associated, soil, and marine niches, and the gut of both invertebrates and vertebrates. Here, we quantify the genomic diversity of B. subtilis and infer the genome dynamics that explain the high genetic and phenotypic diversity observed. Phylogenomic and comparative genomic analyses of 42 B. subtilis genomes uncover a remarkable genome diversity that translates into a core genome of 1,659 genes and an asymptotic pangenome growth rate of 57 new genes per new genome added. This diversity is due to a large proportion of low-frequency genes that are acquired from closely related species. We find no gene-loss bias among wild isolates, which explains why the cloud genome, 43% of the species pangenome, represents only a small proportion of each genome. We show that B. subtilis can acquire xenologous copies of core genes that propagate laterally among strains within a niche. While not excluding the contributions of other mechanisms, our results strongly suggest a process of gene acquisition that is largely driven by competence, where the long-term maintenance of acquired genes depends on local and global fitness effects. This competence-driven genomic diversity provides B. subtilis with its generalist character, enabling it to occupy a wide range of ecological niches and cycle through them.}, } @article {pmid29271914, year = {2017}, author = {Tsukiyama-Kohara, K and Kohara, M}, title = {Hepatitis C Virus: Viral Quasispecies and Genotypes.}, journal = {International journal of molecular sciences}, volume = {19}, number = {1}, pages = {}, pmid = {29271914}, issn = {1422-0067}, mesh = {Animals ; Antiviral Agents/pharmacology/therapeutic use ; Drug Resistance, Viral ; Genotype ; Hepacivirus/drug effects/*genetics ; Hepatitis C/drug therapy/*virology ; Humans ; Mutation ; *Quasispecies ; }, abstract = {Hepatitis C virus (HCV) mainly replicates in the cytoplasm, where it easily establishes persistent infection, resulting in chronic hepatitis, liver cirrhosis, and hepatocellular carcinoma. Due to its high rate of mutation, HCV forms viral quasispecies, categorized based on the highly variable regions in the envelope protein and nonstructural 5A protein. HCV possesses seven major genotypes, among which genotype 1 is the most prevalent globally. The distribution of HCV genotypes varies based on geography, and each genotype has a different sensitivity to interferon treatment. Recently-developed direct-acting antivirals (DAAs), which target viral proteases or polymerases, mediate drastically better antiviral effects than previous therapeutics. Although treatment with DAAs has led to the development of drug-resistant HCV mutants, the most recently approved DAAs show improved pan-genomic activity, with a higher barrier to viral resistance.}, } @article {pmid29270248, year = {2017}, author = {Birkeland, NK and Schönheit, P and Poghosyan, L and Fiebig, A and Klenk, HP}, title = {Complete genome sequence analysis of Archaeoglobus fulgidus strain 7324 (DSM 8774), a hyperthermophilic archaeal sulfate reducer from a North Sea oil field.}, journal = {Standards in genomic sciences}, volume = {12}, number = {}, pages = {79}, pmid = {29270248}, issn = {1944-3277}, abstract = {Archaeoglobus fulgidus is the type species of genus Archaeoglobus Stetter 1998, a hyperthermophilic sulfate reducing group within the Archaeoglobi class of the euryarchaeota phylum. Members of this genus grow heterotrophically or chemolithoautotrophically with sulfate or thiosulfate as electron acceptors. Except for A. fulgidus strain 7324 and the candidate species "Archaeoglobus lithotrophicus", which both originate from deep oil-fields, the other members of this genus have been recovered from marine hydrothermal systems. Here we describe the features of the A. fulgidus strain 7324 genome as compared to the A. fulgidus VC16 type strain. The 2.3 Mbp genome sequence of strain 7324 shares about 93.5% sequence identity with that of strain VC16[T] but is about 138 Kbp longer, which is mostly due to two large 'insertions' carrying one extra cdc6 (cell-cycle control protein 6) gene, extra CRISPR elements and mobile genetic elements, a high-GC ncRNA gene (hgcC) and a large number of hypothetical gene functions. A comparison with four other Archaeoglobus spp. genomes identified 1001 core Archaeoglobus genes and more than 2900 pan-genome orthologous genes.}, } @article {pmid29270162, year = {2017}, author = {Gomila, M and Busquets, A and Mulet, M and García-Valdés, E and Lalucat, J}, title = {Clarification of Taxonomic Status within the Pseudomonas syringae Species Group Based on a Phylogenomic Analysis.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {2422}, pmid = {29270162}, issn = {1664-302X}, abstract = {The Pseudomonas syringae phylogenetic group comprises 15 recognized bacterial species and more than 60 pathovars. The classification and identification of strains is relevant for practical reasons but also for understanding the epidemiology and ecology of this group of plant pathogenic bacteria. Genome-based taxonomic analyses have been introduced recently to clarify the taxonomy of the whole genus. A set of 139 draft and complete genome sequences of strains belonging to all species of the P. syringae group available in public databases were analyzed, together with the genomes of closely related species used as outgroups. Comparative genomics based on the genome sequences of the species type strains in the group allowed the delineation of phylogenomic species and demonstrated that a high proportion of strains included in the study are misclassified. Furthermore, representatives of at least 7 putative novel species were detected. It was also confirmed that P. ficuserectae, P. meliae, and P. savastanoi are later synonyms of P. amygdali and that "P. coronafaciens" should be revived as a nomenspecies.}, } @article {pmid29259172, year = {2017}, author = {Gordon, SP and Contreras-Moreira, B and Woods, DP and Des Marais, DL and Burgess, D and Shu, S and Stritt, C and Roulin, AC and Schackwitz, W and Tyler, L and Martin, J and Lipzen, A and Dochy, N and Phillips, J and Barry, K and Geuten, K and Budak, H and Juenger, TE and Amasino, R and Caicedo, AL and Goodstein, D and Davidson, P and Mur, LAJ and Figueroa, M and Freeling, M and Catalan, P and Vogel, JP}, title = {Extensive gene content variation in the Brachypodium distachyon pan-genome correlates with population structure.}, journal = {Nature communications}, volume = {8}, number = {1}, pages = {2184}, pmid = {29259172}, issn = {2041-1723}, mesh = {Biological Variation, Population/*genetics ; Brachypodium/*genetics ; Chromosomes, Plant/genetics ; DNA Transposable Elements/*genetics ; *Evolution, Molecular ; Genetic Variation/genetics ; Genome, Plant/*genetics ; Phylogeny ; Synteny/genetics ; }, abstract = {While prokaryotic pan-genomes have been shown to contain many more genes than any individual organism, the prevalence and functional significance of differentially present genes in eukaryotes remains poorly understood. Whole-genome de novo assembly and annotation of 54 lines of the grass Brachypodium distachyon yield a pan-genome containing nearly twice the number of genes found in any individual genome. Genes present in all lines are enriched for essential biological functions, while genes present in only some lines are enriched for conditionally beneficial functions (e.g., defense and development), display faster evolutionary rates, lie closer to transposable elements and are less likely to be syntenic with orthologous genes in other grasses. Our data suggest that differentially present genes contribute substantially to phenotypic variation within a eukaryote species, these genes have a major influence in population genetics, and transposable elements play a key role in pan-genome evolution.}, } @article {pmid29247056, year = {2018}, author = {Timms, VJ and Rockett, R and Bachmann, NL and Martinez, E and Wang, Q and Chen, SC and Jeoffreys, N and Howard, PJ and Smith, A and Adamson, S and Gilmour, R and Sheppeard, V and Sintchenko, V}, title = {Genome Sequencing Links Persistent Outbreak of Legionellosis in Sydney (New South Wales, Australia) to an Emerging Clone of Legionella pneumophila Sequence Type 211.}, journal = {Applied and environmental microbiology}, volume = {84}, number = {5}, pages = {}, pmid = {29247056}, issn = {1098-5336}, mesh = {*Disease Outbreaks ; Humans ; Legionella pneumophila/*genetics ; Legionnaires' Disease/*epidemiology/microbiology ; Multilocus Sequence Typing ; New South Wales/epidemiology ; Phylogeny ; *Polymorphism, Single Nucleotide ; }, abstract = {The city of Sydney, Australia, experienced a persistent outbreak of Legionella pneumophila serogroup 1 (Lp1) pneumonia in 2016. To elucidate the source and guide public health actions, the genomes of clinical and environmental Lp1 isolates recovered over 7 weeks were examined. A total of 48 isolates from human cases and cooling towers were sequenced and compared using single-nucleotide polymorphism (SNP)-based core-genome multilocus sequencing typing (MLST) and pangenome approaches. All three methods confirmed phylogenetic relatedness between isolates associated with outbreaks in the Central Business District (CBD) in March and May and those in suburb 1. These isolates were designated the "main cluster" and consisted of isolates from two patients from the CBD March outbreak, one patient and one tower isolate from suburb 1, and isolates from two cooling towers and three patients from the CBD May outbreak. All main cluster isolates were sequence type 211 (ST211), which previously has only been reported in Canada. Significantly, pangenome analysis identified mobile genetic elements containing a unique type IV A F-type secretion system (T4ASS), which was specific to the main cluster, and cocirculating clinical strains, suggesting a potential mechanism for increased fitness and persistence of the outbreak clone. Genome sequencing enabled linking of the geographically dispersed environmental sources of infection among the spatially and temporally coinciding cases of legionellosis in a highly populated urban setting. The discovery of a unique T4ASS emphasizes the role of genome recombination in the emergence of successful Lp1 clones.IMPORTANCE A new emerging clone has been responsible for a prolonged legionellosis outbreak in Sydney, Australia. The use of whole-genome sequencing linked two outbreaks thought to be unrelated and confirmed the outliers. These findings led to the resampling and subsequent identification of the source, guiding public health actions and bringing the outbreak to a close. Significantly, the outbreak clone was identified as sequence type 211 (ST211). Our study reports this ST in the Southern Hemisphere and presents a description of ST211 genomes from both clinical and environmental isolates. A unique mobile genetic element containing a type IV secretion system was identified in Lp1 ST211 isolates linked to the main cluster and Lp1 ST42 isolates that were cocirculating at the time of the outbreak.}, } @article {pmid29247013, year = {2018}, author = {Kwong, JC and Chow, EPF and Stevens, K and Stinear, TP and Seemann, T and Fairley, CK and Chen, MY and Howden, BP}, title = {Whole-genome sequencing reveals transmission of gonococcal antibiotic resistance among men who have sex with men: an observational study.}, journal = {Sexually transmitted infections}, volume = {94}, number = {2}, pages = {151-157}, pmid = {29247013}, issn = {1472-3263}, mesh = {Adolescent ; Adult ; Anti-Bacterial Agents/pharmacology ; Cohort Studies ; DNA, Bacterial ; *Drug Resistance, Microbial ; Gonorrhea/epidemiology/*transmission ; Humans ; Male ; Microbial Sensitivity Tests ; Middle Aged ; Multilocus Sequence Typing ; Neisseria gonorrhoeae/drug effects/*genetics/isolation & purification ; Prospective Studies ; Sexual Partners ; Sexual and Gender Minorities/*statistics & numerical data ; *Whole Genome Sequencing ; Young Adult ; }, abstract = {OBJECTIVES: Drug-resistant Neisseria gonorrhoeae are now a global public health threat. Direct transmission of antibiotic-resistant gonococci between individuals has been proposed as a driver for the increased transmission of resistance, but direct evidence of such transmission is limited. Whole-genome sequencing (WGS) has superior resolution to investigate outbreaks and disease transmission compared with traditional molecular typing methods such as multilocus sequence typing (MLST) and N. gonorrhoeae multiantigen sequence (NG-MAST). We therefore aimed to systematically investigate the transmission of N. gonorrhoeae between men in sexual partnerships using WGS to compare isolates and their resistance to antibiotics at a genome level.

METHODS: 458 couples from a large prospective cohort of men who have sex with men (MSM) tested for gonorrhoea together between 2005 and 2014 were included, and WGS was conducted on all isolates from couples where both men were culture-positive for N. gonorrhoeae. Resistance-determining sequences were identified from genome assemblies, and comparison of isolates between and within individuals was performed by pairwise single nucleotide polymorphism and pangenome comparisons, and in silico predictions of NG-MAST and MLST.

RESULTS: For 33 of 34 (97%; 95% CI 85% to 100%) couples where both partners were positive for gonorrhoea, the resistance-determining genes and mutations were identical in isolates from each partner (94 isolates in total). Resistance determinants in isolates from 23 of 23 (100%; 95% CI 86% to 100%) men with multisite infections were also identical within an individual. These partner and within-host isolates were indistinguishable by NG-MAST, MLST and whole genomic comparisons.

CONCLUSIONS: These data support the transmission of antibiotic-resistant strains between sexual partners as a key driver of resistance rates in gonorrhoea among MSM. This improved understanding of the transmission dynamics of N. gonorrhoeae between sexual partners will inform treatment and prevention guidelines.}, } @article {pmid29240874, year = {2018}, author = {Rasheed, A and Mujeeb-Kazi, A and Ogbonnaya, FC and He, Z and Rajaram, S}, title = {Wheat genetic resources in the post-genomics era: promise and challenges.}, journal = {Annals of botany}, volume = {121}, number = {4}, pages = {603-616}, pmid = {29240874}, issn = {1095-8290}, mesh = {Crop Production ; Cytogenetics ; Genetic Variation ; Genome, Plant/*genetics ; Genomics ; Triticum/*genetics ; }, abstract = {BACKGROUND: Wheat genetic resources have been used for genetic improvement since 1876, when Stephen Wilson (Transactions and Proceedings of the Botanical Society of Edinburgh 12: 286) consciously made the first wide hybrid involving wheat and rye in Scotland. Wide crossing continued with sporadic attempts in the first half of 19th century and became a sophisticated scientific discipline during the last few decades with considerable impact in farmers' fields. However, a large diversity of untapped genetic resources could contribute in meeting future wheat production challenges.

PERSPECTIVES AND CONCLUSION: Recently the complete reference genome of hexaploid (Chinese Spring) and tetraploid (Triticum turgidum ssp. dicoccoides) wheat became publicly available coupled with on-going international efforts on wheat pan-genome sequencing. We anticipate that an objective appraisal is required in the post-genomics era to prioritize genetic resources for use in the improvement of wheat production if the goal of doubling yield by 2050 is to be met. Advances in genomics have resulted in the development of high-throughput genotyping arrays, improved and efficient methods of gene discovery, genomics-assisted selection and gene editing using endonucleases. Likewise, ongoing advances in rapid generation turnover, improved phenotyping, envirotyping and analytical methods will significantly accelerate exploitation of exotic genes and increase the rate of genetic gain in breeding. We argue that the integration of these advances will significantly improve the precision and targeted identification of potentially useful variation in the wild relatives of wheat, providing new opportunities to contribute to yield and quality improvement, tolerance to abiotic stresses, resistance to emerging biotic stresses and resilience to weather extremes.}, } @article {pmid29238330, year = {2017}, author = {Henri, C and Leekitcharoenphon, P and Carleton, HA and Radomski, N and Kaas, RS and Mariet, JF and Felten, A and Aarestrup, FM and Gerner Smidt, P and Roussel, S and Guillier, L and Mistou, MY and Hendriksen, RS}, title = {An Assessment of Different Genomic Approaches for Inferring Phylogeny of Listeria monocytogenes.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {2351}, pmid = {29238330}, issn = {1664-302X}, abstract = {Background/objectives: Whole genome sequencing (WGS) has proven to be a powerful subtyping tool for foodborne pathogenic bacteria like L. monocytogenes. The interests of genome-scale analysis for national surveillance, outbreak detection or source tracking has been largely documented. The genomic data however can be exploited with many different bioinformatics methods like single nucleotide polymorphism (SNP), core-genome multi locus sequence typing (cgMLST), whole-genome multi locus sequence typing (wgMLST) or multi locus predicted protein sequence typing (MLPPST) on either core-genome (cgMLPPST) or pan-genome (wgMLPPST). Currently, there are little comparisons studies of these different analytical approaches. Our objective was to assess and compare different genomic methods that can be implemented in order to cluster isolates of L. monocytogenes. Methods: The clustering methods were evaluated on a collection of 207 L. monocytogenes genomes of food origin representative of the genetic diversity of the Anses collection. The trees were then compared using robust statistical analyses. Results: The backward comparability between conventional typing methods and genomic methods revealed a near-perfect concordance. The importance of selecting a proper reference when calling SNPs was highlighted, although distances between strains remained identical. The analysis also revealed that the topology of the phylogenetic trees between wgMLST and cgMLST were remarkably similar. The comparison between SNP and cgMLST or SNP and wgMLST approaches showed that the topologies of phylogenic trees were statistically similar with an almost equivalent clustering. Conclusion: Our study revealed high concordance between wgMLST, cgMLST, and SNP approaches which are all suitable for typing of L. monocytogenes. The comparable clustering is an important observation considering that the two approaches have been variously implemented among reference laboratories.}, } @article {pmid29237792, year = {2018}, author = {Knetsch, CW and Kumar, N and Forster, SC and Connor, TR and Browne, HP and Harmanus, C and Sanders, IM and Harris, SR and Turner, L and Morris, T and Perry, M and Miyajima, F and Roberts, P and Pirmohamed, M and Songer, JG and Weese, JS and Indra, A and Corver, J and Rupnik, M and Wren, BW and Riley, TV and Kuijper, EJ and Lawley, TD}, title = {Zoonotic Transfer of Clostridium difficile Harboring Antimicrobial Resistance between Farm Animals and Humans.}, journal = {Journal of clinical microbiology}, volume = {56}, number = {3}, pages = {}, pmid = {29237792}, issn = {1098-660X}, support = {MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; 098051/WT_/Wellcome Trust/United Kingdom ; MR/K000551/1/MRC_/Medical Research Council/United Kingdom ; PF451/MRC_/Medical Research Council/United Kingdom ; MR/L006758/1/MRC_/Medical Research Council/United Kingdom ; G0902453/MRC_/Medical Research Council/United Kingdom ; /WT_/Wellcome Trust/United Kingdom ; }, mesh = {Animals ; Animals, Domestic/*microbiology ; Clostridioides difficile/classification/genetics/isolation & purification ; Clostridium Infections/microbiology/*transmission ; Communicable Diseases, Emerging/microbiology/*transmission ; Drug Resistance, Bacterial/*genetics ; Genome, Bacterial/genetics ; Humans ; Phylogeography ; Zoonoses/microbiology/*transmission ; }, abstract = {The emergence of Clostridium difficile as a significant human diarrheal pathogen is associated with the production of highly transmissible spores and the acquisition of antimicrobial resistance genes (ARGs) and virulence factors. Unlike the hospital-associated C. difficile RT027 lineage, the community-associated C. difficile RT078 lineage is isolated from both humans and farm animals; however, the geographical population structure and transmission networks remain unknown. Here, we applied whole-genome phylogenetic analysis of 248 C. difficile RT078 strains from 22 countries. Our results demonstrate limited geographical clustering for C. difficile RT078 and extensive coclustering of human and animal strains, thereby revealing a highly linked intercontinental transmission network between humans and animals. Comparative whole-genome analysis reveals indistinguishable accessory genomes between human and animal strains and a variety of antimicrobial resistance genes in the pangenome of C. difficile RT078. Thus, bidirectional spread of C. difficile RT078 between farm animals and humans may represent an unappreciated route disseminating antimicrobial resistance genes between humans and animals. These results highlight the importance of the "One Health" concept to monitor infectious disease emergence and the dissemination of antimicrobial resistance genes.}, } @article {pmid29222100, year = {2018}, author = {Frantzen, CA and Kleppen, HP and Holo, H}, title = {Lactococcus lactis Diversity in Undefined Mixed Dairy Starter Cultures as Revealed by Comparative Genome Analyses and Targeted Amplicon Sequencing of epsD.}, journal = {Applied and environmental microbiology}, volume = {84}, number = {3}, pages = {}, pmid = {29222100}, issn = {1098-5336}, mesh = {Bacterial Proteins/*genetics ; Cheese/microbiology ; Fermentation ; Food Microbiology ; *Genetic Variation ; *Genome, Bacterial ; Lactococcus lactis/*genetics/metabolism ; Leuconostoc/genetics/metabolism ; Odorants ; Reproducibility of Results ; Taste ; Whole Genome Sequencing ; }, abstract = {Undefined mesophilic mixed (DL) starter cultures are used in the production of continental cheeses and contain unknown strain mixtures of Lactococcus lactis and leuconostocs. The choice of starter culture affects the taste, aroma, and quality of the final product. To gain insight into the diversity of Lactococcus lactis strains in starter cultures, we whole-genome sequenced 95 isolates from three different starter cultures. Pan-genomic analyses, which included 30 publically available complete genomes, grouped the strains into 21 L. lactis subsp. lactis and 28 L. lactis subsp. cremoris lineages. Only one of the 95 isolates grouped with previously sequenced strains, and the three starter cultures showed no overlap in lineage distributions. The culture diversity was assessed by targeted amplicon sequencing using purR, a core gene, and epsD, present in 93 of the 95 starter culture isolates but absent in most of the reference strains. This enabled an unprecedented discrimination of starter culture Lactococcus lactis and revealed substantial differences between the three starter cultures and compositional shifts during the cultivation of cultures in milk.IMPORTANCE In contemporary cheese production, standardized frozen seed stock starter cultures are used to ensure production stability, reproducibility, and quality control of the product. The dairy industry experiences significant disruptions of cheese production due to phage attacks, and one commonly used countermeasure to phage attack is to employ a starter rotation strategy, in which two or more starters with minimal overlap in phage sensitivity are used alternately. A culture-independent analysis of the lactococcal diversity in complex undefined starter cultures revealed large differences between the three starter cultures and temporal shifts in lactococcal composition during the production of bulk starters. A better understanding of the lactococcal diversity in starter cultures will enable the development of more robust starter cultures and assist in maintaining the efficiency and stability of the production process by ensuring the presence of key bacteria that are important to the characteristics of the product.}, } @article {pmid29216329, year = {2017}, author = {Sekizuka, T and Ogasawara, Y and Ohkusa, T and Kuroda, M}, title = {Characterization of Fusobacterium varium Fv113-g1 isolated from a patient with ulcerative colitis based on complete genome sequence and transcriptome analysis.}, journal = {PloS one}, volume = {12}, number = {12}, pages = {e0189319}, pmid = {29216329}, issn = {1932-6203}, mesh = {Bacterial Proteins/genetics/metabolism ; Colitis, Ulcerative/*microbiology ; Fusobacterium/genetics/*isolation & purification ; *Genome, Bacterial ; Humans ; *Transcriptome ; }, abstract = {Fusobacterium spp. present in the oral and gut flora is carcinogenic and is associated with the risk of pancreatic and colorectal cancers. Fusobacterium spp. is also implicated in a broad spectrum of human pathologies, including Crohn's disease and ulcerative colitis (UC). Here we report the complete genome sequence of Fusobacterium varium Fv113-g1 (genome size, 3.96 Mb) isolated from a patient with UC. Comparative genome analyses totally suggested that Fv113-g1 is basically assigned as F. varium, in particular, it could be reclassified as notable F. varium subsp. similar to F. ulcerans because of partial shared orthologs. Compared with the genome sequences of F. varium ATCC 27725 (genome size, 3.30 Mb) and other strains of Fusobacterium spp., Fv113-g1 possesses many accessary pan-genome sequences with noteworthy multiple virulence factors, including 44 autotransporters (type V secretion system, T5SS) and 13 Fusobacterium adhesion (FadA) paralogs involved in potential mucosal inflammation. Indeed, transcriptome analysis demonstrated that Fv113-g1-specific accessary genes, such as multiple T5SS and fadA paralogs, showed notably increased expression with D-MEM cultivation than with brain heart infusion broth. This implied that growth condition may enhance the expression of such potential virulence factors, leading to remarkable survival against other gut microorganisms and to the pathogenicity to human intestinal epithelium.}, } @article {pmid29212929, year = {2018}, author = {Beilstein, F and Blanchet, M and Vaillant, A and Sureau, C}, title = {Nucleic Acid Polymers Are Active against Hepatitis Delta Virus Infection In Vitro.}, journal = {Journal of virology}, volume = {92}, number = {4}, pages = {}, pmid = {29212929}, issn = {1098-5514}, mesh = {Antiviral Agents/*pharmacology ; Cell Line, Tumor ; Hepatitis B virus ; Hepatitis Delta Virus/*drug effects/physiology ; Humans ; Nucleic Acids/*pharmacology ; Polymers/pharmacology ; Viral Envelope Proteins/genetics/*metabolism ; Virion/drug effects ; Virus Internalization/*drug effects ; Virus Replication/drug effects ; }, abstract = {In this study, an in vitro infection model for the hepatitis delta virus (HDV) was used to evaluate the antiviral effects of phosphorothioate nucleic acid polymers (NAPs) and investigate their mechanism of action. The results show that NAPs inhibit HDV infection at concentrations less than 4 μM in cultures of differentiated human hepatoma cells. NAPs were shown to be active at viral entry but inactive postentry on HDV RNA replication. Inhibition was independent of the NAP nucleotide sequence but dependent on both size and amphipathicity of the polymer. NAP antiviral activity was effective against HDV virions bearing the main hepatitis B virus (HBV) immune escape substitutions (D144A and G145R) and was pangenomic with regard to HBV envelope proteins. Furthermore, similar to immobilized heparin, immobilized NAPs could bind HDV particles, suggesting that entry inhibition was due, at least in part, to preventing attachment of the virus to cell surface glycosaminoglycans. The results document NAPs as a novel class of antiviral compounds that can prevent HDV propagation.IMPORTANCE HDV infection causes the most severe form of viral hepatitis in humans and one of the most difficult to cure. Currently, treatments are limited to long-term administration of interferon at high doses, which provide only partial efficacy. There is thus an urgent need for innovative approaches to identify new antiviral against HDV. The significance of our study is in demonstrating that nucleic acid polymers (NAPs) are active against HDV by targeting the envelope of HDV virions. In an in vitro infection assay, NAP activity was recorded at concentrations less than 4 μM in the absence of cell toxicity. Furthermore, the fact that NAPs could block HDV at viral entry suggests their potential to control the spread of HDV in a chronically HBV-infected liver. In addition, NAP anti-HDV activity was pangenomic with regard to HBV envelope proteins and not circumvented by HBsAg substitutions associated with HBV immune escape.}, } @article {pmid29208130, year = {2017}, author = {Husain, F and Tang, K and Veeranagouda, Y and Boente, R and Patrick, S and Blakely, G and Wexler, HM}, title = {Novel large-scale chromosomal transfer in Bacteroides fragilis contributes to its pan-genome and rapid environmental adaptation.}, journal = {Microbial genomics}, volume = {3}, number = {11}, pages = {}, pmid = {29208130}, issn = {2057-5858}, support = {R21 AI109545/AI/NIAID NIH HHS/United States ; }, mesh = {Adaptation, Biological/*genetics ; Antigenic Variation/*genetics ; Bacteroides Infections/*microbiology ; Bacteroides fragilis/*genetics/pathogenicity/physiology ; Chromosomes, Bacterial/*genetics ; DNA Transposable Elements ; Gastrointestinal Microbiome/*genetics ; Gene Transfer, Horizontal/*genetics ; Humans ; Polysaccharides, Bacterial/biosynthesis/genetics ; Recombination, Genetic ; }, abstract = {Bacteroides fragilis, an important component of the human gastrointestinal microbiota, can cause lethal extra-intestinal infection upon escape from the gastrointestinal tract. We demonstrated transfer and recombination of large chromosomal segments from B. fragilis HMW615, a multidrug resistant clinical isolate, to B. fragilis 638R. In one example, the transfer of a segment of ~435 Kb/356 genes replaced ~413 Kb/326 genes of the B. fragilis 638R chromosome. In addition to transfer of antibiotic resistance genes, these transfers (1) replaced complete divergent polysaccharide biosynthesis loci; (2) replaced DNA inversion-controlled intergenic shufflons (that control expression of genes encoding starch utilization system outer membrane proteins) with more complex, divergent shufflons; and (3) introduced additional intergenic shufflons encoding divergent Type 1 restriction/modification systems. Conjugative transposon-like genes within a transferred segment and within a putative integrative conjugative element (ICE5) ~45 kb downstream from the transferred segment both encode proteins that may be involved in the observed transfer. These data indicate that chromosomal transfer is a driver of antigenic diversity and nutrient adaptation in Bacteroides that (1) contributes to the dissemination of the extensive B. fragilis pan-genome, (2) allows rapid adaptation to a changing environment and (3) can confer pathogenic characteristics to host symbionts.}, } @article {pmid29206296, year = {2018}, author = {Sancho, R and Cantalapiedra, CP and López-Alvarez, D and Gordon, SP and Vogel, JP and Catalán, P and Contreras-Moreira, B}, title = {Comparative plastome genomics and phylogenomics of Brachypodium: flowering time signatures, introgression and recombination in recently diverged ecotypes.}, journal = {The New phytologist}, volume = {218}, number = {4}, pages = {1631-1644}, doi = {10.1111/nph.14926}, pmid = {29206296}, issn = {1469-8137}, mesh = {Base Sequence ; Brachypodium/*classification/*genetics ; *Ecotype ; Evolution, Molecular ; Flowers/*physiology ; Genes, Plant ; Genetic Variation ; *Genome, Plastid ; *Genomics ; Geography ; Haplotypes/genetics ; Mediterranean Region ; *Phylogeny ; Recombination, Genetic/*genetics ; Time Factors ; }, abstract = {Few pan-genomic studies have been conducted in plants, and none of them have focused on the intraspecific diversity and evolution of their plastid genomes. We address this issue in Brachypodium distachyon and its close relatives B. stacei and B. hybridum, for which a large genomic data set has been compiled. We analyze inter- and intraspecific plastid comparative genomics and phylogenomic relationships within a family-wide framework. Major indel differences were detected between Brachypodium plastomes. Within B. distachyon, we detected two main lineages, a mostly Extremely Delayed Flowering (EDF+) clade and a mostly Spanish (S+) - Turkish (T+) clade, plus nine chloroplast capture and two plastid DNA (ptDNA) introgression and micro-recombination events. Early Oligocene (30.9 million yr ago (Ma)) and Late Miocene (10.1 Ma) divergence times were inferred for the respective stem and crown nodes of Brachypodium and a very recent Mid-Pleistocene (0.9 Ma) time for the B. distachyon split. Flowering time variation is a main factor driving rapid intraspecific divergence in B. distachyon, although it is counterbalanced by repeated introgression between previously isolated lineages. Swapping of plastomes between the three different genomic groups, EDF+, T+, S+, probably resulted from random backcrossing followed by stabilization through selection pressure.}, } @article {pmid29205771, year = {2018}, author = {Hurgobin, B and Golicz, AA and Bayer, PE and Chan, CK and Tirnaz, S and Dolatabadian, A and Schiessl, SV and Samans, B and Montenegro, JD and Parkin, IAP and Pires, JC and Chalhoub, B and King, GJ and Snowdon, R and Batley, J and Edwards, D}, title = {Homoeologous exchange is a major cause of gene presence/absence variation in the amphidiploid Brassica napus.}, journal = {Plant biotechnology journal}, volume = {16}, number = {7}, pages = {1265-1274}, pmid = {29205771}, issn = {1467-7652}, mesh = {Brassica napus/*genetics ; Diploidy ; Gene Conversion/*genetics ; Gene Deletion ; Gene Duplication ; Genes, Plant/*genetics ; Genetic Variation/genetics ; Genome, Plant/genetics ; Quantitative Trait, Heritable ; }, abstract = {Homoeologous exchanges (HEs) have been shown to generate novel gene combinations and phenotypes in a range of polyploid species. Gene presence/absence variation (PAV) is also a major contributor to genetic diversity. In this study, we show that there is an association between these two events, particularly in recent Brassica napus synthetic accessions, and that these represent a novel source of genetic diversity, which can be captured for the improvement of this important crop species. By assembling the pangenome of B. napus, we show that 38% of the genes display PAV behaviour, with some of these variable genes predicted to be involved in important agronomic traits including flowering time, disease resistance, acyl lipid metabolism and glucosinolate metabolism. This study is a first and provides a detailed characterization of the association between HEs and PAVs in B. napus at the pangenome level.}, } @article {pmid29204317, year = {2017}, author = {Cheng, G and Lu, Q and Ma, L and Zhang, G and Xu, L and Zhou, Z}, title = {BGDMdocker: a Docker workflow for data mining and visualization of bacterial pan-genomes and biosynthetic gene clusters.}, journal = {PeerJ}, volume = {5}, number = {}, pages = {e3948}, pmid = {29204317}, issn = {2167-8359}, abstract = {Recently, Docker technology has received increasing attention throughout the bioinformatics community. However, its implementation has not yet been mastered by most biologists; accordingly, its application in biological research has been limited. In order to popularize this technology in the field of bioinformatics and to promote the use of publicly available bioinformatics tools, such as Dockerfiles and Images from communities, government sources, and private owners in the Docker Hub Registry and other Docker-based resources, we introduce here a complete and accurate bioinformatics workflow based on Docker. The present workflow enables analysis and visualization of pan-genomes and biosynthetic gene clusters of bacteria. This provides a new solution for bioinformatics mining of big data from various publicly available biological databases. The present step-by-step guide creates an integrative workflow through a Dockerfile to allow researchers to build their own Image and run Container easily.}, } @article {pmid29198880, year = {2018}, author = {Fuchs, S and Mehlan, H and Bernhardt, J and Hennig, A and Michalik, S and Surmann, K and Pané-Farré, J and Giese, A and Weiss, S and Backert, L and Herbig, A and Nieselt, K and Hecker, M and Völker, U and Mäder, U}, title = {AureoWiki ̵ The repository of the Staphylococcus aureus research and annotation community.}, journal = {International journal of medical microbiology : IJMM}, volume = {308}, number = {6}, pages = {558-568}, doi = {10.1016/j.ijmm.2017.11.011}, pmid = {29198880}, issn = {1618-0607}, mesh = {*Bacterial Proteins ; Computational Biology ; *Databases as Topic ; *Genes, Bacterial ; Genome, Bacterial ; Internet ; *Molecular Sequence Annotation ; Staphylococcal Infections/microbiology ; Staphylococcus aureus/*genetics ; }, abstract = {In light of continuously accumulating data and knowledge on major human pathogens, comprehensive and up-to-date sources of easily accessible information are urgently required. The AureoWiki database (http://aureowiki.med.uni-greifswald.de) provides detailed information on the genes and proteins of clinically and experimentally relevant S. aureus strains, currently covering NCTC 8325, COL, Newman, USA300_FPR3757, and N315. By implementing a pan-genome approach, AureoWiki facilitates the transfer of knowledge gained in studies with different S. aureus strains, thus supporting functional annotation and better understanding of this organism. All data related to a given gene or gene product is compiled on a strain-specific gene page. The gene pages contain sequence-based information complemented by data on, for example, protein function and localization, transcriptional regulation, and gene expression. The information provided is connected via links to other databases and published literature. Importantly, orthologous genes of the individual strains, which are linked by a pan-genome gene identifier and a unified gene name, are presented side by side using strain-specific tabs. The respective pan-genome gene page contains an orthologue table for 32 S. aureus strains, a multiple-strain genome viewer, a protein sequence alignment as well as other comparative information. The data collected in AureoWiki is also accessible through various download options in order to support bioinformatics applications. In addition, based on two large-scale gene expression data sets, AureoWiki provides graphical representations of condition-dependent mRNA levels and protein profiles under various laboratory and infection-related conditions.}, } @article {pmid29187731, year = {2017}, author = {Yang, N and Xu, XW and Wang, RR and Peng, WL and Cai, L and Song, JM and Li, W and Luo, X and Niu, L and Wang, Y and Jin, M and Chen, L and Luo, J and Deng, M and Wang, L and Pan, Q and Liu, F and Jackson, D and Yang, X and Chen, LL and Yan, J}, title = {Contributions of Zea mays subspecies mexicana haplotypes to modern maize.}, journal = {Nature communications}, volume = {8}, number = {1}, pages = {1874}, pmid = {29187731}, issn = {2041-1723}, mesh = {*Evolution, Molecular ; Genome, Plant/*genetics ; Haplotypes ; Zea mays/*genetics ; }, abstract = {Maize was domesticated from lowland teosinte (Zea mays ssp. parviglumis), but the contribution of highland teosinte (Zea mays ssp. mexicana, hereafter mexicana) to modern maize is not clear. Here, two genomes for Mo17 (a modern maize inbred) and mexicana are assembled using a meta-assembly strategy after sequencing of 10 lines derived from a maize-teosinte cross. Comparative analyses reveal a high level of diversity between Mo17, B73, and mexicana, including three Mb-size structural rearrangements. The maize spontaneous mutation rate is estimated to be 2.17 × 10[-8] ~3.87 × 10[-8] per site per generation with a nonrandom distribution across the genome. A higher deleterious mutation rate is observed in the pericentromeric regions, and might be caused by differences in recombination frequency. Over 10% of the maize genome shows evidence of introgression from the mexicana genome, suggesting that mexicana contributed to maize adaptation and improvement. Our data offer a rich resource for constructing the pan-genome of Zea mays and genetic improvement of modern maize varieties.}, } @article {pmid29183332, year = {2017}, author = {Hall, AB and Yassour, M and Sauk, J and Garner, A and Jiang, X and Arthur, T and Lagoudas, GK and Vatanen, T and Fornelos, N and Wilson, R and Bertha, M and Cohen, M and Garber, J and Khalili, H and Gevers, D and Ananthakrishnan, AN and Kugathasan, S and Lander, ES and Blainey, P and Vlamakis, H and Xavier, RJ and Huttenhower, C}, title = {A novel Ruminococcus gnavus clade enriched in inflammatory bowel disease patients.}, journal = {Genome medicine}, volume = {9}, number = {1}, pages = {103}, pmid = {29183332}, issn = {1756-994X}, support = {U54 DK102557/DK/NIDDK NIH HHS/United States ; U54DE023798//National Institutes of Health (US)/ ; P30 DK043351/DK/NIDDK NIH HHS/United States ; P30DK43351//National Institute of Diabetes and Digestive and Kidney Diseases/ ; R01 DK092405/DK/NIDDK NIH HHS/United States ; U54 DE023798/DE/NIDCR NIH HHS/United States ; K99 DK113224/DK/NIDDK NIH HHS/United States ; R01DK92405//National Institute of Diabetes and Digestive and Kidney Diseases/ ; MCB-1453942//National Science Foundation (US)/ ; }, mesh = {Adult ; Aged ; Feces/microbiology ; Gastrointestinal Microbiome/genetics ; Genome, Bacterial ; Humans ; Inflammatory Bowel Diseases/*microbiology ; Middle Aged ; Oxidative Stress ; Phylogeny ; Ruminococcus/genetics/*isolation & purification ; Species Specificity ; Young Adult ; }, abstract = {BACKGROUND: Inflammatory bowel disease (IBD) is characterized by chronic inflammation of the gastrointestinal tract that is associated with changes in the gut microbiome. Here, we sought to identify strain-specific functional correlates with IBD outcomes.

METHODS: We performed metagenomic sequencing of monthly stool samples from 20 IBD patients and 12 controls (266 total samples). These were taxonomically profiled with MetaPhlAn2 and functionally profiled using HUMAnN2. Differentially abundant species were identified using MaAsLin and strain-specific pangenome haplotypes were analyzed using PanPhlAn.

RESULTS: We found a significantly higher abundance in patients of facultative anaerobes that can tolerate the increased oxidative stress of the IBD gut. We also detected dramatic, yet transient, blooms of Ruminococcus gnavus in IBD patients, often co-occurring with increased disease activity. We identified two distinct clades of R. gnavus strains, one of which is enriched in IBD patients. To study functional differences between these two clades, we augmented the R. gnavus pangenome by sequencing nine isolates from IBD patients. We identified 199 IBD-specific, strain-specific genes involved in oxidative stress responses, adhesion, iron-acquisition, and mucus utilization, potentially conferring an adaptive advantage for this R. gnavus clade in the IBD gut.

CONCLUSIONS: This study adds further evidence to the hypothesis that increased oxidative stress may be a major factor shaping the dysbiosis of the microbiome observed in IBD and suggests that R. gnavus may be an important member of the altered gut community in IBD.}, } @article {pmid29176702, year = {2017}, author = {Vos, M and Eyre-Walker, A}, title = {Are pangenomes adaptive or not?.}, journal = {Nature microbiology}, volume = {2}, number = {12}, pages = {1576}, doi = {10.1038/s41564-017-0067-5}, pmid = {29176702}, issn = {2058-5276}, mesh = {*Evolution, Molecular ; *Prokaryotic Cells ; }, } @article {pmid29176701, year = {2017}, author = {McInerney, JO and McNally, A and O'Connell, MJ}, title = {Reply to 'The population genetics of pangenomes'.}, journal = {Nature microbiology}, volume = {2}, number = {12}, pages = {1575}, doi = {10.1038/s41564-017-0068-4}, pmid = {29176701}, issn = {2058-5276}, mesh = {*Genetics, Population ; }, } @article {pmid29176697, year = {2017}, author = {Shapiro, BJ}, title = {The population genetics of pangenomes.}, journal = {Nature microbiology}, volume = {2}, number = {12}, pages = {1574}, doi = {10.1038/s41564-017-0066-6}, pmid = {29176697}, issn = {2058-5276}, mesh = {*Genetics, Population ; *Prokaryotic Cells ; }, } @article {pmid29170656, year = {2017}, author = {Lira, F and Berg, G and Martínez, JL}, title = {Double-Face Meets the Bacterial World: The Opportunistic Pathogen Stenotrophomonas maltophilia.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {2190}, pmid = {29170656}, issn = {1664-302X}, abstract = {Most studies on bacterial virulence focus on the pathogen itself. However, it is important to recall that the in-host behavior and the virulence of bacterial pathogens constitute a complex situation that depends on both the microorganisms and the infected host. While healthy people (the community) is infected by classical pathogenic microorganisms, able to cope with the anti-infection defenses of the host, in the case of people with basal diseases, debilitated or immunodepressed, the range of pathogens able to cause infection is wider and includes the so-named opportunistic pathogens, which lack the inherent ability to cause disease in healthy hosts and rarely produce infections in the community. Some of the most relevant opportunistic pathogens, as Stenotrophomonas maltophilia, have an environmental origin and, in occasions, present interesting biotechnological properties. Consequently, it is important knowing whether S. maltophilia isolates recovered from infections constitute a specific phylogenetic branch that has evolved toward acquiring a virulent phenotype as it happens in the case of classical pathogens or rather, any member of this bacterial species is capable of producing infection and its pathogenic behavior is mainly a consequence of the host situation. To address this question, we analyzed a set of environmental and clinical S. maltophilia strains. Our results indicate that this opportunistic pathogen presents a large core genome and that the distribution of genes in general, and of known virulence determinants in particular, is similar among environmental and clinical isolates. The majority of genes not belonging to the S. maltophilia core genome are present in just one or two of the analyzed strains. This indicates that, more than speciation into different lineages (virulent and environmental), the evolution of S. maltophilia is based in the strain-specific acquisition of genes, likely involved in the adaptation of this bacterial species to different microniches. In addition, both environmental and clinical isolates present low susceptibility to several antimicrobials. Altogether our results support that S. maltophilia does not present a specific evolutionary branch toward virulence and most likely infection is mainly the consequence of the impaired anti-infective response of the infected patients.}, } @article {pmid29164068, year = {2017}, author = {Nourdin-Galindo, G and Sánchez, P and Molina, CF and Espinoza-Rojas, DA and Oliver, C and Ruiz, P and Vargas-Chacoff, L and Cárcamo, JG and Figueroa, JE and Mancilla, M and Maracaja-Coutinho, V and Yañez, AJ}, title = {Comparative Pan-Genome Analysis of Piscirickettsia salmonis Reveals Genomic Divergences within Genogroups.}, journal = {Frontiers in cellular and infection microbiology}, volume = {7}, number = {}, pages = {459}, pmid = {29164068}, issn = {2235-2988}, mesh = {Animals ; Bacterial Proteins/genetics ; Fish Diseases/microbiology ; Fishes/microbiology ; Gene Ontology ; Genes, Bacterial/*genetics ; Genome Size ; Genome, Bacterial/*genetics ; *Genotype ; Host-Pathogen Interactions ; Kinetics ; Metabolic Networks and Pathways/genetics ; Operon ; Phylogeny ; Piscirickettsia/*genetics/growth & development/isolation & purification/pathogenicity ; Piscirickettsiaceae Infections/microbiology/veterinary ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {Piscirickettsia salmonis is the etiological agent of salmonid rickettsial septicemia, a disease that seriously affects the salmonid industry. Despite efforts to genomically characterize P. salmonis, functional information on the life cycle, pathogenesis mechanisms, diagnosis, treatment, and control of this fish pathogen remain lacking. To address this knowledge gap, the present study conducted an in silico pan-genome analysis of 19 P. salmonis strains from distinct geographic locations and genogroups. Results revealed an expected open pan-genome of 3,463 genes and a core-genome of 1,732 genes. Two marked genogroups were identified, as confirmed by phylogenetic and phylogenomic relationships to the LF-89 and EM-90 reference strains, as well as by assessments of genomic structures. Different structural configurations were found for the six identified copies of the ribosomal operon in the P. salmonis genome, indicating translocation throughout the genetic material. Chromosomal divergences in genomic localization and quantity of genetic cassettes were also found for the Dot/Icm type IVB secretion system. To determine divergences between core-genomes, additional pan-genome descriptions were compiled for the so-termed LF and EM genogroups. Open pan-genomes composed of 2,924 and 2,778 genes and core-genomes composed of 2,170 and 2,228 genes were respectively found for the LF and EM genogroups. The core-genomes were functionally annotated using the Gene Ontology, KEGG, and Virulence Factor databases, revealing the presence of several shared groups of genes related to basic function of intracellular survival and bacterial pathogenesis. Additionally, the specific pan-genomes for the LF and EM genogroups were defined, resulting in the identification of 148 and 273 exclusive proteins, respectively. Notably, specific virulence factors linked to adherence, colonization, invasion factors, and endotoxins were established. The obtained data suggest that these genes could be directly associated with inter-genogroup differences in pathogenesis and host-pathogen interactions, information that could be useful in designing novel strategies for diagnosing and controlling P. salmonis infection.}, } @article {pmid29154929, year = {2017}, author = {Sankarasubramanian, J and Vishnu, US and Gunasekaran, P and Rajendhran, J}, title = {Identification of genetic variants of Brucella spp. through genome-wide association studies.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {56}, number = {}, pages = {92-98}, doi = {10.1016/j.meegid.2017.11.016}, pmid = {29154929}, issn = {1567-7257}, mesh = {Animals ; Brucella/*classification/*genetics ; Brucellosis/microbiology ; Computational Biology/methods ; Genetic Association Studies ; *Genetic Variation ; *Genome, Bacterial ; *Genome-Wide Association Study ; Genomics/methods ; Phylogeny ; Polymorphism, Single Nucleotide ; Zoonoses/microbiology ; }, abstract = {Brucellosis is an important zoonotic disease caused by Brucella spp. We present a phylogeny of 552 strains based on genome-wide single nucleotide polymorphisms (SNPs) determined by an alignment-free k-mer approach. A total of 138,029 SNPs were identified from 552 Brucella genomes. Of these, 31,152 and 106,877 were core and non-core SNPs, respectively. Based on pan-genome analysis 11,937 and 972 genes were identified as pan and core genome, respectively. The pan-genome-wide analysis studies (Pan-GWAS) could not identify the group-specific variants in Brucella spp. Therefore, we focused on SNP based genome-wide association studies (SNP-GWAS) to identify the species-specific genetic determinants in Brucella spp. Phylogenetic tree representing eleven recognized Brucella spp. showed 16 major lineages. We identified 143 species-specific SNPs in Brucella abortus that are conserved in 311 B. abortus genomes. Of these, 141 species-specific SNPs were confined in the positively significant SNPs of B. abortus using SNP-GWAS. Since conserved in all the B. abortus genomes studied, these SNPs might have originated very early during the evolution of B. abortus and might be responsible for the evolution of B. abortus with cattle as the preferred host. Similarly, we identified 383 species-specific SNPs conserved in 132 Brucella melitensis genomes. Of these 379 species-specific SNPs were identified as positively associated using GWAS. Interestingly, >98% of the SNPs that are significantly, positively associated with the traits showed 100% sensitivity and 100% specificity. These identified species-specific core-SNPs identified in Brucella genomes could be responsible for the speciation and their respective host adaptation.}, } @article {pmid29142289, year = {2017}, author = {Shariati J, V and Malboobi, MA and Tabrizi, Z and Tavakol, E and Owlia, P and Safari, M}, title = {Comprehensive genomic analysis of a plant growth-promoting rhizobacterium Pantoea agglomerans strain P5.}, journal = {Scientific reports}, volume = {7}, number = {1}, pages = {15610}, pmid = {29142289}, issn = {2045-2322}, mesh = {Comparative Genomic Hybridization ; Gene Regulatory Networks/genetics ; Genome, Bacterial/*genetics ; *Genomics ; Molecular Sequence Annotation ; Pantoea/*genetics ; Phylogeny ; Plant Diseases/*genetics/microbiology ; Whole Genome Sequencing ; }, abstract = {In this study, we provide a comparative genomic analysis of Pantoea agglomerans strain P5 and 10 closely related strains based on phylogenetic analyses. A next-generation shotgun strategy was implemented using the Illumina HiSeq 2500 technology followed by core- and pan-genome analysis. The genome of P. agglomerans strain P5 contains an assembly size of 5082485 bp with 55.4% G + C content. P. agglomerans consists of 2981 core and 3159 accessory genes for Coding DNA Sequences (CDSs) based on the pan-genome analysis. Strain P5 can be grouped closely with strains PG734 and 299 R using pan and core genes, respectively. All the predicted and annotated gene sequences were allocated to KEGG pathways. Accordingly, genes involved in plant growth-promoting (PGP) ability, including phosphate solubilization, IAA and siderophore production, acetoin and 2,3-butanediol synthesis and bacterial secretion, were assigned. This study provides an in-depth view of the PGP characteristics of strain P5, highlighting its potential use in agriculture as a biofertilizer.}, } @article {pmid29126241, year = {2018}, author = {Kajala, I and Bergsveinson, J and Friesen, V and Redekop, A and Juvonen, R and Storgårds, E and Ziola, B}, title = {Lactobacillus backii and Pediococcus damnosus isolated from 170-year-old beer recovered from a shipwreck lack the metabolic activities required to grow in modern lager beer.}, journal = {FEMS microbiology ecology}, volume = {94}, number = {1}, pages = {}, doi = {10.1093/femsec/fix152}, pmid = {29126241}, issn = {1574-6941}, mesh = {Acids/metabolism ; Base Sequence ; Beer/*microbiology ; Biofilms/growth & development ; Biological Transport/genetics ; Finland ; Food Microbiology ; Genome, Bacterial/genetics ; Lactobacillus/*genetics/isolation & purification/metabolism ; Pediococcus/*genetics/isolation & purification/metabolism ; Plasmids/*genetics ; Polysaccharides, Bacterial/genetics ; }, abstract = {In 2010, bottles of beer containing viable bacteria of the common beer-spoilage species Lactobacillus backii and Pediococcus damnosus were recovered from a shipwreck near the Åland Islands, Finland. The 170-year quiescent state maintained by the shipwreck bacteria presented a unique opportunity to study lactic acid bacteria (LAB) evolution vis-a-vis growth and survival in the beer environment. Three shipwreck bacteria (one L. backii strain and two P. damnosus strains) and modern-day beer-spoilage isolates of the same two species were genome sequenced, characterized for hop iso-α-acid tolerance, and growth in degassed lager and wheat beer. In addition, plasmid variants of the modern-day P. damnosus strain were analyzed for the effect of plasmid-encoded genes on growth in lager beer. Coding content on two plasmids was identified as essential for LAB growth in modern lager beer. Three chromosomal regions containing genes related to sugar transport and cell wall polysaccharides were shared by pediococci able to grow in beer. Our results show that the three shipwreck bacteria lack the necessary plasmid-located genetic content to grow in modern lager beer, but carry additional genes related to acid tolerance and biofilm formation compared to their modern counterparts.}, } @article {pmid29113013, year = {2018}, author = {Leblois, R and Gautier, M and Rohfritsch, A and Foucaud, J and Burban, C and Galan, M and Loiseau, A and Sauné, L and Branco, M and Gharbi, K and Vitalis, R and Kerdelhué, C}, title = {Deciphering the demographic history of allochronic differentiation in the pine processionary moth Thaumetopoea pityocampa.}, journal = {Molecular ecology}, volume = {27}, number = {1}, pages = {264-278}, doi = {10.1111/mec.14411}, pmid = {29113013}, issn = {1365-294X}, support = {G0900740/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Adaptation, Physiological/genetics ; Animals ; Demography ; *Gene Flow ; Gene Frequency ; Genetic Variation ; Genetics, Population ; Genome, Insect ; Moths/*genetics/*physiology ; Phylogeny ; Polymorphism, Single Nucleotide/genetics ; Principal Component Analysis ; Seasons ; Time Factors ; }, abstract = {Understanding the processes of adaptive divergence, which may ultimately lead to speciation, is a major question in evolutionary biology. Allochronic differentiation refers to a particular situation where gene flow is primarily impeded by temporal isolation between early and late reproducers. This process has been suggested to occur in a large array of organisms, even though it is still overlooked in the literature. We here focused on a well-documented case of incipient allochronic speciation in the winter pine processionary moth Thaumetopoea pityocampa. This species typically reproduces in summer and larval development occurs throughout autumn and winter. A unique, phenologically shifted population (SP) was discovered in 1997 in Portugal. It was proved to be strongly differentiated from the sympatric "winter population" (WP), but its evolutionary history could only now be explored. We took advantage of the recent assembly of a draft genome and of the development of pan-genomic RAD-seq markers to decipher the demographic history of the differentiating populations and develop genome scans of adaptive differentiation. We showed that the SP diverged relatively recently, that is, few hundred years ago, and went through two successive bottlenecks followed by population size expansions, while the sympatric WP is currently experiencing a population decline. We identified outlier SNPs that were mapped onto the genome, but none were associated with the phenological shift or with subsequent adaptations. The strong genetic drift that occurred along the SP lineage certainly challenged our capacity to reveal functionally important loci.}, } @article {pmid29100523, year = {2017}, author = {Yu, J and Song, Y and Ren, Y and Qing, Y and Liu, W and Sun, Z}, title = {Genome-level comparisons provide insight into the phylogeny and metabolic diversity of species within the genus Lactococcus.}, journal = {BMC microbiology}, volume = {17}, number = {1}, pages = {213}, pmid = {29100523}, issn = {1471-2180}, mesh = {Base Composition ; *Carbohydrate Metabolism ; DNA, Bacterial/genetics ; *Genetic Variation ; Genome Size ; Genome, Bacterial/*genetics ; Lactococcus/*classification/enzymology/*genetics ; *Phylogeny ; Species Specificity ; Whole Genome Sequencing ; }, abstract = {BACKGROUND: The genomic diversity of different species within the genus Lactococcus and the relationships between genomic differentiation and environmental factors remain unclear. In this study, type isolates of ten Lactococcus species/subspecies were sequenced to assess their genomic characteristics, metabolic diversity, and phylogenetic relationships.

RESULTS: The total genome sizes varied between 1.99 (Lactococcus plantarum) and 2.46 megabases (Mb; L. lactis subsp. lactis), and the G + C content ranged from 34.81 (L. lactis subsp. hordniae) to 39.67% (L. raffinolactis) with an average value of 37.02%. Analysis of genome dynamics indicated that the genus Lactococcus has an open pan-genome, while the core genome size decreased with sequential addition at the genus and species group levels. A phylogenetic dendrogram based on the concatenated amino acid sequences of 643 core genes was largely consistent with the phylogenetic tree obtained by 16S ribosomal RNA (rRNA) genes, but it provided a more robust phylogenetic resolution than the 16S rRNA gene-based analysis.

CONCLUSIONS: Comparative genomics indicated that species in the genus Lactococcus had high degrees of diversity in genome size, gene content, and carbohydrate metabolism. This may be important for the specific adaptations that allow different Lactococcus species to survive in different environments. These results provide a quantitative basis for understanding the genomic and metabolic diversity within the genus Lactococcus, laying the foundation for future studies on taxonomy and functional genomics.}, } @article {pmid29079617, year = {2018}, author = {Pantůček, R and Sedláček, I and Indráková, A and Vrbovská, V and Mašlaňová, I and Kovařovic, V and Švec, P and Králová, S and Krištofová, L and Kekláková, J and Petráš, P and Doškař, J}, title = {Staphylococcus edaphicus sp. nov., Isolated in Antarctica, Harbors the mecC Gene and Genomic Islands with a Suspected Role in Adaptation to Extreme Environments.}, journal = {Applied and environmental microbiology}, volume = {84}, number = {2}, pages = {}, pmid = {29079617}, issn = {1098-5336}, mesh = {Adaptation, Biological/*genetics ; Antarctic Regions ; *Extreme Cold Weather ; *Extreme Environments ; Genes, Bacterial/*physiology ; Genomic Islands/*physiology ; Staphylococcus/*classification/genetics/physiology ; }, abstract = {Two Gram-stain-positive, coagulase-negative staphylococcal strains were isolated from abiotic sources comprising stone fragments and sandy soil in James Ross Island, Antarctica. Here, we describe properties of a novel species of the genus Staphylococcus that has a 16S rRNA gene sequence nearly identical to that of Staphylococcus saprophyticus However, compared to S. saprophyticus and the next closest relatives, the new species demonstrates considerable phylogenetic distance at the whole-genome level, with an average nucleotide identity of <85% and inferred DNA-DNA hybridization of <30%. It forms a separate branch in the S. saprophyticus phylogenetic clade as confirmed by multilocus sequence analysis of six housekeeping genes, rpoB, hsp60, tuf, dnaJ, gap, and sod Matrix-assisted laser desorption ionization-time of flight mass spectrometry (MALDI-TOF MS) and key biochemical characteristics allowed these bacteria to be distinguished from their nearest phylogenetic neighbors. In contrast to S. saprophyticus subsp. saprophyticus, the novel strains are pyrrolidonyl arylamidase and β-glucuronidase positive and β-galactosidase negative, nitrate is reduced, and acid produced aerobically from d-mannose. Whole-genome sequencing of the 2.69-Mb large chromosome revealed the presence of a number of mobile genetic elements, including the 27-kb pseudo-staphylococcus cassette chromosome mec of strain P5085[T] (ψSCCmecP5085), harboring the mecC gene, two composite phage-inducible chromosomal islands probably essential to adaptation to extreme environments, and one complete and one defective prophage. Both strains are resistant to penicillin G, ampicillin, ceftazidime, methicillin, cefoxitin, and fosfomycin. We hypothesize that antibiotic resistance might represent an evolutionary advantage against beta-lactam producers, which are common in a polar environment. Based on these results, a novel species of the genus Staphylococcus is described and named Staphylococcus edaphicus sp. nov. The type strain is P5085[T] (= CCM 8730[T] = DSM 104441[T]).IMPORTANCE The description of Staphylococcus edaphicus sp. nov. enables the comparison of multidrug-resistant staphylococci from human and veterinary sources evolved in the globalized world to their geographically distant relative from the extreme Antarctic environment. Although this new species was not exposed to the pressure of antibiotic treatment in human or veterinary practice, mobile genetic elements carrying antimicrobial resistance genes were found in the genome. The genomic characteristics presented here elucidate the evolutionary relationships in the Staphylococcus genus with a special focus on antimicrobial resistance, pathogenicity, and survival traits. Genes encoded on mobile genetic elements were arranged in unique combinations but retained conserved locations for the integration of mobile genetic elements. These findings point to enormous plasticity of the staphylococcal pangenome, shaped by horizontal gene transfer. Thus, S. edaphicus can act not only as a reservoir of antibiotic resistance in a natural environment but also as a mediator for the spread and evolution of resistance genes.}, } @article {pmid29077859, year = {2018}, author = {Ding, W and Baumdicker, F and Neher, RA}, title = {panX: pan-genome analysis and exploration.}, journal = {Nucleic acids research}, volume = {46}, number = {1}, pages = {e5}, pmid = {29077859}, issn = {1362-4962}, mesh = {*Algorithms ; Bacteria/classification/*genetics ; Computational Biology/*methods ; Evolution, Molecular ; Genome, Bacterial/*genetics ; Genomics/*methods ; Multigene Family ; Phylogeny ; Reproducibility of Results ; *Software ; }, abstract = {Horizontal transfer, gene loss, and duplication result in dynamic bacterial genomes shaped by a complex mixture of different modes of evolution. Closely related strains can differ in the presence or absence of many genes, and the total number of distinct genes found in a set of related isolates-the pan-genome-is often many times larger than the genome of individual isolates. We have developed a pipeline that efficiently identifies orthologous gene clusters in the pan-genome. This pipeline is coupled to a powerful yet easy-to-use web-based visualization for interactive exploration of the pan-genome. The visualization consists of connected components that allow rapid filtering and searching of genes and inspection of their evolutionary history. For each gene cluster, panX displays an alignment, a phylogenetic tree, maps mutations within that cluster to the branches of the tree and infers gain and loss of genes on the core-genome phylogeny. PanX is available at pangenome.de. Custom pan-genomes can be visualized either using a web server or by serving panX locally as a browser-based application.}, } @article {pmid29068466, year = {2017}, author = {Martin, WF}, title = {Too Much Eukaryote LGT.}, journal = {BioEssays : news and reviews in molecular, cellular and developmental biology}, volume = {39}, number = {12}, pages = {}, doi = {10.1002/bies.201700115}, pmid = {29068466}, issn = {1521-1878}, mesh = {Animals ; Eukaryota/classification/*genetics ; Eukaryotic Cells/cytology/metabolism ; *Evolution, Molecular ; *Gene Transfer, Horizontal ; *Genome ; Mitochondria/genetics ; Phylogeny ; Plants/classification/genetics ; Plastids/genetics ; Prokaryotic Cells/cytology/metabolism ; Symbiosis/*genetics ; }, abstract = {The realization that prokaryotes naturally and frequently disperse genes across steep taxonomic boundaries via lateral gene transfer (LGT) gave wings to the idea that eukaryotes might do the same. Eukaryotes do acquire genes from mitochondria and plastids and they do transfer genes during the process of secondary endosymbiosis, the spread of plastids via eukaryotic algal endosymbionts. From those observations it, however, does not follow that eukaryotes transfer genes either in the same ways as prokaryotes do, or to a quantitatively similar degree. An important illustration of the difference is that eukaryotes do not exhibit pangenomes, though prokaryotes do. Eukaryotes reveal no detectable cumulative effects of LGT, though prokaryotes do. A critical analysis suggests that something is deeply amiss with eukaryote LGT theories.}, } @article {pmid29067021, year = {2017}, author = {Chen, X and Hitchings, MD and Mendoza, JE and Balanza, V and Facey, PD and Dyson, PJ and Bielza, P and Del Sol, R}, title = {Comparative Genomics of Facultative Bacterial Symbionts Isolated from European Orius Species Reveals an Ancestral Symbiotic Association.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1969}, pmid = {29067021}, issn = {1664-302X}, support = {BB/G024154/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {Pest control in agriculture employs diverse strategies, among which the use of predatory insects has steadily increased. The use of several species within the genus Orius in pest control is widely spread, particularly in Mediterranean Europe. Commercial mass rearing of predatory insects is costly, and research efforts have concentrated on diet manipulation and selective breeding to reduce costs and improve efficacy. The characterisation and contribution of microbial symbionts to Orius sp. fitness, behaviour, and potential impact on human health has been neglected. This paper provides the first genome sequence level description of the predominant culturable facultative bacterial symbionts associated with five Orius species (O. laevigatus, O. niger, O. pallidicornis, O. majusculus, and O. albidipennis) from several geographical locations. Two types of symbionts were broadly classified as members of the genera Serratia and Leucobacter, while a third constitutes a new genus within the Erwiniaceae. These symbionts were found to colonise all the insect specimens tested, which evidenced an ancestral symbiotic association between these bacteria and the genus Orius. Pangenome analyses of the Serratia sp. isolates offered clues linking Type VI secretion system effector-immunity proteins from the Tai4 sub-family to the symbiotic lifestyle.}, } @article {pmid29061896, year = {2017}, author = {Hall, JPJ and Brockhurst, MA and Harrison, E}, title = {Sampling the mobile gene pool: innovation via horizontal gene transfer in bacteria.}, journal = {Philosophical transactions of the Royal Society of London. Series B, Biological sciences}, volume = {372}, number = {1735}, pages = {}, pmid = {29061896}, issn = {1471-2970}, support = {311490/ERC_/European Research Council/International ; }, mesh = {Bacteria/*genetics ; *Biological Evolution ; *Gene Transfer, Horizontal ; }, abstract = {In biological systems, evolutionary innovations can spread not only from parent to offspring (i.e. vertical transmission), but also 'horizontally' between individuals, who may or may not be related. Nowhere is this more apparent than in bacteria, where novel ecological traits can spread rapidly within and between species through horizontal gene transfer (HGT). This important evolutionary process is predominantly a by-product of the infectious spread of mobile genetic elements (MGEs). We will discuss the ecological conditions that favour the spread of traits by HGT, the evolutionary and social consequences of sharing traits, and how HGT is shaped by inherent conflicts between bacteria and MGEs.This article is part of the themed issue 'Process and pattern in innovations from cells to societies'.}, } @article {pmid29059217, year = {2017}, author = {Lee, JY and Han, GG and Lee, HB and Lee, SM and Kang, SK and Jin, GD and Park, J and Chae, BJ and Choi, YH and Kim, EB and Choi, YJ}, title = {Prohibition of antibiotic growth promoters has affected the genomic profiles of Lactobacillus salivarius inhabiting the swine intestine.}, journal = {PloS one}, volume = {12}, number = {10}, pages = {e0186671}, pmid = {29059217}, issn = {1932-6203}, mesh = {Animals ; Biofilms ; Drug Resistance, Microbial ; Genome, Bacterial ; Intestines/*microbiology ; Ligilactobacillus salivarius/*drug effects/genetics ; Probiotics/*pharmacology ; Swine ; }, abstract = {After the introduction of a ban on the use of antibiotic growth promoters (AGPs) for livestock, the feeding environment, including the composition of animal intestinal microbiota, has changed rapidly. We hypothesized that the microbial genomes have also been affected by this legal prohibition, and investigated an important member of the swine gut microbiota, Lactobacillus salivarius, with a pan-genomic approach. Here, we isolated 21 L. salivarius strains composed of 6 strains isolated before the AGP prohibition (SBPs) and 15 strains isolated after the AGP prohibition (SAPs) at an interval of a decade, and the draft genomes were generated de novo. Several genomic differences between SBPs and SAPs were identified, although the number and function of antibiotic resistance genes were not different. SBPs showed larger genome size and a higher number of orthologs, as well as lower genetic diversity, than SAPs. SBPs had genes associated with the utilization of L-rhamnose and D-tagatose for energy production. Because these sugars are also used in exopolysaccharide (EPS) synthesis, we tried to identify differences in biofilm formation-associated genes. The genes for the production of EPSs and extracellular proteins were different in terms of amino acid sequences. Indeed, SAPs formed dense biofilm and survived better than SBPs in the swine intestinal environment. These results suggest that SAPs have evolved and adapted to protect themselves from new selection pressure of the swine intestinal microenvironment by forming dense biofilms, adopting a distinct antibiotic resistance strategy. This finding is particularly important to understand the evolutionary changes in host-microbe interaction and provide detailed insight for the development of effective probiotics for livestock.}, } @article {pmid29058585, year = {2017}, author = {Zhang, DF and Zhi, XY and Zhang, J and Paoli, GC and Cui, Y and Shi, C and Shi, X}, title = {Preliminary comparative genomics revealed pathogenic potential and international spread of Staphylococcus argenteus.}, journal = {BMC genomics}, volume = {18}, number = {1}, pages = {808}, pmid = {29058585}, issn = {1471-2164}, mesh = {Amino Acid Sequence ; *Genome, Bacterial ; Genomics/*methods ; Humans ; International Agencies ; Phylogeny ; Sequence Analysis, DNA/*methods ; Sequence Homology ; Staphylococcal Infections/genetics/*microbiology ; Staphylococcus/classification/genetics/isolation & purification/*pathogenicity ; Virulence ; Virulence Factors/*genetics ; }, abstract = {BACKGROUND: Staphylococcus argenteus and S. schweitzeri, were recently proposed as novel species within S. aureus complex (SAC). S. argenteus has been reported in many countries and can threaten human health. S. schweitzeri has not been associated with human infections, but has been isolated from non-human primates. Questions regarding the evolution of pathogenicity of these two species will remain elusive until an exploratory evolutionary framework is established.

RESULTS: We present genomic comparison analysis among members of SAC based on a pan-genome definition, which included 15 S. argenteus genomes (five newly sequenced), six S. schweitzeri genomes and 30 divergent S. aureus genomes. The three species had divergent core genomes and rare interspecific recombination was observed among the core genes. However, some subtypes of staphylococcal cassette chromosome mec (SCCmec) elements and prophages were present in different species. Of 111 tested virulence genes of S. aureus, 85 and 86 homologous genes were found in S. argenteus and S. schweitzeri, respectively. There was no difference in virulence gene content among the three species, but the sequence of most core virulence genes was divergent. Analysis of the agr locus and the genes in the capsular polysaccharides biosynthetic operon revealed that they both diverged before the speciation of SAC members. Furthermore, the widespread geographic distribution of S. argenteus, sequence type 2250, showed ambiguous biogeographical structure among geographically isolated populations, demonstrating an international spread of this pathogen.

CONCLUSIONS: S. argenteus has spread among several countries, and invasive infections and persistent carriage may be not limited to currently reported regions. S. argenteus probably had undergone a recent host adaption and can cause human infections with a similar pathogenic potential.}, } @article {pmid29049350, year = {2017}, author = {Jamal, SB and Hassan, SS and Tiwari, S and Viana, MV and Benevides, LJ and Ullah, A and Turjanski, AG and Barh, D and Ghosh, P and Costa, DA and Silva, A and Röttger, R and Baumbach, J and Azevedo, VAC}, title = {An integrative in-silico approach for therapeutic target identification in the human pathogen Corynebacterium diphtheriae.}, journal = {PloS one}, volume = {12}, number = {10}, pages = {e0186401}, pmid = {29049350}, issn = {1932-6203}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/metabolism ; Bacterial Vaccines/pharmacology ; Computer Simulation ; Corynebacterium diphtheriae/drug effects/genetics/metabolism/*pathogenicity ; Genome, Bacterial ; Humans ; Ligands ; Models, Biological ; Molecular Docking Simulation ; }, abstract = {Corynebacterium diphtheriae (Cd) is a Gram-positive human pathogen responsible for diphtheria infection and once regarded for high mortalities worldwide. The fatality gradually decreased with improved living standards and further alleviated when many immunization programs were introduced. However, numerous drug-resistant strains emerged recently that consequently decreased the efficacy of current therapeutics and vaccines, thereby obliging the scientific community to start investigating new therapeutic targets in pathogenic microorganisms. In this study, our contributions include the prediction of modelome of 13 C. diphtheriae strains, using the MHOLline workflow. A set of 463 conserved proteins were identified by combining the results of pangenomics based core-genome and core-modelome analyses. Further, using subtractive proteomics and modelomics approaches for target identification, a set of 23 proteins was selected as essential for the bacteria. Considering human as a host, eight of these proteins (glpX, nusB, rpsH, hisE, smpB, bioB, DIP1084, and DIP0983) were considered as essential and non-host homologs, and have been subjected to virtual screening using four different compound libraries (extracted from the ZINC database, plant-derived natural compounds and Di-terpenoid Iso-steviol derivatives). The proposed ligand molecules showed favorable interactions, lowered energy values and high complementarity with the predicted targets. Our proposed approach expedites the selection of C. diphtheriae putative proteins for broad-spectrum development of novel drugs and vaccines, owing to the fact that some of these targets have already been identified and validated in other organisms.}, } @article {pmid29048510, year = {2017}, author = {Smibert, OC and Wilson, HL and Sohail, A and Narayanasamy, S and Schultz, MB and Ballard, SA and Kwong, JC and de Boer, J and Morrissey, CO and Peleg, AY and Snell, GI and Paraskeva, MA and Jenney, AWJ}, title = {Donor-Derived Mycoplasma hominis and an Apparent Cluster of M. hominis Cases in Solid Organ Transplant Recipients.}, journal = {Clinical infectious diseases : an official publication of the Infectious Diseases Society of America}, volume = {65}, number = {9}, pages = {1504-1508}, doi = {10.1093/cid/cix601}, pmid = {29048510}, issn = {1537-6591}, mesh = {Adult ; Aged ; Female ; Humans ; Lung Transplantation/*adverse effects ; Male ; Middle Aged ; Mycoplasma Infections/*etiology/*microbiology ; Mycoplasma hominis/*genetics ; Phylogeny ; Tissue Donors ; *Transplant Recipients ; }, abstract = {BACKGROUND: Invasive and disseminated Mycoplasma hominis infections are well recognized but uncommon complications in solid organ transplant recipients. In a single center, a cluster of M. hominis infections were identified in lung transplant recipients from the same thoracic intensive care unit (ICU). We sought to determine the source(s) of these infections.

METHODS: Medical records of the donor and infected transplant recipients were reviewed for clinical characteristics. Clinical specimens underwent routine processing with subculture on Mycoplasma-specific Hayflick agar. Mycoplasma hominis identification was confirmed using sequencing of the 16S ribosomal RNA gene. Mycoplasma hominis isolates were subjected to whole-genome sequencing on the Illumina NextSeq platform.

RESULTS: Three lung transplant recipients presented with invasive M. hominis infections at multiple sites characterized by purulent infections without organisms detected by Gram staining. Each patient had a separate donor; however, pretransplant bronchoalveolar lavage fluid was only available from the donor for patient 1, which subsequently grew M. hominis. Phylo- and pangenomic analyses indicated that the isolates from the donor and the corresponding recipient (patient 1) were closely related and formed a distinct single clade. In contrast, isolates from patients 2 and 3 were unrelated and divergent from one another.

CONCLUSIONS: Mycoplasma hominis should be considered a cause of donor-derived infection. Genomic data suggest donor-to-recipient transmission of M. hominis. Additional patients co-located in the ICU were found to have genetically unrelated M. hominis isolates, excluding patient-to-patient transmission.}, } @article {pmid29047329, year = {2017}, author = {Guo, X and Li, S and Zhang, J and Wu, F and Li, X and Wu, D and Zhang, M and Ou, Z and Jie, Z and Yan, Q and Li, P and Yi, J and Peng, Y}, title = {Genome sequencing of 39 Akkermansia muciniphila isolates reveals its population structure, genomic and functional diverisity, and global distribution in mammalian gut microbiotas.}, journal = {BMC genomics}, volume = {18}, number = {1}, pages = {800}, pmid = {29047329}, issn = {1471-2164}, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Drug Resistance, Bacterial/genetics ; Evolution, Molecular ; Gastrointestinal Microbiome/*genetics ; Humans ; Mammals/*microbiology ; Mice ; Molecular Sequence Annotation ; Verrucomicrobia/drug effects/*genetics/*physiology ; *Whole Genome Sequencing ; }, abstract = {BACKGROUND: Akkermansia muciniphila is one of the most dominant bacteria that resides on the mucus layer of intestinal tract and plays key role in human health, however, little is known about its genomic content.

RESULTS: Herein, we for the first time characterized the genomic architecture of A. muciniphila based on whole-genome sequencing, assembling, and annotating of 39 isolates derived from human and mouse feces. We revealed a flexible open pangenome of A. muciniphila currently consisting of 5644 unique proteins. Phylogenetic analysis identified three species-level A. muciniphila phylogroups exhibiting distinct metabolic and functional features. Based on the comprehensive genome catalogue, we reconstructed 106 newly A. muciniphila metagenome assembled genomes (MAGs) from available metagenomic datasets of human, mouse and pig gut microbiomes, revealing a transcontinental distribution of A. muciniphila phylogroups across mammalian gut microbiotas. Accurate quantitative analysis of A. muciniphila phylogroups in human subjects further demonstrated its strong correlation with body mass index and anti-diabetic drug usage. Furthermore, we found that, during their mammalian gut evolution history, A. muciniphila acquired extra genes, especially antibiotic resistance genes, from symbiotic microbes via recent lateral gene transfer.

CONCLUSIONS: The genome repertoire of A. muciniphila provided insights into population structure, evolutionary and functional specificity of this significant bacterium.}, } @article {pmid29020009, year = {2017}, author = {Meyer, KA and Davis, TW and Watson, SB and Denef, VJ and Berry, MA and Dick, GJ}, title = {Genome sequences of lower Great Lakes Microcystis sp. reveal strain-specific genes that are present and expressed in western Lake Erie blooms.}, journal = {PloS one}, volume = {12}, number = {10}, pages = {e0183859}, pmid = {29020009}, issn = {1932-6203}, mesh = {Base Sequence ; CRISPR-Cas Systems/genetics ; *Eutrophication ; *Gene Expression Regulation, Bacterial ; *Genes, Bacterial ; Great Lakes Region ; Metagenome ; Microcystis/*genetics/isolation & purification ; Phylogeny ; Species Specificity ; }, abstract = {Blooms of the potentially toxic cyanobacterium Microcystis are increasing worldwide. In the Laurentian Great Lakes they pose major socioeconomic, ecological, and human health threats, particularly in western Lake Erie. However, the interpretation of "omics" data is constrained by the highly variable genome of Microcystis and the small number of reference genome sequences from strains isolated from the Great Lakes. To address this, we sequenced two Microcystis isolates from Lake Erie (Microcystis aeruginosa LE3 and M. wesenbergii LE013-01) and one from upstream Lake St. Clair (M. cf aeruginosa LSC13-02), and compared these data to the genomes of seventeen Microcystis spp. from across the globe as well as one metagenome and seven metatranscriptomes from a 2014 Lake Erie Microcystis bloom. For the publically available strains analyzed, the core genome is ~1900 genes, representing ~11% of total genes in the pan-genome and ~45% of each strain's genome. The flexible genome content was related to Microcystis subclades defined by phylogenetic analysis of both housekeeping genes and total core genes. To our knowledge this is the first evidence that the flexible genome is linked to the core genome of the Microcystis species complex. The majority of strain-specific genes were present and expressed in bloom communities in Lake Erie. Roughly 8% of these genes from the lower Great Lakes are involved in genome plasticity (rapid gain, loss, or rearrangement of genes) and resistance to foreign genetic elements (such as CRISPR-Cas systems). Intriguingly, strain-specific genes from Microcystis cultured from around the world were also present and expressed in the Lake Erie blooms, suggesting that the Microcystis pangenome is truly global. The presence and expression of flexible genes, including strain-specific genes, suggests that strain-level genomic diversity may be important in maintaining Microcystis abundance during bloom events.}, } @article {pmid29017450, year = {2017}, author = {Zhou, G and Peng, H and Wang, YS and Huang, XM and Xie, XB and Shi, QS}, title = {Complete genome sequence of Citrobacter werkmanii strain BF-6 isolated from industrial putrefaction.}, journal = {BMC genomics}, volume = {18}, number = {1}, pages = {765}, pmid = {29017450}, issn = {1471-2164}, mesh = {Biofilms ; Citrobacter/*genetics/physiology ; Genome, Bacterial/genetics ; *Genomics ; *Industry ; }, abstract = {BACKGROUND: In our previous study, Citrobacter werkmanii BF-6 was isolated from an industrial spoilage sample and demonstrated an excellent ability to form biofilms, which could be affected by various environmental factors. However, the genome sequence of this organism has not been reported so far.

RESULTS: We report the complete genome sequence of C. werkmanii BF-6 together with the description of the genome features and its annotation. The size of the complete chromosome is 4,929,789 bp with an average coverage of 137×. The chromosome exhibits an average G + C content of 52.0%, and encodes 4570 protein coding genes, 84 tRNA genes, 25 rRNA operons, 3 microsatellite sequences and 34 minisatellite sequences. A previously unknown circular plasmid designated as pCW001 was also found with a length of 212,549 bp and a G + C content of 48.2%. 73.5%, 75.6% and 92.6% of the protein coding genes could be assigned to GO Ontology, KEGG Pathway, and COG (Clusters of Orthologous Groups) categories respectively. C. werkmanii BF-6 and C. werkmanii NRBC 105721 exhibited the closest evolutionary relationships based on 16S ribosomal RNA and core-pan genome assay. Furthermore, C. werkmanii BF-6 exhibits typical bacterial biofilm formation and development. In the RT-PCR experiments, we found that a great number of biofilm related genes, such as bsmA, bssR, bssS, hmsP, tabA, csgA, csgB, csgC, csgD, csgE, and csgG, were involved in C. werkmanii BF-6 biofilm formation.

CONCLUSIONS: This is the first complete genome of C. werkmanii. Our work highlights the potential genetic mechanisms involved in biofilm formation and paves a way for further application of C. werkmanii in biofilms research.}, } @article {pmid28977764, year = {2017}, author = {Bergsveinson, J and Ziola, B}, title = {Comparative genomic and plasmid analysis of beer-spoiling and non-beer-spoiling Lactobacillus brevis isolates.}, journal = {Canadian journal of microbiology}, volume = {63}, number = {12}, pages = {970-983}, doi = {10.1139/cjm-2017-0405}, pmid = {28977764}, issn = {1480-3275}, mesh = {Beer/microbiology ; *Food Microbiology ; Genetic Variation ; Genome, Bacterial/*genetics ; Levilactobacillus brevis/*genetics/isolation & purification ; Plasmids/*genetics ; }, abstract = {Beer-spoilage-related lactic acid bacteria (BSR LAB) belong to multiple genera and species; however, beer-spoilage capacity is isolate-specific and partially acquired via horizontal gene transfer within the brewing environment. Thus, the extent to which genus-, species-, or environment- (i.e., brewery-) level genetic variability influences beer-spoilage phenotype is unknown. Publicly available Lactobacillus brevis genomes were analyzed via BlAst Diagnostic Gene findEr (BADGE) for BSR genes and assessed for pangenomic relationships. Also analyzed were functional coding capacities of plasmids of LAB inhabiting extreme niche environments. Considerable genetic variation was observed in L. brevis isolated from clinical samples, whereas 16 candidate genes distinguish BSR and non-BSR L. brevis genomes. These genes are related to nutrient scavenging of gluconate or pentoses, mannose, and metabolism of pectin. BSR L. brevis isolates also have higher average nucleotide identity and stronger pangenome association with one another, though isolation source (i.e., specific brewery) also appears to influence the plasmid coding capacity of BSR LAB. Finally, it is shown that niche-specific adaptation and phenotype are plasmid-encoded for both BSR and non-BSR LAB. The ultimate combination of plasmid-encoded genes dictates the ability of L. brevis to survive in the most extreme beer environment, namely, gassed (i.e., pressurized) beer.}, } @article {pmid28970823, year = {2017}, author = {Benevides, L and Burman, S and Martin, R and Robert, V and Thomas, M and Miquel, S and Chain, F and Sokol, H and Bermudez-Humaran, LG and Morrison, M and Langella, P and Azevedo, VA and Chatel, JM and Soares, S}, title = {New Insights into the Diversity of the Genus Faecalibacterium.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1790}, pmid = {28970823}, issn = {1664-302X}, abstract = {Faecalibacterium prausnitzii is a commensal bacterium, ubiquitous in the gastrointestinal tracts of animals and humans. This species is a functionally important member of the microbiota and studies suggest it has an impact on the physiology and health of the host. F. prausnitzii is the only identified species in the genus Faecalibacterium, but a recent study clustered strains of this species in two different phylogroups. Here, we propose the existence of distinct species in this genus through the use of comparative genomics. Briefly, we performed analyses of 16S rRNA gene phylogeny, phylogenomics, whole genome Multi-Locus Sequence Typing (wgMLST), Average Nucleotide Identity (ANI), gene synteny, and pangenome to better elucidate the phylogenetic relationships among strains of Faecalibacterium. For this, we used 12 newly sequenced, assembled, and curated genomes of F. prausnitzii, which were isolated from feces of healthy volunteers from France and Australia, and combined these with published data from 5 strains downloaded from public databases. The phylogenetic analysis of the 16S rRNA sequences, together with the wgMLST profiles and a phylogenomic tree based on comparisons of genome similarity, all supported the clustering of Faecalibacterium strains in different genospecies. Additionally, the global analysis of gene synteny among all strains showed a highly fragmented profile, whereas the intra-cluster analyses revealed larger and more conserved collinear blocks. Finally, ANI analysis substantiated the presence of three distinct clusters-A, B, and C-composed of five, four, and four strains, respectively. The pangenome analysis of each cluster corroborated the classification of these clusters into three distinct species, each containing less variability than that found within the global pangenome of all strains. Here, we propose that comparison of pangenome subsets and their associated α values may be used as an alternative approach, together with ANI, in the in silico classification of new species. Altogether, our results provide evidence not only for the reconsideration of the phylogenetic and genomic relatedness among strains currently assigned to F. prausnitzii, but also the need for lineage (strain-based) differentiation of this taxon to better define how specific members might be associated with positive or negative host interactions.}, } @article {pmid28968784, year = {2019}, author = {Médigue, C and Calteau, A and Cruveiller, S and Gachet, M and Gautreau, G and Josso, A and Lajus, A and Langlois, J and Pereira, H and Planel, R and Roche, D and Rollin, J and Rouy, Z and Vallenet, D}, title = {MicroScope-an integrated resource for community expertise of gene functions and comparative analysis of microbial genomic and metabolic data.}, journal = {Briefings in bioinformatics}, volume = {20}, number = {4}, pages = {1071-1084}, pmid = {28968784}, issn = {1477-4054}, mesh = {Computational Biology ; Computer Graphics ; Database Management Systems ; Databases, Chemical ; *Genome, Microbial ; Genomics/*methods/statistics & numerical data ; Internet ; Metabolic Networks and Pathways/genetics ; Microbiological Phenomena ; Molecular Sequence Annotation/*methods/statistics & numerical data ; *Software ; User-Computer Interface ; }, abstract = {The overwhelming list of new bacterial genomes becoming available on a daily basis makes accurate genome annotation an essential step that ultimately determines the relevance of thousands of genomes stored in public databanks. The MicroScope platform (http://www.genoscope.cns.fr/agc/microscope) is an integrative resource that supports systematic and efficient revision of microbial genome annotation, data management and comparative analysis. Starting from the results of our syntactic, functional and relational annotation pipelines, MicroScope provides an integrated environment for the expert annotation and comparative analysis of prokaryotic genomes. It combines tools and graphical interfaces to analyze genomes and to perform the manual curation of gene function in a comparative genomics and metabolic context. In this article, we describe the free-of-charge MicroScope services for the annotation and analysis of microbial (meta)genomes, transcriptomic and re-sequencing data. Then, the functionalities of the platform are presented in a way providing practical guidance and help to the nonspecialists in bioinformatics. Newly integrated analysis tools (i.e. prediction of virulence and resistance genes in bacterial genomes) and original method recently developed (the pan-genome graph representation) are also described. Integrated environments such as MicroScope clearly contribute, through the user community, to help maintaining accurate resources.}, } @article {pmid28959245, year = {2017}, author = {De Maayer, P and Aliyu, H and Vikram, S and Blom, J and Duffy, B and Cowan, DA and Smits, THM and Venter, SN and Coutinho, TA}, title = {Phylogenomic, Pan-genomic, Pathogenomic and Evolutionary Genomic Insights into the Agronomically Relevant Enterobacteria Pantoea ananatis and Pantoea stewartii.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1755}, pmid = {28959245}, issn = {1664-302X}, abstract = {Pantoea ananatis is ubiquitously found in the environment and causes disease on a wide range of plant hosts. By contrast, its sister species, Pantoea stewartii subsp. stewartii is the host-specific causative agent of the devastating maize disease Stewart's wilt. This pathogen has a restricted lifecycle, overwintering in an insect vector before being introduced into susceptible maize cultivars, causing disease and returning to overwinter in its vector. The other subspecies of P. stewartii subsp. indologenes, has been isolated from different plant hosts and is predicted to proliferate in different environmental niches. Here we have, by the use of comparative genomics and a comprehensive suite of bioinformatic tools, analyzed the genomes of ten P. stewartii and nineteen P. ananatis strains. Our phylogenomic analyses have revealed that there are two distinct clades within P. ananatis while far less phylogenetic diversity was observed among the P. stewartii subspecies. Pan-genome analyses revealed a large core genome comprising of 3,571 protein coding sequences is shared among the twenty-nine compared strains. Furthermore, we showed that an extensive accessory genome made up largely by a mobilome of plasmids, integrated prophages, integrative and conjugative elements and insertion elements has resulted in extensive diversification of P. stewartii and P. ananatis. While these organisms share many pathogenicity determinants, our comparative genomic analyses show that they differ in terms of the secretion systems they encode. The genomic differences identified in this study have allowed us to postulate on the divergent evolutionary histories of the analyzed P. ananatis and P. stewartii strains and on the molecular basis underlying their ecological success and host range.}, } @article {pmid28957508, year = {2017}, author = {Déraspe, M and Raymond, F and Boisvert, S and Culley, A and Roy, PH and Laviolette, F and Corbeil, J}, title = {Phenetic Comparison of Prokaryotic Genomes Using k-mers.}, journal = {Molecular biology and evolution}, volume = {34}, number = {10}, pages = {2716-2729}, pmid = {28957508}, issn = {1537-1719}, mesh = {Bacteria/genetics ; Biological Evolution ; Cluster Analysis ; Computational Biology/*methods ; Computer Simulation ; Evolution, Molecular ; Genome, Bacterial/*genetics ; Genomics/methods ; Metagenomics ; Phylogeny ; Prokaryotic Cells ; Sequence Analysis, DNA/*methods ; Software ; }, abstract = {Bacterial genomics studies are getting more extensive and complex, requiring new ways to envision analyses. Using the Ray Surveyor software, we demonstrate that comparison of genomes based on their k-mer content allows reconstruction of phenetic trees without the need of prior data curation, such as core genome alignment of a species. We validated the methodology using simulated genomes and previously published phylogenomic studies of Streptococcus pneumoniae and Pseudomonas aeruginosa. We also investigated the relationship of specific genetic determinants with bacterial population structures. By comparing clusters from the complete genomic content of a genome population with clusters from specific functional categories of genes, we can determine how the population structures are correlated. Indeed, the strain clustering based on a subset of k-mers allows determination of its similarity with the whole genome clusters. We also applied this methodology on 42 species of bacteria to determine the correlational significance of five important bacterial genomic characteristics. For example, intrinsic resistance is more important in P. aeruginosa than in S. pneumoniae, and the former has increased correlation of its population structure with antibiotic resistance genes. The global view of the pangenome of bacteria also demonstrated the taxa-dependent interaction of population structure with antibiotic resistance, bacteriophage, plasmid, and mobile element k-mer data sets.}, } @article {pmid28957464, year = {2017}, author = {Yano, H and Iwamoto, T and Nishiuchi, Y and Nakajima, C and Starkova, DA and Mokrousov, I and Narvskaya, O and Yoshida, S and Arikawa, K and Nakanishi, N and Osaki, K and Nakagawa, I and Ato, M and Suzuki, Y and Maruyama, F}, title = {Population Structure and Local Adaptation of MAC Lung Disease Agent Mycobacterium avium subsp. hominissuis.}, journal = {Genome biology and evolution}, volume = {9}, number = {9}, pages = {2403-2417}, pmid = {28957464}, issn = {1759-6653}, mesh = {*Adaptation, Physiological ; Alleles ; Animals ; *Evolution, Molecular ; Homologous Recombination ; Humans ; Lung/microbiology ; Mycobacterium Infections/microbiology ; Mycobacterium avium/*genetics/isolation & purification ; Operon ; Polymorphism, Genetic ; Swine ; Trehalose/genetics/metabolism ; }, abstract = {Mycobacterium avium subsp. hominissuis (MAH) is one of the most common nontuberculous mycobacterial species responsible for chronic lung disease in humans. Despite increasing worldwide incidence, little is known about the genetic mechanisms behind the population evolution of MAH. To elucidate the local adaptation mechanisms of MAH, we assessed genetic population structure, the mutual homologous recombination, and gene content for 36 global MAH isolates, including 12 Japanese isolates sequenced in the present study. We identified five major MAH lineages and found that extensive mutual homologous recombination occurs among them. Two lineages (MahEastAsia1 and MahEastAsia2) were predominant in the Japanese isolates. We identified alleles unique to these two East Asian lineages in the loci responsible for trehalose biosynthesis (treS and mak) and in one mammalian cell entry operon, which presumably originated from as yet undiscovered mycobacterial lineages. Several genes and alleles unique to East Asian strains were located in the fragments introduced via recombination between East Asian lineages, suggesting implication of recombination in local adaptation. These patterns of MAH genomes are consistent with the signature of distribution conjugative transfer, a mode of sexual reproduction reported for other mycobacterial species.}, } @article {pmid28945251, year = {2017}, author = {Eggertsson, HP and Jonsson, H and Kristmundsdottir, S and Hjartarson, E and Kehr, B and Masson, G and Zink, F and Hjorleifsson, KE and Jonasdottir, A and Jonasdottir, A and Jonsdottir, I and Gudbjartsson, DF and Melsted, P and Stefansson, K and Halldorsson, BV}, title = {Graphtyper enables population-scale genotyping using pangenome graphs.}, journal = {Nature genetics}, volume = {49}, number = {11}, pages = {1654-1660}, pmid = {28945251}, issn = {1546-1718}, mesh = {*Algorithms ; Alleles ; Base Sequence ; Computer Graphics ; *Genome, Human ; Genotyping Techniques/*instrumentation ; HLA Antigens/genetics ; Haplotypes ; High-Throughput Nucleotide Sequencing ; Humans ; *Polymorphism, Single Nucleotide ; Sequence Alignment ; Sequence Analysis, DNA/methods/*statistics & numerical data ; Software ; }, abstract = {A fundamental requirement for genetic studies is an accurate determination of sequence variation. While human genome sequence diversity is increasingly well characterized, there is a need for efficient ways to use this knowledge in sequence analysis. Here we present Graphtyper, a publicly available novel algorithm and software for discovering and genotyping sequence variants. Graphtyper realigns short-read sequence data to a pangenome, a variation-aware graph structure that encodes sequence variation within a population by representing possible haplotypes as graph paths. Our results show that Graphtyper is fast, highly scalable, and provides sensitive and accurate genotype calls. Graphtyper genotyped 89.4 million sequence variants in the whole genomes of 28,075 Icelanders using less than 100,000 CPU days, including detailed genotyping of six human leukocyte antigen (HLA) genes. We show that Graphtyper is a valuable tool in characterizing sequence variation in both small and population-scale sequencing studies.}, } @article {pmid28942844, year = {2017}, author = {Lee, JY and Han, GG and Kim, EB and Choi, YJ}, title = {Comparative genomics of Lactobacillus salivarius strains focusing on their host adaptation.}, journal = {Microbiological research}, volume = {205}, number = {}, pages = {48-58}, doi = {10.1016/j.micres.2017.08.008}, pmid = {28942844}, issn = {1618-0623}, mesh = {Acclimatization/*genetics ; Animals ; Bacterial Outer Membrane Proteins/genetics ; Base Sequence ; Carbohydrate Metabolism ; Cell Wall/chemistry/genetics ; Chickens ; DNA, Bacterial/genetics ; Drug Resistance, Bacterial/genetics ; Feces/microbiology ; Gastrointestinal Tract/microbiology ; Gene Order ; Gene Transfer, Horizontal ; Genes, Bacterial/genetics/physiology ; Genetic Structures ; *Genome, Bacterial ; Genomics ; Humans ; Ligilactobacillus salivarius/classification/*genetics/isolation & purification/*physiology ; Phylogeny ; Probiotics ; Species Specificity ; Swine ; Symbiosis/genetics/physiology ; }, abstract = {Lactobacillus salivarius is an important member of the animal gut microflora and is a promising probiotic bacterium. However, there is a lack of research on the genomic diversity of L. salivarius species. In this study, we generated 21 L. salivarius draft genomes, and investigated the pan-genome of L. salivarius strains isolated from humans, pigs and chickens using all available genomes, focusing on host adaptation. Phylogenetic clustering showed a distinct categorization of L. salivarius strains depending on their hosts. In the pan-genome, 15 host-specific genes and 16 dual-host-shared genes that only one host isolate did not possess were identified. Comparison of 56 extracellular protein encoding genes and 124 orthologs related to exopolysaccharide production in the pan-genome revealed that extracellular components of the assayed bacteria have been globally acquired and mutated under the selection pressure for host adaptation. We also found the three host-specific genes that are responsible for energy production in L. salivarius. These results showed that L. salivarius has evolved to adapt to host habitats in two ways, by gaining the abilities for niche adhesion and efficient utilization of nutrients. Our study offers a deeper understanding of the probiotic species L. salivarius, and provides a basis for future studies on L. salivarius and other mutualistic bacteria.}, } @article {pmid28924022, year = {2018}, author = {Noto, JM and Chopra, A and Loh, JT and Romero-Gallo, J and Piazuelo, MB and Watson, M and Leary, S and Beckett, AC and Wilson, KT and Cover, TL and Mallal, S and Israel, DA and Peek, RM}, title = {Pan-genomic analyses identify key Helicobacter pylori pathogenic loci modified by carcinogenic host microenvironments.}, journal = {Gut}, volume = {67}, number = {10}, pages = {1793-1804}, pmid = {28924022}, issn = {1468-3288}, support = {R01 DK053620/DK/NIDDK NIH HHS/United States ; P01 CA028842/CA/NCI NIH HHS/United States ; R01 CA077955/CA/NCI NIH HHS/United States ; R01 CA190612/CA/NCI NIH HHS/United States ; P30 DK058404/DK/NIDDK NIH HHS/United States ; P01 CA116087/CA/NCI NIH HHS/United States ; I01 BX001453/BX/BLRD VA/United States ; R01 AI118932/AI/NIAID NIH HHS/United States ; R01 AI039657/AI/NIAID NIH HHS/United States ; R01 DK058587/DK/NIDDK NIH HHS/United States ; }, mesh = {Bacterial Proteins/genetics ; *Carcinogenesis ; *Helicobacter Infections/pathology/physiopathology ; *Helicobacter pylori/genetics/pathogenicity ; Humans ; In Vitro Techniques/methods ; Polymorphism, Single Nucleotide/physiology ; *Stomach Neoplasms/microbiology/pathology/physiopathology ; }, abstract = {OBJECTIVE: Helicobacter pylori is the strongest risk factor for gastric cancer; however, the majority of infected individuals do not develop disease. Pathological outcomes are mediated by complex interactions among bacterial, host and environmental constituents, and two dietary factors linked with gastric cancer risk are iron deficiency and high salt. We hypothesised that prolonged adaptation of H. pylori to in vivo carcinogenic microenvironments results in genetic modification important for disease.

DESIGN: Whole genome sequencing of genetically related H. pylori strains that differ in virulence and targeted H. pylori sequencing following prolonged exposure of bacteria to in vitro carcinogenic conditions were performed.

RESULTS: A total of 180 unique single nucleotide polymorphisms (SNPs) were identified among the collective genomes when compared with a reference H. pylori genome. Importantly, common SNPs were identified in isolates harvested from iron-depleted and high salt carcinogenic microenvironments, including an SNP within fur (FurR88H). To investigate the direct role of low iron and/or high salt, H. pylori was continuously cultured in vitro under low iron or high salt conditions to assess fur genetic variation. Exposure to low iron or high salt selected for the FurR88H variant after only 5 days. To extend these results, fur was sequenced in 339 clinical H. pylori strains. Among the isolates examined, 17% (40/232) of strains isolated from patients with premalignant lesions harboured the FurR88H variant, compared with only 6% (6/107) of strains from patients with non-atrophic gastritis alone (p=0.0034).

CONCLUSION: These results indicate that specific genetic variation arises within H. pylori strains during in vivo adaptation to conditions conducive for gastric carcinogenesis.}, } @article {pmid28922838, year = {2017}, author = {Jeukens, J and Kukavica-Ibrulj, I and Emond-Rheault, JG and Freschi, L and Levesque, RC}, title = {Comparative genomics of a drug-resistant Pseudomonas aeruginosa panel and the challenges of antimicrobial resistance prediction from genomes.}, journal = {FEMS microbiology letters}, volume = {364}, number = {18}, pages = {}, doi = {10.1093/femsle/fnx161}, pmid = {28922838}, issn = {1574-6968}, mesh = {Drug Resistance, Multiple, Bacterial/*genetics ; Genes, MDR ; Genomics/*methods ; Genotype ; Humans ; Pseudomonas Infections/*microbiology ; Pseudomonas aeruginosa/*genetics ; Whole Genome Sequencing/methods ; }, abstract = {Antimicrobial resistance (AMR) is now recognized as a global threat to human health. The accessibility of microbial whole-genome sequencing offers an invaluable opportunity for resistance surveillance via the resistome, i.e. the genes and mutations underlying AMR. Unfortunately, AMR prediction from genomic data remains extremely challenging, especially for species with a large pan-genome. One such organism, for which multidrug-resistant (MDR) isolates are frequently encountered in the clinic, is Pseudomonas aeruginosa. This study focuses on a commercially available panel of seven MDR P. aeruginosa strains. The main goals were to sequence and compare these strains' genomes, attempt to predict AMR from whole genomes using two different methods and determine whether this panel could be an informative complement to the international P. aeruginosa reference panel. As expected, the results highlight the complexity of associating genotype and AMR phenotype in P. aeruginosa, mainly due to the intricate regulation of resistance mechanisms. Our results also urge caution in the interpretation of predicted resistomes regarding the occurrence of gene identity discrepancies between strains. We envision that, in addition to accounting for the genomic diversity of P. aeruginosa, future development of predictive tools will need to incorporate a transcriptomic, proteomic and/or metabolomic component.}, } @article {pmid28912444, year = {2017}, author = {Chun, BH and Kim, KH and Jeon, HH and Lee, SH and Jeon, CO}, title = {Pan-genomic and transcriptomic analyses of Leuconostoc mesenteroides provide insights into its genomic and metabolic features and roles in kimchi fermentation.}, journal = {Scientific reports}, volume = {7}, number = {1}, pages = {11504}, pmid = {28912444}, issn = {2045-2322}, mesh = {Fermentation ; *Food Microbiology ; *Gene Expression Profiling ; Genetic Variation ; Genotype ; Leuconostoc mesenteroides/classification/*genetics/*metabolism ; Metabolic Networks and Pathways/*genetics ; Phylogeny ; }, abstract = {The genomic and metabolic features of Leuconostoc (Leu) mesenteroides were investigated through pan-genomic and transcriptomic analyses. Relatedness analysis of 17 Leu. mesenteroides strains available in GenBank based on 16S rRNA gene sequence, average nucleotide identity, in silico DNA-DNA hybridization, molecular phenotype, and core-genome indicated that Leu. mesenteroides has been separated into different phylogenetic lineages. Pan-genome of Leu. mesenteroides strains, consisting of 999 genes in core-genome, 1,432 genes in accessory-genome, and 754 genes in unique genome, and their COG and KEGG analyses showed that Leu. mesenteroides harbors strain-specifically diverse metabolisms, probably representing high evolutionary genome changes. The reconstruction of fermentative metabolic pathways for Leu. mesenteroides strains showed that Leu. mesenteroides produces various metabolites such as lactate, ethanol, acetate, CO2, mannitol, diacetyl, acetoin, and 2,3-butanediol through an obligate heterolactic fermentation from various carbohydrates. Fermentative metabolic features of Leu. mesenteroides during kimchi fermentation were investigated through transcriptional analyses for the KEGG pathways and reconstructed metabolic pathways of Leu. mesenteroides using kimchi metatranscriptomic data. This was the first study to investigate the genomic and metabolic features of Leu. mesenteroides through pan-genomic and metatranscriptomic analyses, and may provide insights into its genomic and metabolic features and a better understanding of kimchi fermentations by Leu. mesenteroides.}, } @article {pmid28911112, year = {2017}, author = {Cury, J and Touchon, M and Rocha, EPC}, title = {Integrative and conjugative elements and their hosts: composition, distribution and organization.}, journal = {Nucleic acids research}, volume = {45}, number = {15}, pages = {8943-8956}, pmid = {28911112}, issn = {1362-4962}, support = {281605/ERC_/European Research Council/International ; }, mesh = {Actinobacteria/classification/genetics/metabolism ; Archaea/classification/genetics/metabolism ; *Conjugation, Genetic ; DNA Replication ; *DNA Transposable Elements ; DNA, Bacterial/*genetics/metabolism ; Evolution, Molecular ; Firmicutes/classification/genetics/metabolism ; *Gene Transfer, Horizontal ; Genes, Bacterial ; Integrases/genetics/metabolism ; Lysogeny ; *Phylogeny ; Plasmids/*chemistry/metabolism ; Proteobacteria/classification/genetics/metabolism ; Recombinases/genetics/metabolism ; }, abstract = {Conjugation of single-stranded DNA drives horizontal gene transfer between bacteria and was widely studied in conjugative plasmids. The organization and function of integrative and conjugative elements (ICE), even if they are more abundant, was only studied in a few model systems. Comparative genomics of ICE has been precluded by the difficulty in finding and delimiting these elements. Here, we present the results of a method that circumvents these problems by requiring only the identification of the conjugation genes and the species' pan-genome. We delimited 200 ICEs and this allowed the first large-scale characterization of these elements. We quantified the presence in ICEs of a wide set of functions associated with the biology of mobile genetic elements, including some that are typically associated with plasmids, such as partition and replication. Protein sequence similarity networks and phylogenetic analyses revealed that ICEs are structured in functional modules. Integrases and conjugation systems have different evolutionary histories, even if the gene repertoires of ICEs can be grouped in function of conjugation types. Our characterization of the composition and organization of ICEs paves the way for future functional and evolutionary analyses of their cargo genes, composed of a majority of unknown function genes.}, } @article {pmid28894437, year = {2017}, author = {Esposito, A and Pompilio, A and Bettua, C and Crocetta, V and Giacobazzi, E and Fiscarelli, E and Jousson, O and Di Bonaventura, G}, title = {Evolution of Stenotrophomonas maltophilia in Cystic Fibrosis Lung over Chronic Infection: A Genomic and Phenotypic Population Study.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1590}, pmid = {28894437}, issn = {1664-302X}, abstract = {Stenotrophomonas maltophilia has been recognized as an emerging multi-drug resistant opportunistic pathogen in cystic fibrosis (CF) patients. We report a comparative genomic and phenotypic analysis of 91 S. maltophilia strains from 10 CF patients over a 12-year period. Draft genome analyses included in silico Multi-Locus Sequence Typing (MLST), Single-Nucleotide Polymorphisms (SNPs), and pangenome characterization. Growth rate, biofilm formation, motility, mutation frequency, in vivo virulence, and in vitro antibiotic susceptibility were determined and compared with population structure over time. The population consisted of 20 different sequence types (STs), 11 of which are new ones. Pangenome and SNPs data showed that this population is composed of three major phylogenetic lineages. All patients were colonized by multiple STs, although most of them were found in a single patient and showed persistence over years. Only few phenotypes showed some correlation with population phylogenetic structure. Our results show that S. maltophilia adaptation to CF lung is associated with consistent genotypic and phenotypic heterogeneity. Stenotrophomonas maltophilia infecting multiple hosts likely experiences different selection pressures depending on the host environment. The poor genotype-phenotype correlation suggests the existence of complex regulatory mechanisms that need to be explored in order to better design therapeutic strategies.}, } @article {pmid28890711, year = {2017}, author = {Bolotin, E and Hershberg, R}, title = {Horizontally Acquired Genes Are Often Shared between Closely Related Bacterial Species.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1536}, pmid = {28890711}, issn = {1664-302X}, abstract = {Horizontal gene transfer (HGT) serves as an important source of innovation for bacterial species. We used a pangenome-based approach to identify genes that were horizontally acquired by four closely related bacterial species, belonging to the Enterobacteriaceae family. This enabled us to examine the extent to which such closely related species tend to share horizontally acquired genes. We find that a high percent of horizontally acquired genes are shared among these closely related species. Furthermore, we demonstrate that the extent of sharing of horizontally acquired genes among these four closely related species is predictive of the extent to which these genes will be found in additional bacterial species. Finally, we show that acquired genes shared by more species tend to be better optimized for expression within the genomes of their new hosts. Combined, our results demonstrate the existence of a large pool of frequently horizontally acquired genes that have distinct characteristics from horizontally acquired genes that are less frequently shared between species.}, } @article {pmid28887527, year = {2017}, author = {Kim, Y and Koh, I and Young Lim, M and Chung, WH and Rho, M}, title = {Pan-genome analysis of Bacillus for microbiome profiling.}, journal = {Scientific reports}, volume = {7}, number = {1}, pages = {10984}, pmid = {28887527}, issn = {2045-2322}, mesh = {Bacillus/classification/*genetics ; Evolution, Molecular ; Genes, Bacterial ; *Genome, Bacterial ; *Genomics/methods ; Metagenome ; Metagenomics/methods ; *Microbiota ; Multilocus Sequence Typing ; Phylogeny ; }, abstract = {Recent advances in high-throughput sequencing technology allow for in-depth studies on microbial genomes and their communities. While multiple strains of the same species could display genomic variations with different gene contents in diverse habitats and hosts, the essential functions for a specific species are conserved as core genes that are shared among strains. We have comprehensively analyzed 238 strains of five different Bacillus species to identify the properties of core and strain-specific genes. Core and strain-specific genes in each Bacillus species show significant differences in their functions and genomic signatures. Using the core genes defined in this study, we have precisely identified the Bacillus species that exist in food microbiomes. Without resorting to culture-based whole genome sequencing, an unexpectedly large portion of the core genes, 98.22% of core genes in B. amyloliquefaciens and 97.77% of B. subtilis, were reconstructed from the microbiome. We have performed a pan-genome analysis on the core gene data of multiple Bacillus species to investigate the Bacillus species in food microbiome. Our findings provide a comprehensive genetic landscape of the Bacillus species, which is also consistent with previous studies on a limited number of strains and species. Analysis based on comprehensive core genes should thus serve as a powerful profiling tool to better understand major constituents in fermented food microbiomes.}, } @article {pmid28887423, year = {2017}, author = {Schmedes, SE and Woerner, AE and Budowle, B}, title = {Forensic Human Identification Using Skin Microbiomes.}, journal = {Applied and environmental microbiology}, volume = {83}, number = {22}, pages = {}, pmid = {28887423}, issn = {1098-5336}, abstract = {The human microbiome contributes significantly to the genetic content of the human body. Genetic and environmental factors help shape the microbiome, and as such, the microbiome can be unique to an individual. Previous studies have demonstrated the potential to use microbiome profiling for forensic applications; however, a method has yet to identify stable features of skin microbiomes that produce high classification accuracies for samples collected over reasonably long time intervals. A novel approach is described here to classify skin microbiomes to their donors by comparing two feature types: Propionibacterium acnes pangenome presence/absence features and nucleotide diversities of stable clade-specific markers. Supervised learning was used to attribute skin microbiomes from 14 skin body sites from 12 healthy individuals sampled at three time points over a >2.5-year period with accuracies of up to 100% for three body sites. Feature selection identified a reduced subset of markers from each body site that are highly individualizing, identifying 187 markers from 12 clades. Classification accuracies were compared in a formal model testing framework, and the results of this analysis indicate that learners trained on nucleotide diversity perform significantly better than those trained on presence/absence encodings. This study used supervised learning to identify individuals with high accuracy and associated stable features from skin microbiomes over a period of up to almost 3 years. These selected features provide a preliminary marker panel for future development of a robust and reproducible method for skin microbiome profiling for forensic human identification.IMPORTANCE A novel approach is described to attribute skin microbiomes, collected over a period of >2.5 years, to their individual hosts with a high degree of accuracy. Nucleotide diversities of stable clade-specific markers with supervised learning were used to classify skin microbiomes from a particular individual with up to 100% classification accuracy for three body sites. Attribute selection was used to identify 187 genetic markers from 12 clades which provide the greatest differentiation of individual skin microbiomes from 14 skin sites. This study performs skin microbiome profiling from a supervised learning approach and obtains high classification accuracy for samples collected from individuals over a relatively long time period for potential application to forensic human identification.}, } @article {pmid28878862, year = {2017}, author = {Ktari, A and Nouioui, I and Furnholm, T and Swanson, E and Ghodhbane-Gtari, F and Tisa, LS and Gtari, M}, title = {Permanent draft genome sequence of Frankia sp. NRRL B-16219 reveals the presence of canonical nod genes, which are highly homologous to those detected in Candidatus Frankia Dg1 genome.}, journal = {Standards in genomic sciences}, volume = {12}, number = {}, pages = {51}, pmid = {28878862}, issn = {1944-3277}, abstract = {Frankia sp. NRRL B-16219 was directly isolated from a soil sample obtained from the rhizosphere of Ceanothus jepsonii growing in the USA. Its host plant range includes members of Elaeagnaceae species. Phylogenetically, strain NRRL B-16219 is closely related to "Frankia discariae" with a 16S rRNA gene similarity of 99.78%. Because of the lack of genetic tools for Frankia, our understanding of the bacterial signals involved during the plant infection process and the development of actinorhizal root nodules is very limited. Since the first three Frankia genomes were sequenced, additional genome sequences covering more diverse strains have helped provide insight into the depth of the pangenome and attempts to identify bacterial signaling molecules like the rhizobial canonical nod genes. The genome sequence of Frankia sp. strain NRRL B-16219 was generated and assembled into 289 contigs containing 8,032,739 bp with 71.7% GC content. Annotation of the genome identified 6211 protein-coding genes, 561 pseudogenes, 1758 hypothetical proteins and 53 RNA genes including 4 rRNA genes. The NRRL B-16219 draft genome contained genes homologous to the rhizobial common nodulation genes clustered in two areas. The first cluster contains nodACIJH genes whereas the second has nodAB and nodH genes in the upstream region. Phylogenetic analysis shows that Frankia nod genes are more deeply rooted than their sister groups from rhizobia. PCR-sequencing suggested the widespread occurrence of highly homologous nodA and nodB genes in microsymbionts of field collected Ceanothus americanus.}, } @article {pmid28861061, year = {2017}, author = {Lean, SS and Yeo, CC}, title = {Small, Enigmatic Plasmids of the Nosocomial Pathogen, Acinetobacter baumannii: Good, Bad, Who Knows?.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1547}, pmid = {28861061}, issn = {1664-302X}, abstract = {Acinetobacter baumannii is a Gram-negative nosocomial pathogen that has become a serious healthcare concern within a span of two decades due to its ability to rapidly acquire resistance to all classes of antimicrobial compounds. One of the key features of the A. baumannii genome is an open pan genome with a plethora of plasmids, transposons, integrons, and genomic islands, all of which play important roles in the evolution and success of this clinical pathogen, particularly in the acquisition of multidrug resistance determinants. An interesting genetic feature seen in majority of A. baumannii genomes analyzed is the presence of small plasmids that usually ranged from 2 to 10 kb in size, some of which harbor antibiotic resistance genes and homologs of plasmid mobilization genes. These plasmids are often overlooked when compared to their larger, conjugative counterparts that harbor multiple antibiotic resistance genes and transposable elements. In this mini-review, we will examine our current knowledge of these small A. baumannii plasmids and look into their genetic diversity and phylogenetic relationships. Some of these plasmids, such as the Rep-3 superfamily group and the pRAY-type, which has no recognizable replicase genes, are quite widespread among diverse A. baumannii clinical isolates worldwide, hinting at their usefulness to the lifestyle of this pathogen. Other small plasmids especially those from the Rep-1 superfamily are truly enigmatic, encoding only hypothetical proteins of unknown function, leading to the question of whether these small plasmids are "good" or "bad" to their host A. baumannii.}, } @article {pmid28857514, year = {2017}, author = {Zhou, Y and Jiang, T and Hu, S and Wang, M and Ming, D and Chen, S}, title = {Genomic insights of Pannonibacter phragmitetus strain 31801 isolated from a patient with a liver abscess.}, journal = {MicrobiologyOpen}, volume = {6}, number = {6}, pages = {}, pmid = {28857514}, issn = {2045-8827}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Infections/*microbiology ; Bacterial Proteins/genetics/metabolism ; Genome, Bacterial ; Genomics ; Humans ; Liver Abscess/*microbiology ; Phylogeny ; Rhodobacteraceae/classification/drug effects/genetics/*isolation & purification ; Virulence Factors/genetics/metabolism ; }, abstract = {Pannonibacter phragmitetus is a bioremediation reagent for the detoxification of heavy metals and polycyclic aromatic compounds (PAHs) while it rarely infects healthy populations. However, infection by the opportunistic pathogen P. phragmitetus complicates diagnosis and treatments, and poses a serious threat to immunocompromised patients owing to its multidrug resistance. Unfortunately, genome features, antimicrobial resistance, and virulence potentials in P. phragmitetus have not been reported before. A predominant colony (31801) was isolated from a liver abscess patient, indicating that it accounted for the infection. To investigate its infection mechanism(s) in depth, we sequenced this bacterial genome and tested its antimicrobial resistance. Average nucleotide identity (ANI) analysis assigned the bacterium to the species P. phragmitetus (ANI, >95%). Comparative genomics analyses among Pannonibacter spp. representing the different living niches were used to describe the Pannonibacter pan-genomes and to examine virulence factors, prophages, CRISPR arrays, and genomic islands. Pannonibacter phragmitetus 31801 consisted of one chromosome and one plasmid, while the plasmid was absent in other Pannonibacter isolates. Pannonibacter phragmitetus 31801 may have a great infection potential because a lot of genes encoding toxins, flagellum formation, iron uptake, and virulence factor secretion systems in its genome. Moreover, the genome has 24 genomic islands and 2 prophages. A combination of antimicrobial susceptibility tests and the detailed antibiotic resistance gene analysis provide useful information about the drug resistance mechanisms and therefore can be used to guide the treatment strategy for the bacterial infection.}, } @article {pmid28855980, year = {2017}, author = {Lesho, E and Snesrud, E and Kwak, Y and Ong, A and Maybank, R and Laguio-Vila, M and Falsey, AR and Hinkle, M}, title = {Pseudomonas Endocarditis with an unstable phenotype: the challenges of isolate characterization and Carbapenem stewardship with a partial review of the literature.}, journal = {Antimicrobial resistance and infection control}, volume = {6}, number = {}, pages = {87}, pmid = {28855980}, issn = {2047-2994}, abstract = {BACKGROUND: Pseudomonas endocarditis is exceedingly rare, especially in patients without predisposing risks. We present such a case that included unexpected switches in antibacterial resistance profiles in two Pseudomonas aeruginosa (PA) strains with the same whole-genome sequence. The case also involved diagnostic and treatment challenges, such as issues with automated testing platforms, choosing the optimal aminoglycoside, minimizing unnecessary carbapenem exposure, and the need for faster, more informative laboratory tests.

CASE PRESENTATION: On hospital day one (HD-1) a cefepime and piperacillin-tazobactam (FEP-TZP)-susceptible P. aeruginosa was isolated from the bloodstream of a 62-year-old man admitted for evaluation of possible endocarditis and treated with gentamicin and cefepime. On HD-2, his antibiotic regimen was changed to tobramycin and cefepime. On HD-11, he underwent aortic valve replacement, and P. aeruginosa was isolated from the explanted valve. Unexpectedly, it was FEP-TZP-resistant, so cefepime was switched to meropenem. On HD-14, in preparation for whole-genome sequencing (WGS), valve and blood isolates were removed from cryo-storage, re-cultured, and simultaneously tested with the same platforms, reagents, and inoculations previously used. Curiously, the valve isolate was now FEP-TZP-susceptible. WGS revealed that both isolates were phylogenetically identical, differing by a single nucleotide in a chemotaxis-encoding gene. They also contained the same resistance genes (blaADC35, aph(3')-II, blaOXA-50, catB7, fosA).

CONCLUSION: Repeated testing on alternate platforms and WGS did not definitively determine the resistance mechanism(s), which in this case, is most likely unstable de-repression of a chromosomal AmpC β-lactamase, porin alterations, or efflux upregulation, with reversion to baseline (non-efflux) transcription. Although sub-culture on specialized media to select for less fit (more resistant) colonies, followed by transcriptome analysis, and multiple sequence alignment, might have revealed the mechanism and better informed the optimal choice of β-lactam, such approaches are neither rapid, nor feasible for hospital laboratories. In this era of escalating drug resistance and dwindling antibiotics, use of the most potent anti-pseudomonals must be balanced with stewardship. Clinicians need access to validated genomic correlates of resistance, and faster, more informative diagnostics. Therefore, we placed these isolates and their sequences in the public domain for inclusion in the Pseudomonas pan-genome and database projects for further countermeasure development.}, } @article {pmid28852435, year = {2017}, author = {Hollensteiner, J and Poehlein, A and Spröer, C and Bunk, B and Sheppard, AE and Rosentstiel, P and Schulenburg, H and Liesegang, H}, title = {Complete Genome sequence of the nematicidal Bacillus thuringiensis MYBT18246.}, journal = {Standards in genomic sciences}, volume = {12}, number = {}, pages = {48}, pmid = {28852435}, issn = {1944-3277}, abstract = {10.1601/nm.5000 is a rod-shaped facultative anaerobic spore forming bacterium of the genus 10.1601/nm.4857. The defining feature of the species is the ability to produce parasporal crystal inclusion bodies, consisting of δ-endotoxins, encoded by cry-genes. Here we present the complete annotated genome sequence of the nematicidal 10.1601/nm.5000 strain MYBT18246. The genome comprises one 5,867,749 bp chromosome and 11 plasmids which vary in size from 6330 bp to 150,790 bp. The chromosome contains 6092 protein-coding and 150 RNA genes, including 36 rRNA genes. The plasmids encode 997 proteins and 4 t-RNA's. Analysis of the genome revealed a large number of mobile elements involved in genome plasticity including 11 plasmids and 16 chromosomal prophages. Three different nematicidal toxin genes were identified and classified according to the Cry toxin naming committee as cry13Aa2, cry13Ba1, and cry13Ab1. Strikingly, these genes are located on the chromosome in close proximity to three separate prophages. Moreover, four putative toxin genes of different toxin classes were identified on the plasmids p120510 (Vip-like toxin), p120416 (Cry-like toxin) and p109822 (two Bin-like toxins). A comparative genome analysis of 10.1601/nm.5000 MYBT18246 with three closely related 10.1601/nm.5000 strains enabled determination of the pan-genome of 10.1601/nm.5000 MYBT18246, revealing a large number of singletons, mostly represented by phage genes, morons and cryptic genes.}, } @article {pmid28846694, year = {2017}, author = {Skov, L and , and Schierup, MH}, title = {Analysis of 62 hybrid assembled human Y chromosomes exposes rapid structural changes and high rates of gene conversion.}, journal = {PLoS genetics}, volume = {13}, number = {8}, pages = {e1006834}, pmid = {28846694}, issn = {1553-7404}, mesh = {Chromosomes, Human, Y/*genetics ; Denmark ; *Evolution, Molecular ; Fathers ; Gene Conversion/genetics ; Heterochromatin/*genetics ; Humans ; INDEL Mutation/*genetics ; Infertility, Male/genetics/pathology ; Inverted Repeat Sequences/genetics ; Male ; Nuclear Family ; Phylogeny ; Polymorphism, Single Nucleotide ; }, abstract = {The human Y-chromosome does not recombine across its male-specific part and is therefore an excellent marker of human migrations. It also plays an important role in male fertility. However, its evolution is difficult to fully understand because of repetitive sequences, inverted repeats and the potentially large role of gene conversion. Here we perform an evolutionary analysis of 62 Y-chromosomes of Danish descent sequenced using a wide range of library insert sizes and high coverage, thus allowing large regions of these chromosomes to be well assembled. These include 17 father-son pairs, which we use to validate variation calling. Using a recent method that can integrate variants based on both mapping and de novo assembly, we genotype 10898 SNVs and 2903 indels (max length of 27241 bp) in our sample and show by father-son concordance and experimental validation that the non-recurrent SNP and indel variation on the Y chromosome tree is called very accurately. This includes variation called in a 0.9 Mb centromeric heterochromatic region, which is by far the most variable in the Y chromosome. Among the variation is also longer sequence-stretches not present in the reference genome but shared with the chimpanzee Y chromosome. We analyzed 2.7 Mb of large inverted repeats (palindromes) for variation patterns among the two palindrome arms and identified 603 mutation and 416 gene conversions events. We find clear evidence for GC-biased gene conversion in the palindromes (and a balancing AT mutation bias), but irrespective of this, also a strong bias towards gene conversion towards the ancestral state, suggesting that palindromic gene conversion may alleviate Muller's ratchet. Finally, we also find a large number of large-scale gene duplications and deletions in the palindromic regions (at least 24) and find that such events can consist of complex combinations of simultaneous insertions and deletions of long stretches of the Y chromosome.}, } @article {pmid28826610, year = {2017}, author = {Garinet, S and Néou, M and de La Villéon, B and Faillot, S and Sakat, J and Da Fonseca, JP and Jouinot, A and Le Tourneau, C and Kamal, M and Luscap-Rondof, W and Boeva, V and Gaujoux, S and Vidaud, M and Pasmant, E and Letourneur, F and Bertherat, J and Assié, G}, title = {Calling Chromosome Alterations, DNA Methylation Statuses, and Mutations in Tumors by Simple Targeted Next-Generation Sequencing: A Solution for Transferring Integrated Pangenomic Studies into Routine Practice?.}, journal = {The Journal of molecular diagnostics : JMD}, volume = {19}, number = {5}, pages = {776-787}, doi = {10.1016/j.jmoldx.2017.06.005}, pmid = {28826610}, issn = {1943-7811}, mesh = {Alleles ; *Biomarkers, Tumor ; *Chromosome Aberrations ; Computational Biology/methods ; CpG Islands ; DNA Copy Number Variations ; *DNA Methylation ; Diagnostic Tests, Routine/methods ; Gene Frequency ; Genomics/methods ; Genotype ; *High-Throughput Nucleotide Sequencing/methods ; Humans ; *Mutation ; Neoplasms/*diagnosis/*genetics ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; }, abstract = {Pangenomic studies identified distinct molecular classes for many cancers, with major clinical applications. However, routine use requires cost-effective assays. We assessed whether targeted next-generation sequencing (NGS) could call chromosomal alterations and DNA methylation status. A training set of 77 tumors and a validation set of 449 (43 tumor types) were analyzed by targeted NGS and single-nucleotide polymorphism (SNP) arrays. Thirty-two tumors were analyzed by NGS after bisulfite conversion, and compared to methylation array or methylation-specific multiplex ligation-dependent probe amplification. Considering allelic ratios, correlation was strong between targeted NGS and SNP arrays (r = 0.88). In contrast, considering DNA copy number, for variations of one DNA copy, correlation was weaker between read counts and SNP array (r = 0.49). Thus, we generated TARGOMICs, optimized for detecting chromosome alterations by combining allelic ratios and read counts generated by targeted NGS. Sensitivity for calling normal, lost, and gained chromosomes was 89%, 72%, and 31%, respectively. Specificity was 81%, 93%, and 98%, respectively. These results were confirmed in the validation set. Finally, TARGOMICs could efficiently align and compute proportions of methylated cytosines from bisulfite-converted DNA from targeted NGS. In conclusion, beyond calling mutations, targeted NGS efficiently calls chromosome alterations and methylation status in tumors. A single run and minor design/protocol adaptations are sufficient. Optimizing targeted NGS should expand translation of genomics to clinical routine.}, } @article {pmid28824598, year = {2017}, author = {Tsai, MH and Liu, YY and Soo, VW}, title = {PathoBacTyper: A Web Server for Pathogenic Bacteria Identification and Molecular Genotyping.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1474}, pmid = {28824598}, issn = {1664-302X}, abstract = {With the decline in the cost of whole-genome sequencing because of the introduction of next-generation sequencing (NGS) techniques, many public health and clinical laboratories have started to use bacterial whole genomes for epidemiological surveillance and clinical investigation. For epidemiological and clinical purposes in this "NGS era," whole-genome-scale single nucleotide polymorphism (wgSNP) analysis for genotyping is considered suitable. In this paper, we present an online service, PathoBacTyper (http://halst.nhri.org.tw/PathoBacTyper/), for pathogenic bacteria identification and genotyping based on wgSNP analysis. More than 400 pathogenic bacteria can be identified and genotyped through this service. Four data sets containing 59 Salmonella Heidelberg isolates from three outbreaks with the same pulsed-field gel electrophoresis pattern, 34 Salmonella Typhimurium isolates from six outbreaks, 103 isolates of hospital-associated vancomycin-resistant Enterococcus faecium and 15 Legionella pneumophila isolates from clinical and environmental samples in Israel were used for demonstrating the operation and testing the performance of the PathoBacTyper service. The test results reveal the applicability of this service for epidemiological typing and clinical investigation.}, } @article {pmid28824571, year = {2017}, author = {Belbahri, L and Chenari Bouket, A and Rekik, I and Alenezi, FN and Vallat, A and Luptakova, L and Petrovova, E and Oszako, T and Cherrad, S and Vacher, S and Rateb, ME}, title = {Comparative Genomics of Bacillus amyloliquefaciens Strains Reveals a Core Genome with Traits for Habitat Adaptation and a Secondary Metabolites Rich Accessory Genome.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1438}, pmid = {28824571}, issn = {1664-302X}, abstract = {The Gram positive, non-pathogenic endospore-forming soil inhabiting prokaryote Bacillus amyloliquefaciens is a plant growth-promoting rhizobacterium. Bacillus amyloliquefaciens processes wide biocontrol abilities and numerous strains have been reported to suppress diverse bacterial, fungal and fungal-like pathogens. Knowledge about strain level biocontrol abilities is warranted to translate this knowledge into developing more efficient biocontrol agents and bio-fertilizers. Ever-expanding genome studies of B. amyloliquefaciens are showing tremendous increase in strain-specific new secondary metabolite clusters which play key roles in the suppression of pathogens and plant growth promotion. In this report, we have used genome mining of all sequenced B. amyloliquefaciens genomes to highlight species boundaries, the diverse strategies used by different strains to promote plant growth and the diversity of their secondary metabolites. Genome composition of the targeted strains suggest regions of genomic plasticity that shape the structure and function of these genomes and govern strain adaptation to different niches. Our results indicated that B. amyloliquefaciens: (i) suffer taxonomic imprecision that blurs the debate over inter-strain genome diversity and dynamics, (ii) have diverse strategies to promote plant growth and development, (iii) have an unlocked, yet to be delimited impressive arsenal of secondary metabolites and products, (iv) have large number of so-called orphan gene clusters, i.e., biosynthetic clusters for which the corresponding metabolites are yet unknown, and (v) have a dynamic pan genome with a secondary metabolite rich accessory genome.}, } @article {pmid28824552, year = {2017}, author = {Laing, CR and Whiteside, MD and Gannon, VPJ}, title = {Pan-genome Analyses of the Species Salmonella enterica, and Identification of Genomic Markers Predictive for Species, Subspecies, and Serovar.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1345}, pmid = {28824552}, issn = {1664-302X}, abstract = {Food safety is a global concern, with upward of 2.2 million deaths due to enteric disease every year. Current whole-genome sequencing platforms allow routine sequencing of enteric pathogens for surveillance, and during outbreaks; however, a remaining challenge is the identification of genomic markers that are predictive of strain groups that pose the most significant health threats to humans, or that can persist in specific environments. We have previously developed the software program Panseq, which identifies the pan-genome among a group of sequences, and the SuperPhy platform, which utilizes this pan-genome information to identify biomarkers that are predictive of groups of bacterial strains. In this study, we examined the pan-genome of 4893 genomes of Salmonella enterica, an enteric pathogen responsible for the loss of more disability adjusted life years than any other enteric pathogen. We identified a pan-genome of 25.3 Mbp, a strict core of 1.5 Mbp present in all genomes, and a conserved core of 3.2 Mbp found in at least 96% of these genomes. We also identified 404 genomic regions of 1000 bp that were specific to the species S. enterica. These species-specific regions were found to encode mostly hypothetical proteins, effectors, and other proteins related to virulence. For each of the six S. enterica subspecies, markers unique to each were identified. No serovar had pan-genome regions that were present in all of its genomes and absent in all other serovars; however, each serovar did have genomic regions that were universally present among all constituent members, and statistically predictive of the serovar. The phylogeny based on SNPs within the conserved core genome was found to be highly concordant to that produced by a phylogeny using the presence/absence of 1000 bp regions of the entire pan-genome. Future studies could use these predictive regions as components of a vaccine to prevent salmonellosis, as well as in simple and rapid diagnostic tests for both in silico and wet-lab applications, with uses ranging from food safety to public health. Lastly, the tools and methods described in this study could be applied as a pan-genomics framework to other population genomic studies seeking to identify markers for other bacterial species and their sub-groups.}, } @article {pmid28821232, year = {2017}, author = {Oshone, R and Ngom, M and Chu, F and Mansour, S and Sy, MO and Champion, A and Tisa, LS}, title = {Genomic, transcriptomic, and proteomic approaches towards understanding the molecular mechanisms of salt tolerance in Frankia strains isolated from Casuarina trees.}, journal = {BMC genomics}, volume = {18}, number = {1}, pages = {633}, pmid = {28821232}, issn = {1471-2164}, mesh = {Cell Membrane/metabolism ; Fagales/*microbiology ; Frankia/cytology/*genetics/metabolism/*physiology ; *Gene Expression Profiling ; Nitrogen/pharmacology ; Nucleotides/metabolism ; Osmotic Pressure ; Phenotype ; *Proteomics ; Salt Tolerance/*genetics ; Trees/*microbiology ; Up-Regulation ; }, abstract = {BACKGROUND: Soil salinization is a worldwide problem that is intensifying because of the effects of climate change. An effective method for the reclamation of salt-affected soils involves initiating plant succession using fast growing, nitrogen fixing actinorhizal trees such as the Casuarina. The salt tolerance of Casuarina is enhanced by the nitrogen-fixing symbiosis that they form with the actinobacterium Frankia. Identification and molecular characterization of salt-tolerant Casuarina species and associated Frankia is imperative for the successful utilization of Casuarina trees in saline soil reclamation efforts. In this study, salt-tolerant and salt-sensitive Casuarina associated Frankia strains were identified and comparative genomics, transcriptome profiling, and proteomics were employed to elucidate the molecular mechanisms of salt and osmotic stress tolerance.

RESULTS: Salt-tolerant Frankia strains (CcI6 and Allo2) that could withstand up to 1000 mM NaCl and a salt-sensitive Frankia strain (CcI3) which could withstand only up to 475 mM NaCl were identified. The remaining isolates had intermediate levels of salt tolerance with MIC values ranging from 650 mM to 750 mM. Comparative genomic analysis showed that all of the Frankia isolates from Casuarina belonged to the same species (Frankia casuarinae). Pangenome analysis revealed a high abundance of singletons among all Casuarina isolates. The two salt-tolerant strains contained 153 shared single copy genes (most of which code for hypothetical proteins) that were not found in the salt-sensitive(CcI3) and moderately salt-tolerant (CeD) strains. RNA-seq analysis of one of the two salt-tolerant strains (Frankia sp. strain CcI6) revealed hundreds of genes differentially expressed under salt and/or osmotic stress. Among the 153 genes, 7 and 7 were responsive to salt and osmotic stress, respectively. Proteomic profiling confirmed the transcriptome results and identified 19 and 8 salt and/or osmotic stress-responsive proteins in the salt-tolerant (CcI6) and the salt-sensitive (CcI3) strains, respectively.

CONCLUSION: Genetic differences between salt-tolerant and salt-sensitive Frankia strains isolated from Casuarina were identified. Transcriptome and proteome profiling of a salt-tolerant strain was used to determine molecular differences correlated with differential salt-tolerance and several candidate genes were identified. Mechanisms involving transcriptional and translational regulation, cell envelop remodeling, and previously uncharacterized proteins appear to be important for salt tolerance. Physiological and mutational analyses will further shed light on the molecular mechanism of salt tolerance in Casuarina associated Frankia isolates.}, } @article {pmid28818331, year = {2017}, author = {Hick, PM and Subramaniam, K and Thompson, PM and Waltzek, TB and Becker, JA and Whittington, RJ}, title = {Molecular epidemiology of Epizootic haematopoietic necrosis virus (EHNV).}, journal = {Virology}, volume = {511}, number = {}, pages = {320-329}, doi = {10.1016/j.virol.2017.07.029}, pmid = {28818331}, issn = {1096-0341}, mesh = {Animals ; DNA Virus Infections/veterinary/virology ; *Disease Outbreaks ; Endemic Diseases ; Fish Diseases/*epidemiology/*virology ; Fishes ; Genes, Viral ; *Genetic Variation ; Genome, Viral ; Iatrogenic Disease/epidemiology/veterinary ; *Molecular Epidemiology ; Ranavirus/*classification/*genetics/isolation & purification ; Sequence Analysis, DNA ; Sequence Homology ; Synteny ; }, abstract = {Low genetic diversity of Epizootic haematopoietic necrosis virus (EHNV) was determined for the complete genome of 16 isolates spanning the natural range of hosts, geography and time since the first outbreaks of disease. Genomes ranged from 125,591-127,487 nucleotides with 97.47% pairwise identity and 106-109 genes. All isolates shared 101 core genes with 121 potential genes predicted within the pan-genome of this collection. There was high conservation within 90,181 nucleotides of the core genes with isolates separated by average genetic distance of 3.43 × 10[-4] substitutions per site. Evolutionary analysis of the core genome strongly supported historical epidemiological evidence of iatrogenic spread of EHNV to naïve hosts and establishment of endemic status in discrete ecological niches. There was no evidence of structural genome reorganization, however, the complement of non-core genes and variation in repeat elements enabled fine scale molecular epidemiological investigation of this unpredictable pathogen of fish.}, } @article {pmid28798737, year = {2017}, author = {Tripathi, C and Mishra, H and Khurana, H and Dwivedi, V and Kamra, K and Negi, RK and Lal, R}, title = {Complete Genome Analysis of Thermus parvatiensis and Comparative Genomics of Thermus spp. Provide Insights into Genetic Variability and Evolution of Natural Competence as Strategic Survival Attributes.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1410}, pmid = {28798737}, issn = {1664-302X}, abstract = {Thermophilic environments represent an interesting niche. Among thermophiles, the genus Thermus is among the most studied genera. In this study, we have sequenced the genome of Thermus parvatiensis strain RL, a thermophile isolated from Himalayan hot water springs (temperature >96°C) using PacBio RSII SMRT technique. The small genome (2.01 Mbp) comprises a chromosome (1.87 Mbp) and a plasmid (143 Kbp), designated in this study as pTP143. Annotation revealed a high number of repair genes, a squeezed genome but containing highly plastic plasmid with transposases, integrases, mobile elements and hypothetical proteins (44%). We performed a comparative genomic study of the group Thermus with an aim of analysing the phylogenetic relatedness as well as niche specific attributes prevalent among the group. We compared the reference genome RL with 16 Thermus genomes to assess their phylogenetic relationships based on 16S rRNA gene sequences, average nucleotide identity (ANI), conserved marker genes (31 and 400), pan genome and tetranucleotide frequency. The core genome of the analyzed genomes contained 1,177 core genes and many singleton genes were detected in individual genomes, reflecting a conserved core but adaptive pan repertoire. We demonstrated the presence of metagenomic islands (chromosome:5, plasmid:5) by recruiting raw metagenomic data (from the same niche) against the genomic replicons of T. parvatiensis. We also dissected the CRISPR loci wide all genomes and found widespread presence of this system across Thermus genomes. Additionally, we performed a comparative analysis of competence loci wide Thermus genomes and found evidence for recent horizontal acquisition of the locus and continued dispersal among members reflecting that natural competence is a beneficial survival trait among Thermus members and its acquisition depicts unending evolution in order to accomplish optimal fitness.}, } @article {pmid28774965, year = {2017}, author = {Jensen, JM and Villesen, P and Friborg, RM and , and Mailund, T and Besenbacher, S and Schierup, MH}, title = {Assembly and analysis of 100 full MHC haplotypes from the Danish population.}, journal = {Genome research}, volume = {27}, number = {9}, pages = {1597-1607}, pmid = {28774965}, issn = {1549-5469}, mesh = {Alleles ; Chromosome Mapping ; Denmark ; Genetic Variation/*genetics ; *Genetics, Population ; Haplotypes/genetics ; Humans ; Linkage Disequilibrium/*genetics ; Major Histocompatibility Complex/*genetics ; Polymorphism, Single Nucleotide/genetics ; }, abstract = {Genes in the major histocompatibility complex (MHC, also known as HLA) play a critical role in the immune response and variation within the extended 4-Mb region shows association with major risks of many diseases. Yet, deciphering the underlying causes of these associations is difficult because the MHC is the most polymorphic region of the genome with a complex linkage disequilibrium structure. Here, we reconstruct full MHC haplotypes from de novo assembled trios without relying on a reference genome and perform evolutionary analyses. We report 100 full MHC haplotypes and call a large set of structural variants in the regions for future use in imputation with GWAS data. We also present the first complete analysis of the recombination landscape in the entire region and show how balancing selection at classical genes have linked effects on the frequency of variants throughout the region.}, } @article {pmid28768476, year = {2017}, author = {Bazinet, AL}, title = {Pan-genome and phylogeny of Bacillus cereus sensu lato.}, journal = {BMC evolutionary biology}, volume = {17}, number = {1}, pages = {176}, pmid = {28768476}, issn = {1471-2148}, mesh = {Amino Acid Sequence ; Bacillus cereus/*classification/*genetics ; Bayes Theorem ; Cluster Analysis ; Codon/genetics ; DNA Transposable Elements/genetics ; Genes, Bacterial ; Genetic Association Studies ; *Genome, Bacterial ; Likelihood Functions ; *Phylogeny ; Quantitative Trait, Heritable ; Recombination, Genetic/genetics ; Sequence Alignment ; Species Specificity ; }, abstract = {BACKGROUND: Bacillus cereus sensu lato (s. l.) is an ecologically diverse bacterial group of medical and agricultural significance. In this study, I use publicly available genomes and novel bioinformatic workflows to characterize the B. cereus s. l. pan-genome and perform the largest phylogenetic and population genetic analyses of this group to date in terms of the number of genes and taxa included. With these fundamental data in hand, I identify genes associated with particular phenotypic traits (i.e., "pan-GWAS" analysis), and quantify the degree to which taxa sharing common attributes are phylogenetically clustered.

METHODS: A rapid k-mer based approach (Mash) was used to create reduced representations of selected Bacillus genomes, and a fast distance-based phylogenetic analysis of this data (FastME) was performed to determine which species should be included in B. cereus s. l. The complete genomes of eight B. cereus s. l. species were annotated de novo with Prokka, and these annotations were used by Roary to produce the B. cereus s. l. pan-genome. Scoary was used to associate gene presence and absence patterns with various phenotypes. The orthologous protein sequence clusters produced by Roary were filtered and used to build HaMStR databases of gene models that were used in turn to construct phylogenetic data matrices. Phylogenetic analyses used RAxML, DendroPy, ClonalFrameML, PAUP*, and SplitsTree. Bayesian model-based population genetic analysis assigned taxa to clusters using hierBAPS. The genealogical sorting index was used to quantify the phylogenetic clustering of taxa sharing common attributes.

RESULTS: The B. cereus s. l. pan-genome currently consists of ≈60,000 genes, ≈600 of which are "core" (common to at least 99% of taxa sampled). Pan-GWAS analysis revealed genes associated with phenotypes such as isolation source, oxygen requirement, and ability to cause diseases such as anthrax or food poisoning. Extensive phylogenetic analyses using an unprecedented amount of data produced phylogenies that were largely concordant with each other and with previous studies. Phylogenetic support as measured by bootstrap probabilities increased markedly when all suitable pan-genome data was included in phylogenetic analyses, as opposed to when only core genes were used. Bayesian population genetic analysis recommended subdividing the three major clades of B. cereus s. l. into nine clusters. Taxa sharing common traits and species designations exhibited varying degrees of phylogenetic clustering.

CONCLUSIONS: All phylogenetic analyses recapitulated two previously used classification systems, and taxa were consistently assigned to the same major clade and group. By including accessory genes from the pan-genome in the phylogenetic analyses, I produced an exceptionally well-supported phylogeny of 114 complete B. cereus s. l. genomes. The best-performing methods were used to produce a phylogeny of all 498 publicly available B. cereus s. l. genomes, which was in turn used to compare three different classification systems and to test the monophyly status of various B. cereus s. l. species. The majority of the methodology used in this study is generic and could be leveraged to produce pan-genome estimates and similarly robust phylogenetic hypotheses for other bacterial groups.}, } @article {pmid28764658, year = {2017}, author = {Lugli, GA and Milani, C and Turroni, F and Duranti, S and Mancabelli, L and Mangifesta, M and Ferrario, C and Modesto, M and Mattarelli, P and Jiří, K and van Sinderen, D and Ventura, M}, title = {Comparative genomic and phylogenomic analyses of the Bifidobacteriaceae family.}, journal = {BMC genomics}, volume = {18}, number = {1}, pages = {568}, pmid = {28764658}, issn = {1471-2164}, mesh = {Bifidobacterium/*genetics ; Evolution, Molecular ; *Genomics ; *Phylogeny ; }, abstract = {BACKGROUND: Members of the Bifidobacteriaceae family represent both dominant microbial groups that colonize the gut of various animals, especially during the suckling stage of their life, while they also occur as pathogenic bacteria of the urogenital tract. The pan-genome of the genus Bifidobacterium has been explored in detail in recent years, though genomics of the Bifidobacteriaceae family has not yet received much attention. Here, a comparative genomic analyses of 67 Bifidobacteriaceae (sub) species including all currently recognized genera of this family, i.e., Aeriscardovia, Alloscardovia, Bifidobacterium, Bombiscardovia, Gardnerella, Neoscardovia, Parascardovia, Pseudoscardovia and Scardovia, was performed. Furthermore, in order to include a representative of each of the 67 (currently recognized) (sub) species belonging to the Bifidobacteriaceae family, we sequenced the genomes of an additional 11 species from this family, accomplishing the most extensive comparative genomic analysis performed within this family so far.

RESULTS: Phylogenomics-based analyses revealed the deduced evolutionary pathway followed by each member of the Bifidobacteriaceae family, highlighting Aeriscardovia aeriphila LMG 21773 as the deepest branch in the evolutionary tree of this family. Furthermore, functional analyses based on genome content unveil connections between a given member of the family, its carbohydrate utilization abilities and its corresponding host. In this context, bifidobacterial (sub) species isolated from humans and monkeys possess the highest relative number of acquired glycosyl hydrolase-encoding genes, probably in order to enhance their metabolic ability to utilize different carbon sources consumed by the host.

CONCLUSIONS: Within the Bifidobacteriaceae family, genomics of the genus Bifidobacterium has been extensively investigated. In contrast, very little is known about the genomics of members of the other eight genera of this family. In this study, we decoded the genome sequences of each member of the Bifidobacteriaceae family. Thanks to subsequent comparative genomic and phylogenetic analyses, the deduced pan-genome of this family, as well as the predicted evolutionary development of each taxon belonging to this family was assessed.}, } @article {pmid28756981, year = {2017}, author = {Alfaro-Cervello, C and Andrade-Gamarra, V and Nieto, G and Navarro, L and Martín-Vañó, S and García de la Torre, JP and Bengoa Caamaño, M and García Mauriño, ML and Noguera, R and Navarro, S}, title = {Congenital undifferentiated sarcoma associated to BCOR-CCNB3 gene fusion.}, journal = {Pathology, research and practice}, volume = {213}, number = {11}, pages = {1435-1439}, doi = {10.1016/j.prp.2017.07.012}, pmid = {28756981}, issn = {1618-0631}, mesh = {Adult ; Biomarkers, Tumor/analysis ; Cyclin B/*genetics ; Diagnosis, Differential ; Female ; Gene Fusion/genetics ; Humans ; Oncogene Proteins, Fusion/genetics ; Proto-Oncogene Proteins/*genetics ; Repressor Proteins/*genetics ; Sarcoma/diagnosis/*genetics/pathology ; Soft Tissue Neoplasms/*genetics/pathology ; }, abstract = {Small round cell sarcomas are aggressive bone and soft tissue tumors that predominantly affect children and young adults. A new group of sarcomas with a recurrent BCOR-CCNB3 gene fusion has been recently identified in previously unclassifiable small round cell sarcomas. BCOR-CCNB3 sarcomas share clinical and pathologic similarities with Ewing sarcoma, but show a stronger male predilection and less aggressiveness, as well as distinct gene expression profiling and pangenomic SNP array analyses. We report the unusual case of a congenital BCOR-CCNB3 retroperitoneal sarcoma in a female born at 34th gestational week, which was diagnosed in necropsy after 21hours of life. Immunohistochemical analysis showed diffuse expression of CD99 and CCNB3. SNPa showed two focal segmentary deletions at 5q34 and 22q11.23, the latter harboring among others the SMARCB1/INI1 tumor suppressor gene. Immunohistochemistry confirmed loss of INI1 in tumor cells, which has not been previously reported in this type of undifferentiated sarcomas.}, } @article {pmid28744269, year = {2017}, author = {Yu, Z and Ma, Y and Zhong, W and Qiu, J and Li, J}, title = {Comparative Genomics of Methanopyrus sp. SNP6 and KOL6 Revealing Genomic Regions of Plasticity Implicated in Extremely Thermophilic Profiles.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1278}, pmid = {28744269}, issn = {1664-302X}, abstract = {Methanopyrus spp. are usually isolated from harsh niches, such as high osmotic pressure and extreme temperature. However, the molecular mechanisms for their environmental adaption are poorly understood. Archaeal species is commonly considered as primitive organism. The evolutional placement of archaea is a fundamental and intriguing scientific question. We sequenced the genomes of Methanopyrus strains SNP6 and KOL6 isolated from the Atlantic and Iceland, respectively. Comparative genomic analysis revealed genetic diversity and instability implicated in niche adaption, including a number of transporter- and integrase/transposase-related genes. Pan-genome analysis also defined the gene pool of Methanopyrus spp., in addition of ~120-Kb genomic region of plasticity impacting cognate genomic architecture. We believe that Methanopyrus genomics could facilitate efficient investigation/recognition of archaeal phylogenetic diverse patterns, as well as improve understanding of biological roles and significance of these versatile microbes.}, } @article {pmid28742023, year = {2017}, author = {Lees, JA and Croucher, NJ and Goldblatt, D and Nosten, F and Parkhill, J and Turner, C and Turner, P and Bentley, SD}, title = {Genome-wide identification of lineage and locus specific variation associated with pneumococcal carriage duration.}, journal = {eLife}, volume = {6}, number = {}, pages = {}, pmid = {28742023}, issn = {2050-084X}, support = {MR/R003076/1/MRC_/Medical Research Council/United Kingdom ; 098051/WT_/Wellcome Trust/United Kingdom ; 083735/Z/07/Z/WT_/Wellcome Trust/United Kingdom ; /WT_/Wellcome Trust/United Kingdom ; 104169/Z/14/Z/WT_/Wellcome Trust/United Kingdom ; 1365620/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Age Factors ; Anti-Bacterial Agents/pharmacology ; Carrier State ; Child, Preschool ; Drug Resistance, Multiple, Bacterial/*genetics ; Female ; *Genetic Variation ; *Genome, Bacterial ; Genome-Wide Association Study ; Genotype ; *Host-Pathogen Interactions ; Humans ; Infant ; Infant, Newborn ; Longitudinal Studies ; Male ; Markov Chains ; Models, Genetic ; Nasopharynx/immunology/*microbiology ; Phylogeny ; Serogroup ; Streptococcus pneumoniae/classification/drug effects/*genetics/isolation & purification ; Time Factors ; }, abstract = {Streptococcus pneumoniae is a leading cause of invasive disease in infants, especially in low-income settings. Asymptomatic carriage in the nasopharynx is a prerequisite for disease, but variability in its duration is currently only understood at the serotype level. Here we developed a model to calculate the duration of carriage episodes from longitudinal swab data, and combined these results with whole genome sequence data. We estimated that pneumococcal genomic variation accounted for 63% of the phenotype variation, whereas the host traits considered here (age and previous carriage) accounted for less than 5%. We further partitioned this heritability into both lineage and locus effects, and quantified the amount attributable to the largest sources of variation in carriage duration: serotype (17%), drug-resistance (9%) and other significant locus effects (7%). A pan-genome-wide association study identified prophage sequences as being associated with decreased carriage duration independent of serotype, potentially by disruption of the competence mechanism. These findings support theoretical models of pneumococcal competition and antibiotic resistance.}, } @article {pmid28736702, year = {2017}, author = {Solorzano Zambrano, L and Usai, G and Vangelisti, A and Mascagni, F and Giordani, T and Bernardi, R and Cavallini, A and Gucci, R and Caruso, G and D'Onofrio, C and Quartacci, MF and Picciarelli, P and Conti, B and Lucchi, A and Natali, L}, title = {Cultivar-specific transcriptome prediction and annotation in Ficus carica L.}, journal = {Genomics data}, volume = {13}, number = {}, pages = {64-66}, pmid = {28736702}, issn = {2213-5960}, abstract = {The availability of transcriptomic data sequence is a key step for functional genomics studies. Recently, a repertoire of predicted genes of a Japanese cultivar of fig (Ficus carica L.) was released. Because of the great phenotypic variability that can be found in this species, we decided to study another fig genotype, the Italian cv. Dottato, in order to perform comparative studies between the two cultivars and extend the pan genome of this species. We isolated, sequenced and assembled fig genomic DNA from young fruits of cv. Dottato. Then, putative gene sequences were predicted and annotated. Finally, a comparison was performed between cvs. Dottato and Horaishi predicted transcriptomes. Our data provide a resource (available at the Sequence Read Archive database under SRP109082) to be used for functional genomics of fig, in order to fill the gap of knowledge still existing in this species concerning plant development, defense and adaptation to the environment.}, } @article {pmid28720902, year = {2017}, author = {Xu, H and Qin, S and Lan, Y and Liu, M and Cao, X and Qiao, D and Cao, Y and Cao, Y}, title = {Comparative genomic analysis of Paenibacillus sp. SSG-1 and its closely related strains reveals the effect of glycometabolism on environmental adaptation.}, journal = {Scientific reports}, volume = {7}, number = {1}, pages = {5720}, pmid = {28720902}, issn = {2045-2322}, mesh = {*Adaptation, Biological ; Agar/metabolism ; *Evolution, Molecular ; Gene Transfer, Horizontal ; *Genomics ; Hydrolysis ; Metabolic Networks and Pathways/*genetics ; Paenibacillus/*genetics/*metabolism ; Polysaccharides/*metabolism ; }, abstract = {The extensive environmental adaptability of the genus Paenibacillus is related to the enormous diversity of its gene repertoires. Paenibacillus sp. SSG-1 has previously been reported, and its agar-degradation trait has attracted our attention. Here, the genome sequence of Paenibacillus sp. SSG-1, together with 76 previously sequenced strains, was comparatively studied. The results show that the pan-genome of Paenibacillus is open and indicate that the current taxonomy of this genus is incorrect. The incessant flux of gene repertoires resulting from the processes of gain and loss largely contributed to the difference in genomic content and genome size in Paenibacillus. Furthermore, a large number of genes gained are associated with carbohydrate transport and metabolism. It indicates that the evolution of glycometabolism is a key factor for the environmental adaptability of Paenibacillus species. Interestingly, through horizontal gene transfer, Paenibacillus sp. SSG-1 acquired an approximately 150 kb DNA fragment and shows an agar-degrading characteristic distinct from most other non-marine bacteria. This region may be transported in bacteria as a complete unit responsible for agar degradation. Taken together, these results provide insights into the evolutionary pattern of Paenibacillus and have implications for studies on the taxonomy and functional genomics of this genus.}, } @article {pmid28717847, year = {2017}, author = {Meng, P and Lu, C and Zhang, Q and Lin, J and Chen, F}, title = {Exploring the Genomic Diversity and Cariogenic Differences of Streptococcus mutans Strains Through Pan-Genome and Comparative Genome Analysis.}, journal = {Current microbiology}, volume = {74}, number = {10}, pages = {1200-1209}, pmid = {28717847}, issn = {1432-0991}, support = {PKUSS20130210//Peking University School of Stomatology/ ; }, mesh = {Bacterial Typing Techniques ; Computational Biology/methods ; Databases, Genetic ; *Genetic Variation ; *Genome, Bacterial ; *Genomics/methods ; Molecular Sequence Annotation ; Streptococcus mutans/*classification/*genetics ; Virulence Factors/genetics ; }, abstract = {Pan-genome refers to the sum of genes that can be found in a given bacterial species, including the core-genome and the dispensable genome. In this study, the genomes from 183 Streptococcus mutans (S. mutans) isolates were analyzed from the pan-genome perspective. This analysis revealed that S. mutans has an "open" pan-genome, implying that there are plenty of new genes to be found as more genomes are sequenced. Additionally, S. mutans has a limited core-genome, which is composed of genes related to vital activities within the bacterium, such as metabolism and hereditary information storage or processing, occupying 35.6 and 26.6% of the core genes, respectively. We estimate the theoretical core-genome size to be about 1083 genes, which are fewer than other Streptococcus species. In addition, core genes suffer larger selection pressures in comparison to those that are less widely distributed. Not surprisingly, the distribution of putative virulence genes in S. mutans strains does not correlate with caries status, indicating that other factors are also responsible for cariogenesis. These results contribute to a more understanding of the evolutionary characteristics and dynamic changes within the genome components of the species. This also helps to form a new theoretical foundation for preventing dental caries. Furthermore, this study sets an example for analyzing large genomic datasets of pathogens from the pan-genome perspective.}, } @article {pmid28710456, year = {2017}, author = {Jeong, DW and Heo, S and Ryu, S and Blom, J and Lee, JH}, title = {Genomic insights into the virulence and salt tolerance of Staphylococcus equorum.}, journal = {Scientific reports}, volume = {7}, number = {1}, pages = {5383}, pmid = {28710456}, issn = {2045-2322}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/*genetics/metabolism ; Conserved Sequence ; Databases, Genetic ; Drug Resistance, Multiple, Bacterial ; Gene Expression Profiling ; *Gene Expression Regulation, Neoplastic ; *Genome, Bacterial ; Potassium Channels, Voltage-Gated/genetics/metabolism ; Salt Tolerance/*genetics ; Sodium Chloride/pharmacology ; Staphylococcus/classification/drug effects/*genetics/*pathogenicity ; Virulence ; }, abstract = {To shed light on the genetic background behind the virulence and salt tolerance of Staphylococcus equorum, we performed comparative genome analysis of six S. equorum strains. Data on four previously published genome sequences were obtained from the NCBI database, while those on strain KM1031 displaying resistance to multiple antibiotics and strain C2014 causing haemolysis were determined in this study. Examination of the pan-genome of five of the six S. equorum strains showed that the conserved core genome retained the genes for general physiological processes and survival of the species. In this comparative genomic analysis, the factors that distinguish the strains from each other, including acquired genomic factors in mobile elements, were identified. Additionally, the high salt tolerance of strains enabling growth at a NaCl concentration of 25% (w/v) was attributed to the genes encoding potassium voltage-gated channels. Among the six strains, KS1039 does not possess any of the functional virulence determinants expressed in the other strains.}, } @article {pmid28706512, year = {2017}, author = {Machado, H and Gram, L}, title = {Comparative Genomics Reveals High Genomic Diversity in the Genus Photobacterium.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {1204}, pmid = {28706512}, issn = {1664-302X}, abstract = {Vibrionaceae is a large marine bacterial family, which can constitute up to 50% of the prokaryotic population in marine waters. Photobacterium is the second largest genus in the family and we used comparative genomics on 35 strains representing 16 of the 28 species described so far, to understand the genomic diversity present in the Photobacterium genus. Such understanding is important for ecophysiology studies of the genus. We used whole genome sequences to evaluate phylogenetic relationships using several analyses (16S rRNA, MLSA, fur, amino-acid usage, ANI), which allowed us to identify two misidentified strains. Genome analyses also revealed occurrence of higher and lower GC content clades, correlating with phylogenetic clusters. Pan- and core-genome analysis revealed the conservation of 25% of the genome throughout the genus, with a large and open pan-genome. The major source of genomic diversity could be traced to the smaller chromosome and plasmids. Several of the physiological traits studied in the genus did not correlate with phylogenetic data. Since horizontal gene transfer (HGT) is often suggested as a source of genetic diversity and a potential driver of genomic evolution in bacterial species, we looked into evidence of such in Photobacterium genomes. Genomic islands were the source of genomic differences between strains of the same species. Also, we found transposase genes and CRISPR arrays that suggest multiple encounters with foreign DNA. Presence of genomic exchange traits was widespread and abundant in the genus, suggesting a role in genomic evolution. The high genetic variability and indications of genetic exchange make it difficult to elucidate genome evolutionary paths and raise the awareness of the roles of foreign DNA in the genomic evolution of environmental organisms.}, } @article {pmid28705964, year = {2017}, author = {Rothen, J and Schindler, T and Pothier, JF and Younan, M and Certa, U and Daubenberger, C and Pflüger, V and Jores, J}, title = {Draft Genome Sequences of Seven Streptococcus agalactiae Strains Isolated from Camelus dromedarius at the Horn of Africa.}, journal = {Genome announcements}, volume = {5}, number = {28}, pages = {}, pmid = {28705964}, issn = {2169-8287}, abstract = {We present draft whole-genome sequences of seven Streptococcus agalactiae strains isolated from Camelus dromedarius in Kenya and Somalia. These data are an extension to the group B Streptococcus (GBS) pangenome and might provide more insight into the underlying mechanisms of pathogenicity and antibiotic resistance of camel GBS.}, } @article {pmid28698717, year = {2017}, author = {Girard, SD and Virard, I and Lacassagne, E and Paumier, JM and Lahlou, H and Jabes, F and Molino, Y and Stephan, D and Baranger, K and Belghazi, M and Deveze, A and Khrestchatisky, M and Nivet, E and Roman, FS and Féron, F}, title = {From Blood to Lesioned Brain: An In Vitro Study on Migration Mechanisms of Human Nasal Olfactory Stem Cells.}, journal = {Stem cells international}, volume = {2017}, number = {}, pages = {1478606}, pmid = {28698717}, issn = {1687-966X}, abstract = {Stem cell-based therapies critically rely on selective cell migration toward pathological or injured areas. We previously demonstrated that human olfactory ectomesenchymal stem cells (OE-MSCs), derived from an adult olfactory lamina propria, migrate specifically toward an injured mouse hippocampus after transplantation in the cerebrospinal fluid and promote functional recoveries. However, the mechanisms controlling their recruitment and homing remain elusive. Using an in vitro model of blood-brain barrier (BBB) and secretome analysis, we observed that OE-MSCs produce numerous proteins allowing them to cross the endothelial wall. Then, pan-genomic DNA microarrays identified signaling molecules that lesioned mouse hippocampus overexpressed. Among the most upregulated cytokines, both recombinant SPP1/osteopontin and CCL2/MCP-1 stimulate OE-MSC migration whereas only CCL2 exerts a chemotactic effect. Additionally, OE-MSCs express SPP1 receptors but not the CCL2 cognate receptor, suggesting a CCR2-independent pathway through other CCR receptors. These results confirm that OE-MSCs can be attracted by chemotactic cytokines overexpressed in inflamed areas and demonstrate that CCL2 is an important factor that could promote OE-MSC engraftment, suggesting improvement for future clinical trials.}, } @article {pmid28682230, year = {2017}, author = {Olié, E and Courtet, P}, title = {[Genetics and epigenetics of suicidal behaviors].}, journal = {Biologie aujourd'hui}, volume = {211}, number = {1}, pages = {93-96}, doi = {10.1051/jbio/2017013}, pmid = {28682230}, issn = {2105-0686}, mesh = {Biomedical Research/trends ; Epigenesis, Genetic/*physiology ; *Genetic Predisposition to Disease ; Humans ; Mental Disorders/*genetics ; *Suicide ; }, abstract = {With more than 10 000 deaths each year in France, suicide is a major public health problem. Despite many known risk factors, suicide prediction remains extremely difficult in daily clinical practice. The identification of biomarkers, including genetic and epigenetic factors, is needed in suicidology in order to better understand pathophysiology of such behaviors and to improve the screening of individuals at high suicidal risk. Numerous studies have reported candidate genes involved in serotonergic system, HPA axis and neurotrophic system. Moreover, an interaction between genetic factors and environment is now admitted, facilitating emergence of a psychobiological vulnerability leading to the suicidal act. For instance, polymorphism of the serotonin transporter (5-HTTLPR) as well as its interaction with early life events (childhood abuse) are involved in suicidality in adulthood. Unfortunately pangenomic studies are not conclusive yet. Beyond serotonergic genes, the SKA-2 gene, which is involved in stress response, deserves more attention. SKA-2 genic expression and methylation level have been associated with the suicidal act. In the future, the combination of clinical, biological and genetic risk factors will probably improve detection of suicide risk.}, } @article {pmid28674418, year = {2017}, author = {Proença, JT and Barral, DC and Gordo, I}, title = {Commensal-to-pathogen transition: One-single transposon insertion results in two pathoadaptive traits in Escherichia coli -macrophage interaction.}, journal = {Scientific reports}, volume = {7}, number = {1}, pages = {4504}, pmid = {28674418}, issn = {2045-2322}, mesh = {Animals ; *DNA Transposable Elements ; Escherichia coli/*physiology ; Escherichia coli Infections/immunology/metabolism/*microbiology ; Genome, Bacterial ; *Host-Pathogen Interactions/immunology ; Macrophages/*immunology/metabolism/*microbiology ; Mice ; Microbial Viability/immunology ; Mutagenesis, Insertional ; Mutation ; Phagocytosis ; Phagosomes/immunology/microbiology ; RAW 264.7 Cells ; }, abstract = {Escherichia coli is both a harmless commensal in the intestines of many mammals, as well as a dangerous pathogen. The evolutionary paths taken by strains of this species in the commensal-to-pathogen transition are complex and can involve changes both in the core genome, as well in the pan-genome. One way to understand the likely paths that a commensal strain of E. coli takes when evolving pathogenicity is through experimentally evolving the strain under the selective pressures that it will have to withstand as a pathogen. Here, we report that a commensal strain, under continuous pressure from macrophages, recurrently acquired a transposable element insertion, which resulted in two key phenotypic changes: increased intracellular survival, through the delay of phagosome maturation and increased ability to escape macrophages. We further show that the acquisition of the pathoadaptive traits was accompanied by small but significant changes in the transcriptome of macrophages upon infection. These results show that under constant pressures from a key component of the host immune system, namely macrophage phagocytosis, commensal E. coli rapidly acquires pathoadaptive mutations that cause transcriptome changes associated to the host-microbe duet.}, } @article {pmid28664513, year = {2017}, author = {Yang, JA and Yang, SH and Kim, J and Kwon, KK and Oh, HM}, title = {Comparative genome analysis of the Flavobacteriales bacterium strain UJ101, isolated from the gut of Atergatis reticulatus.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {55}, number = {7}, pages = {583-591}, pmid = {28664513}, issn = {1976-3794}, mesh = {Animals ; Base Composition ; Biotin/metabolism ; Brachyura/*microbiology ; Flavobacteriaceae/*genetics/isolation & purification ; Gastrointestinal Microbiome ; *Genome, Bacterial ; Intestines/microbiology ; Metabolic Networks and Pathways/genetics ; Molecular Sequence Annotation ; Nitrogen/metabolism ; Pentose Phosphate Pathway/genetics ; Phylogeny ; Republic of Korea ; Sequence Analysis, DNA ; Shellfish/*microbiology ; Thiamine/metabolism ; }, abstract = {Here we report the comparative genomic analysis of strain UJ101 with 15 strains from the family Flavobacteriaceae, using the CGExplorer program. Flavobacteriales bacterium strain UJ101 was isolated from a xanthid crab, Atergatis reticulatus, from the East Sea near Korea. The complete genome of strain UJ101 is a 3,074,209 bp, single, circular chromosome with 30.74% GC content. While the UJ101 genome contains a number of annotated genes for many metabolic pathways, such as the Embden-Meyerhof pathway, the pentose phosphate pathway, the tricarboxylic acid (TCA) cycle, and the glyoxylate cycle, genes for the Entner-Douddoroff pathway are not found in the UJ101 genome. Overall, carbon fixation processes were absent but nitrate reduction and denitrification pathways were conserved. The UJ101 genome was compared to genomes from other marine animals (three invertebrate strains and 5 fish strains) and other marine animal- derived genera. Notable results by genome comparisons showed that UJ101 is capable of denitrification and nitrate reduction, and that biotin-thiamine pathway participation varies among marine bacteria; fish-dwelling bacteria, freeliving bacteria, invertebrate-dwelling bacteria, and strain UJ101. Pan-genome analysis of the 16 strains in this study included 7,220 non-redundant genes that covered 62% of the pan-genome. A core-genome of 994 genes was present and consisted of 8% of the genes from the pan-genome. Strain UJ101 is a symbiotic hetero-organotroph isolated from xanthid crab, and is a metabolic generalist with nitrate-reducing abilities but without the ability to synthesize biotin. There is a general tendency of UJ101 and some fish pathogens to prefer thiamine-dependent glycolysis to gluconeogenesis. Biotin and thiamine auxotrophy or prototrophy may be used as important markers in microbial community studies.}, } @article {pmid28645159, year = {2017}, author = {Jimenez-Infante, F and Ngugi, DK and Vinu, M and Blom, J and Alam, I and Bajic, VB and Stingl, U}, title = {Genomic characterization of two novel SAR11 isolates from the Red Sea, including the first strain of the SAR11 Ib clade.}, journal = {FEMS microbiology ecology}, volume = {93}, number = {7}, pages = {}, doi = {10.1093/femsec/fix083}, pmid = {28645159}, issn = {1574-6941}, mesh = {*Alphaproteobacteria/classification/genetics/isolation & purification ; Aquatic Organisms/*classification/*genetics ; Bacterial Adhesion/physiology ; Genome, Bacterial/*genetics ; Indian Ocean ; Phylogeny ; Seawater/microbiology ; }, abstract = {The SAR11 clade (Pelagibacterales) is a diverse group that forms a monophyletic clade within the Alphaproteobacteria, and constitutes up to one third of all prokaryotic cells in the photic zone of most oceans. Pelagibacterales are very abundant in the warm and highly saline surface waters of the Red Sea, raising the question of adaptive traits of SAR11 populations in this water body and warmer oceans through the world. In this study, two pure cultures were successfully obtained from surface waters on the Red Sea: one isolate of subgroup Ia and one of the previously uncultured SAR11 Ib lineage. The novel genomes were very similar to each other and to genomes of isolates of SAR11 subgroup Ia (Ia pan-genome), both in terms of gene content and synteny. Among the genes that were not present in the Ia pan-genome, 108 (RS39, Ia) and 151 genes (RS40, Ib) were strain specific. Detailed analyses showed that only 51 (RS39, Ia) and 55 (RS40, Ib) of these strain-specific genes had not reported before on genome fragments of Pelagibacterales. Further analyses revealed the potential production of phosphonates by some SAR11 members and possible adaptations for oligotrophic life, including pentose sugar utilization and adhesion to marine particulate matter.}, } @article {pmid28642379, year = {2017}, author = {Veseli, IA and Tang, C and Pombert, JF}, title = {Complete Genome Sequence of Staphylococcus lutrae ATCC 700373, a Potential Pathogen Isolated from Deceased Otters.}, journal = {Genome announcements}, volume = {5}, number = {25}, pages = {}, pmid = {28642379}, issn = {2169-8287}, abstract = {Despite their relevance to human health, not all staphylococcal species have been characterized. As such, the potential zoonotic threats posed by uninvestigated species and their contribution to the staphylococcal pangenome are unclear. Here, we report the complete genome sequence of Staphylococcus lutrae ATCC 700373, a coagulase-positive species isolated from deceased otters.}, } @article {pmid28637301, year = {2017}, author = {Münch, PC and Stecher, B and McHardy, AC}, title = {EDEN: evolutionary dynamics within environments.}, journal = {Bioinformatics (Oxford, England)}, volume = {33}, number = {20}, pages = {3292-3295}, pmid = {28637301}, issn = {1367-4811}, mesh = {Bacteria/genetics ; *Biological Evolution ; Metagenomics/*methods ; Phenotype ; *Phylogeny ; Sequence Analysis, DNA/*methods ; *Software ; }, abstract = {SUMMARY: Metagenomics revolutionized the field of microbial ecology, giving access to Gb-sized datasets of microbial communities under natural conditions. This enables fine-grained analyses of the functions of community members, studies of their association with phenotypes and environments, as well as of their microevolution and adaptation to changing environmental conditions. However, phylogenetic methods for studying adaptation and evolutionary dynamics are not able to cope with big data. EDEN is the first software for the rapid detection of protein families and regions under positive selection, as well as their associated biological processes, from meta- and pangenome data. It provides an interactive result visualization for detailed comparative analyses.

EDEN is available as a Docker installation under the GPL 3.0 license, allowing its use on common operating systems, at http://www.github.com/hzi-bifo/eden.

CONTACT: alice.mchardy@helmholtz-hzi.de.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid28633403, year = {2017}, author = {Anderson, RE and Kouris, A and Seward, CH and Campbell, KM and Whitaker, RJ}, title = {Structured Populations of Sulfolobus acidocaldarius with Susceptibility to Mobile Genetic Elements.}, journal = {Genome biology and evolution}, volume = {9}, number = {6}, pages = {1699-1710}, pmid = {28633403}, issn = {1759-6653}, mesh = {*DNA Transposable Elements ; Evolution, Molecular ; Gene Deletion ; Genome, Archaeal ; Hot Springs/chemistry/microbiology ; Phylogeny ; Plasmids/genetics ; Sulfolobus acidocaldarius/classification/*genetics/isolation & purification ; }, abstract = {The impact of a structured environment on genome evolution can be determined through comparative population genomics of species that live in the same habitat. Recent work comparing three genome sequences of Sulfolobus acidocaldarius suggested that highly structured, extreme, hot spring environments do not limit dispersal of this thermoacidophile, in contrast to other co-occurring Sulfolobus species. Instead, a high level of conservation among these three S. acidocaldarius genomes was hypothesized to result from rapid, global-scale dispersal promoted by low susceptibility to viruses that sets S. acidocaldarius apart from its sister Sulfolobus species. To test this hypothesis, we conducted a comparative analysis of 47 genomes of S. acidocaldarius from spatial and temporal sampling of two hot springs in Yellowstone National Park. While we confirm the low diversity in the core genome, we observe differentiation among S. acidocaldarius populations, likely resulting from low migration among hot spring "islands" in Yellowstone National Park. Patterns of genomic variation indicate that differing geological contexts result in the elimination or preservation of diversity among differentiated populations. We observe multiple deletions associated with a large genomic island rich in glycosyltransferases, differential integrations of the Sulfolobus turreted icosahedral virus, as well as two different plasmid elements. These data demonstrate that neither rapid dispersal nor lack of mobile genetic elements result in low diversity in the S. acidocaldarius genomes. We suggest instead that significant differences in the recent evolutionary history, or the intrinsic evolutionary rates, of sister Sulfolobus species result in the relatively low diversity of the S. acidocaldarius genome.}, } @article {pmid28629190, year = {2017}, author = {Borchert, E and Knobloch, S and Dwyer, E and Flynn, S and Jackson, SA and Jóhannsson, R and Marteinsson, VT and O'Gara, F and Dobson, ADW}, title = {Biotechnological Potential of Cold Adapted Pseudoalteromonas spp. Isolated from 'Deep Sea' Sponges.}, journal = {Marine drugs}, volume = {15}, number = {6}, pages = {}, pmid = {28629190}, issn = {1660-3397}, mesh = {Animals ; *Biotechnology ; Cold Temperature ; Genome, Bacterial ; Porifera/*microbiology ; Pseudoalteromonas/enzymology/*genetics ; Recombinant Proteins/biosynthesis ; beta-Galactosidase/genetics ; beta-Glucosidase/genetics ; }, abstract = {The marine genus Pseudoalteromonas is known for its versatile biotechnological potential with respect to the production of antimicrobials and enzymes of industrial interest. We have sequenced the genomes of three Pseudoalteromonas sp. strains isolated from different deep sea sponges on the Illumina MiSeq platform. The isolates have been screened for various industrially important enzymes and comparative genomics has been applied to investigate potential relationships between the isolates and their host organisms, while comparing them to free-living Pseudoalteromonas spp. from shallow and deep sea environments. The genomes of the sponge associated Pseudoalteromonas strains contained much lower levels of potential eukaryotic-like proteins which are known to be enriched in symbiotic sponge associated microorganisms, than might be expected for true sponge symbionts. While all the Pseudoalteromonas shared a large distinct subset of genes, nonetheless the number of unique and accessory genes is quite large and defines the pan-genome as open. Enzymatic screens indicate that a vast array of enzyme activities is expressed by the isolates, including β-galactosidase, β-glucosidase, and protease activities. A β-glucosidase gene from one of the Pseudoalteromonas isolates, strain EB27 was heterologously expressed in Escherichia coli and, following biochemical characterization, the recombinant enzyme was found to be cold-adapted, thermolabile, halotolerant, and alkaline active.}, } @article {pmid28625995, year = {2017}, author = {Lau, CH and van Engelen, K and Gordon, S and Renaud, J and Topp, E}, title = {Novel Antibiotic Resistance Determinants from Agricultural Soil Exposed to Antibiotics Widely Used in Human Medicine and Animal Farming.}, journal = {Applied and environmental microbiology}, volume = {83}, number = {16}, pages = {}, pmid = {28625995}, issn = {1098-5336}, abstract = {Antibiotic resistance has emerged globally as one of the biggest threats to human and animal health. Although the excessive use of antibiotics is recognized as accelerating the selection for resistance, there is a growing body of evidence suggesting that natural environments are "hot spots" for the development of both ancient and contemporary resistance mechanisms. Given that pharmaceuticals can be entrained onto agricultural land through anthropogenic activities, this could be a potential driver for the emergence and dissemination of resistance in soil bacteria. Using functional metagenomics, we interrogated the "resistome" of bacterial communities found in a collection of Canadian agricultural soil, some of which had been receiving antibiotics widely used in human medicine (macrolides) or food animal production (sulfamethazine, chlortetracycline, and tylosin) for up to 16 years. Of the 34 new antibiotic resistance genes (ARGs) recovered, the majority were predicted to encode (multi)drug efflux systems, while a few share little to no homology with established resistance determinants. We characterized several novel gene products, including putative enzymes that can confer high-level resistance against aminoglycosides, sulfonamides, and broad range of beta-lactams, with respect to their resistance mechanisms and clinical significance. By coupling high-resolution proteomics analysis with functional metagenomics, we discovered an unusual peptide, PPP[AZI 4], encoded within an alternative open reading frame not predicted by bioinformatics tools. Expression of the proline-rich PPP[AZI 4] can promote resistance against different macrolides but not other ribosome-targeting antibiotics, implicating a new macrolide-specific resistance mechanism that could be fundamentally linked to the evolutionary design of this peptide.IMPORTANCE Antibiotic resistance is a clinical phenomenon with an evolutionary link to the microbial pangenome. Genes and protogenes encoding specialized and potential resistance mechanisms are abundant in natural environments, but understanding of their identity and genomic context remains limited. Our discovery of several previously unknown antibiotic resistance genes from uncultured soil microorganisms indicates that soil is a significant reservoir of resistance determinants, which, once acquired and "repurposed" by pathogenic bacteria, can have serious impacts on therapeutic outcomes. This study provides valuable insights into the diversity and identity of resistance within the soil microbiome. The finding of a novel peptide-mediated resistance mechanism involving an unpredicted gene product also highlights the usefulness of integrating proteomics analysis into metagenomics-driven gene discovery.}, } @article {pmid28624550, year = {2017}, author = {Kayansamruaj, P and Dong, HT and Hirono, I and Kondo, H and Senapin, S and Rodkhum, C}, title = {Comparative genome analysis of fish pathogen Flavobacterium columnare reveals extensive sequence diversity within the species.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {54}, number = {}, pages = {7-17}, doi = {10.1016/j.meegid.2017.06.012}, pmid = {28624550}, issn = {1567-7257}, mesh = {Animals ; Fish Diseases/*microbiology ; Fishes/microbiology ; Flavobacteriaceae Infections/*microbiology/*veterinary ; Flavobacterium/classification/*genetics ; Genome, Bacterial/*genetics ; Polymorphism, Single Nucleotide/*genetics ; Sequence Analysis, DNA ; }, abstract = {Flavobacterium columnare is one of the deadliest fish pathogens causing devastating mortality in various freshwater fish species globally. To gain an insight into bacterial genomic contents and structures, comparative genome analyses were performed using the reference and newly sequenced genomes of F. columnare including genomovar I, II and I/II strains isolated from Thailand, Europe and the USA. Bacterial genomes varied in size from 3.09 to 3.39Mb (2714 to 3101 CDSs). The pan-genome analysis revealed open pan-genome nature of F. columnare strains, which possessed at least 4953 genes and tended to increase progressively with the addition of a new genome. Genomic islands (GIs) present in bacterial genomes were diverse, in which 65% (39 out of 60) of possible GIs were strain-specific. A CRISPR/cas investigation indicated at least two different CRISPR systems with varied spacer profiles. On the other hand, putative virulence genes, including those related to gliding motility, type IX secretion system (T9SS), outer membrane proteins (Omp), were equally distributed among F. columnare strains. The MLSA scheme categorized bacterial strains into nine different sequence types (ST 9-17). Phylogenetic analyses based on either 16S rRNA, MLSA and concatenated SNPs of core genome revealed the diversity of F. columnare strains. DNA homology analysis indicated that the estimated digital DNA-DNA hybridization (dDDH) between strains of genomovar I and II can be as low as 42.6%, while the three uniquely tilapia-originated strains from Thailand (1214, NK01 and 1215) were clearly dissimilar to other F. columnare strains as the dDDH values were only 27.7-30.4%. Collectively, this extensive diversity among bacterial strains suggested that species designation of F. columnare would potentially require re-emendation.}, } @article {pmid28620358, year = {2017}, author = {Soberón-Chávez, G and Alcaraz, LD and Morales, E and Ponce-Soto, GY and Servín-González, L}, title = {The Transcriptional Regulators of the CRP Family Regulate Different Essential Bacterial Functions and Can Be Inherited Vertically and Horizontally.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {959}, pmid = {28620358}, issn = {1664-302X}, abstract = {One of the best-studied transcriptional regulatory proteins in bacteria is the Escherichia coli catabolite repressor protein (CRP) that when complexed with 3'-5'-cyclic AMP (cAMP) changes its conformation and interacts with specific DNA-sequences. CRP DNA-binding can result in positive or negative regulation of gene expression depending on the position of its interaction with respect to RNA polymerase binding site. The aim of this work is to review the biological role and phylogenetic relations that some members of the CRP family of transcriptional regulators (also known as cAMP receptor protein family) have in different bacterial species. This work is not intended to give an exhaustive revision of bacterial CRP-orthologs, but to provide examples of the role that these proteins play in the expression of genes that are fundamental for the life style of some bacterial species. We highlight the conservation of their structural characteristics and of their binding to conserved-DNA sequences, in contrast to their very diverse repertoire of gene activation. CRP activates a wide variety of fundamental genes for the biological characteristic of each bacterial species, which in several instances form part of their core-genome (defined as the gene sequences present in all members of a bacterial species). We present evidence that support the fact that some of the transcriptional regulators that belong to the CRP family in different bacterial species, and some of the genes that are regulated by them, can be inherited by horizontal gene transfer. These data are discussed in the framework of bacterial evolution models.}, } @article {pmid28615683, year = {2017}, author = {Collins, FWJ and O'Connor, PM and O'Sullivan, O and Gómez-Sala, B and Rea, MC and Hill, C and Ross, RP}, title = {Bacteriocin Gene-Trait matching across the complete Lactobacillus Pan-genome.}, journal = {Scientific reports}, volume = {7}, number = {1}, pages = {3481}, pmid = {28615683}, issn = {2045-2322}, mesh = {Anti-Bacterial Agents/*isolation & purification ; Bacteriocins/*genetics ; Lactobacillus/*genetics ; Operon ; }, abstract = {Lactobacilli constitute a large genus of Gram-positive lactic acid bacteria which have widespread roles ranging from gut commensals to starters in fermented foods. A combination of in silico and laboratory-based screening allowed us to determine the overall bacteriocin producing potential of representative strains of each species of the genus. The genomes of 175 lactobacilli and 38 associated species were screened for the presence of antimicrobial producing genes and combined with screening for antimicrobial activity against a range of indicators. There also appears to be a link between the strains' environment and bacteriocin production, with those from the animal and human microbiota encoding over twice as many bacteriocins as those from other sources. Five novel bacteriocins were identified belonging to differing bacteriocin classes, including two-peptide bacteriocins (muricidin and acidocin X) and circular bacteriocins (paracyclicin). In addition, there was a clear clustering of helveticin type bacteriolysins in the Lactobacillus acidophilus group of species. This combined in silico and in vitro approach to screening has demonstrated the true diversity and complexity of bacteriocins across the genus. It also highlights their biological importance in terms of communication and competition between closely related strains in diverse complex microbial environments.}, } @article {pmid28615466, year = {2017}, author = {Howell, KJ and Weinert, LA and Peters, SE and Wang, J and Hernandez-Garcia, J and Chaudhuri, RR and Luan, SL and Angen, Ø and Aragon, V and Williamson, SM and Langford, PR and Rycroft, AN and Wren, BW and Maskell, DJ and Tucker, AW}, title = {"Pathotyping" Multiplex PCR Assay for Haemophilus parasuis: a Tool for Prediction of Virulence.}, journal = {Journal of clinical microbiology}, volume = {55}, number = {9}, pages = {2617-2628}, pmid = {28615466}, issn = {1098-660X}, support = {BB/G019177/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; 109385/Z/15/Z//Wellcome Trust/United Kingdom ; BB/G018553/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; //Wellcome Trust/United Kingdom ; BB/G019274/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/G020744/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; Genome/genetics ; Haemophilus Infections/*diagnosis/microbiology/*veterinary ; Haemophilus parasuis/*genetics/isolation & purification/*pathogenicity ; Molecular Diagnostic Techniques/*methods ; Multiplex Polymerase Chain Reaction ; Swine ; Swine Diseases/*diagnosis/microbiology ; Virulence/genetics ; }, abstract = {Haemophilus parasuis is a diverse bacterial species that is found in the upper respiratory tracts of pigs and can also cause Glässer's disease and pneumonia. A previous pangenome study of H. parasuis identified 48 genes that were associated with clinical disease. Here, we describe the development of a generalized linear model (termed a pathotyping model) to predict the potential virulence of isolates of H. parasuis based on a subset of 10 genes from the pangenome. A multiplex PCR (mPCR) was constructed based on these genes, the results of which were entered into the pathotyping model to yield a prediction of virulence. This new diagnostic mPCR was tested on 143 field isolates of H. parasuis that had previously been whole-genome sequenced and a further 84 isolates from the United Kingdom from cases of H. parasuis-related disease in pigs collected between 2013 and 2014. The combination of the mPCR and the pathotyping model predicted the virulence of an isolate with 78% accuracy for the original isolate collection and 90% for the additional isolate collection, providing an overall accuracy of 83% (81% sensitivity and 93% specificity) compared with that of the "current standard" of detailed clinical metadata. This new pathotyping assay has the potential to aid surveillance and disease control in addition to serotyping data.}, } @article {pmid28611758, year = {2017}, author = {Cao, P and Guo, D and Liu, J and Jiang, Q and Xu, Z and Qu, L}, title = {Genome-Wide Analyses Reveal Genes Subject to Positive Selection in Pasteurella multocida.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {961}, pmid = {28611758}, issn = {1664-302X}, abstract = {Pasteurella multocida, a Gram-negative opportunistic pathogen, has led to a broad range of diseases in mammals and birds, including fowl cholera in poultry, pneumonia and atrophic rhinitis in swine and rabbit, hemorrhagic septicemia in cattle, and bite infections in humans. In order to better interpret the genetic diversity and adaptation evolution of this pathogen, seven genomes of P. multocida strains isolated from fowls, rabbit and pigs were determined by using high-throughput sequencing approach. Together with publicly available P. multocida genomes, evolutionary features were systematically analyzed in this study. Clustering of 70,565 protein-coding genes showed that the pangenome of 33 P. multocida strains was composed of 1,602 core genes, 1,364 dispensable genes, and 1,070 strain-specific genes. Of these, we identified a full spectrum of genes related to virulence factors and revealed genetic diversity of these potential virulence markers across P. multocida strains, e.g., bcbAB, fcbC, lipA, bexDCA, ctrCD, lgtA, lgtC, lic2A involved in biogenesis of surface polysaccharides, hsf encoding autotransporter adhesin, and fhaB encoding filamentous haemagglutinin. Furthermore, based on genome-wide positive selection scanning, a total of 35 genes were subject to strong selection pressure. Extensive analyses of protein subcellular location indicated that membrane-associated genes were highly abundant among all positively selected genes. The detected amino acid sites undergoing adaptive selection were preferably located in extracellular space, perhaps associated with bacterial evasion of host immune responses. Our findings shed more light on conservation and distribution of virulence-associated genes across P. multocida strains. Meanwhile, this study provides a genetic context for future researches on the mechanism of adaptive evolution in P. multocida.}, } @article {pmid28599670, year = {2017}, author = {Kong, LC and Guo, X and Wang, Z and Gao, YH and Jia, BY and Liu, SM and Ma, HX}, title = {Whole genome sequencing of an ExPEC that caused fatal pneumonia at a pig farm in Changchun, China.}, journal = {BMC veterinary research}, volume = {13}, number = {1}, pages = {169}, pmid = {28599670}, issn = {1746-6148}, mesh = {Animals ; Bacterial Capsules/metabolism ; China ; DNA, Bacterial ; Escherichia coli Infections/microbiology/pathology/*veterinary ; Extraintestinal Pathogenic Escherichia coli/classification/drug effects/*genetics/isolation & purification ; Farms ; *Genome, Bacterial ; Lung/microbiology/pathology ; Mice ; Microbial Sensitivity Tests ; Multigene Family ; Phylogeny ; Pneumonia, Bacterial/microbiology/*veterinary ; Polysaccharides, Bacterial/biosynthesis ; Sequence Analysis, DNA/veterinary ; Serotyping ; Swine ; Swine Diseases/*microbiology/pathology ; Virulence/genetics ; }, abstract = {BACKGROUND: In recent years, highly frequent swine respiratory diseases have been caused by extraintestinal pathogenic Escherichia coli (ExPEC) in China. Due to this increase in ExPECs, this bacterial pathogen has become a threat to the development of the Chinese swine industry. To investigate ExPEC pathogenesis, we isolated a strain (named SLPE) from lesioned porcine lungs from Changchun in China, reported the draft genome and performed comparative genomic analyses.

RESULTS: Based on the gross post-mortem examination, bacterial isolation, animal regression test and 16S rRNA gene sequence analysis, the pathogenic bacteria was identified as an ExPEC. The SLPE draft genome was 4.9 Mb with a G + C content of 51.7%. The phylogenomic comparison indicated that the SLPE strain belongs to the B1 monophyletic phylogroups and that its closest relative is Avian Pathogenic Escherichia coli (APEC) O78. However, the distribution diagram of the pan-genome virulence genes demonstrated significant differences between SLPE and APEC078. We also identified a capsular polysaccharide synthesis gene cluster (CPS) in the SLPE strain genomes using blastp.

CONCLUSIONS: We isolated the ExPEC (SLPE) from swine lungs in China, performed the whole genome sequencing and compared the sequence with other Escherichia coli (E. coli). The comparative genomic analysis revealed several genes including several virulence factors that are ExPEC strain-specific, such as fimbrial adhesins (papG II), ireA, pgtP, hlyF, the pix gene cluster and fecR for their further study. We found a CPS in the SLPE strain genomes for the first time, and this CPS is closely related to the CPS from Klebsiella pneumoniae.}, } @article {pmid28594829, year = {2017}, author = {G C C L Cardenas, R and D Linhares, N and L Ferreira, R and Pena, SDJ}, title = {Mendel,MD: A user-friendly open-source web tool for analyzing WES and WGS in the diagnosis of patients with Mendelian disorders.}, journal = {PLoS computational biology}, volume = {13}, number = {6}, pages = {e1005520}, pmid = {28594829}, issn = {1553-7358}, mesh = {*Databases, Genetic ; *Genetic Diseases, Inborn/diagnosis/genetics ; Genome ; Genomics/*methods ; High-Throughput Nucleotide Sequencing ; Humans ; *Internet ; *Software ; }, abstract = {Whole exome and whole genome sequencing have both become widely adopted methods for investigating and diagnosing human Mendelian disorders. As pangenomic agnostic tests, they are capable of more accurate and agile diagnosis compared to traditional sequencing methods. This article describes new software called Mendel,MD, which combines multiple types of filter options and makes use of regularly updated databases to facilitate exome and genome annotation, the filtering process and the selection of candidate genes and variants for experimental validation and possible diagnosis. This tool offers a user-friendly interface, and leads clinicians through simple steps by limiting the number of candidates to achieve a final diagnosis of a medical genetics case. A useful innovation is the "1-click" method, which enables listing all the relevant variants in genes present at OMIM for perusal by clinicians. Mendel,MD was experimentally validated using clinical cases from the literature and was tested by students at the Universidade Federal de Minas Gerais, at GENE-Núcleo de Genética Médica in Brazil and at the Children's University Hospital in Dublin, Ireland. We show in this article how it can simplify and increase the speed of identifying the culprit mutation in each of the clinical cases that were received for further investigation. Mendel,MD proved to be a reliable web-based tool, being open-source and time efficient for identifying the culprit mutation in different clinical cases of patients with Mendelian Disorders. It is also freely accessible for academic users on the following URL: https://mendelmd.org.}, } @article {pmid28593194, year = {2017}, author = {Gordon, DM and Geyik, S and Clermont, O and O'Brien, CL and Huang, S and Abayasekara, C and Rajesh, A and Kennedy, K and Collignon, P and Pavli, P and Rodriguez, C and Johnston, BD and Johnson, JR and Decousser, JW and Denamur, E}, title = {Fine-Scale Structure Analysis Shows Epidemic Patterns of Clonal Complex 95, a Cosmopolitan Escherichia coli Lineage Responsible for Extraintestinal Infection.}, journal = {mSphere}, volume = {2}, number = {3}, pages = {}, pmid = {28593194}, issn = {2379-5042}, abstract = {The Escherichia coli lineage known as clonal complex 95 (CC95) is a cosmopolitan human-associated lineage responsible for a significant fraction of extraintestinal infections of humans. Whole-genome sequence data of 200 CC95 strains from various origins enabled determination of the CC95 pangenome. The pangenome analysis revealed that strains of the complex could be assigned to one of five subgroups that vary in their serotype, extraintestinal virulence, virulence gene content, and antibiotic resistance gene profile. A total of 511 CC95 strains isolated from humans living in France, Australia, and the United States were screened for their subgroup membership using a PCR-based method. The CC95 subgroups are nonrandomly distributed with respect to their geographic origin. The relative frequency of the subgroups was shown to change through time, although the nature of the changes varies with continent. Strains of the subgroups are also nonrandomly distributed with respect to source of isolation (blood, urine, or feces) and host sex. Collectively, the evidence indicates that although strains belonging to CC95 may be cosmopolitan, human movement patterns have been insufficient to homogenize the distribution of the CC95 subgroups. Rather, the manner in which CC95 strains evolve appears to vary both spatially and temporally. Although CC95 strains appeared globally as pandemic, fine-scale structure analysis shows epidemic patterns of the CC95 subgroups. Furthermore, the observation that the relative frequency of CC95 subgroups at a single locality has changed over time indicates that the relative fitness of the subgroups has changed. IMPORTANCEEscherichia coli clonal complex 95 represents a cosmopolitan, genetically diverse lineage, and the extensive substructure observed in this lineage is epidemiologically and clinically relevant. The frequency with which CC95 strains are responsible for extraintestinal infection appears to have been stable over the past 15 years. However, the different subgroups identified within this lineage have an epidemic structure depending on the host, sample, continent, and time. Thus, the evolution and spread of strains belonging to CC95 are very different from those of another cosmopolitan human-associated clonal complex, CC131, which has increased significantly in frequency as a cause of extraintestinal infection over the past 15 years due to the evolution and spread of two very closely related, nearly monomorphic lineages.}, } @article {pmid28592797, year = {2017}, author = {Zheng, W and Tan, MF and Old, LA and Paterson, IC and Jakubovics, NS and Choo, SW}, title = {Distinct Biological Potential of Streptococcus gordonii and Streptococcus sanguinis Revealed by Comparative Genome Analysis.}, journal = {Scientific reports}, volume = {7}, number = {1}, pages = {2949}, pmid = {28592797}, issn = {2045-2322}, mesh = {Base Composition ; Comparative Genomic Hybridization ; Computational Biology/methods ; Genome Size ; *Genome, Bacterial ; *Genomics/methods ; Molecular Sequence Annotation ; Phylogeny ; Prophages/genetics ; Streptococcal Infections/*microbiology ; Streptococcus gordonii/*physiology/virology ; Streptococcus sanguis/*physiology/virology ; Virulence ; Virulence Factors/genetics ; }, abstract = {Streptococcus gordonii and Streptococcus sanguinis are pioneer colonizers of dental plaque and important agents of bacterial infective endocarditis (IE). To gain a greater understanding of these two closely related species, we performed comparative analyses on 14 new S. gordonii and 5 S. sanguinis strains using various bioinformatics approaches. We revealed S. gordonii and S. sanguinis harbor open pan-genomes and share generally high sequence homology and number of core genes including virulence genes. However, we observed subtle differences in genomic islands and prophages between the species. Comparative pathogenomics analysis identified S. sanguinis strains have genes encoding IgA proteases, mitogenic factor deoxyribonucleases, nickel/cobalt uptake and cobalamin biosynthesis. On the contrary, genomic islands of S. gordonii strains contain additional copies of comCDE quorum-sensing system components involved in genetic competence. Two distinct polysaccharide locus architectures were identified, one of which was exclusively present in S. gordonii strains. The first evidence of genes encoding the CylA and CylB system by the α-haemolytic S. gordonii is presented. This study provides new insights into the genetic distinctions between S. gordonii and S. sanguinis, which yields understanding of tooth surfaces colonization and contributions to dental plaque formation, as well as their potential roles in the pathogenesis of IE.}, } @article {pmid28567447, year = {2017}, author = {Kumar, R and Verma, H and Haider, S and Bajaj, A and Sood, U and Ponnusamy, K and Nagar, S and Shakarad, MN and Negi, RK and Singh, Y and Khurana, JP and Gilbert, JA and Lal, R}, title = {Comparative Genomic Analysis Reveals Habitat-Specific Genes and Regulatory Hubs within the Genus Novosphingobium.}, journal = {mSystems}, volume = {2}, number = {3}, pages = {}, pmid = {28567447}, issn = {2379-5077}, abstract = {Species belonging to the genus Novosphingobium are found in many different habitats and have been identified as metabolically versatile. Through comparative genomic analysis, we identified habitat-specific genes and regulatory hubs that could determine habitat selection for Novosphingobium spp. Genomes from 27 Novosphingobium strains isolated from diverse habitats such as rhizosphere soil, plant surfaces, heavily contaminated soils, and marine and freshwater environments were analyzed. Genome size and coding potential were widely variable, differing significantly between habitats. Phylogenetic relationships between strains were less likely to describe functional genotype similarity than the habitat from which they were isolated. In this study, strains (19 out of 27) with a recorded habitat of isolation, and at least 3 representative strains per habitat, comprised four ecological groups-rhizosphere, contaminated soil, marine, and freshwater. Sulfur acquisition and metabolism were the only core genomic traits to differ significantly in proportion between these ecological groups; for example, alkane sulfonate (ssuABCD) assimilation was found exclusively in all of the rhizospheric isolates. When we examined osmolytic regulation in Novosphingobium spp. through ectoine biosynthesis, which was assumed to be marine habitat specific, we found that it was also present in isolates from contaminated soil, suggesting its relevance beyond the marine system. Novosphingobium strains were also found to harbor a wide variety of mono- and dioxygenases, responsible for the metabolism of several aromatic compounds, suggesting their potential to act as degraders of a variety of xenobiotic compounds. Protein-protein interaction analysis revealed β-barrel outer membrane proteins as habitat-specific hubs in each of the four habitats-freshwater (Saro_1868), marine water (PP1Y_AT17644), rhizosphere (PMI02_00367), and soil (V474_17210). These outer membrane proteins could play a key role in habitat demarcation and extend our understanding of the metabolic versatility of the Novosphingobium species. IMPORTANCE This study highlights the significant role of a microorganism's genetic repertoire in structuring the similarity between Novosphingobium strains. The results suggest that the phylogenetic relationships were mostly influenced by metabolic trait enrichment, which is possibly governed by the microenvironment of each microbe's respective niche. Using core genome analysis, the enrichment of a certain set of genes specific to a particular habitat was determined, which provided insights on the influence of habitat on the distribution of metabolic traits in Novosphingobium strains. We also identified habitat-specific protein hubs, which suggested delineation of Novosphingobium strains based on their habitat. Examining the available genomes of ecologically diverse bacterial species and analyzing the habitat-specific genes are useful for understanding the distribution and evolution of functional and phylogenetic diversity in the genus Novosphingobium.}, } @article {pmid28567052, year = {2017}, author = {Minio, A and Lin, J and Gaut, BS and Cantu, D}, title = {How Single Molecule Real-Time Sequencing and Haplotype Phasing Have Enabled Reference-Grade Diploid Genome Assembly of Wine Grapes.}, journal = {Frontiers in plant science}, volume = {8}, number = {}, pages = {826}, pmid = {28567052}, issn = {1664-462X}, } @article {pmid28560114, year = {2017}, author = {Lawley, B and Munro, K and Hughes, A and Hodgkinson, AJ and Prosser, CG and Lowry, D and Zhou, SJ and Makrides, M and Gibson, RA and Lay, C and Chew, C and Lee, PS and Wong, KH and Tannock, GW}, title = {Differentiation of Bifidobacterium longum subspecies longum and infantis by quantitative PCR using functional gene targets.}, journal = {PeerJ}, volume = {5}, number = {}, pages = {e3375}, pmid = {28560114}, issn = {2167-8359}, abstract = {BACKGROUND: Members of the genus Bifidobacterium are abundant in the feces of babies during the exclusively-milk-diet period of life. Bifidobacterium longum is reported to be a common member of the infant fecal microbiota. However, B. longum is composed of three subspecies, two of which are represented in the bowel microbiota (B. longum subsp. longum; B. longum subsp. infantis). B. longum subspecies are not differentiated in many studies, so that their prevalence and relative abundances are not accurately known. This may largely be due to difficulty in assigning subspecies identity using DNA sequences of 16S rRNA or tuf genes that are commonly used in bacterial taxonomy.

METHODS: We developed a qPCR method targeting the sialidase gene (subsp. infantis) and sugar kinase gene (subsp. longum) to differentiate the subspecies using specific primers and probes. Specificity of the primers/probes was tested by in silico, pangenomic search, and using DNA from standard cultures of bifidobacterial species. The utility of the method was further examined using DNA from feces that had been collected from infants inhabiting various geographical regions.

RESULTS: A pangenomic search of the NCBI genomic database showed that the PCR primers/probes targeted only the respective genes of the two subspecies. The primers/probes showed total specificity when tested against DNA extracted from the gold standard strains (type cultures) of bifidobacterial species detected in infant feces. Use of the qPCR method with DNA extracted from the feces of infants of different ages, delivery method and nutrition, showed that subsp. infantis was detectable (0-32.4% prevalence) in the feces of Australian (n = 90), South-East Asian (n = 24), and Chinese babies (n = 91), but in all cases at low abundance (<0.01-4.6%) compared to subsp. longum (0.1-33.7% abundance; 21.4-100% prevalence).

DISCUSSION: Our qPCR method differentiates B. longum subspecies longum and infantis using characteristic functional genes. It can be used as an identification aid for isolates of bifidobacteria, as well as in determining prevalence and abundance of the subspecies in feces. The method should thus be useful in ecological studies of the infant gut microbiota during early life where an understanding of the ecology of bifidobacterial species may be important in developing interventions to promote infant health.}, } @article {pmid28542514, year = {2017}, author = {Pantoja, Y and Pinheiro, K and Veras, A and Araújo, F and Lopes de Sousa, A and Guimarães, LC and Silva, A and Ramos, RTJ}, title = {PanWeb: A web interface for pan-genomic analysis.}, journal = {PloS one}, volume = {12}, number = {5}, pages = {e0178154}, pmid = {28542514}, issn = {1932-6203}, mesh = {Algorithms ; Computational Biology ; Computer Graphics ; Databases, Genetic ; Escherichia coli/classification/genetics ; Genome, Bacterial ; *Genomics ; High-Throughput Nucleotide Sequencing ; Internet ; Phylogeny ; Programming Languages ; *Software ; *User-Computer Interface ; }, abstract = {With increased production of genomic data since the advent of next-generation sequencing (NGS), there has been a need to develop new bioinformatics tools and areas, such as comparative genomics. In comparative genomics, the genetic material of an organism is directly compared to that of another organism to better understand biological species. Moreover, the exponentially growing number of deposited prokaryote genomes has enabled the investigation of several genomic characteristics that are intrinsic to certain species. Thus, a new approach to comparative genomics, termed pan-genomics, was developed. In pan-genomics, various organisms of the same species or genus are compared. Currently, there are many tools that can perform pan-genomic analyses, such as PGAP (Pan-Genome Analysis Pipeline), Panseq (Pan-Genome Sequence Analysis Program) and PGAT (Prokaryotic Genome Analysis Tool). Among these software tools, PGAP was developed in the Perl scripting language and its reliance on UNIX platform terminals and its requirement for an extensive parameterized command line can become a problem for users without previous computational knowledge. Thus, the aim of this study was to develop a web application, known as PanWeb, that serves as a graphical interface for PGAP. In addition, using the output files of the PGAP pipeline, the application generates graphics using custom-developed scripts in the R programming language. PanWeb is freely available at http://www.computationalbiology.ufpa.br/panweb.}, } @article {pmid28534829, year = {2017}, author = {Finke, JF and Winget, DM and Chan, AM and Suttle, CA}, title = {Variation in the Genetic Repertoire of Viruses Infecting Micromonas pusilla Reflects Horizontal Gene Transfer and Links to Their Environmental Distribution.}, journal = {Viruses}, volume = {9}, number = {5}, pages = {}, pmid = {28534829}, issn = {1999-4915}, mesh = {Base Sequence ; Chlorophyta/classification/*genetics/*virology ; DNA Viruses/*genetics ; DNA-Directed DNA Polymerase/genetics ; Environment ; Gene Transfer, Horizontal/*genetics ; Genes, Viral ; Genetic Variation ; Genome, Viral/*genetics ; Marine Biology ; Phycodnaviridae/classification/*genetics/isolation & purification/pathogenicity ; Phylogeny ; Phytoplankton/virology ; }, abstract = {Prasinophytes, a group of eukaryotic phytoplankton, has a global distribution and is infected by large double-stranded DNA viruses (prasinoviruses) in the family Phycodnaviridae. This study examines the genetic repertoire, phylogeny, and environmental distribution of phycodnaviruses infecting Micromonas pusilla, other prasinophytes and chlorophytes. Based on comparisons among the genomes of viruses infecting M. pusilla and other phycodnaviruses, as well as the genome from a host isolate of M. pusilla, viruses infecting M. pusilla (MpVs) share a limited set of core genes, but vary strongly in their flexible pan-genome that includes numerous metabolic genes, such as those associated with amino acid synthesis and sugar manipulation. Surprisingly, few of these presumably host-derived genes are shared with M. pusilla, but rather have their closest non-viral homologue in bacteria and other eukaryotes, indicating horizontal gene transfer. A comparative analysis of full-length DNA polymerase (DNApol) genes from prasinoviruses with their overall gene content, demonstrated that the phylogeny of DNApol gene fragments reflects the gene content of the viruses; hence, environmental DNApol gene sequences from prasinoviruses can be used to infer their overall genetic repertoire. Thus, the distribution of virus ecotypes across environmental samples based on DNApol sequences implies substantial underlying differences in gene content that reflect local environmental conditions. Moreover, the high diversity observed in the genetic repertoire of prasinoviruses has been driven by horizontal gene transfer throughout their evolutionary history, resulting in a broad suite of functional capabilities and a high diversity of prasinovirus ecotypes.}, } @article {pmid28521770, year = {2017}, author = {Rand, KD and Grytten, I and Nederbragt, AJ and Storvik, GO and Glad, IK and Sandve, GK}, title = {Coordinates and intervals in graph-based reference genomes.}, journal = {BMC bioinformatics}, volume = {18}, number = {1}, pages = {263}, pmid = {28521770}, issn = {1471-2105}, mesh = {Algorithms ; *Computer Graphics ; Genetic Loci ; *Genome, Human ; Genomics/*methods ; Humans ; Internet ; RNA, Messenger/genetics/metabolism ; Sequence Analysis, DNA ; Software ; }, abstract = {BACKGROUND: It has been proposed that future reference genomes should be graph structures in order to better represent the sequence diversity present in a species. However, there is currently no standard method to represent genomic intervals, such as the positions of genes or transcription factor binding sites, on graph-based reference genomes.

RESULTS: We formalize offset-based coordinate systems on graph-based reference genomes and introduce methods for representing intervals on these reference structures. We show the advantage of our methods by representing genes on a graph-based representation of the newest assembly of the human genome (GRCh38) and its alternative loci for regions that are highly variable.

CONCLUSION: More complex reference genomes, containing alternative loci, require methods to represent genomic data on these structures. Our proposed notation for genomic intervals makes it possible to fully utilize the alternative loci of the GRCh38 assembly and potential future graph-based reference genomes. We have made a Python package for representing such intervals on offset-based coordinate systems, available at https://github.com/uio-cels/offsetbasedgraph . An interactive web-tool using this Python package to visualize genes on a graph created from GRCh38 is available at https://github.com/uio-cels/genomicgraphcoords .}, } @article {pmid28490487, year = {2017}, author = {Esaiassen, E and Hjerde, E and Cavanagh, JP and Simonsen, GS and Klingenberg, C and , }, title = {Bifidobacterium Bacteremia: Clinical Characteristics and a Genomic Approach To Assess Pathogenicity.}, journal = {Journal of clinical microbiology}, volume = {55}, number = {7}, pages = {2234-2248}, pmid = {28490487}, issn = {1098-660X}, mesh = {Adult ; Aged ; Aged, 80 and over ; Anti-Bacterial Agents/pharmacology ; Bacteremia/*microbiology ; Bifidobacterium/classification/*genetics/isolation & purification/*pathogenicity ; Female ; Genomics ; Gram-Positive Bacterial Infections/*microbiology ; Humans ; Infant ; Infant, Newborn ; Male ; Microbial Sensitivity Tests ; Middle Aged ; Norway ; Retrospective Studies ; Virulence Factors/genetics ; *Whole Genome Sequencing ; }, abstract = {Bifidobacteria are commensals that colonize the orogastrointestinal tract and rarely cause invasive human infections. However, an increasing number of bifidobacterial blood culture isolates has lately been observed in Norway. In order to investigate the pathogenicity of the Bifidobacterium species responsible for bacteremia, we studied Bifidobacterium isolates from 15 patients for whom cultures of blood obtained from 2013 to 2015 were positive. We collected clinical data and analyzed phenotypic and genotypic antibiotic susceptibility. All isolates (11 Bifidobacterium longum, 2 B. breve, and 2 B. animalis isolates) were subjected to whole-genome sequencing. The 15 patients were predominantly in the extreme lower or upper age spectrum, many were severely immunocompromised, and 11 of 15 had gastrointestinal tract-related conditions. In two elderly patients, the Bifidobacterium bacteremia caused a sepsis-like picture, interpreted as the cause of death. Most bifidobacterial isolates had low MICs (≤0.5 mg/liter) to beta-lactam antibiotics, vancomycin, and clindamycin and relatively high MICs to ciprofloxacin and metronidazole. We performed a pangenomic comparison of invasive and noninvasive B. longum isolates based on 65 sequences available from GenBank and the sequences of 11 blood culture isolates from this study. Functional annotation identified unique genes among both invasive and noninvasive isolates of Bifidobacterium Phylogenetic clusters of invasive isolates were identified for a subset of the B. longum subsp. longum isolates. However, there was no difference in the number of putative virulence genes between invasive and noninvasive isolates. In conclusion, Bifidobacterium has an invasive potential in the immunocompromised host and may cause a sepsis-like picture. Using comparative genomics, we could not delineate specific pathogenicity traits characterizing invasive isolates.}, } @article {pmid28475226, year = {2017}, author = {Jourdy, Y and Chatron, N and Fretigny, M and Carage, ML and Chambost, H and Claeyssens-Donadel, S and Roussel-Robert, V and Negrier, C and Sanlaville, D and Vinciguerra, C}, title = {Molecular cytogenetic characterization of five F8 complex rearrangements: utility for haemophilia A genetic counselling.}, journal = {Haemophilia : the official journal of the World Federation of Hemophilia}, volume = {23}, number = {4}, pages = {e316-e323}, doi = {10.1111/hae.13218}, pmid = {28475226}, issn = {1365-2516}, mesh = {*Cytogenetic Analysis ; Factor VIII/*genetics ; Female ; *Gene Rearrangement ; *Genetic Counseling ; Hemophilia A/diagnosis/*genetics ; Humans ; Introns/genetics ; Male ; Oligonucleotide Array Sequence Analysis ; }, abstract = {BACKGROUND: Genomic inversions are usually balanced, but unusual patterns have been described in haemophilia A (HA) patients for intron 22 (Inv22) and intron 1 (Inv1) inversions leading to the hypothesis of more complex rearrangements involving deletions or duplications.

AIM: To characterize five abnormal patterns either in Southern blot and long-range PCR for Inv22 or in PCR for Inv1.

MATERIALS AND METHODS: All patients were studied using cytogenetic microarray analysis (CMA).

RESULTS: In all cases, CMA analysis found that each inversion was associated with complex Xq28 rearrangement. In three patients, CMA analysis showed large duplication ranging from 230 to 1302 kb and encompassing a various number of contiguous genes among which RAB39B. RAB39B duplication is a strong candidate gene for X-linked intellectual disability (XLID). Surprisingly, none of the severe HA patients with RAB39B duplication reported in this study or in the literature exhibited XLID. We hypothesise that F8 complex rearrangement down regulated RAB39B expression. In the two remaining patients, CMA analysis found Xq28 large deletion (from 285 to 522 kb). Moyamoya syndrome was strongly suspected in one of them who carried BRCC3 deletion.

CONCLUSION: Because several F8 neighbouring genes are associated with other pathologies such as XLID and cardiovascular disease, all HA patients where complex Xq28 rearrangement was suspected should be referred to a geneticist for possible utility of a pangenomic study. Such investigation should be carefully considered in genetic counselling in female carriers to assess the risk of transmitting severe HA with a "contiguous gene syndrome".}, } @article {pmid28450852, year = {2017}, author = {Garita-Cambronero, J and Palacio-Bielsa, A and López, MM and Cubero, J}, title = {Pan-Genomic Analysis Permits Differentiation of Virulent and Non-virulent Strains of Xanthomonas arboricola That Cohabit Prunus spp. and Elucidate Bacterial Virulence Factors.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {573}, pmid = {28450852}, issn = {1664-302X}, abstract = {Xanthomonas arboricola is a plant-associated bacterial species that causes diseases on several plant hosts. One of the most virulent pathovars within this species is X. arboricola pv. pruni (Xap), the causal agent of bacterial spot disease of stone fruit trees and almond. Recently, a non-virulent Xap-look-a-like strain isolated from Prunus was characterized and its genome compared to pathogenic strains of Xap, revealing differences in the profile of virulence factors, such as the genes related to the type III secretion system (T3SS) and type III effectors (T3Es). The existence of this atypical strain arouses several questions associated with the abundance, the pathogenicity, and the evolutionary context of X. arboricola on Prunus hosts. After an initial characterization of a collection of Xanthomonas strains isolated from Prunus bacterial spot outbreaks in Spain during the past decade, six Xap-look-a-like strains, that did not clustered with the pathogenic strains of Xap according to a multi locus sequence analysis, were identified. Pathogenicity of these strains was analyzed and the genome sequences of two Xap-look-a-like strains, CITA 14 and CITA 124, non-virulent to Prunus spp., were obtained and compared to those available genomes of X. arboricola associated with this host plant. Differences were found among the genomes of the virulent and the Prunus non-virulent strains in several characters related to the pathogenesis process. Additionally, a pan-genomic analysis that included the available genomes of X. arboricola, revealed that the atypical strains associated with Prunus were related to a group of non-virulent or low virulent strains isolated from a wide host range. The repertoire of the genes related to T3SS and T3Es varied among the strains of this cluster and those strains related to the most virulent pathovars of the species, corylina, juglandis, and pruni. This variability provides information about the potential evolutionary process associated to the acquisition of pathogenicity and host specificity in X. arboricola. Finally, based in the genomic differences observed between the virulent and the non-virulent strains isolated from Prunus, a sensitive and specific real-time PCR protocol was designed to detect and identify Xap strains. This method avoids miss-identifications due to atypical strains of X. arboricola that can cohabit Prunus.}, } @article {pmid28450142, year = {2017}, author = {Bhardwaj, T and Somvanshi, P}, title = {Pan-genome analysis of Clostridium botulinum reveals unique targets for drug development.}, journal = {Gene}, volume = {623}, number = {}, pages = {48-62}, doi = {10.1016/j.gene.2017.04.019}, pmid = {28450142}, issn = {1879-0038}, mesh = {Clostridium botulinum/classification/drug effects/*genetics/pathogenicity ; Drug Resistance, Bacterial/*genetics ; *Genome, Bacterial ; Phylogeny ; Virulence/genetics ; }, abstract = {Clostridium botulinum, a formidable pathogen is responsible for the emerging cause of food poisoning cases on the global canvas. The endemicity of bacterium Clostridium botulinum is reflected by the sudden hospital outbreaks and increased resistance towards multiple drugs. Therefore, a combined approach of in-silico comparative genomic analysis with statistical analysis was applied to overcome the limitation of bench-top technologies. Owing to the paucity of genomic data available by the advent of third generation sequencing technologies, several 'omics' technologies were applied to understand the underlying evolutionary pattern and lifestyle of the bacterial pathogen using phylogenomics. The calculation of pan-genome, core genome and singletons provides view of genetic repertoire of the bacterial pathogen lineage at the successive level, orthology shared and specific gene subsets. In addition, assessment of pathogenomic potential, resistome, toxin/antitoxin family in successive pathogenic strains of Clostridium botulinum aids in revealing more specific targets for drug design and development.}, } @article {pmid28447607, year = {2017}, author = {Shivaraj, SM and Deshmukh, RK and Rai, R and Bélanger, R and Agrawal, PK and Dash, PK}, title = {Genome-wide identification, characterization, and expression profile of aquaporin gene family in flax (Linum usitatissimum).}, journal = {Scientific reports}, volume = {7}, number = {}, pages = {46137}, pmid = {28447607}, issn = {2045-2322}, mesh = {Amino Acid Sequence ; Aquaporins/chemistry/*genetics/metabolism ; Conserved Sequence ; Evolution, Molecular ; Exons/genetics ; Flax/drug effects/*genetics ; *Gene Expression Profiling ; *Gene Expression Regulation, Plant/drug effects ; Genes, Plant ; *Genome, Plant ; Introns/genetics ; *Multigene Family ; Phylogeny ; Plant Proteins/chemistry/*genetics/metabolism ; Protein Domains ; Protein Structure, Tertiary ; Sequence Alignment ; Silicon/pharmacology ; Subcellular Fractions/metabolism ; }, abstract = {Membrane intrinsic proteins (MIPs) form transmembrane channels and facilitate transport of myriad substrates across the cell membrane in many organisms. Majority of plant MIPs have water transporting ability and are commonly referred as aquaporins (AQPs). In the present study, we identified aquaporin coding genes in flax by genome-wide analysis, their structure, function and expression pattern by pan-genome exploration. Cross-genera phylogenetic analysis with known aquaporins from rice, arabidopsis, and poplar showed five subgroups of flax aquaporins representing 16 plasma membrane intrinsic proteins (PIPs), 17 tonoplast intrinsic proteins (TIPs), 13 NOD26-like intrinsic proteins (NIPs), 2 small basic intrinsic proteins (SIPs), and 3 uncharacterized intrinsic proteins (XIPs). Amongst aquaporins, PIPs contained hydrophilic aromatic arginine (ar/R) selective filter but TIP, NIP, SIP and XIP subfamilies mostly contained hydrophobic ar/R selective filter. Analysis of RNA-seq and microarray data revealed high expression of PIPs in multiple tissues, low expression of NIPs, and seed specific expression of TIP3 in flax. Exploration of aquaporin homologs in three closely related Linum species bienne, grandiflorum and leonii revealed presence of 49, 39 and 19 AQPs, respectively. The genome-wide identification of aquaporins, first in flax, provides insight to elucidate their physiological and developmental roles in flax.}, } @article {pmid28439658, year = {2017}, author = {Lee, JY and Han, GG and Choi, J and Jin, GD and Kang, SK and Chae, BJ and Kim, EB and Choi, YJ}, title = {Pan-Genomic Approaches in Lactobacillus reuteri as a Porcine Probiotic: Investigation of Host Adaptation and Antipathogenic Activity.}, journal = {Microbial ecology}, volume = {74}, number = {3}, pages = {709-721}, pmid = {28439658}, issn = {1432-184X}, support = {914005-04//Strategic Initiative for Microbiomes in Agriculture and Food, Ministry of Agriculture, Food and Rural Affairs, Republic of Korea/ ; 520160476//2016 Research Grant from Kangwon National University/ ; }, mesh = {Animals ; Feces/microbiology ; *Genome, Bacterial ; Genomics/methods ; Limosilactobacillus reuteri/chemistry/*genetics ; Probiotics/*analysis/metabolism ; *Sus scrofa/genetics/metabolism/microbiology ; }, abstract = {After the introduction of a ban on the use of antibiotic growth promoters (AGPs) for livestock, reuterin-producing Lactobacillus reuteri is getting attention as an alternative to AGPs. In this study, we investigated genetic features of L. reuteri associated with host specificity and antipathogenic effect. We isolated 104 L. reuteri strains from porcine feces, and 16 strains, composed of eight strains exhibiting the higher antipathogenic effect (group HS) and eight strains exhibiting the lower effect (group LS), were selected for genomic comparison. We generated draft genomes of the 16 isolates and investigated their pan-genome together with the 26 National Center for Biotechnology Information-registered genomes. L. reuteri genomes organized six clades with multi-locus sequence analysis, and the clade IV includes the 16 isolates. First, we identified six L. reuteri clade IV-specific genes including three hypothetical protein-coding genes. The three annotated genes encode transposases and cell surface proteins, indicating that these genes are the result of adaptation to the host gastrointestinal epithelia and that these host-specific traits were acquired by horizontal gene transfer. We also identified differences between groups HS and LS in the pdu-cbi-cob-hem gene cluster, which is essential for reuterin and cobalamin synthesis, and six genes specific to group HS are revealed. While the strains of group HS possessed all genes of this cluster, LS strains have lost many genes of the cluster. This study provides a deeper understanding of the relationship between probiotic properties and genomic features of L. reuteri.}, } @article {pmid28427330, year = {2017}, author = {Karlsen, C and Hjerde, E and Klemetsen, T and Willassen, NP}, title = {Pan genome and CRISPR analyses of the bacterial fish pathogen Moritella viscosa.}, journal = {BMC genomics}, volume = {18}, number = {1}, pages = {313}, pmid = {28427330}, issn = {1471-2164}, mesh = {Animals ; CRISPR-Cas Systems/*genetics ; Evolution, Molecular ; Fishes/*microbiology ; *Genomics ; Moritella/*genetics/physiology/virology ; Plasmids/genetics ; Prophages/physiology ; }, abstract = {BACKGROUND: Winter-ulcer Moritella viscosa infections continue to be a significant burden in Atlantic salmon (Salmo salar L.) farming. M. viscosa comprises two main clusters that differ in genetic variation and phenotypes including virulence. Horizontal gene transfer through acquisition and loss of mobile genetic elements (MGEs) is a major driving force of bacterial diversification. To gain insight into genomic traits that could affect sublineage evolution within this bacterium we examined the genome sequences of twelve M. viscosa strains. Matches between M. viscosa clustered, regularly interspaced, short palindromic, repeats and associated cas genes (CRISPR-Cas) were analysed to correlate CRISPR-Cas with adaptive immunity against MGEs.

RESULTS: The comparative genomic analysis of M. viscosa isolates from across the North Atlantic region and from different fish species support delineation of M. viscosa into four phylogenetic lineages. The results showed that M. viscosa carries two distinct variants of the CRISPR-Cas subtype I-F systems and that CRISPR features follow the phylogenetic lineages. A subset of the spacer content match prophage and plasmid genes dispersed among the M. viscosa strains. Further analysis revealed that prophage and plasmid-like element distribution were reflected in the content of the CRISPR-spacer profiles.

CONCLUSIONS: Our data suggests that CRISPR-Cas mediated interactions with MGEs impact genome properties among M. viscosa, and that patterns in spacer and MGE distributions are linked to strain relationships.}, } @article {pmid28414739, year = {2017}, author = {Kant, R and Palva, A and von Ossowski, I}, title = {An in silico pan-genomic probe for the molecular traits behind Lactobacillus ruminis gut autochthony.}, journal = {PloS one}, volume = {12}, number = {4}, pages = {e0175541}, pmid = {28414739}, issn = {1932-6203}, mesh = {Animals ; Cattle ; Computer Simulation ; Flagella/microbiology ; Genome, Bacterial/*genetics ; Genomics/methods ; Horses/microbiology ; Humans ; Intestines/*microbiology ; Lactobacillus/*genetics/*isolation & purification ; Phenotype ; Swine/microbiology ; }, abstract = {As an ecological niche, the mammalian intestine provides the ideal habitat for a variety of bacterial microorganisms. Purportedly, some commensal genera and species offer a beneficial mix of metabolic, protective, and structural processes that help sustain the natural digestive health of the host. Among these sort of gut inhabitants is the Gram-positive lactic acid bacterium Lactobacillus ruminis, a strict anaerobe with both pili and flagella on its cell surface, but also known for being autochthonous (indigenous) to the intestinal environment. Given that the molecular basis of gut autochthony for this species is largely unexplored and unknown, we undertook a study at the genome level to pinpoint some of the adaptive traits behind its colonization behavior. In our pan-genomic probe of L. ruminis, the genomes of nine different strains isolated from human, bovine, porcine, and equine host guts were compiled and compared for in silico analysis. For this, we conducted a geno-phenotypic assessment of protein-coding genes, with an emphasis on those products involved with cell-surface morphology and anaerobic fermentation and respiration. We also categorized and examined the core and accessory genes that define the L. ruminis species and its strains. Here, we made an attempt to identify those genes having ecologically relevant phenotypes that might support or bring about intestinal indigenousness.}, } @article {pmid28414633, year = {2017}, author = {Giampetruzzi, A and Saponari, M and Loconsole, G and Boscia, D and Savino, VN and Almeida, RPP and Zicca, S and Landa, BB and Chacón-Diaz, C and Saldarelli, P}, title = {Genome-Wide Analysis Provides Evidence on the Genetic Relatedness of the Emergent Xylella fastidiosa Genotype in Italy to Isolates from Central America.}, journal = {Phytopathology}, volume = {107}, number = {7}, pages = {816-827}, doi = {10.1094/PHYTO-12-16-0420-R}, pmid = {28414633}, issn = {0031-949X}, mesh = {Costa Rica ; DNA, Bacterial/genetics ; Gene Expression Regulation, Bacterial ; *Genome, Bacterial ; *Genome-Wide Association Study ; *Genotype ; Italy ; Phylogeny ; Polymorphism, Single Nucleotide ; Xylella/*genetics ; }, abstract = {Xylella fastidiosa is a plant-pathogenic bacterium recently introduced in Europe that is causing decline in olive trees in the South of Italy. Genetic studies have consistently shown that the bacterial genotype recovered from infected olive trees belongs to the sequence type ST53 within subspecies pauca. This genotype, ST53, has also been reported to occur in Costa Rica. The ancestry of ST53 was recently clarified, showing it contains alleles that are monophyletic with those of subsp. pauca in South America. To more robustly determine the phylogenetic placement of ST53 within X. fastidiosa, we performed a comparative analysis based on single nucleotide polymorphisms (SNPs) and the study of the pan-genome of the 27 currently public available whole genome sequences of X. fastidiosa. The resulting maximum-parsimony and maximum likelihood trees constructed using the SNPs and the pan-genome analysis are consistent with previously described X. fastidiosa taxonomy, distinguishing the subsp. fastidiosa, multiplex, pauca, sandyi, and morus. Within the subsp. pauca, the Italian and three Costa Rican isolates, all belonging to ST53, formed a compact phylotype in a clade divergent from the South American pauca isolates, also distinct from the recently described coffee isolate CFBP8072 imported into Europe from Ecuador. These findings were also supported by the gene characterization of a conjugative plasmid shared by all the four ST53 isolates. Furthermore, isolates of the ST53 clade possess an exclusive locus encoding a putative ATP-binding protein belonging to the family of histidine kinase-like ATPase gene, which is not present in isolates from the subspecies multiplex, sandyi, and pauca, but was detected in ST21 isolates of the subspecies fastidiosa from Costa Rica. The clustering and distinctiveness of the ST53 isolates supports the hypothesis of their common origin, and the limited genetic diversity among these isolates suggests this is an emerging clade within subsp. pauca.}, } @article {pmid28403535, year = {2017}, author = {Bayer, PE and Hurgobin, B and Golicz, AA and Chan, CK and Yuan, Y and Lee, H and Renton, M and Meng, J and Li, R and Long, Y and Zou, J and Bancroft, I and Chalhoub, B and King, GJ and Batley, J and Edwards, D}, title = {Assembly and comparison of two closely related Brassica napus genomes.}, journal = {Plant biotechnology journal}, volume = {15}, number = {12}, pages = {1602-1610}, pmid = {28403535}, issn = {1467-7652}, support = {BB/E017363/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/E017797/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Brassica napus/genetics ; Expressed Sequence Tags ; Genes, Plant ; *Genome, Plant ; Molecular Sequence Annotation ; Repetitive Sequences, Nucleic Acid ; }, abstract = {As an increasing number of plant genome sequences become available, it is clear that gene content varies between individuals, and the challenge arises to predict the gene content of a species. However, genome comparison is often confounded by variation in assembly and annotation. Differentiating between true gene absence and variation in assembly or annotation is essential for the accurate identification of conserved and variable genes in a species. Here, we present the de novo assembly of the B. napus cultivar Tapidor and comparison with an improved assembly of the Brassica napus cultivar Darmor-bzh. Both cultivars were annotated using the same method to allow comparison of gene content. We identified genes unique to each cultivar and differentiate these from artefacts due to variation in the assembly and annotation. We demonstrate that using a common annotation pipeline can result in different gene predictions, even for closely related cultivars, and repeat regions which collapse during assembly impact whole genome comparison. After accounting for differences in assembly and annotation, we demonstrate that the genome of Darmor-bzh contains a greater number of genes than the genome of Tapidor. Our results are the first step towards comparison of the true differences between B. napus genomes and highlight the potential sources of error in future production of a B. napus pangenome.}, } @article {pmid28383665, year = {2017}, author = {Jeukens, J and Freschi, L and Vincent, AT and Emond-Rheault, JG and Kukavica-Ibrulj, I and Charette, SJ and Levesque, RC}, title = {A Pan-Genomic Approach to Understand the Basis of Host Adaptation in Achromobacter.}, journal = {Genome biology and evolution}, volume = {9}, number = {4}, pages = {1030-1046}, pmid = {28383665}, issn = {1759-6653}, abstract = {Over the past decade, there has been a rising interest in Achromobacter sp., an emerging opportunistic pathogen responsible for nosocomial and cystic fibrosis lung infections. Species of this genus are ubiquitous in the environment, can outcompete resident microbiota, and are resistant to commonly used disinfectants as well as antibiotics. Nevertheless, the Achromobacter genus suffers from difficulties in diagnosis, unresolved taxonomy and limited understanding of how it adapts to the cystic fibrosis lung, not to mention other host environments. The goals of this first genus-wide comparative genomics study were to clarify the taxonomy of this genus and identify genomic features associated with pathogenicity and host adaptation. This was done with a widely applicable approach based on pan-genome analysis. First, using all publicly available genomes, a combination of phylogenetic analysis based on 1,780 conserved genes with average nucleotide identity and accessory genome composition allowed the identification of a largely clinical lineage composed of Achromobacter xylosoxidans, Achromobacter insuavis, Achromobacter dolens, and Achromobacter ruhlandii. Within this lineage, we identified 35 positively selected genes involved in metabolism, regulation and efflux-mediated antibiotic resistance. Second, resistome analysis showed that this clinical lineage carried additional antibiotic resistance genes compared with other isolates. Finally, we identified putative mobile elements that contribute 53% of the genus's resistome and support horizontal gene transfer between Achromobacter and other ecologically similar genera. This study provides strong phylogenetic and pan-genomic bases to motivate further research on Achromobacter, and contributes to the understanding of opportunistic pathogen evolution.}, } @article {pmid28379509, year = {2017}, author = {Okura, M and Nozawa, T and Watanabe, T and Murase, K and Nakagawa, I and Takamatsu, D and Osaki, M and Sekizaki, T and Gottschalk, M and Hamada, S and Maruyama, F}, title = {A Locus Encoding Variable Defense Systems against Invading DNA Identified in Streptococcus suis.}, journal = {Genome biology and evolution}, volume = {9}, number = {4}, pages = {1000-1012}, pmid = {28379509}, issn = {1759-6653}, abstract = {Streptococcus suis, an important zoonotic pathogen, is known to have an open pan-genome and to develop a competent state. In S. suis, limited genetic lineages are suggested to be associated with zoonosis. However, little is known about the evolution of diversified lineages and their respective phenotypic or ecological characteristics. In this study, we performed comparative genome analyses of S. suis, with a focus on the competence genes, mobile genetic elements, and genetic elements related to various defense systems against exogenous DNAs (defense elements) that are associated with gene gain/loss/exchange mediated by horizontal DNA movements and their restrictions. Our genome analyses revealed a conserved competence-inducing peptide type (pherotype) of the competence system and large-scale genome rearrangements in certain clusters based on the genome phylogeny of 58 S. suis strains. Moreover, the profiles of the defense elements were similar or identical to each other among the strains belonging to the same genomic clusters. Our findings suggest that these genetic characteristics of each cluster might exert specific effects on the phenotypic or ecological differences between the clusters. We also found certain loci that shift several types of defense elements in S. suis. Of note, one of these loci is a previously unrecognized variable region in bacteria, at which strains of distinct clusters code for different and various defense elements. This locus might represent a novel defense mechanism that has evolved through an arms race between bacteria and invading DNAs, mediated by mobile genetic elements and genetic competence.}, } @article {pmid28377746, year = {2017}, author = {Bhattacharyya, C and Bakshi, U and Mallick, I and Mukherji, S and Bera, B and Ghosh, A}, title = {Genome-Guided Insights into the Plant Growth Promotion Capabilities of the Physiologically Versatile Bacillus aryabhattai Strain AB211.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {411}, pmid = {28377746}, issn = {1664-302X}, abstract = {Bacillus aryabhattai AB211 is a plant growth promoting, Gram-positive firmicute, isolated from the rhizosphere of tea (Camellia sinensis), one of the oldest perennial crops and a major non-alcoholic beverage widely consumed all over the world. The whole genome of B. aryabhattai AB211 was sequenced, annotated and evaluated with special focus on genomic elements related to plant microbe interaction. It's genome sequence reveals the presence of a 5,403,026 bp chromosome. A total of 5226 putative protein-coding sequences, 16 rRNA, 120 tRNA, 8 ncRNAs, 58 non-protein coding genes, and 11 prophage regions were identified. Genome sequence comparisons between strain AB211 and other related environmental strains of B. aryabhattai, identified about 3558 genes conserved among all B. aryabhattai genomes analyzed. Most of the common genes involved in plant growth promotion activities were found to be present within core genes of all the genomes used for comparison, illustrating possible common plant growth promoting traits shared among all the strains of B. aryabhattai. Besides the core genes, some genes were exclusively identified in the genome of strain AB211. Functional annotation of the genes predicted in the strain AB211 revealed the presence of genes responsible for mineral phosphate solubilization, siderophores, acetoin, butanediol, exopolysaccharides, flagella biosynthesis, surface attachment/biofilm formation, and indole acetic acid production, most of which were experimentally verified in the present study. Genome analysis and experimental evidence suggested that AB211 has robust central carbohydrate metabolism implying that this bacterium can efficiently utilize the root exudates and other organic materials as an energy source. Genes for the production of peroxidases, catalases, and superoxide dismutases, that confer resistance to oxidative stresses in plants were identified in AB211 genome. Besides these, genes for heat shock tolerance, cold shock tolerance, glycine-betaine production, and antibiotic/heavy metal resistance that enable bacteria to survive biotic/abiotic stress were also identified. Based on the genome sequence information and experimental evidence as presented in this study, strain AB211 appears to be metabolically diverse and exhibits tremendous potential as a plant growth promoting bacterium.}, } @article {pmid28369371, year = {2017}, author = {Hu, Z and Sun, C and Lu, KC and Chu, X and Zhao, Y and Lu, J and Shi, J and Wei, C}, title = {EUPAN enables pan-genome studies of a large number of eukaryotic genomes.}, journal = {Bioinformatics (Oxford, England)}, volume = {33}, number = {15}, pages = {2408-2409}, doi = {10.1093/bioinformatics/btx170}, pmid = {28369371}, issn = {1367-4811}, mesh = {Eukaryota/*genetics ; Genetics, Population/*methods ; *Genome ; Genomics/methods ; Genotyping Techniques/methods ; High-Throughput Nucleotide Sequencing ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA/*methods ; *Software ; }, abstract = {SUMMARY: Pan-genome analyses are routinely carried out for bacteria to interpret the within-species gene presence/absence variations (PAVs). However, pan-genome analyses are rare for eukaryotes due to the large sizes and higher complexities of their genomes. Here we proposed EUPAN, a eukaryotic pan-genome analysis toolkit, enabling automatic large-scale eukaryotic pan-genome analyses and detection of gene PAVs at a relatively low sequencing depth. In the previous studies, we demonstrated the effectiveness and high accuracy of EUPAN in the pan-genome analysis of 453 rice genomes, in which we also revealed widespread gene PAVs among individual rice genomes. Moreover, EUPAN can be directly applied to the current re-sequencing projects primarily focusing on single nucleotide polymorphisms.

EUPAN is implemented in Perl, R and C ++. It is supported under Linux and preferred for a computer cluster with LSF and SLURM job scheduling system. EUPAN together with its standard operating procedure (SOP) is freely available for non-commercial use (CC BY-NC 4.0) at http://cgm.sjtu.edu.cn/eupan/index.html .

CONTACT: ccwei@sjtu.edu.cn or jianxin.shi@sjtu.edu.cn.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid28369330, year = {2017}, author = {MacArthur, I and Anastasi, E and Alvarez, S and Scortti, M and Vázquez-Boland, JA}, title = {Comparative Genomics of Rhodococcus equi Virulence Plasmids Indicates Host-Driven Evolution of the vap Pathogenicity Island.}, journal = {Genome biology and evolution}, volume = {9}, number = {5}, pages = {1241-1247}, pmid = {28369330}, issn = {1759-6653}, support = {BB/J004227/1//Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Actinomycetales Infections/microbiology/veterinary ; Animals ; Cattle ; *Evolution, Molecular ; *Genomic Islands ; Host-Pathogen Interactions ; Phylogeny ; Plasmids ; Rhodococcus equi/*genetics/isolation & purification/pathogenicity ; }, abstract = {The conjugative virulence plasmid is a key component of the Rhodococcus equi accessory genome essential for pathogenesis. Three host-associated virulence plasmid types have been identified the equine pVAPA and porcine pVAPB circular variants, and the linear pVAPN found in bovine (ruminant) isolates. We recently characterized the R. equi pangenome (Anastasi E, et al. 2016. Pangenome and phylogenomic analysis of the pathogenic actinobacterium Rhodococcus equi. Genome Biol Evol. 8:3140-3148.) and we report here the comparative analysis of the virulence plasmid genomes. Plasmids within each host-associated type were highly similar despite their diverse origins. Variation was accounted for by scattered single nucleotide polymorphisms and short nucleotide indels, while larger indels-mostly in the plasticity region near the vap pathogencity island (PAI)-defined plasmid genomic subtypes. Only one of the plasmids analyzed, of pVAPN type, was exceptionally divergent due to accumulation of indels in the housekeeping backbone. Each host-associated plasmid type carried a unique PAI differing in vap gene complement, suggesting animal host-specific evolution of the vap multigene family. Complete conservation of the vap PAI was observed within each host-associated plasmid type. Both diversity of host-associated plasmid types and clonality of specific chromosomal-plasmid genomic type combinations were observed within the same R. equi phylogenomic subclade. Our data indicate that the overall strong conservation of the R. equi host-associated virulence plasmids is the combined result of host-driven selection, lateral transfer between strains, and geographical spread due to international livestock exchanges.}, } @article {pmid28368424, year = {2017}, author = {Batisse-Lignier, M and Sahut-Barnola, I and Tissier, F and Dumontet, T and Mathieu, M and Drelon, C and Pointud, JC and Damon-Soubeyrand, C and Marceau, G and Kemeny, JL and Bertherat, J and Tauveron, I and Val, P and Martinez, A and Lefrançois-Martinez, AM}, title = {P53/Rb inhibition induces metastatic adrenocortical carcinomas in a preclinical transgenic model.}, journal = {Oncogene}, volume = {36}, number = {31}, pages = {4445-4456}, pmid = {28368424}, issn = {1476-5594}, mesh = {Adrenocortical Carcinoma/*pathology ; Animals ; Antigens, Polyomavirus Transforming/*genetics ; Humans ; Mechanistic Target of Rapamycin Complex 1 ; Mice ; Mice, Transgenic ; Multiprotein Complexes/physiology ; Neoplasm Metastasis ; Retinoblastoma Protein/antagonists & inhibitors/*physiology ; Sirolimus/pharmacology ; TOR Serine-Threonine Kinases/physiology ; Tumor Suppressor Protein p53/antagonists & inhibitors/*physiology ; Wnt Signaling Pathway/physiology ; beta Catenin/physiology ; }, abstract = {Adrenocortical carcinoma (ACC) is a rare cancer with poor prognosis. Pan-genomic analyses identified p53/Rb and WNT/β-catenin signaling pathways as main contributors to the disease. However, isolated β-catenin constitutive activation failed to induce malignant progression in mouse adrenocortical tumors. Therefore, there still was a need for a relevant animal model to study ACC pathogenesis and to test new therapeutic approaches. Here, we have developed a transgenic mice model with adrenocortical specific expression of SV40 large T-antigen (AdTAg mice), to test the oncogenic potential of p53/Rb inhibition in the adrenal gland. All AdTAg mice develop large adrenal carcinomas that eventually metastasize to the liver and lungs, resulting in decreased overall survival. Consistent with ACC in patients, adrenal tumors in AdTAg mice autonomously produce large amounts of glucocorticoids and spontaneously activate WNT/β-catenin signaling pathway during malignant progression. We show that this activation is associated with downregulation of secreted frizzled related proteins (Sfrp) and Znrf3 that act as inhibitors of the WNT signaling. We also show that mTORC1 pathway activation is an early event during neoplasia expansion and further demonstrate that mTORC1 pathway is activated in ACC patients. Preclinical inhibition of mTORC1 activity induces a marked reduction in tumor size, associated with induction of apoptosis and inhibition of proliferation that results in normalization of corticosterone plasma levels in AdTAg mice. Altogether, these data establish AdTAg mice as the first preclinical model for metastatic ACC.}, } @article {pmid28356072, year = {2017}, author = {Kelleher, P and Bottacini, F and Mahony, J and Kilcawley, KN and van Sinderen, D}, title = {Comparative and functional genomics of the Lactococcus lactis taxon; insights into evolution and niche adaptation.}, journal = {BMC genomics}, volume = {18}, number = {1}, pages = {267}, pmid = {28356072}, issn = {1471-2164}, mesh = {Amino Acid Transport Systems/genetics ; Carbohydrate Metabolism/genetics ; Carrier Proteins/genetics ; Cluster Analysis ; Comparative Genomic Hybridization ; *Evolution, Molecular ; *Genome, Bacterial ; Lactococcus lactis/classification/*genetics ; Lipid Metabolism/genetics ; Phylogeny ; RNA, Ribosomal, 16S/chemistry/classification/metabolism ; }, abstract = {BACKGROUND: Lactococcus lactis is among the most widely studied lactic acid bacterial species due to its long history of safe use and economic importance to the dairy industry, where it is exploited as a starter culture in cheese production.

RESULTS: In the current study, we report on the complete sequencing of 16 L. lactis subsp. lactis and L. lactis subsp. cremoris genomes. The chromosomal features of these 16 L. lactis strains in conjunction with 14 completely sequenced, publicly available lactococcal chromosomes were assessed with particular emphasis on discerning the L. lactis subspecies division, evolution and niche adaptation. The deduced pan-genome of L. lactis was found to be closed, indicating that the representative data sets employed for this analysis are sufficient to fully describe the genetic diversity of the taxon.

CONCLUSIONS: Niche adaptation appears to play a significant role in governing the genetic content of each L. lactis subspecies, while (differential) genome decay and redundancy in the dairy niche is also highlighted.}, } @article {pmid28356070, year = {2017}, author = {Wang, D and Li, S and Guo, F and Ning, K and Wang, L}, title = {Core-genome scaffold comparison reveals the prevalence that inversion events are associated with pairs of inverted repeats.}, journal = {BMC genomics}, volume = {18}, number = {1}, pages = {268}, pmid = {28356070}, issn = {1471-2164}, mesh = {Comparative Genomic Hybridization ; Escherichia coli/*genetics ; Gene Rearrangement ; *Genome, Bacterial ; Pseudomonas aeruginosa/*genetics ; Sequence Inversion/*genetics ; }, abstract = {BACKGROUND: Genome rearrangement describes gross changes of chromosomal regions, plays an important role in evolutionary biology and has profound impacts on phenotype in organisms ranging from microbes to humans. With more and more complete genomes accomplished, lots of genomic comparisons have been conducted in order to find genome rearrangements and the mechanisms which underlie the rearrangement events. In our opinion, genomic comparison of different individuals/strains within the same species (pan-genome) is more helpful to reveal the mechanisms for genome rearrangements since genomes of the same species are much closer to each other.

RESULTS: We study the mechanism for inversion events via core-genome scaffold comparison of different strains within the same species. We focus on two kinds of bacteria, Pseudomonas aeruginosa and Escherichia coli, and investigate the inversion events among different strains of the same species. We find an interesting phenomenon that long (larger than 10,000 bp) inversion regions are flanked by a pair of Inverted Repeats (IRs). This mechanism can also explain why the breakpoint reuses for inversion events happen. We study the prevalence of the phenomenon and find that it is a major mechanism for inversions. The other observation is that for different rearrangement events such as transposition and inverted block interchange, the two ends of the swapped regions are also associated with repeats so that after the rearrangement operations the two ends of the swapped regions remain unchanged. To our knowledge, this is the first time such a phenomenon is reported for transposition event.

CONCLUSIONS: In both Pseudomonas aeruginosa and Escherichia coli strains, IRs were found at the two ends of long sequence inversions. The two ends of the inversion remained unchanged before and after the inversion event. The existence of IRs can explain the breakpoint reuse phenomenon. We also observed that other rearrangement operations such as transposition, inverted transposition, and inverted block interchange, had repeats (not necessarily inverted) at the ends of each segment, where the ends remained unchanged before and after the rearrangement operations. This suggests that the conservation of ends could possibly be a popular phenomenon in many types of chromosome rearrangement events.}, } @article {pmid28352257, year = {2017}, author = {Zeng, L and Wang, D and Hu, N and Zhu, Q and Chen, K and Dong, K and Zhang, Y and Yao, Y and Guo, X and Chang, YF and Zhu, Y}, title = {A Novel Pan-Genome Reverse Vaccinology Approach Employing a Negative-Selection Strategy for Screening Surface-Exposed Antigens against leptospirosis.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {396}, pmid = {28352257}, issn = {1664-302X}, abstract = {Reverse vaccinology (RV) has been widely used for screening of surface-exposed proteins (PSEs) of important pathogens, including outer membrane proteins (OMPs), and extracellular proteins (ECPs) as potential vaccine candidates. In this study, we applied a novel RV negative strategy and a pan-genome analysis for screening of PSEs from 17 L. interrogans strains covering 11 predominately epidemic serovars and 17 multilocus typing (MLST) sequence types (STs) worldwide. Our results showed, for instance, out of a total of 633 predicted PSEs in strain 56601, 92.8% were OMPs or ECPs (588/633). Among the 17 strains, 190 core PSEs, 913 dispensable PSEs and 861 unique PSEs were identified. Of the 190 PSEs, 121 were further predicted to be highly antigenic and thus may serve as potential vaccine candidates against leptospirosis. With the exception of LipL45, OmpL1, and LigB, the majority of the 121 PSEs were newly identified antigens. For example, hypothetical proteins BatC, LipL71, and the OmpA family proteins sharing many common features, such as surface-exposed localization, universal conservation, and eliciting strong antibody responses in patients, are regarded as the most promising vaccine antigens. Additionally, a wide array of potential virulence factors among the predicted PSEs including TonB-dependent receptor, sphingomyelinase 2, leucine-rich repeat protein, and 4 neighboring hypothetical proteins were identified as potential antigenicity, and deserve further investigation. Our results can contribute to the prediction of suitable antigens as potential vaccine candidates against leptospirosis and also provide further insights into mechanisms of leptospiral pathogenicity. In addition, our novel negative-screening strategy combined with pan-genome analysis can be a routine RV method applied to numerous other pathogens.}, } @article {pmid28350002, year = {2017}, author = {McInerney, JO and McNally, A and O'Connell, MJ}, title = {Why prokaryotes have pangenomes.}, journal = {Nature microbiology}, volume = {2}, number = {}, pages = {17040}, doi = {10.1038/nmicrobiol.2017.40}, pmid = {28350002}, issn = {2058-5276}, mesh = {Archaea/*classification/*genetics ; Bacteria/*classification/*genetics ; Evolution, Molecular ; Gene Transfer, Horizontal ; Genetic Fitness ; *Genetic Variation ; Selection, Genetic ; }, } @article {pmid28348877, year = {2017}, author = {Lees, JA and Kremer, PHC and Manso, AS and Croucher, NJ and Ferwerda, B and Serón, MV and Oggioni, MR and Parkhill, J and Brouwer, MC and van der Ende, A and van de Beek, D and Bentley, SD}, title = {Large scale genomic analysis shows no evidence for pathogen adaptation between the blood and cerebrospinal fluid niches during bacterial meningitis.}, journal = {Microbial genomics}, volume = {3}, number = {1}, pages = {e000103}, pmid = {28348877}, issn = {2057-5858}, support = {104169/Z/14/Z/WT_/Wellcome Trust/United Kingdom ; MR/M003078/1/MRC_/Medical Research Council/United Kingdom ; 281156/ERC_/European Research Council/International ; 098051/WT_/Wellcome Trust/United Kingdom ; 1365620/MRC_/Medical Research Council/United Kingdom ; /WT_/Wellcome Trust/United Kingdom ; }, mesh = {Adaptation, Biological/*genetics ; Blood-Brain Barrier/*microbiology ; Carrier State/microbiology ; DNA, Bacterial ; Genetic Variation ; Humans ; Meningitis, Meningococcal/blood/cerebrospinal fluid/*microbiology ; Meningitis, Pneumococcal/blood/cerebrospinal fluid/*microbiology ; Nasopharynx/microbiology ; Neisseria meningitidis/genetics/*pathogenicity ; Streptococcus pneumoniae/genetics/*pathogenicity ; Whole Genome Sequencing ; }, abstract = {Recent studies have provided evidence for rapid pathogen genome diversification, some of which could potentially affect the course of disease. We have previously described such variation seen between isolates infecting the blood and cerebrospinal fluid (CSF) of a single patient during a case of bacterial meningitis. Here, we performed whole-genome sequencing of paired isolates from the blood and CSF of 869 meningitis patients to determine whether such variation frequently occurs between these two niches in cases of bacterial meningitis. Using a combination of reference-free variant calling approaches, we show that no genetic adaptation occurs in either invaded niche during bacterial meningitis for two major pathogen species, Streptococcus pneumoniae and Neisseria meningitidis. This study therefore shows that the bacteria capable of causing meningitis are already able to do this upon entering the blood, and no further sequence change is necessary to cross the blood-brain barrier. Our findings place the focus back on bacterial evolution between nasopharyngeal carriage and invasion, or diversity of the host, as likely mechanisms for determining invasiveness.}, } @article {pmid28348852, year = {2016}, author = {Jamrozy, DM and Harris, SR and Mohamed, N and Peacock, SJ and Tan, CY and Parkhill, J and Anderson, AS and Holden, MTG}, title = {Pan-genomic perspective on the evolution of the Staphylococcus aureus USA300 epidemic.}, journal = {Microbial genomics}, volume = {2}, number = {5}, pages = {e000058}, pmid = {28348852}, issn = {2057-5858}, support = {098051//Wellcome Trust/United Kingdom ; }, mesh = {Adolescent ; Adult ; Aged ; Aged, 80 and over ; Child ; Child, Preschool ; Community-Acquired Infections/*epidemiology/*microbiology ; Evolution, Molecular ; Genome, Bacterial/*genetics ; Humans ; Infant ; Methicillin-Resistant Staphylococcus aureus/classification/*genetics ; Middle Aged ; *Molecular Epidemiology ; Phylogeny ; Staphylococcal Infections/*epidemiology/*microbiology ; United States/epidemiology ; Whole Genome Sequencing ; Young Adult ; }, abstract = {Staphylococcus aureus USA300 represents the dominant community-associated methicillin-resistant S. aureus lineage in the USA, where it is a major cause of skin and soft tissue infections. Previous comparative genomic studies have described the population structure and evolution of USA300 based on geographically restricted isolate collections. Here, we investigated the USA300 population by sequencing genomes of a geographically distributed panel of 191 clinical S. aureus isolates belonging to clonal complex 8 (CC8), derived from the Tigecycline Evaluation and Surveillance Trial program. Isolates were collected at 12 healthcare centres across nine USA states in 2004, 2009 or 2010. Reconstruction of evolutionary relationships revealed that CC8 was dominated by USA300 isolates (154/191, 81 %), which were heterogeneous and demonstrated limited phylogeographic clustering. Analysis of the USA300 core genomes revealed an increase in median pairwise SNP distance from 62 to 98 between 2004 and 2010, with a stable pattern of above average dN/dS ratios. The phylogeny of the USA300 population indicated that early diversification events led to the formation of nested clades, which arose through cumulative acquisition of predominantly non-synonymous SNPs in various coding sequences. The accessory genome of USA300 was largely homogenous and consisted of elements previously associated with this lineage. We observed an emergence of SCCmec negative and ACME negative USA300 isolates amongst more recent samples, and an increase in the prevalence of ϕSa5 prophage. Together, the analysed S. aureus USA300 collection revealed an evolving pan-genome through increased core genome heterogeneity and temporal variation in the frequency of certain accessory elements.}, } @article {pmid28348841, year = {2016}, author = {O Sheridan, P and Martin, JC and Lawley, TD and Browne, HP and Harris, HMB and Bernalier-Donadille, A and Duncan, SH and O'Toole, PW and P Scott, K and J Flint, H}, title = {Polysaccharide utilization loci and nutritional specialization in a dominant group of butyrate-producing human colonic Firmicutes.}, journal = {Microbial genomics}, volume = {2}, number = {2}, pages = {e000043}, pmid = {28348841}, issn = {2057-5858}, mesh = {Bacterial Proteins/*genetics/metabolism ; Butyrates/metabolism ; Colon/*microbiology ; Dietary Carbohydrates/metabolism ; Enzymes/*genetics/metabolism ; Firmicutes/*enzymology/*genetics/metabolism ; Genome, Bacterial ; Humans ; Polysaccharides/*metabolism ; Species Specificity ; }, abstract = {Firmicutes and Bacteroidetes are the predominant bacterial phyla colonizing the healthy human large intestine. Whilst both ferment dietary fibre, genes responsible for this important activity have been analysed only in the Bacteroidetes, with very little known about the Firmicutes. This work investigates the carbohydrate-active enzymes (CAZymes) in a group of Firmicutes, Roseburia spp. and Eubacterium rectale, which play an important role in producing butyrate from dietary carbohydrates and in health maintenance. Genome sequences of 11 strains representing E. rectale and four Roseburia spp. were analysed for carbohydrate-active genes. Following assembly into a pan-genome, core, variable and unique genes were identified. The 1840 CAZyme genes identified in the pan-genome were assigned to 538 orthologous groups, of which only 26 were present in all strains, indicating considerable inter-strain variability. This analysis was used to categorize the 11 strains into four carbohydrate utilization ecotypes (CUEs), which were shown to correspond to utilization of different carbohydrates for growth. Many glycoside hydrolase genes were found linked to genes encoding oligosaccharide transporters and regulatory elements in the genomes of Roseburia spp. and E. rectale, forming distinct polysaccharide utilization loci (PULs). Whilst PULs are also a common feature in Bacteroidetes, key differences were noted in these Firmicutes, including the absence of close homologues of Bacteroides polysaccharide utilization genes, hence we refer to Gram-positive PULs (gpPULs). Most CAZyme genes in the Roseburia/E. rectale group are organized into gpPULs. Variation in gpPULs can explain the high degree of nutritional specialization at the species level within this group.}, } @article {pmid28347275, year = {2017}, author = {Zhou, P and Silverstein, KA and Ramaraj, T and Guhlin, J and Denny, R and Liu, J and Farmer, AD and Steele, KP and Stupar, RM and Miller, JR and Tiffin, P and Mudge, J and Young, ND}, title = {Exploring structural variation and gene family architecture with De Novo assemblies of 15 Medicago genomes.}, journal = {BMC genomics}, volume = {18}, number = {1}, pages = {261}, pmid = {28347275}, issn = {1471-2164}, mesh = {Comparative Genomic Hybridization ; DNA Copy Number Variations/*genetics ; *Genome, Plant ; Heat-Shock Proteins/genetics ; High-Throughput Nucleotide Sequencing ; Leucine-Rich Repeat Proteins ; Medicago truncatula/*genetics ; Plant Proteins/genetics ; Proteins/genetics ; RNA, Plant/chemistry/isolation & purification/metabolism ; Sequence Alignment ; Sequence Analysis, DNA ; }, abstract = {BACKGROUND: Previous studies exploring sequence variation in the model legume, Medicago truncatula, relied on mapping short reads to a single reference. However, read-mapping approaches are inadequate to examine large, diverse gene families or to probe variation in repeat-rich or highly divergent genome regions. De novo sequencing and assembly of M. truncatula genomes enables near-comprehensive discovery of structural variants (SVs), analysis of rapidly evolving gene families, and ultimately, construction of a pan-genome.

RESULTS: Genome-wide synteny based on 15 de novo M. truncatula assemblies effectively detected different types of SVs indicating that as much as 22% of the genome is involved in large structural changes, altogether affecting 28% of gene models. A total of 63 million base pairs (Mbp) of novel sequence was discovered, expanding the reference genome space for Medicago by 16%. Pan-genome analysis revealed that 42% (180 Mbp) of genomic sequences is missing in one or more accession, while examination of de novo annotated genes identified 67% (50,700) of all ortholog groups as dispensable - estimates comparable to recent studies in rice, maize and soybean. Rapidly evolving gene families typically associated with biotic interactions and stress response were found to be enriched in the accession-specific gene pool. The nucleotide-binding site leucine-rich repeat (NBS-LRR) family, in particular, harbors the highest level of nucleotide diversity, large effect single nucleotide change, protein diversity, and presence/absence variation. However, the leucine-rich repeat (LRR) and heat shock gene families are disproportionately affected by large effect single nucleotide changes and even higher levels of copy number variation.

CONCLUSIONS: Analysis of multiple M. truncatula genomes illustrates the value of de novo assemblies to discover and describe structural variation, something that is often under-estimated when using read-mapping approaches. Comparisons among the de novo assemblies also indicate that different large gene families differ in the architecture of their structural variation.}, } @article {pmid28337203, year = {2017}, author = {Grassmann, AA and Souza, JD and McBride, AJ}, title = {A Universal Vaccine against Leptospirosis: Are We Going in the Right Direction?.}, journal = {Frontiers in immunology}, volume = {8}, number = {}, pages = {256}, pmid = {28337203}, issn = {1664-3224}, abstract = {Leptospirosis is the most widespread zoonosis in the world and a neglected tropical disease estimated to cause severe infection in more than one million people worldwide every year that can be combated by effective immunization. However, no significant progress has been made on the leptospirosis vaccine since the advent of bacterins over 100 years. Although protective against lethal infection, particularly in animals, bacterin-induced immunity is considered short term, serovar restricted, and the vaccine can cause serious side effects. The urgent need for a new vaccine has motivated several research groups to evaluate the protective immune response induced by recombinant vaccines. Significant protection has been reported with several promising outer membrane proteins, including LipL32 and the leptospiral immunoglobulin-like proteins. However, efficacy was variable and failed to induce a cross-protective response or sterile immunity among vaccinated animals. As hundreds of draft genomes of all known Leptospira species are now available, this should aid novel target discovery through reverse vaccinology (RV) and pangenomic studies. The identification of surface-exposed vaccine candidates that are highly conserved among infectious Leptospira spp. is a requirement for the development of a cross-protective universal vaccine. However, the lack of immune correlates is a major drawback to the application of RV to Leptospira genomes. In addition, as the protective immune response against leptospirosis is not fully understood, the rational use of adjuvants tends to be a process of trial and error. In this perspective, we discuss current advances, the pitfalls, and possible solutions for the development of a universal leptospirosis vaccine.}, } @article {pmid28333270, year = {2017}, author = {Hughes, D and Andersson, DI}, title = {Environmental and genetic modulation of the phenotypic expression of antibiotic resistance.}, journal = {FEMS microbiology reviews}, volume = {41}, number = {3}, pages = {374-391}, pmid = {28333270}, issn = {1574-6976}, mesh = {Adaptation, Physiological/*physiology ; Anti-Bacterial Agents/*pharmacology ; *Bacteria/drug effects/genetics/growth & development ; Biofilms/*drug effects/growth & development ; Drug Resistance, Multiple, Bacterial/*genetics ; Environment ; Gene Transfer, Horizontal/genetics ; Genotype ; Humans ; Mutation ; Phenotype ; }, abstract = {Antibiotic resistance can be acquired by mutation or horizontal transfer of a resistance gene, and generally an acquired mechanism results in a predictable increase in phenotypic resistance. However, recent findings suggest that the environment and/or the genetic context can modify the phenotypic expression of specific resistance genes/mutations. An important implication from these findings is that a given genotype does not always result in the expected phenotype. This dissociation of genotype and phenotype has important consequences for clinical bacteriology and for our ability to predict resistance phenotypes from genetics and DNA sequences. A related problem concerns the degree to which the genes/mutations currently identified in vitro can fully explain the in vivo resistance phenotype, or whether there is a significant additional amount of presently unknown mutations/genes (genetic 'dark matter') that could contribute to resistance in clinical isolates. Finally, a very important question is whether/how we can identify the genetic features that contribute to making a successful pathogen, and predict why some resistant clones are very successful and spread globally? In this review, we describe different environmental and genetic factors that influence phenotypic expression of antibiotic resistance genes/mutations and how this information is needed to understand why particular resistant clones spread worldwide and to what extent we can use DNA sequences to predict evolutionary success.}, } @article {pmid28327996, year = {2017}, author = {Xie, W and Chen, C and Yang, Z and Guo, L and Yang, X and Wang, D and Chen, M and Huang, J and Wen, Y and Zeng, Y and Liu, Y and Xia, J and Tian, L and Cui, H and Wu, Q and Wang, S and Xu, B and Li, X and Tan, X and Ghanim, M and Qiu, B and Pan, H and Chu, D and Delatte, H and Maruthi, MN and Ge, F and Zhou, X and Wang, X and Wan, F and Du, Y and Luo, C and Yan, F and Preisser, EL and Jiao, X and Coates, BS and Zhao, J and Gao, Q and Xia, J and Yin, Y and Liu, Y and Brown, JK and Zhou, XJ and Zhang, Y}, title = {Genome sequencing of the sweetpotato whitefly Bemisia tabaci MED/Q.}, journal = {GigaScience}, volume = {6}, number = {5}, pages = {1-7}, pmid = {28327996}, issn = {2047-217X}, mesh = {Animals ; Female ; Gene Library ; *Genome, Insect ; Hemiptera/*genetics ; Male ; Sequence Analysis, DNA ; }, abstract = {The sweetpotato whitefly Bemisia tabaci is a highly destructive agricultural and ornamental crop pest. It damages host plants through both phloem feeding and vectoring plant pathogens. Introductions of B. tabaci are difficult to quarantine and eradicate because of its high reproductive rates, broad host plant range, and insecticide resistance. A total of 791 Gb of raw DNA sequence from whole genome shotgun sequencing, and 13 BAC pooling libraries were generated by Illumina sequencing using different combinations of mate-pair and pair-end libraries. Assembly gave a final genome with a scaffold N50 of 437 kb, and a total length of 658 Mb. Annotation of repetitive elements and coding regions resulted in 265.0 Mb TEs (40.3%) and 20 786 protein-coding genes with putative gene family expansions, respectively. Phylogenetic analysis based on orthologs across 14 arthropod taxa suggested that MED/Q is clustered into a hemipteran clade containing A. pisum and is a sister lineage to a clade containing both R. prolixus and N. lugens. Genome completeness, as estimated using the CEGMA and Benchmarking Universal Single-Copy Orthologs pipelines, reached 96% and 79%. These MED/Q genomic resources lay a foundation for future 'pan-genomic' comparisons of invasive vs. noninvasive, invasive vs. invasive, and native vs. exotic Bemisia, which, in return, will open up new avenues of investigation into whitefly biology, evolution, and management.}, } @article {pmid28325850, year = {2017}, author = {Lee, AH and Flibotte, S and Sinha, S and Paiero, A and Ehrlich, RL and Balashov, S and Ehrlich, GD and Zlosnik, JE and Mell, JC and Nislow, C}, title = {Phenotypic diversity and genotypic flexibility of Burkholderia cenocepacia during long-term chronic infection of cystic fibrosis lungs.}, journal = {Genome research}, volume = {27}, number = {4}, pages = {650-662}, pmid = {28325850}, issn = {1549-5469}, support = {R01 DC002148/DC/NIDCD NIH HHS/United States ; }, mesh = {Adolescent ; Animals ; Biofilms ; Burkholderia Infections/complications/*microbiology ; Burkholderia cenocepacia/*genetics/isolation & purification/pathogenicity/physiology ; Child ; Child, Preschool ; Cystic Fibrosis/complications/*microbiology ; Genotype ; Humans ; Lung/microbiology ; Moths/microbiology ; *Phenotype ; *Polymorphism, Genetic ; Virulence ; Young Adult ; }, abstract = {Chronic bacterial infections of the lung are the leading cause of morbidity and mortality in cystic fibrosis patients. Tracking bacterial evolution during chronic infections can provide insights into how host selection pressures-including immune responses and therapeutic interventions-shape bacterial genomes. We carried out genomic and phenotypic analyses of 215 serially collected Burkholderia cenocepacia isolates from 16 cystic fibrosis patients, spanning a period of 2-20 yr and a broad range of epidemic lineages. Systematic phenotypic tests identified longitudinal bacterial series that manifested progressive changes in liquid media growth, motility, biofilm formation, and acute insect virulence, but not in mucoidy. The results suggest that distinct lineages follow distinct evolutionary trajectories during lung infection. Pan-genome analysis identified 10,110 homologous gene clusters present only in a subset of strains, including genes restricted to different molecular types. Our phylogenetic analysis based on 2148 orthologous gene clusters from all isolates is consistent with patient-specific clades. This suggests that initial colonization of patients was likely by individual strains, followed by subsequent diversification. Evidence of clonal lineages shared by some patients was observed, suggesting inter-patient transmission. We observed recurrent gene losses in multiple independent longitudinal series, including complete loss of Chromosome III and deletions on other chromosomes. Recurrently observed loss-of-function mutations were associated with decreases in motility and biofilm formation. Together, our study provides the first comprehensive genome-phenome analyses of B. cenocepacia infection in cystic fibrosis lungs and serves as a valuable resource for understanding the genomic and phenotypic underpinnings of bacterial evolution.}, } @article {pmid28321969, year = {2017}, author = {Udaondo, Z and Duque, E and Ramos, JL}, title = {The pangenome of the genus Clostridium.}, journal = {Environmental microbiology}, volume = {19}, number = {7}, pages = {2588-2603}, doi = {10.1111/1462-2920.13732}, pmid = {28321969}, issn = {1462-2920}, mesh = {Bacterial Proteins/genetics/metabolism ; Clostridium/classification/*genetics/metabolism ; *Genome, Bacterial ; Metabolic Networks and Pathways ; Multilocus Sequence Typing ; Phylogeny ; Protein Serine-Threonine Kinases/genetics/metabolism ; S-Adenosylmethionine/metabolism ; }, abstract = {The pangenome for the genus Clostridium sensu stricto, which was obtained using highly curated and annotated genomes from 16 species is presented; some of these cause disease, while others are used for the production of added-value chemicals. Multilocus sequencing analysis revealed that species of this genus group into at least two clades that include non-pathogenic and pathogenic strains, suggesting that pathogenicity is dispersed across the phylogenetic tree. The core genome of the genus includes 546 protein families, which mainly comprise those involved in protein translation and DNA repair. The GS-GOGAT may represent the central pathway for generating organic nitrogen from inorganic nitrogen sources. Glycerol and glucose metabolism genes are well represented in the core genome together with a set of energy conservation systems. A metabolic network comprising proteins/enzymes, RNAs and metabolites, whose topological structure is a non-random and scale-free network with hierarchically structured modules was built. These modules shed light on the interactions between RNAs, proteins and metabolites, revealing biological features of transcription and translation, cell wall biosynthesis, C1 metabolism and N metabolism. Network analysis identified four nodes that function as hubs and bottlenecks, namely, coenzyme A, HPr kinases, S-adenosylmethionine and the ribonuclease P-protein, suggesting pivotal roles for them in Clostridium.}, } @article {pmid28314726, year = {2017}, author = {Farag, IF and Youssef, NH and Elshahed, MS}, title = {Global Distribution Patterns and Pangenomic Diversity of the Candidate Phylum "Latescibacteria" (WS3).}, journal = {Applied and environmental microbiology}, volume = {83}, number = {10}, pages = {}, pmid = {28314726}, issn = {1098-5336}, mesh = {Bacteria/classification/*genetics/*isolation & purification ; Bacterial Proteins/genetics/metabolism ; Cellulosomes/genetics/metabolism ; DNA, Bacterial/genetics ; Ecosystem ; Fresh Water/*microbiology ; *Genetic Variation ; Genomics ; Groundwater/*microbiology ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; }, abstract = {We investigated the global distribution patterns and pangenomic diversity of the candidate phylum "Latescibacteria" (WS3) in 16S rRNA gene as well as metagenomic data sets. We document distinct distribution patterns for various "Latescibacteria" orders in 16S rRNA gene data sets, with prevalence of orders sediment_1 in terrestrial, PBSIII_9 in groundwater and temperate freshwater, and GN03 in pelagic marine, saline-hypersaline, and wastewater habitats. Using a fragment recruitment approach, we identified 68.9 Mb of "Latescibacteria"-affiliated contigs in publicly available metagenomic data sets comprising 73,079 proteins. Metabolic reconstruction suggests a prevalent saprophytic lifestyle in all "Latescibacteria" orders, with marked capacities for the degradation of proteins, lipids, and polysaccharides predominant in plant, bacterial, fungal/crustacean, and eukaryotic algal cell walls. As well, extensive transport and central metabolic pathways for the metabolism of imported monomers were identified. Interestingly, genes and domains suggestive of the production of a cellulosome-e.g., protein-coding genes harboring dockerin I domains attached to a glycosyl hydrolase and scaffoldin-encoding genes harboring cohesin I and CBM37 domains-were identified in order PBSIII_9, GN03, and MSB-4E2 fragments recovered from four anoxic aquatic habitats; hence extending the cellulosomal production capabilities in Bacteria beyond the Gram-positive Firmicutes In addition to fermentative pathways, a complete electron transport chain with terminal cytochrome c oxidases Caa3 (for operation under high oxygen tension) and Cbb3 (for operation under low oxygen tension) were identified in PBSIII_9 and GN03 fragments recovered from oxygenated and partially/seasonally oxygenated aquatic habitats. Our metagenomic recruitment effort hence represents a comprehensive pangenomic view of this yet-uncultured phylum and provides insights broader than and complementary to those gained from genome recovery initiatives focusing on a single or few sampled environments.IMPORTANCE Our understanding of the phylogenetic diversity, metabolic capabilities, and ecological roles of yet-uncultured microorganisms is rapidly expanding. However, recent efforts mainly have been focused on recovering genomes of novel microbial lineages from a specific sampling site, rather than from a wide range of environmental habitats. To comprehensively evaluate the genomic landscape, putative metabolic capabilities, and ecological roles of yet-uncultured candidate phyla, efforts that focus on the recovery of genomic fragments from a wide range of habitats and that adequately sample the intraphylum diversity within a specific target lineage are needed. Here, we investigated the global distribution patterns and pangenomic diversity of the candidate phylum "Latescibacteria" Our results document the preference of specific "Latescibacteria" orders to specific habitats, the prevalence of plant polysaccharide degradation abilities within all "Latescibacteria" orders, the occurrence of all genes/domains necessary for the production of cellulosomes within three "Latescibacteria" orders (GN03, PBSIII_9, and MSB-4E2) in data sets recovered from anaerobic locations, and the identification of the components of an aerobic respiratory chain, as well as occurrence of multiple O2-dependent metabolic reactions in "Latescibacteria" orders GN03 and PBSIII_9 recovered from oxygenated habitats. The results demonstrate the value of phylocentric pangenomic surveys for understanding the global ecological distribution and panmetabolic abilities of yet-uncultured microbial lineages since they provide broader and more complementary insights than those gained from single-cell genomic and/or metagenomic-enabled genome recovery efforts focusing on a single sampling site.}, } @article {pmid28293680, year = {2017}, author = {Castillo, D and Alvise, PD and Xu, R and Zhang, F and Middelboe, M and Gram, L}, title = {Comparative Genome Analyses of Vibrio anguillarum Strains Reveal a Link with Pathogenicity Traits.}, journal = {mSystems}, volume = {2}, number = {1}, pages = {}, pmid = {28293680}, issn = {2379-5077}, abstract = {Vibrio anguillarum is a marine bacterium that can cause vibriosis in many fish and shellfish species, leading to high mortalities and economic losses in aquaculture. Although putative virulence factors have been identified, the mechanism of pathogenesis of V. anguillarum is not fully understood. Here, we analyzed whole-genome sequences of a collection of V. anguillarum strains and compared them to virulence of the strains as determined in larval challenge assays. Previously identified virulence factors were globally distributed among the strains, with some genetic diversity. However, the pan-genome revealed that six out of nine high-virulence strains possessed a unique accessory genome that was attributed to pathogenic genomic islands, prophage-like elements, virulence factors, and a new set of gene clusters involved in biosynthesis, modification, and transport of polysaccharides. In contrast, V. anguillarum strains that were medium to nonvirulent had a high degree of genomic homogeneity. Finally, we found that a phylogeny based on the core genomes clustered the strains with moderate to no virulence, while six out of nine high-virulence strains represented phylogenetically separate clusters. Hence, we suggest a link between genotype and virulence characteristics of Vibrio anguillarum, which can be used to unravel the molecular evolution of V. anguillarum and can also be important from survey and diagnostic perspectives. IMPORTANCE Comparative genome analysis of strains of a pathogenic bacterial species can be a powerful tool to discover acquisition of mobile genetic elements related to virulence. Here, we compared 28 V. anguillarum strains that differed in virulence in fish larval models. By pan-genome analyses, we found that six of nine highly virulent strains had a unique core and accessory genome. In contrast, V. anguillarum strains that were medium to nonvirulent had low genomic diversity. Integration of genomic and phenotypic features provides insights into the evolution of V. anguillarum and can also be important for survey and diagnostic purposes.}, } @article {pmid28288685, year = {2017}, author = {Hur, J and Özgür, A and He, Y}, title = {Ontology-based literature mining of E. coli vaccine-associated gene interaction networks.}, journal = {Journal of biomedical semantics}, volume = {8}, number = {1}, pages = {12}, pmid = {28288685}, issn = {2041-1480}, mesh = {Bacterial Vaccines/*immunology ; *Biological Ontologies ; Data Mining/*methods ; Escherichia coli/*genetics/*immunology ; *Gene Regulatory Networks ; Genomics ; }, abstract = {BACKGROUND: Pathogenic Escherichia coli infections cause various diseases in humans and many animal species. However, with extensive E. coli vaccine research, we are still unable to fully protect ourselves against E. coli infections. To more rational development of effective and safe E. coli vaccine, it is important to better understand E. coli vaccine-associated gene interaction networks.

METHODS: In this study, we first extended the Vaccine Ontology (VO) to semantically represent various E. coli vaccines and genes used in the vaccine development. We also normalized E. coli gene names compiled from the annotations of various E. coli strains using a pan-genome-based annotation strategy. The Interaction Network Ontology (INO) includes a hierarchy of various interaction-related keywords useful for literature mining. Using VO, INO, and normalized E. coli gene names, we applied an ontology-based SciMiner literature mining strategy to mine all PubMed abstracts and retrieve E. coli vaccine-associated E. coli gene interactions. Four centrality metrics (i.e., degree, eigenvector, closeness, and betweenness) were calculated for identifying highly ranked genes and interaction types.

RESULTS: Using vaccine-related PubMed abstracts, our study identified 11,350 sentences that contain 88 unique INO interactions types and 1,781 unique E. coli genes. Each sentence contained at least one interaction type and two unique E. coli genes. An E. coli gene interaction network of genes and INO interaction types was created. From this big network, a sub-network consisting of 5 E. coli vaccine genes, including carA, carB, fimH, fepA, and vat, and 62 other E. coli genes, and 25 INO interaction types was identified. While many interaction types represent direct interactions between two indicated genes, our study has also shown that many of these retrieved interaction types are indirect in that the two genes participated in the specified interaction process in a required but indirect process. Our centrality analysis of these gene interaction networks identified top ranked E. coli genes and 6 INO interaction types (e.g., regulation and gene expression).

CONCLUSIONS: Vaccine-related E. coli gene-gene interaction network was constructed using ontology-based literature mining strategy, which identified important E. coli vaccine genes and their interactions with other genes through specific interaction types.}, } @article {pmid28287462, year = {2017}, author = {Hurgobin, B and Edwards, D}, title = {SNP Discovery Using a Pangenome: Has the Single Reference Approach Become Obsolete?.}, journal = {Biology}, volume = {6}, number = {1}, pages = {}, pmid = {28287462}, issn = {2079-7737}, abstract = {Increasing evidence suggests that a single individual is insufficient to capture the genetic diversity within a species due to gene presence absence variation. In order to understand the extent to which genomic variation occurs in a species, the construction of its pangenome is necessary. The pangenome represents the complete set of genes of a species; it is composed of core genes, which are present in all individuals, and variable genes, which are present only in some individuals. Aside from variations at the gene level, single nucleotide polymorphisms (SNPs) are also an important form of genetic variation. The advent of next-generation sequencing (NGS) coupled with the heritability of SNPs make them ideal markers for genetic analysis of human, animal, and microbial data. SNPs have also been extensively used in crop genetics for association mapping, quantitative trait loci (QTL) analysis, analysis of genetic diversity, and phylogenetic analysis. This review focuses on the use of pangenomes for SNP discovery. It highlights the advantages of using a pangenome rather than a single reference for this purpose. This review also demonstrates how extra information not captured in a single reference alone can be used to provide additional support for linking genotypic data to phenotypic data.}, } @article {pmid28283403, year = {2017}, author = {Choudoir, MJ and Panke-Buisse, K and Andam, CP and Buckley, DH}, title = {Genome Surfing As Driver of Microbial Genomic Diversity.}, journal = {Trends in microbiology}, volume = {25}, number = {8}, pages = {624-636}, doi = {10.1016/j.tim.2017.02.006}, pmid = {28283403}, issn = {1878-4380}, mesh = {*Gene Transfer, Horizontal ; *Genetic Variation ; *Genome, Microbial ; Genomics ; Models, Genetic ; Phylogeny ; Streptomyces/genetics ; }, abstract = {Historical changes in population size, such as those caused by demographic range expansions, can produce nonadaptive changes in genomic diversity through mechanisms such as gene surfing. We propose that demographic range expansion of a microbial population capable of horizontal gene exchange can result in genome surfing, a mechanism that can cause widespread increase in the pan-genome frequency of genes acquired by horizontal gene exchange. We explain that patterns of genetic diversity within Streptomyces are consistent with genome surfing, and we describe several predictions for testing this hypothesis both in Streptomyces and in other microorganisms.}, } @article {pmid28273858, year = {2017}, author = {Tall, BD and Gangiredla, J and Grim, CJ and Patel, IR and Jackson, SA and Mammel, MK and Kothary, MH and Sathyamoorthy, V and Carter, L and Fanning, S and Iversen, C and Pagotto, F and Stephan, R and Lehner, A and Farber, J and Yan, QQ and Gopinath, GR}, title = {Use of a Pan-Genomic DNA Microarray in Determination of the Phylogenetic Relatedness among Cronobacter spp. and Its Use as a Data Mining Tool to Understand Cronobacter Biology.}, journal = {Microarrays (Basel, Switzerland)}, volume = {6}, number = {1}, pages = {}, pmid = {28273858}, issn = {2076-3905}, abstract = {Cronobacter (previously known as Enterobacter sakazakii) is a genus of Gram-negative, facultatively anaerobic, oxidase-negative, catalase-positive, rod-shaped bacteria of the family Enterobacteriaceae. These organisms cause a variety of illnesses such as meningitis, necrotizing enterocolitis, and septicemia in neonates and infants, and urinary tract, wound, abscesses or surgical site infections, septicemia, and pneumonia in adults. The total gene content of 379 strains of Cronobacter spp. and taxonomically-related isolates was determined using a recently reported DNA microarray. The Cronobacter microarray as a genotyping tool gives the global food safety community a rapid method to identify and capture the total genomic content of outbreak isolates for food safety, environmental, and clinical surveillance purposes. It was able to differentiate the seven Cronobacter species from one another and from non-Cronobacter species. The microarray was also able to cluster strains within each species into well-defined subgroups. These results also support previous studies on the phylogenic separation of species members of the genus and clearly highlight the evolutionary sequence divergence among each species of the genus compared to phylogenetically-related species. This review extends these studies and illustrates how the microarray can also be used as an investigational tool to mine genomic data sets from strains. Three case studies describing the use of the microarray are shown and include: (1) the determination of allelic differences among Cronobacter sakazakii strains possessing the virulence plasmid pESA3; (2) mining of malonate and myo-inositol alleles among subspecies of Cronobacter dublinensis strains to determine subspecies identity; and (3) lastly using the microarray to demonstrate sequence divergence and phylogenetic relatedness trends for 13 outer-membrane protein alleles among 240 Cronobacter and phylogenetically-related strains. The goal of this review is to describe microarrays as a robust tool for genomics research of this assorted and important genus, a criterion toward the development of future preventative measures to eliminate this foodborne pathogen from the global food supply.}, } @article {pmid28261241, year = {2017}, author = {Contreras-Moreira, B and Cantalapiedra, CP and García-Pereira, MJ and Gordon, SP and Vogel, JP and Igartua, E and Casas, AM and Vinuesa, P}, title = {Analysis of Plant Pan-Genomes and Transcriptomes with GET_HOMOLOGUES-EST, a Clustering Solution for Sequences of the Same Species.}, journal = {Frontiers in plant science}, volume = {8}, number = {}, pages = {184}, pmid = {28261241}, issn = {1664-462X}, abstract = {The pan-genome of a species is defined as the union of all the genes and non-coding sequences found in all its individuals. However, constructing a pan-genome for plants with large genomes is daunting both in sequencing cost and the scale of the required computational analysis. A more affordable alternative is to focus on the genic repertoire by using transcriptomic data. Here, the software GET_HOMOLOGUES-EST was benchmarked with genomic and RNA-seq data of 19 Arabidopsis thaliana ecotypes and then applied to the analysis of transcripts from 16 Hordeum vulgare genotypes. The goal was to sample their pan-genomes and classify sequences as core, if detected in all accessions, or accessory, when absent in some of them. The resulting sequence clusters were used to simulate pan-genome growth, and to compile Average Nucleotide Identity matrices that summarize intra-species variation. Although transcripts were found to under-estimate pan-genome size by at least 10%, we concluded that clusters of expressed sequences can recapitulate phylogeny and reproduce two properties observed in A. thaliana gene models: accessory loci show lower expression and higher non-synonymous substitution rates than core genes. Finally, accessory sequences were observed to preferentially encode transposon components in both species, plus disease resistance genes in cultivated barleys, and a variety of protein domains from other families that appear frequently associated with presence/absence variation in the literature. These results demonstrate that pan-genome analyses are useful to explore germplasm diversity.}, } @article {pmid28258140, year = {2017}, author = {Bansal, K and Midha, S and Kumar, S and Patil, PB}, title = {Ecological and Evolutionary Insights into Xanthomonas citri Pathovar Diversity.}, journal = {Applied and environmental microbiology}, volume = {83}, number = {9}, pages = {}, pmid = {28258140}, issn = {1098-5336}, mesh = {Citrus/*microbiology ; Cluster Analysis ; DNA, Bacterial/chemistry/genetics ; *Genetic Variation ; Genome, Bacterial ; Genotype ; India ; Phylogeny ; Plant Diseases/*microbiology ; Sequence Analysis, DNA ; Sequence Homology ; Xanthomonas/*classification/*genetics/isolation & purification ; }, abstract = {Citrus canker, caused by Xanthomonas citri pv. citri, is a serious disease of citrus plants worldwide. Earlier phylogenetic studies using housekeeping genes revealed that X. citri pv. citri is related to many other pathovars, which can be collectively referred as Xanthomonas citri pathovars (XCPs). From the present study, we report the genome sequences of 18 XCPs and compared them with four XCPs available in the public domain. In a tree based on phylogenomic marker genes, all the XCPs form a monophyletic cluster, suggesting their origin from a common ancestor. Phylogenomic analysis using the type strain further established that all the XCPs belong to one species. Clonal analysis of the core genome revealed the presence of two major lineages within this monophyletic cluster consisting of some clonal variants. Incidentally, the majority of these XCPs were first noticed in India, corroborating their clonal relationship and their common origin. Comparative analysis revealed an open pan-genome and the role of interstrain genomic flux of these XCPs since their diversification from a common ancestor. Even though there are wide variations in type III gene effectomes, we identified three core effectors which can be valuable in resistance-breeding programs. Overall, genomic examination of ecological relatives allowed us to dissect the tremendous genomic potential of X. citri species to rapidly evolve into specialized strains infecting diverse crop plants.IMPORTANCE Host specialization is one of the characteristic features of highly evolved pathogens such as the Xanthomonas group of phytopathogenic bacteria. Since the hosts involve staple crops and economically important fruits such as citrus, detailed understanding of the diversity and evolution of such strains infecting diverse plants is important for quarantine purposes. In the present study, we carried out genomic investigation of members of a phylogenetically and ecologically defined group of Xanthomonas strains pathogenic to diverse plants, including citrus. This group includes the oldest Xanthomonas pathovars and also recently emerged pathovars in a particular country where they are endemic. Our high-throughput genomic study has provided novel insights into the evolution of a unique lineage consisting of serious pathogens and their ecological relatives, suggesting the nature, scope, and pattern of rapid and recent diversification. Further, from the level of species to that of clonal variants, the study revealed interesting genomic patterns in diversification of a Xanthomonas lineage and perhaps will inspire careful study of the host range of the included pathovars.}, } @article {pmid28240182, year = {2017}, author = {Liao, W and Zhang, TT and Gao, L and Lee, SS and Xu, J and Zhang, H and Yang, Z and Liu, Z and Li, W}, title = {Integration of Novel Materials and Advanced Genomic Technologies into New Vaccine Design.}, journal = {Current topics in medicinal chemistry}, volume = {17}, number = {20}, pages = {2286-2301}, doi = {10.2174/1568026617666170224122117}, pmid = {28240182}, issn = {1873-4294}, mesh = {*Drug Design ; *Genetic Techniques ; Humans ; Vaccines/chemistry/*genetics/pharmacology ; }, abstract = {Designing new vaccines is one of the most challenging tasks for public health to prevent both infectious and chronic diseases. Even though many research scientists have spent great efforts in improving the specificity, sensitivity and safety of current available vaccines, there are still much space on how to effectively combine different biomaterials and technologies to design universal or personalized vaccines. Traditionally, vaccines were made based on empirical approaches designed to mimic immunity induced by natural infection. Either live attenuated or killed whole microorganisms were used as vaccines. With the development of biomaterial science, DNA/RNA, recombinant vector, adjuvant and nanoparticles greatly expand the category of vaccines. More importantly, with the tremendous advances of new technologies including genomics, proteomics and immunomics, the paradigm of vaccine design has shifted from microbiological to sequence-based approaches. This ever-growing large amount of genomic data and new genomic approaches such as comparative genomics, reverse vaccinology and pan-genomics, will play critical roles in novel vaccine design and enable development of more effective vaccines to cure and control both chronic and infectious diseases. In this review, we summarize current various vaccine materials, advanced technologies and combinational strategies to integrate biomaterials and advanced technologies for vaccine design, which we hope will provide some very useful guidelines and perspectives for the vaccine design.}, } @article {pmid28236274, year = {2017}, author = {Ibrahim, M and Subramanian, A and Anishetty, S}, title = {Comparative pan genome analysis of oral Prevotella species implicated in periodontitis.}, journal = {Functional & integrative genomics}, volume = {17}, number = {5}, pages = {513-536}, pmid = {28236274}, issn = {1438-7948}, mesh = {Bacterial Proteins/chemistry/genetics ; Bacterial Secretion Systems/*genetics ; Cysteine Proteases/chemistry/genetics ; *Genome, Bacterial ; Periodontitis/microbiology ; Phylogeny ; Prevotella/classification/*genetics/pathogenicity ; Protein Domains ; Sequence Alignment ; Sequence Homology ; }, abstract = {Prevotella is part of the oral bacterial community implicated in periodontitis. Pan genome analyses of eight oral Prevotella species, P. dentalis, P. enoeca, P. fusca, P. melaninogenica, P. denticola, P. intermedia 17, P. intermedia 17-2 and P. sp. oral taxon 299 are presented in this study. Analysis of the Prevotella pan genome revealed features such as secretion systems, resistance to oxidative stress and clustered regularly interspaced short palindromic repeat (CRISPR)-Cas systems that enable the bacteria to adapt to the oral environment. We identified the presence of type VI secretion system (T6SS) in P. fusca and P. intermedia strains. For some VgrG and Hcp proteins which were not part of the core T6SS loci, we used gene neighborhood analysis and identified putative effector proteins and putative polyimmunity loci in P. fusca and polymorphic toxin systems in P. intermedia strains. Earlier studies have identified the presence of Por secretion system (PorSS) in P. gingivalis, P. melaninogenica and P. intermedia. We noted the presence of their homologs in six other oral Prevotella studied here. We suggest that in Prevotella, PorSS is used to secrete cysteine proteases such as interpain and C-terminal domain containing proteins with a "Por_secre_tail" domain. We identified subtype I-B CRISPR-Cas system in P. enoeca. Putative CRISPR-Cas system subtypes for 37 oral Prevotella and 30 non-oral Prevotella species were also predicted. Further, we performed a BLASTp search of the Prevotella proteins which are also conserved in the red-complex pathogens, against the human proteome to identify potential broad-spectrum drug targets. In summary, the use of a pan genome approach enabled identification of secretion systems and defense mechanisms in Prevotella that confer adaptation to the oral cavity.}, } @article {pmid28231383, year = {2017}, author = {Montenegro, JD and Golicz, AA and Bayer, PE and Hurgobin, B and Lee, H and Chan, CK and Visendi, P and Lai, K and Doležel, J and Batley, J and Edwards, D}, title = {The pangenome of hexaploid bread wheat.}, journal = {The Plant journal : for cell and molecular biology}, volume = {90}, number = {5}, pages = {1007-1013}, doi = {10.1111/tpj.13515}, pmid = {28231383}, issn = {1365-313X}, mesh = {Chromosomes, Plant/genetics ; Genetic Variation/genetics ; Genome, Plant/*genetics ; Polymorphism, Single Nucleotide/genetics ; Triticum/*genetics ; }, abstract = {There is an increasing understanding that variation in gene presence-absence plays an important role in the heritability of agronomic traits; however, there have been relatively few studies on variation in gene presence-absence in crop species. Hexaploid wheat is one of the most important food crops in the world and intensive breeding has reduced the genetic diversity of elite cultivars. Major efforts have produced draft genome assemblies for the cultivar Chinese Spring, but it is unknown how well this represents the genome diversity found in current modern elite cultivars. In this study we build an improved reference for Chinese Spring and explore gene diversity across 18 wheat cultivars. We predict a pangenome size of 140 500 ± 102 genes, a core genome of 81 070 ± 1631 genes and an average of 128 656 genes in each cultivar. Functional annotation of the variable gene set suggests that it is enriched for genes that may be associated with important agronomic traits. In addition to variation in gene presence, more than 36 million intervarietal single nucleotide polymorphisms were identified across the pangenome. This study of the wheat pangenome provides insight into genome diversity in elite wheat as a basis for genomics-based improvement of this important crop. A wheat pangenome, GBrowse, is available at http://appliedbioinformatics.com.au/cgi-bin/gb2/gbrowse/WheatPan/, and data are available to download from http://wheatgenome.info/wheat_genome_databases.php.}, } @article {pmid28217118, year = {2017}, author = {Frantzen, CA and Kot, W and Pedersen, TB and Ardö, YM and Broadbent, JR and Neve, H and Hansen, LH and Dal Bello, F and Østlie, HM and Kleppen, HP and Vogensen, FK and Holo, H}, title = {Genomic Characterization of Dairy Associated Leuconostoc Species and Diversity of Leuconostocs in Undefined Mixed Mesophilic Starter Cultures.}, journal = {Frontiers in microbiology}, volume = {8}, number = {}, pages = {132}, pmid = {28217118}, issn = {1664-302X}, abstract = {Undefined mesophilic mixed (DL-type) starter cultures are composed of predominantly Lactococcus lactis subspecies and 1-10% Leuconostoc spp. The composition of the Leuconostoc population in the starter culture ultimately affects the characteristics and the quality of the final product. The scientific basis for the taxonomy of dairy relevant leuconostocs can be traced back 50 years, and no documentation on the genomic diversity of leuconostocs in starter cultures exists. We present data on the Leuconostoc population in five DL-type starter cultures commonly used by the dairy industry. The analyses were performed using traditional cultivation methods, and further augmented by next-generation DNA sequencing methods. Bacterial counts for starter cultures cultivated on two different media, MRS and MPCA, revealed large differences in the relative abundance of leuconostocs. Most of the leuconostocs in two of the starter cultures were unable to grow on MRS, emphasizing the limitations of culture-based methods and the importance of careful media selection or use of culture independent methods. Pan-genomic analysis of 59 Leuconostoc genomes enabled differentiation into twelve robust lineages. The genomic analyses show that the dairy-associated leuconostocs are highly adapted to their environment, characterized by the acquisition of genotype traits, such as the ability to metabolize citrate. In particular, Leuconostoc mesenteroides subsp. cremoris display telltale signs of a degenerative evolution, likely resulting from a long period of growth in milk in association with lactococci. Great differences in the metabolic potential between Leuconostoc species and subspecies were revealed. Using targeted amplicon sequencing, the composition of the Leuconostoc population in the five commercial starter cultures was shown to be significantly different. Three of the cultures were dominated by Ln. mesenteroides subspecies cremoris. Leuconostoc pseudomesenteroides dominated in two of the cultures while Leuconostoc lactis, reported to be a major constituent in fermented dairy products, was only present in low amounts in one of the cultures. This is the first in-depth study of Leuconostoc genomics and diversity in dairy starter cultures. The results and the techniques presented may be of great value for the dairy industry.}, } @article {pmid28198668, year = {2017}, author = {Yang, J and Yang, S}, title = {Comparative analysis of Corynebacterium glutamicum genomes: a new perspective for the industrial production of amino acids.}, journal = {BMC genomics}, volume = {18}, number = {Suppl 1}, pages = {940}, pmid = {28198668}, issn = {1471-2164}, mesh = {Alleles ; Amino Acids/*biosynthesis ; Corynebacterium glutamicum/classification/*genetics/metabolism ; *Genome, Bacterial ; *Genomics/methods ; High-Throughput Nucleotide Sequencing ; Multilocus Sequence Typing ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; }, abstract = {BACKGROUND: Corynebacterium glutamicum is a non-pathogenic bacterium widely used in industrial amino acid production and metabolic engineering research. Although the genome sequences of some C. glutamicum strains are available, comprehensive comparative genome analyses of these species have not been done. Six wild type C. glutamicum strains were sequenced using next-generation sequencing technology in our study. Together with 20 previously reported strains, we present a comprehensive comparative analysis of C. glutamicum genomes.

RESULTS: By average nucleotide identity (ANI) analysis, we show that 10 strains, which were previously classified either in the genus Brevibacterium, or as some other species within the genus Corynebacterium, should be reclassified as members of the species C. glutamicum. C. glutamicum has an open pan-genome with 2359 core genes. An additional NAD[+]/NADP[+] specific glutamate dehydrogenase (GDH) gene (gdh) was identified in the glutamate synthesis pathway of some C. glutamicum strains. For analyzing variations related to amino acid production, we have developed an efficient pipeline that includes three major steps: multi locus sequence typing (MLST), phylogenomic analysis based on single nucleotide polymorphisms (SNPs), and a thorough comparison of all genomic variation amongst ancestral or closely related wild type strains. This combined approach can provide new perspectives on the industrial use of C. glutamicum.

CONCLUSIONS: This is the first comprehensive comparative analysis of C. glutamicum genomes at the pan-genomic level. Whole genome comparison provides definitive evidence for classifying the members of this species. Identifying an aditional gdh gene in some C. glutamicum strains may accelerate further research on glutamate synthesis. Our proposed pipeline can provide a clear perspective, including the presumed ancestor, the strain breeding trajectory, and the genomic variations necessary to increase amino acid production in C. glutamicum.}, } @article {pmid28194158, year = {2017}, author = {Checcucci, A and Azzarello, E and Bazzicalupo, M and De Carlo, A and Emiliani, G and Mancuso, S and Spini, G and Viti, C and Mengoni, A}, title = {Role and Regulation of ACC Deaminase Gene in Sinorhizobium meliloti: Is It a Symbiotic, Rhizospheric or Endophytic Gene?.}, journal = {Frontiers in genetics}, volume = {8}, number = {}, pages = {6}, pmid = {28194158}, issn = {1664-8021}, abstract = {Plant-associated bacteria exhibit a number of different strategies and specific genes allow bacteria to communicate and metabolically interact with plant tissues. Among the genes found in the genomes of plant-associated bacteria, the gene encoding the enzyme 1-aminocyclopropane-1-carboxylate (ACC) deaminase (acdS) is one of the most diffused. This gene is supposed to be involved in the cleaving of plant-produced ACC, the precursor of the plant stress-hormone ethylene toning down the plant response to infection. However, few reports are present on the actual role in rhizobia, one of the most investigated groups of plant-associated bacteria. In particular, still unclear is the origin and the role of acdS in symbiotic competitiveness and on the selective benefit it may confer to plant symbiotic rhizobia. Here we present a phylogenetic and functional analysis of acdS orthologs in the rhizobium model-species Sinorhizobium meliloti. Results showed that acdS orthologs present in S. meliloti pangenome have polyphyletic origin and likely spread through horizontal gene transfer, mediated by mobile genetic elements. When acdS ortholog from AK83 strain was cloned and assayed in S. meliloti 1021 (lacking acdS), no modulation of plant ethylene levels was detected, as well as no increase in fitness for nodule occupancy was found in the acdS-derivative strain compared to the parental one. Surprisingly, AcdS was shown to confer the ability to utilize formamide and some dipeptides as sole nitrogen source. Finally, acdS was shown to be negatively regulated by a putative leucine-responsive regulator (LrpL) located upstream to acdS sequence (acdR). acdS expression was induced by root exudates of both legumes and non-leguminous plants. We conclude that acdS in S. meliloti is not directly related to symbiotic interaction, but it could likely be involved in the rhizospheric colonization or in the endophytic behavior.}, } @article {pmid28187427, year = {2016}, author = {Soares, SC and Oliveira, LC and Jaiswal, AK and Azevedo, V}, title = {Genomic Islands: an overview of current software and future improvements.}, journal = {Journal of integrative bioinformatics}, volume = {13}, number = {1}, pages = {301}, doi = {10.2390/biecoll-jib-2016-301}, pmid = {28187427}, issn = {1613-4516}, mesh = {Bacteria/*genetics ; *Genome, Bacterial ; *Genomic Islands ; Sequence Analysis, DNA/*methods ; *Software ; }, abstract = {Microbes are highly diverse and widely distributed organisms. They account for ~60% of Earth’s biomass and new predictions point for the existence of 1011 to 1012 species, which are constantly sharing genes through several different mechanisms. Genomic Islands (GI) are critical in this context, as they are large regions acquired through horizontal gene transfer. Also, they present common features like genomic signature deviation, transposase genes, flanking tRNAs and insertion sequences. GIs carry large numbers of genes related to specific lifestyle and are commonly classified in Pathogenicity, Resistance, Metabolic or Symbiotic Islands. With the advent of the next-generation sequencing technologies and the deluge of genomic data, many software tools have been developed that aim to tackle the problem of GI prediction and they are all based on the prediction of GI common features. However, there is still room for the development of new software tools that implements new approaches, such as, machine learning and pangenomics based analyses. Finally, GIs will always hold a potential application in every newly invented genomic approach as they are directly responsible for much of the genomic plasticity of bacteria.}, } @article {pmid28173009, year = {2017}, author = {Monat, C and Pera, B and Ndjiondjop, MN and Sow, M and Tranchant-Dubreuil, C and Bastianelli, L and Ghesquière, A and Sabot, F}, title = {De Novo Assemblies of Three Oryza glaberrima Accessions Provide First Insights about Pan-Genome of African Rices.}, journal = {Genome biology and evolution}, volume = {9}, number = {1}, pages = {1-6}, pmid = {28173009}, issn = {1759-6653}, mesh = {Africa ; Genetic Variation ; *Genome, Plant ; Molecular Sequence Annotation ; Oryza/*classification/*genetics ; Sequence Analysis, DNA ; }, abstract = {Oryza glaberrima is one of the two cultivated species of rice, and harbors various interesting agronomic traits, especially in biotic and abiotic resistance, compared with its Asian cousin O. sativa. A previous reference genome was published but newer studies highlighted some missing parts. Moreover, global species diversity is known nowadays to be represented by more than one single individual. For that purpose, we sequenced, assembled and annotated de novo three different cultivars from O. glaberrima. After validating our assemblies, we were able to better solve complex regions than the previous assembly and to provide a first insight in pan-genomic divergence between individuals. The three assemblies shown large common regions, but almost 25% of the genome present collinearity breakpoints or are even individual specific.}, } @article {pmid28158295, year = {2017}, author = {Wang, J and Haapalainen, M and Schott, T and Thompson, SM and Smith, GR and Nissinen, AI and Pirhonen, M}, title = {Genomic sequence of 'Candidatus Liberibacter solanacearum' haplotype C and its comparison with haplotype A and B genomes.}, journal = {PloS one}, volume = {12}, number = {2}, pages = {e0171531}, pmid = {28158295}, issn = {1932-6203}, mesh = {Animals ; DNA, Plant ; Daucus carota/microbiology/parasitology ; Genome, Bacterial ; *Haplotypes ; Hemiptera/microbiology ; Operon ; Phylogeny ; Prophages/genetics ; RNA, Plant ; RNA, Ribosomal ; Rhizobiaceae/classification/*genetics/isolation & purification ; Sequence Analysis, DNA ; }, abstract = {Haplotypes A and B of 'Candidatus Liberibacter solanacearum' (CLso) are associated with diseases of solanaceous plants, especially Zebra chip disease of potato, and haplotypes C, D and E are associated with symptoms on apiaceous plants. To date, one complete genome of haplotype B and two high quality draft genomes of haplotype A have been obtained for these unculturable bacteria using metagenomics from the psyllid vector Bactericera cockerelli. Here, we present the first genomic sequences obtained for the carrot-associated CLso. These two genomic sequences of haplotype C, FIN114 (1.24 Mbp) and FIN111 (1.20 Mbp), were obtained from carrot psyllids (Trioza apicalis) harboring CLso. Genomic comparisons between the haplotypes A, B and C revealed that the genome organization differs between these haplotypes, due to large inversions and other recombinations. Comparison of protein-coding genes indicated that the core genome of CLso consists of 885 ortholog groups, with the pan-genome consisting of 1327 ortholog groups. Twenty-seven ortholog groups are unique to CLso haplotype C, whilst 11 ortholog groups shared by the haplotypes A and B, are not found in the haplotype C. Some of these ortholog groups that are not part of the core genome may encode functions related to interactions with the different host plant and psyllid species.}, } @article {pmid28138100, year = {2017}, author = {Sansevere, EA and Luo, X and Park, JY and Yoon, S and Seo, KS and Robinson, DA}, title = {Transposase-Mediated Excision, Conjugative Transfer, and Diversity of ICE6013 Elements in Staphylococcus aureus.}, journal = {Journal of bacteriology}, volume = {199}, number = {8}, pages = {}, pmid = {28138100}, issn = {1098-5530}, support = {P20 GM103646/GM/NIGMS NIH HHS/United States ; R01 GM080602/GM/NIGMS NIH HHS/United States ; }, mesh = {Bacterial Proteins/genetics/*metabolism ; Conjugation, Genetic/*physiology ; Gene Expression Regulation, Bacterial/physiology ; Genetic Variation ; Protein Domains ; Staphylococcus aureus/*enzymology/genetics/*physiology ; Transposases/*metabolism ; }, abstract = {ICE6013 represents one of two families of integrative conjugative elements (ICEs) identified in the pan-genome of the human and animal pathogen Staphylococcus aureus Here we investigated the excision and conjugation functions of ICE6013 and further characterized the diversity of this element. ICE6013 excision was not significantly affected by growth, temperature, pH, or UV exposure and did not depend on recA The IS30-like DDE transposase (Tpase; encoded by orf1 and orf2) of ICE6013 must be uninterrupted for excision to occur, whereas disrupting three of the other open reading frames (ORFs) on the element significantly affects the level of excision. We demonstrate that ICE6013 conjugatively transfers to different S. aureus backgrounds at frequencies approaching that of the conjugative plasmid pGO1. We found that excision is required for conjugation, that not all S. aureus backgrounds are successful recipients, and that transconjugants acquire the ability to transfer ICE6013 Sequencing of chromosomal integration sites in serially passaged transconjugants revealed a significant integration site preference for a 15-bp AT-rich palindromic consensus sequence, which surrounds the 3-bp target site that is duplicated upon integration. A sequence analysis of ICE6013 from different host strains of S. aureus and from eight other species of staphylococci identified seven divergent subfamilies of ICE6013 that include sequences previously classified as a transposon, a plasmid, and various ICEs. In summary, these results indicate that the IS30-like Tpase functions as the ICE6013 recombinase and that ICE6013 represents a diverse family of mobile genetic elements that mediate conjugation in staphylococci.IMPORTANCE Integrative conjugative elements (ICEs) encode the abilities to integrate into and excise from bacterial chromosomes and plasmids and mediate conjugation between bacteria. As agents of horizontal gene transfer, ICEs may affect bacterial evolution. ICE6013 represents one of two known families of ICEs in the pathogen Staphylococcus aureus, but its core functions of excision and conjugation are not well studied. Here, we show that ICE6013 depends on its IS30-like DDE transposase for excision, which is unique among ICEs, and we demonstrate the conjugative transfer and integration site preference of ICE6013 A sequence analysis revealed that ICE6013 has diverged into seven subfamilies that are dispersed among staphylococci.}, } @article {pmid28130242, year = {2017}, author = {Pedersen, TL}, title = {Hierarchical sets: analyzing pangenome structure through scalable set visualizations.}, journal = {Bioinformatics (Oxford, England)}, volume = {33}, number = {11}, pages = {1604-1612}, pmid = {28130242}, issn = {1367-4811}, mesh = {Algorithms ; Bacteria/*genetics ; Cluster Analysis ; Escherichia/genetics ; *Genome, Bacterial ; Genomics/*methods ; Sequence Analysis, DNA/*methods ; Shigella/genetics ; *Software ; }, abstract = {MOTIVATION: The increase in available microbial genome sequences has resulted in an increase in the size of the pangenomes being analyzed. Current pangenome visualizations are not intended for the pangenome sizes possible today and new approaches are necessary in order to convert the increase in available information to increase in knowledge. As the pangenome data structure is essentially a collection of sets we explore the potential for scalable set visualization as a tool for pangenome analysis.

RESULTS: We present a new hierarchical clustering algorithm based on set arithmetics that optimizes the intersection sizes along the branches. The intersection and union sizes along the hierarchy are visualized using a composite dendrogram and icicle plot, which, in pangenome context, shows the evolution of pangenome and core size along the evolutionary hierarchy. Outlying elements, i.e. elements whose presence pattern do not correspond with the hierarchy, can be visualized using hierarchical edge bundles. When applied to pangenome data this plot shows putative horizontal gene transfers between the genomes and can highlight relationships between genomes that is not represented by the hierarchy. We illustrate the utility of hierarchical sets by applying it to a pangenome based on 113 Escherichia and Shigella genomes and find it provides a powerful addition to pangenome analysis.

The described clustering algorithm and visualizations are implemented in the hierarchicalSets R package available from CRAN (https://cran.r-project.org/web/packages/hierarchicalSets).

CONTACT: thomasp85@gmail.com.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid28125655, year = {2017}, author = {Baraúna, RA and Ramos, RT and Veras, AA and Pinheiro, KC and Benevides, LJ and Viana, MV and Guimarães, LC and Edman, JM and Spier, SJ and Azevedo, V and Silva, A}, title = {Assessing the Genotypic Differences between Strains of Corynebacterium pseudotuberculosis biovar equi through Comparative Genomics.}, journal = {PloS one}, volume = {12}, number = {1}, pages = {e0170676}, pmid = {28125655}, issn = {1932-6203}, mesh = {Animals ; Corynebacterium Infections/*genetics/microbiology ; Corynebacterium pseudotuberculosis/*genetics/pathogenicity ; Genome, Bacterial/*genetics ; Genotype ; High-Throughput Nucleotide Sequencing ; Horse Diseases/*genetics/microbiology ; Horses/microbiology ; Phylogeny ; Polymorphism, Single Nucleotide/genetics ; Rhodococcus equi/*genetics/pathogenicity ; }, abstract = {Seven genomes of Corynebacterium pseudotuberculosis biovar equi were sequenced on the Ion Torrent PGM platform, generating high-quality scaffolds over 2.35 Mbp. This bacterium is the causative agent of disease known as "pigeon fever" which commonly affects horses worldwide. The pangenome of biovar equi was calculated and two phylogenomic approaches were used to identify clustering patterns within Corynebacterium genus. Furthermore, other comparative analyses were performed including the prediction of genomic islands and prophages, and SNP-based phylogeny. In the phylogenomic tree, C. pseudotuberculosis was divided into two distinct clades, one formed by nitrate non-reducing species (biovar ovis) and another formed by nitrate-reducing species (biovar equi). In the latter group, the strains isolated from California were more related to each other, while the strains CIP 52.97 and 1/06-A formed the outermost clade of the biovar equi. A total of 1,355 core genes were identified, corresponding to 42.5% of the pangenome. This pangenome has one of the smallest core genomes described in the literature, suggesting a high genetic variability of biovar equi of C. pseudotuberculosis. The analysis of the similarity between the resistance islands identified a higher proximity between the strains that caused more severe infectious conditions (infection in the internal organs). Pathogenicity islands were largely conserved between strains. Several genes that modulate the pathogenicity of C. pseudotuberculosis were described including peptidases, recombination enzymes, micoside synthesis enzymes, bacteriocins with antimicrobial activity and several others. Finally, no genotypic differences were observed between the strains that caused the three different types of infection (external abscess formation, infection with abscess formation in the internal organs, and ulcerative lymphangitis). Instead, it was noted that there is a higher phenetic correlation between strains isolated at California compared to the other strains. Additionally, high variability of resistance islands suggests gene acquisition through several events of horizontal gene transfer.}, } @article {pmid28123380, year = {2016}, author = {Knight, DR and Squire, MM and Collins, DA and Riley, TV}, title = {Genome Analysis of Clostridium difficile PCR Ribotype 014 Lineage in Australian Pigs and Humans Reveals a Diverse Genetic Repertoire and Signatures of Long-Range Interspecies Transmission.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {2138}, pmid = {28123380}, issn = {1664-302X}, abstract = {Clostridium difficile PCR ribotype (RT) 014 is well-established in both human and porcine populations in Australia, raising the possibility that C. difficile infection (CDI) may have a zoonotic or foodborne etiology. Here, whole genome sequencing and high-resolution core genome phylogenetics were performed on a contemporaneous collection of 40 Australian RT014 isolates of human and porcine origin. Phylogenies based on MLST (7 loci, STs 2, 13, and 49) and core orthologous genes (1260 loci) showed clustering of human and porcine strains indicative of very recent shared ancestry. Core genome single nucleotide variant (SNV) analysis found 42% of human strains showed a clonal relationship (separated by ≤2 SNVs in their core genome) with one or more porcine strains, consistent with recent inter-host transmission. Clones were spread over a vast geographic area with 50% of the human cases occurring without recent healthcare exposure. These findings suggest a persistent community reservoir with long-range dissemination, potentially due to agricultural recycling of piggery effluent. We also provide the first pan-genome analysis for this lineage, characterizing its resistome, prophage content, and in silico virulence potential. The RT014 is defined by a large "open" pan-genome (7587 genes) comprising a core genome of 2296 genes (30.3% of the total gene repertoire) and an accessory genome of 5291 genes. Antimicrobial resistance genotypes and phenotypes varied across host populations and ST lineages and were characterized by resistance to tetracycline [tetM, tetA(P), tetB(P) and tetW], clindamycin/erythromycin (ermB), and aminoglycosides (aph3-III-Sat4A-ant6-Ia). Resistance was mediated by clinically important mobile genetic elements, most notably Tn6194 (harboring ermB) and a novel variant of Tn5397 (harboring tetM). Numerous clinically important prophages (Siphoviridae and Myoviridae) were identified as well as an uncommon accessory gene regulator locus (agr3). Conservation in the pathogenicity locus and S-layer correlated with ST affiliation, further extending the concept of clonal C. difficile lineages. This study provides novel insights on the genetic variability and strain relatedness of C. difficile RT014, a lineage of emerging One Health importance. Ongoing molecular and genomic surveillance of strains in humans, animals, food, and the environment is imperative to identify opportunities to reduce the overall CDI burden.}, } @article {pmid28117406, year = {2017}, author = {Pancrace, C and Barny, MA and Ueoka, R and Calteau, A and Scalvenzi, T and Pédron, J and Barbe, V and Piel, J and Humbert, JF and Gugger, M}, title = {Insights into the Planktothrix genus: Genomic and metabolic comparison of benthic and planktic strains.}, journal = {Scientific reports}, volume = {7}, number = {}, pages = {41181}, pmid = {28117406}, issn = {2045-2322}, mesh = {*Genetic Variation ; Genome ; Genome, Bacterial ; Genomics ; Oscillatoria/*genetics/*metabolism ; Phylogeny ; }, abstract = {Planktothrix is a dominant cyanobacterial genus forming toxic blooms in temperate freshwater ecosystems. We sequenced the genome of planktic and non planktic Planktothrix strains to better represent this genus diversity and life style at the genomic level. Benthic and biphasic strains are rooting the Planktothrix phylogenetic tree and widely expand the pangenome of this genus. We further investigated in silico the genetic potential dedicated to gas vesicles production, nitrogen fixation as well as natural product synthesis and conducted complementary experimental tests by cell culture, microscopy and mass spectrometry. Significant differences for the investigated features could be evidenced between strains of different life styles. The benthic Planktothrix strains showed unexpected characteristics such as buoyancy, nitrogen fixation capacity and unique natural product features. In comparison with Microcystis, another dominant toxic bloom-forming genus in freshwater ecosystem, different evolutionary strategies were highlighted notably as Planktothrix exhibits an overall greater genetic diversity but a smaller genomic plasticity than Microcystis. Our results are shedding light on Planktothrix evolution, phylogeny and physiology in the frame of their diverse life styles.}, } @article {pmid28115376, year = {2017}, author = {Thépault, A and Méric, G and Rivoal, K and Pascoe, B and Mageiros, L and Touzain, F and Rose, V and Béven, V and Chemaly, M and Sheppard, SK}, title = {Genome-Wide Identification of Host-Segregating Epidemiological Markers for Source Attribution in Campylobacter jejuni.}, journal = {Applied and environmental microbiology}, volume = {83}, number = {7}, pages = {}, pmid = {28115376}, issn = {1098-5336}, support = {/WT_/Wellcome Trust/United Kingdom ; MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; 088786/C/09/Z/WT_/Wellcome Trust/United Kingdom ; BB/I02464X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; Bacterial Typing Techniques ; Campylobacter/isolation & purification ; Campylobacter Infections/*epidemiology/*microbiology/transmission ; Campylobacter jejuni/classification/*genetics ; Chickens/microbiology ; Disease Reservoirs/microbiology ; *Food Microbiology ; France/epidemiology ; Genetic Markers ; *Genome, Bacterial ; Genomics ; Humans ; Multilocus Sequence Typing ; Ruminants/microbiology ; United Kingdom/epidemiology ; }, abstract = {Campylobacter is among the most common worldwide causes of bacterial gastroenteritis. This organism is part of the commensal microbiota of numerous host species, including livestock, and these animals constitute potential sources of human infection. Molecular typing approaches, especially multilocus sequence typing (MLST), have been used to attribute the source of human campylobacteriosis by quantifying the relative abundance of alleles at seven MLST loci among isolates from animal reservoirs and human infection, implicating chicken as a major infection source. The increasing availability of bacterial genomes provides data on allelic variation at loci across the genome, providing the potential to improve the discriminatory power of data for source attribution. Here we present a source attribution approach based on the identification of novel epidemiological markers among a reference pan-genome list of 1,810 genes identified by gene-by-gene comparison of 884 genomes of Campylobacter jejuni isolates from animal reservoirs, the environment, and clinical cases. Fifteen loci involved in metabolic activities, protein modification, signal transduction, and stress response or coding for hypothetical proteins were selected as host-segregating markers and used to attribute the source of 42 French and 281 United Kingdom clinical C. jejuni isolates. Consistent with previous studies of British campylobacteriosis, analyses performed using STRUCTURE software attributed 56.8% of British clinical cases to chicken, emphasizing the importance of this host reservoir as an infection source in the United Kingdom. However, among French clinical isolates, approximately equal proportions of isolates were attributed to chicken and ruminant reservoirs, suggesting possible differences in the relative importance of animal host reservoirs and indicating a benefit for further national-scale attribution modeling to account for differences in production, behavior, and food consumption.IMPORTANCE Accurately quantifying the relative contribution of different host reservoirs to human Campylobacter infection is an ongoing challenge. This study, based on the development of a novel source attribution approach, provides the first results of source attribution in Campylobacter jejuni in France. A systematic analysis using gene-by-gene comparison of 884 genomes of C. jejuni isolates, with a pan-genome list of genes, identified 15 novel epidemiological markers for source attribution. The different proportions of French and United Kingdom clinical isolates attributed to each host reservoir illustrate a potential role for local/national variations in C. jejuni transmission dynamics.}, } @article {pmid28105943, year = {2016}, author = {Sultanov, RI and Arapidi, GP and Vinogradova, SV and Govorun, VM and Luster, DG and Ignatov, AN}, title = {Comprehensive analysis of draft genomes of two closely related pseudomonas syringae phylogroup 2b strains infecting mono- and dicotyledon host plants.}, journal = {BMC genomics}, volume = {17}, number = {Suppl 14}, pages = {1010}, pmid = {28105943}, issn = {1471-2164}, mesh = {Bacterial Secretion Systems/genetics ; Clustered Regularly Interspaced Short Palindromic Repeats ; Computational Biology/methods ; DNA Transposable Elements ; Genes, Bacterial ; *Genome, Bacterial ; *Genomics/methods ; High-Throughput Nucleotide Sequencing ; Molecular Sequence Annotation ; Multigene Family ; Phylogeny ; Plant Diseases/*microbiology ; Pseudomonas syringae/classification/*genetics/pathogenicity ; Quorum Sensing/genetics ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {BACKGROUND: In recent years, the damage caused by bacterial pathogens to major crops has been increasing worldwide. Pseudomonas syringae is a widespread bacterial species that infects almost all major crops. Different P. syringae strains use a wide range of biochemical mechanisms, including phytotoxins and effectors of the type III and type IV secretion systems, which determine the specific nature of the pathogen virulence.

RESULTS: Strains 1845 (isolated from dicots) and 2507 (isolated from monocots) were selected for sequencing because they specialize on different groups of plants. We compared virulence factors in these and other available genomes of phylogroup 2 to find genes responsible for the specialization of bacteria. We showed that strain 1845 belongs to the clonal group that has been infecting monocots in Russia and USA for a long time (at least 50 years). Strain 1845 has relatively recently changed its host plant to dicots.

CONCLUSIONS: The results obtained by comparing the strain 1845 genome with the genomes of bacteria infecting monocots can help to identify the genes that define specific nature of the virulence of P. syringae strains.}, } @article {pmid28095778, year = {2017}, author = {Bosi, E and Fondi, M and Orlandini, V and Perrin, E and Maida, I and de Pascale, D and Tutino, ML and Parrilli, E and Lo Giudice, A and Filloux, A and Fani, R}, title = {The pangenome of (Antarctic) Pseudoalteromonas bacteria: evolutionary and functional insights.}, journal = {BMC genomics}, volume = {18}, number = {1}, pages = {93}, pmid = {28095778}, issn = {1471-2164}, support = {MR/J006874/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Antarctic Regions ; Anti-Bacterial Agents/metabolism ; Bacterial Proteins/genetics/metabolism ; Cold Temperature ; Databases, Genetic ; *Evolution, Molecular ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Membrane Transport Proteins/genetics/metabolism ; Phylogeny ; Pseudoalteromonas/classification/*genetics ; Secondary Metabolism/genetics ; }, abstract = {BACKGROUND: Pseudoalteromonas is a genus of ubiquitous marine bacteria used as model organisms to study the biological mechanisms involved in the adaptation to cold conditions. A remarkable feature shared by these bacteria is their ability to produce secondary metabolites with a strong antimicrobial and antitumor activity. Despite their biotechnological relevance, representatives of this genus are still lacking (with few exceptions) an extensive genomic characterization, including features involved in the evolution of secondary metabolites production. Indeed, biotechnological applications would greatly benefit from such analysis.

RESULTS: Here, we analyzed the genomes of 38 strains belonging to different Pseudoalteromonas species and isolated from diverse ecological niches, including extreme ones (i.e. Antarctica). These sequences were used to reconstruct the largest Pseudoalteromonas pangenome computed so far, including also the two main groups of Pseudoalteromonas strains (pigmented and not pigmented strains). The downstream analyses were conducted to describe the genomic diversity, both at genus and group levels. This allowed highlighting a remarkable genomic heterogeneity, even for closely related strains. We drafted all the main evolutionary steps that led to the current structure and gene content of Pseudoalteromonas representatives. These, most likely, included an extensive genome reduction and a strong contribution of Horizontal Gene Transfer (HGT), which affected biotechnologically relevant gene sets and occurred in a strain-specific fashion. Furthermore, this study also identified the genomic determinants related to some of the most interesting features of the Pseudoalteromonas representatives, such as the production of secondary metabolites, the adaptation to cold temperatures and the resistance to abiotic compounds.

CONCLUSIONS: This study poses the bases for a comprehensive understanding of the evolutionary trajectories followed in time by this peculiar bacterial genus and for a focused exploitation of their biotechnological potential.}, } @article {pmid28078297, year = {2016}, author = {Cao, DM and Lu, QF and Li, SB and Wang, JP and Chen, YL and Huang, YQ and Bi, HK}, title = {Comparative Genomics of H. pylori and Non-Pylori Helicobacter Species to Identify New Regions Associated with Its Pathogenicity and Adaptability.}, journal = {BioMed research international}, volume = {2016}, number = {}, pages = {6106029}, pmid = {28078297}, issn = {2314-6141}, mesh = {Bacterial Proteins/*genetics ; Genome, Bacterial ; *Genomics ; Helicobacter Infections/*genetics/microbiology ; Helicobacter pylori/*genetics/pathogenicity ; Humans ; Molecular Sequence Annotation ; }, abstract = {The genus Helicobacter is a group of Gram-negative, helical-shaped pathogens consisting of at least 36 bacterial species. Helicobacter pylori (H. pylori), infecting more than 50% of the human population, is considered as the major cause of gastritis, peptic ulcer, and gastric cancer. However, the genetic underpinnings of H. pylori that are responsible for its large scale epidemic and gastrointestinal environment adaption within human beings remain unclear. Core-pan genome analysis was performed among 75 representative H. pylori and 24 non-pylori Helicobacter genomes. There were 1173 conserved protein families of H. pylori and 673 of all 99 Helicobacter genus strains. We found 79 genome unique regions, a total of 202,359bp, shared by at least 80% of the H. pylori but lacked in non-pylori Helicobacter species. The operons, genes, and sRNAs within the H. pylori unique regions were considered as potential ones associated with its pathogenicity and adaptability, and the relativity among them has been partially confirmed by functional annotation analysis. However, functions of at least 54 genes and 10 sRNAs were still unclear. Our analysis of protein-protein interaction showed that 30 genes within them may have the cooperation relationship.}, } @article {pmid28057934, year = {2017}, author = {Zeng, H and Zhang, J and Li, C and Xie, T and Ling, N and Wu, Q and Ye, Y}, title = {The driving force of prophages and CRISPR-Cas system in the evolution of Cronobacter sakazakii.}, journal = {Scientific reports}, volume = {7}, number = {}, pages = {40206}, pmid = {28057934}, issn = {2045-2322}, mesh = {*CRISPR-Cas Systems ; Clustered Regularly Interspaced Short Palindromic Repeats ; Cronobacter sakazakii/*genetics/pathogenicity ; *Evolution, Molecular ; Genome ; Genome, Bacterial ; Prophages/*genetics ; }, abstract = {Cronobacter sakazakii is an important foodborne pathogens causing rare but life-threatening diseases in neonates and infants. CRISPR-Cas system is a new prokaryotic defense system that provides adaptive immunity against phages, latter play an vital role on the evolution and pathogenicity of host bacteria. In this study, we found that genome sizes of C. sakazakii strains had a significant positive correlation with total genome sizes of prophages. Prophages contributed to 16.57% of the genetic diversity (pan genome) of C. sakazakii, some of which maybe the potential virulence factors. Subtype I-E CRISPR-Cas system and five types of CRISPR arrays were found in the conserved site of C. sakazakii strains. CRISPR1 and CRISPR2 loci with high variable spacers were active and showed potential protection against phage attacks. The number of spacers from two active CRISPR loci in clinical strains was significant less than that of foodborne strains, it maybe a reason why clinical strains were found to have more prophages than foodborne strains. The frequently gain/loss of prophages and spacers in CRISPR loci is likely to drive the quick evolution of C. sakazakii. Our study provides a new insight into the co-evolution of phages and C. sakazakii.}, } @article {pmid28057677, year = {2017}, author = {Pedersen, TL and Nookaew, I and Wayne Ussery, D and Månsson, M}, title = {PanViz: interactive visualization of the structure of functionally annotated pangenomes.}, journal = {Bioinformatics (Oxford, England)}, volume = {33}, number = {7}, pages = {1081-1082}, pmid = {28057677}, issn = {1367-4811}, mesh = {Computer Graphics ; Gene Ontology ; *Genome ; Genomics ; Molecular Sequence Annotation ; *Software ; }, abstract = {SUMMARY: PanViz is a novel, interactive, visualization tool for pangenome analysis. PanViz allows visualization of changes in gene group (groups of similar genes across genomes) classification as different subsets of pangenomes are selected, as well as comparisons of individual genomes to pangenomes with gene ontology based navigation of gene groups. Furthermore it allows for rich and complex visual querying of gene groups in the pangenome. PanViz visualizations require no external programs and are easily sharable, allowing for rapid pangenome analyses.

PanViz is written entirely in JavaScript and is available on https://github.com/thomasp85/PanViz . A companion R package that facilitates the creation of PanViz visualizations from a range of data formats is released through Bioconductor and is available at https://bioconductor.org/packages/PanVizGenerator .

CONTACT: thomasp85@gmail.com.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid28056800, year = {2017}, author = {López-Pérez, M and Ramon-Marco, N and Rodriguez-Valera, F}, title = {Networking in microbes: conjugative elements and plasmids in the genus Alteromonas.}, journal = {BMC genomics}, volume = {18}, number = {1}, pages = {36}, pmid = {28056800}, issn = {1471-2164}, mesh = {Alteromonas/classification/*genetics/metabolism ; Base Composition ; Computational Biology/methods ; *Conjugation, Genetic ; *Genome, Bacterial ; *Genomics/methods ; Open Reading Frames ; Phylogeny ; Plasmids/*genetics ; Polymorphism, Single Nucleotide ; Protein Interaction Mapping ; Protein Interaction Maps ; Proteome ; Proteomics/methods ; }, abstract = {BACKGROUND: To develop evolutionary models for the free living bacterium Alteromonas the genome sequences of isolates of the genus have been extensively analyzed. However, the main genetic exchange drivers in these microbes, conjugative elements (CEs), have not been considered in detail thus far. In this work, CEs have been searched in several complete Alteromonas genomes and their sequence studied to understand their role in the evolution of this genus. Six genomes are reported here for the first time.

RESULTS: We have found nine different plasmids of sizes ranging from 85 to 600 Kb, most of them were found in a single strain. Networks of gene similarity could be established among six of the plasmids that were also connected with another cluster of plasmids found in Shewanella strains. The cargo genes found in these plasmids included cassettes found before in chromosome flexible genomic islands of Alteromonas strains. We describe also the plasmids pAMCP48-600 and pAMCP49-600, the largest found in Alteromonas thus far (ca. 600 Kb) and containing all the hallmarks to be classified as chromids. We found in them some housekeeping genes and a cluster that code for an exocellular polysaccharide. They could represent the transport vectors for the previously described replacement flexible genomic islands. Integrative and conjugative elements (ICEs) were more common than plasmids and showed similar patterns of variation with cargo genes coding for components of additive flexible genomic islands. A nearly identical ICE was found in A. mediterranea MED64 and Vibrio cholera AHV1003 isolated from a human pathogen, indicating the potential exchange of these genes across phylogenetic distances exceeding the family threshold.

CONCLUSION: We have seen evidence of how CEs can be vectors to transfer gene cassettes acquired in the chromosomal flexible genomic islands, both of the additive and replacement kind. These CEs showed evidence of how genetic material is exchanged among members of the same species but also (albeit less frequently) across genus and family barriers. These gradients of exchange frequency are probably one of the main drivers of species origin and maintenance in prokaryotes and also provide these taxa with large genetic diversity.}, } @article {pmid28053228, year = {2017}, author = {Croucher, NJ and Campo, JJ and Le, TQ and Liang, X and Bentley, SD and Hanage, WP and Lipsitch, M}, title = {Diverse evolutionary patterns of pneumococcal antigens identified by pangenome-wide immunological screening.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {114}, number = {3}, pages = {E357-E366}, pmid = {28053228}, issn = {1091-6490}, support = {104169/Z/14/Z/WT_/Wellcome Trust/United Kingdom ; R01 AI048935/AI/NIAID NIH HHS/United States ; 098051/WT_/Wellcome Trust/United Kingdom ; R01 AI066304/AI/NIAID NIH HHS/United States ; U54 GM088558/GM/NIGMS NIH HHS/United States ; /WT_/Wellcome Trust/United Kingdom ; }, mesh = {Adhesins, Bacterial/immunology ; Adult ; Antibodies, Bacterial/immunology ; Antibody Formation/immunology ; Antigens, Bacterial/*immunology ; Bacterial Proteins/immunology ; Epitopes/immunology ; Humans ; Immunoglobulin G/immunology ; Membrane Proteins/immunology ; Membrane Transport Proteins/immunology ; Pneumococcal Infections/immunology ; Pneumococcal Vaccines/immunology ; Streptococcus pneumoniae/*immunology ; }, abstract = {Characterizing the immune response to pneumococcal proteins is critical in understanding this bacterium's epidemiology and vaccinology. Probing a custom-designed proteome microarray with sera from 35 healthy US adults revealed a continuous distribution of IgG affinities for 2,190 potential antigens from the species-wide pangenome. Reproducibly elevated IgG binding was elicited by 208 "antibody binding targets" (ABTs), which included 109 variants of the diverse pneumococcal surface proteins A and C (PspA and PspC) and zinc metalloprotease A and B (ZmpA and ZmpB) proteins. Functional analysis found ABTs were enriched in motifs for secretion and cell surface association, with extensive representation of cell wall synthesis machinery, adhesins, transporter solute-binding proteins, and degradative enzymes. ABTs were associated with stronger evidence for evolving under positive selection, although this varied between functional categories, as did rates of diversification through recombination. Particularly rapid variation was observed at some immunogenic accessory loci, including a phage protein and a phase-variable glycosyltransferase ubiquitous among the diverse set of genomic islands encoding the serine-rich PsrP glycoprotein. Nevertheless, many antigens were conserved in the core genome, and strains' antigenic profiles were generally stable. No strong evidence was found for any epistasis between antigens driving population dynamics, or redundancy between functionally similar accessory ABTs, or age stratification of antigen profiles. These results highlight the paradox of why substantial variation is observed in only a subset of epitopes. This result may indicate only some interactions between immunoglobulins and ABTs clear pneumococcal colonization or that acquired immunity to pneumococci is an accumulation of individually weak responses to ABTs evolving under different levels of functional constraint.}, } @article {pmid28045086, year = {2017}, author = {Uchiya, KI and Tomida, S and Nakagawa, T and Asahi, S and Nikai, T and Ogawa, K}, title = {Comparative genome analyses of Mycobacterium avium reveal genomic features of its subspecies and strains that cause progression of pulmonary disease.}, journal = {Scientific reports}, volume = {7}, number = {}, pages = {39750}, pmid = {28045086}, issn = {2045-2322}, mesh = {Animals ; Ecosystem ; Gene Transfer, Horizontal ; Genetic Speciation ; Genetics, Population ; Genome/*genetics ; Humans ; Japan ; Multigene Family ; Mycobacterium avium/*physiology ; Phylogeny ; Polymorphism, Single Nucleotide ; Species Specificity ; Tuberculosis, Pulmonary/genetics/*microbiology ; Whole Genome Sequencing ; }, abstract = {Pulmonary disease caused by nontuberculous mycobacteria (NTM) is increasing worldwide. Mycobacterium avium is the most clinically significant NTM species in humans and animals, and comprises four subspecies: M. avium subsp. avium (MAA), M. avium subsp. silvaticum (MAS), M. avium subsp. paratuberculosis (MAP), and M. avium subsp. hominissuis (MAH). To improve our understanding of the genetic landscape and diversity of M. avium and its role in disease, we performed a comparative genome analysis of 79 M. avium strains. Our analysis demonstrated that MAH is an open pan-genome species. Phylogenetic analysis based on single nucleotide variants showed that MAH had the highest degree of sequence variability among the subspecies, and MAH strains isolated in Japan and those isolated abroad possessed distinct phylogenetic features. Furthermore, MAP strains, MAS and MAA strains isolated from birds, and many MAH strains that cause the progression of pulmonary disease were grouped in each specific cluster. Comparative genome analysis revealed the presence of genetic elements specific to each lineage, which are thought to be acquired via horizontal gene transfer during the evolutionary process, and identified potential genetic determinants accounting for the pathogenic and host range characteristics of M. avium.}, } @article {pmid28043631, year = {2016}, author = {Otchere, ID and Harris, SR and Busso, SL and Asante-Poku, A and Osei-Wusu, S and Koram, K and Parkhill, J and Gagneux, S and Yeboah-Manu, D}, title = {The First population structure and comparative genomics analysis of Mycobacterium africanum strains from Ghana reveals higher diversity of Lineage 5.}, journal = {International journal of mycobacteriology}, volume = {5 Suppl 1}, number = {}, pages = {S80-S81}, doi = {10.1016/j.ijmyco.2016.09.051}, pmid = {28043631}, issn = {2212-554X}, abstract = {OBJECTIVE/BACKGROUND: Mycobacterium africanum (MAF) remains an important TB causing pathogen in West Africa; however, little is known about its population structure and actual diversity which may have implications for diagnostics and vaccines. We carried out comparative genomics analysis of candidate Mycobacterium tuberculosis (MTB) and MAF using whole genome sequencing.

METHODS: Clinical MTB complex strains (n=187) comprising L4 (n=22), L5 (n=126), and L6 (n=39) isolated over 8years from Ghana were whole genome sequenced. The reads were mapped onto a reference genome for phylogenetic and functional genomics analysis. A maximum likelihood tree with 100 bootstraps was constructed from the single nucleotide polymorphisms (SNPs) found using RAxML and clustered with hierBAPS. A total of 147 (18 L4, 36 L6, and 93 L5) of the genomes were de novo assembled and annotated for comparative pangenome analysis using Roary.

RESULTS: The population structure of MAF revealed at least five clusters of L5 as compared to three for L6. We also identified a group of three multi-drug-resistants (MDRs) within a single cluster of L5 strains from Southern Ghana isolated in 2013. Among the global collection of MTB complex, there were four Ghana-specific L5 clusters of which one (L5.1.1) had traits of clonal expansion. From the 5947pan genes extracted from the collection, 3215 (54.1%) were core to all the 147 genomes whereas 719 (12.1%) were found in single genomes. Most of the variable genes were PE-PGRS/PPE (1,281) duplicates of other genes (431). The genome degradation was more pronounced in Lineages 4 and 6 as compared to Lineage 5. We identified the absence of some unique genes among specific lineages and/or clades with possible clinical implications. For example, mpt64 and mlaD encoding respectively an immunogenic protein and a mammalian cell entry protein were missing from all L6 genomes. In addition, all L5 strains had an amino acid substitution I43N within the mpt64 gene. Analysis of SNPs within some genes encoding proteins for substrate metabolism, ion transport and secretory systems showed higher proportion of SNPs among L6 compared to L5 and L4. We also identified a number of lineage/sublineage specific SNPs and indels that may be utilized in rapid PCR based genotyping of MTB complex.

CONCLUSION: This work emphasizes on the possibility that the mpt64-based rapid diagnostic kit would not be effective in MAF endemic settings. More mutations in ESAT-6 secretory system of MAF compared to MTB sensu stricto can affect efficacy of ESAT-6-based vaccines in the future.}, } @article {pmid28018331, year = {2016}, author = {Liu, YY and Chen, CC and Chiou, CS}, title = {Construction of a Pan-Genome Allele Database of Salmonella enterica Serovar Enteritidis for Molecular Subtyping and Disease Cluster Identification.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {2010}, pmid = {28018331}, issn = {1664-302X}, abstract = {We built a pan-genome allele database with 395 genomes of Salmonella enterica serovar Enteritidis and developed computer tools for analysis of whole genome sequencing (WGS) data of bacterial isolates for disease cluster identification. A web server (http://wgmlst.imst.nsysu.edu.tw) was set up with the database and the tools, allowing users to upload WGS data to generate whole genome multilocus sequence typing (wgMLST) profiles and to perform cluster analysis of wgMLST profiles. The usefulness of the database in disease cluster identification was demonstrated by analyzing a panel of genomes from 55 epidemiologically well-defined S. Enteritidis isolates provided by the Minnesota Department of Health. The wgMLST-based cluster analysis revealed distinct clades that were concordant with the epidemiologically defined outbreaks. Thus, using a common pan-genome allele database, wgMLST can be a promising WGS-based subtyping approach for disease surveillance and outbreak investigation across laboratories.}, } @article {pmid28011701, year = {2017}, author = {van Vliet, AH}, title = {Use of pan-genome analysis for the identification of lineage-specific genes of Helicobacter pylori.}, journal = {FEMS microbiology letters}, volume = {364}, number = {2}, pages = {}, doi = {10.1093/femsle/fnw296}, pmid = {28011701}, issn = {1574-6968}, mesh = {*Genes, Bacterial ; *Genetic Variation ; *Genome, Bacterial ; Genomic Islands ; Helicobacter Infections/microbiology ; Helicobacter pylori/classification/*genetics/isolation & purification ; Humans ; Metabolic Networks and Pathways/genetics ; Multilocus Sequence Typing ; Virulence Factors/genetics ; }, abstract = {The human bacterial pathogen Helicobacter pylori has a highly variable genome, with significant allelic and sequence diversity between isolates and even within well-characterised strains, hampering comparative genomics of H. pylori In this study, pan-genome analysis has been used to identify lineage-specific genes of H. pylori A total of 346 H. pylori genomes spanning the hpAfrica1, hpAfrica2, hpAsia2, hpEurope, hspAmerind and hspEAsia multilocus sequence typing (MLST) lineages were searched for genes specifically overrepresented or underrepresented in MLST lineages or associated with the cag pathogenicity island. The only genes overrepresented in cag-positive genomes were the cag pathogenicity island genes themselves. In contrast, a total of 125 genes were either overrepresented or underrepresented in one or more MLST lineages. Of these 125 genes, alcohol/aldehyde-reducing enzymes linked with acid resistance and production of toxic aldehydes were found to be overrepresented in African lineages. Conversely, the FecA2 ferric citrate receptor was missing from hspAmerind genomes, but present in all other lineages. This work shows the applicability of pan-genome analysis for identification of lineage-specific genes of H. pylori, facilitating further investigation to allow linkage of differential distribution of genes with disease outcome or virulence of H. pylori.}, } @article {pmid27993146, year = {2016}, author = {Brynildsrud, O and Bohlin, J and Scheffer, L and Eldholm, V}, title = {Erratum to: Rapid scoring of genes in microbial pan-genome-wide association studies with Scoary.}, journal = {Genome biology}, volume = {17}, number = {1}, pages = {262}, pmid = {27993146}, issn = {1474-760X}, } @article {pmid27988045, year = {2017}, author = {Benamar, S and Cassir, N and Merhej, V and Jardot, P and Robert, C and Raoult, D and La Scola, B}, title = {Multi-spacer typing as an effective method to distinguish the clonal lineage of Clostridium butyricum strains isolated from stool samples during a series of necrotizing enterocolitis cases.}, journal = {The Journal of hospital infection}, volume = {95}, number = {3}, pages = {300-305}, doi = {10.1016/j.jhin.2016.10.026}, pmid = {27988045}, issn = {1532-2939}, mesh = {Adult ; Aged ; Child ; Child, Preschool ; Clostridium Infections/epidemiology/*microbiology ; Clostridium butyricum/*classification/genetics/isolation & purification ; Cluster Analysis ; Enterocolitis, Necrotizing/epidemiology/*microbiology ; Feces/microbiology ; *Genotype ; Humans ; Infant ; Infant, Newborn ; Molecular Epidemiology ; Molecular Typing/*methods ; }, abstract = {BACKGROUND: Necrotizing enterocolitis (NEC) is a devastating gastrointestinal disease with high morbidity and mortality that predominantly affects preterm neonates during outbreaks. In a previous study, the present authors identified 15 Clostridium butyricum isolates from stool samples during a series of NEC cases involving four neonatal intensive care units. A clonal lineage of these strains was observed by in-silico multi-locus sequence typing.

AIM: To confirm the previous findings by sequencing a larger number of C. butyricum genomes and using other genotyping approaches.

METHODS: The previously isolated 15 C. butyricum strains were characterized and compared with 17 other commensal and environmental C. butyricum strains using whole-genome sequencing (WGS). In addition, the clustering was analysed using multi-spacer sequence typing (MST).

FINDINGS: The core genome of C. butyricum was composed of 1251 genes, and its pan-genome consisted of 12,628 genes with high variability between strains. It was possible to distinguish the clonal lineage of strains from a series of NEC cases, forming three clades with geographical clustering. The results obtained using WGS and MST approaches were congruent.

CONCLUSION: MST is a fast, cheap and effective genotyping method for investigating NEC outbreaks associated with C. butyricum.}, } @article {pmid27940610, year = {2017}, author = {Sun, C and Hu, Z and Zheng, T and Lu, K and Zhao, Y and Wang, W and Shi, J and Wang, C and Lu, J and Zhang, D and Li, Z and Wei, C}, title = {RPAN: rice pan-genome browser for ∼3000 rice genomes.}, journal = {Nucleic acids research}, volume = {45}, number = {2}, pages = {597-605}, pmid = {27940610}, issn = {1362-4962}, mesh = {Computational Biology/methods ; Databases, Genetic ; *Genome, Plant ; *Genomics/methods ; Molecular Sequence Annotation ; Oryza/*genetics ; *Software ; Web Browser ; }, abstract = {A pan-genome is the union of the gene sets of all the individuals of a clade or a species and it provides a new dimension of genome complexity with the presence/absence variations (PAVs) of genes among these genomes. With the progress of sequencing technologies, pan-genome study is becoming affordable for eukaryotes with large-sized genomes. The Asian cultivated rice, Oryza sativa L., is one of the major food sources for the world and a model organism in plant biology. Recently, the 3000 Rice Genome Project (3K RGP) sequenced more than 3000 rice genomes with a mean sequencing depth of 14.3×, which provided a tremendous resource for rice research. In this paper, we present a genome browser, Rice Pan-genome Browser (RPAN), as a tool to search and visualize the rice pan-genome derived from 3K RGP. RPAN contains a database of the basic information of 3010 rice accessions, including genomic sequences, gene annotations, PAV information and gene expression data of the rice pan-genome. At least 12 000 novel genes absent in the reference genome were included. RPAN also provides multiple search and visualization functions. RPAN can be a rich resource for rice biology and rice breeding. It is available at http://cgm.sjtu.edu.cn/3kricedb/ or http://www.rmbreeding.cn/pan3k.}, } @article {pmid27938326, year = {2016}, author = {Walkowiak, S and Rowland, O and Rodrigue, N and Subramaniam, R}, title = {Whole genome sequencing and comparative genomics of closely related Fusarium Head Blight fungi: Fusarium graminearum, F. meridionale and F. asiaticum.}, journal = {BMC genomics}, volume = {17}, number = {1}, pages = {1014}, pmid = {27938326}, issn = {1471-2164}, mesh = {Alleles ; Computational Biology/methods ; Fusarium/*classification/*genetics/metabolism ; Genes, Fungal ; Genetic Variation ; *Genome, Fungal ; *Genomics/methods ; *High-Throughput Nucleotide Sequencing ; INDEL Mutation ; Polymorphism, Single Nucleotide ; Pseudogenes ; Secondary Metabolism ; Selection, Genetic ; }, abstract = {BACKGROUND: The Fusarium graminearum species complex is composed of many distinct fungal species that cause several diseases in economically important crops, including Fusarium Head Blight of wheat. Despite being closely related, these species and individuals within species have distinct phenotypic differences in toxin production and pathogenicity, with some isolates reported as non-pathogenic on certain hosts. In this report, we compare genomes and gene content of six new isolates from the species complex, including the first available genomes of F. asiaticum and F. meridionale, with four other genomes reported in previous studies.

RESULTS: A comparison of genome structure and gene content revealed a 93-99% overlap across all ten genomes. We identified more than 700 k base pairs (kb) of single nucleotide polymorphisms (SNPs), insertions, and deletions (indels) within common regions of the genome, which validated the species and genetic populations reported within species. We constructed a non-redundant pan gene list containing 15,297 genes from the ten genomes and among them 1827 genes or 12% were absent in at least one genome. These genes were co-localized in telomeric regions and select regions within chromosomes with a corresponding increase in SNPs and indels. Many are also predicted to encode for proteins involved in secondary metabolism and other functions associated with disease. Genes that were common between isolates contained high levels of nucleotide variation and may be pseudogenes, allelic, or under diversifying selection.

CONCLUSIONS: The genomic resources we have contributed will be useful for the identification of genes that contribute to the phenotypic variation and niche specialization that have been reported among members of the F. graminearum species complex.}, } @article {pmid27933096, year = {2016}, author = {Beller, T and Ohlebusch, E}, title = {Erratum to: A representation of a compressed de Bruijn graph for pan-genome analysis that enables search.}, journal = {Algorithms for molecular biology : AMB}, volume = {11}, number = {}, pages = {28}, pmid = {27933096}, issn = {1748-7188}, abstract = {[This corrects the article DOI: 10.1186/s13015-016-0083-7.].}, } @article {pmid27924951, year = {2016}, author = {Bakshi, U and Sarkar, M and Paul, S and Dutta, C}, title = {Assessment of virulence potential of uncharacterized Enterococcus faecalis strains using pan genomic approach - Identification of pathogen-specific and habitat-specific genes.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {38648}, pmid = {27924951}, issn = {2045-2322}, mesh = {Base Composition ; Enterococcus faecalis/classification/pathogenicity/*physiology ; Genes, Bacterial ; Genetic Variation ; Genome Size ; *Genome, Bacterial ; *Genomic Islands ; *Genomics ; Gram-Positive Bacterial Infections/microbiology ; Humans ; Open Reading Frames ; Phylogeny ; Plasmids ; Virulence/*genetics ; Virulence Factors/genetics ; }, abstract = {Enterococcus faecalis, a leading nosocomial pathogen and yet a prominent member of gut microbiome, lacks clear demarcation between pathogenic and non-pathogenic strains at genome level. Here we present the comparative genome analysis of 36 E. faecalis strains with different pathogenic features and from different body-habitats. This study begins by addressing the genome dynamics, which shows that the pan-genome of E. faecalis is still open, though the core genome is nearly saturated. We identified eight uncharacterized strains as potential pathogens on the basis of their co-segregation with reported pathogens in gene presence-absence matrix and Pathogenicity Island (PAI) distribution. A ~7.4 kb genomic-cassette, which is itself a part of PAI, is found to exist in all reported and potential pathogens, but not in commensals and other uncharacterized strains. This region encodes four genes and among them, products of two hypothetical genes are predicted to be intrinsically disordered that may serve as novel targets for therapeutic measures. Exclusive existence of 215, 129, 4 and 1 genes in the blood, gastrointestinal tract, urogenital tract, oral cavity and lymph node derived E. faecalis genomes respectively suggests possible employment of distinct habitat-specific genetic strategies in the adaptation of E. faecalis in human host.}, } @article {pmid27924153, year = {2016}, author = {Kim, JY and Song, HS and Kim, YB and Kwon, J and Choi, JS and Cho, YJ and Kim, BY and Rhee, JK and Myoung, J and Nam, YD and Roh, SW}, title = {Genome sequence of a commensal bacterium, Enterococcus faecalis CBA7120, isolated from a Korean fecal sample.}, journal = {Gut pathogens}, volume = {8}, number = {}, pages = {62}, pmid = {27924153}, issn = {1757-4749}, abstract = {BACKGROUND: Enterococcus faecalis, the type strain of the genus Enterococcus, is not only a commensal bacterium in the gastrointestinal tract in vertebrates and invertebrates, but also causes serious disease as an opportunistic pathogen. To date, genome sequences have been published for over four hundred E. faecalis strains; however, pathogenicity of these microbes remains complicated. To increase our knowledge of E. faecalis virulence factors, we isolated strain CBA7120 from the feces of an 81-year-old female from the Republic of Korea and performed a comparative genomic analysis.

RESULTS: The genome sequence of E. faecalis CBA7120 is 3,134,087 bp in length, with a G + C content of 37.35 mol%, and is comprised of four contigs with an N50 value of 2,922,046 bp. The genome showed high similarity with other strains of E. faecalis, including OG1RF, T13, 12107 and T20, based on OrthoANI values. Strain CBA7120 contains 374 pan-genome orthologous groups (POGs) as singletons, including "Phages, Prophages, Transposable elements, Plasmids," "Carbohydrates," "DNA metabolism," and "Virulence, Disease and Defense" subsystems. Genes related to multidrug resistance efflux pumps were annotated in the genome.

CONCLUSIONS: The comparative genomic analysis of E. faecalis strains presented in this study was performed using a variety of analysis methods and will facilitate future identification of hypothetical proteins.}, } @article {pmid27922098, year = {2016}, author = {Koehorst, JJ and van Dam, JC and van Heck, RG and Saccenti, E and Dos Santos, VA and Suarez-Diez, M and Schaap, PJ}, title = {Comparison of 432 Pseudomonas strains through integration of genomic, functional, metabolic and expression data.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {38699}, pmid = {27922098}, issn = {2045-2322}, mesh = {*Computational Biology/methods ; *Energy Metabolism ; *Gene Expression Regulation, Bacterial ; *Genome, Bacterial ; *Genomics/methods ; Humans ; Molecular Sequence Annotation ; Phylogeny ; Pseudomonas/classification/*genetics/*metabolism ; Workflow ; }, abstract = {Pseudomonas is a highly versatile genus containing species that can be harmful to humans and plants while others are widely used for bioengineering and bioremediation. We analysed 432 sequenced Pseudomonas strains by integrating results from a large scale functional comparison using protein domains with data from six metabolic models, nearly a thousand transcriptome measurements and four large scale transposon mutagenesis experiments. Through heterogeneous data integration we linked gene essentiality, persistence and expression variability. The pan-genome of Pseudomonas is closed indicating a limited role of horizontal gene transfer in the evolutionary history of this genus. A large fraction of essential genes are highly persistent, still non essential genes represent a considerable fraction of the core-genome. Our results emphasize the power of integrating large scale comparative functional genomics with heterogeneous data for exploring bacterial diversity and versatility.}, } @article {pmid27919552, year = {2017}, author = {Carter, MQ}, title = {Decoding the Ecological Function of Accessory Genome.}, journal = {Trends in microbiology}, volume = {25}, number = {1}, pages = {6-8}, doi = {10.1016/j.tim.2016.11.012}, pmid = {27919552}, issn = {1878-4380}, mesh = {Animals ; Cattle ; Escherichia coli Infections/microbiology/*transmission ; Escherichia coli O157/*genetics/isolation & purification/metabolism/*pathogenicity ; Escherichia coli Proteins/*genetics ; Food Microbiology ; Genome, Bacterial/genetics ; Genomic Islands/genetics ; Humans ; Phosphoproteins/*genetics ; Shiga Toxin/*metabolism ; Virulence Factors/genetics ; }, abstract = {Shiga toxin-producing Escherichia coli O157:H7 primarily resides in cattle asymptomatically, and can be transmitted to humans through food. A study by Lupolova et al. applied a machine-learning approach to complex pan-genome information and predicted that only a small subset of bovine isolates have the potential to cause diseases in humans.}, } @article {pmid27899624, year = {2017}, author = {Vallenet, D and Calteau, A and Cruveiller, S and Gachet, M and Lajus, A and Josso, A and Mercier, J and Renaux, A and Rollin, J and Rouy, Z and Roche, D and Scarpelli, C and Médigue, C}, title = {MicroScope in 2017: an expanding and evolving integrated resource for community expertise of microbial genomes.}, journal = {Nucleic acids research}, volume = {45}, number = {D1}, pages = {D517-D528}, pmid = {27899624}, issn = {1362-4962}, mesh = {Computational Biology/methods ; *Databases, Genetic ; Evolution, Molecular ; Metabolome ; Metabolomics/methods ; *Metagenome ; Metagenomics/*methods ; Microbiota/*genetics ; Multigene Family ; Polymorphism, Single Nucleotide ; Software ; }, abstract = {The annotation of genomes from NGS platforms needs to be automated and fully integrated. However, maintaining consistency and accuracy in genome annotation is a challenging problem because millions of protein database entries are not assigned reliable functions. This shortcoming limits the knowledge that can be extracted from genomes and metabolic models. Launched in 2005, the MicroScope platform (http://www.genoscope.cns.fr/agc/microscope) is an integrative resource that supports systematic and efficient revision of microbial genome annotation, data management and comparative analysis. Effective comparative analysis requires a consistent and complete view of biological data, and therefore, support for reviewing the quality of functional annotation is critical. MicroScope allows users to analyze microbial (meta)genomes together with post-genomic experiment results if any (i.e. transcriptomics, re-sequencing of evolved strains, mutant collections, phenotype data). It combines tools and graphical interfaces to analyze genomes and to perform the expert curation of gene functions in a comparative context. Starting with a short overview of the MicroScope system, this paper focuses on some major improvements of the Web interface, mainly for the submission of genomic data and on original tools and pipelines that have been developed and integrated in the platform: computation of pan-genomes and prediction of biosynthetic gene clusters. Today the resource contains data for more than 6000 microbial genomes, and among the 2700 personal accounts (65% of which are now from foreign countries), 14% of the users are performing expert annotations, on at least a weekly basis, contributing to improve the quality of microbial genome annotations.}, } @article {pmid27887642, year = {2016}, author = {Brynildsrud, O and Bohlin, J and Scheffer, L and Eldholm, V}, title = {Rapid scoring of genes in microbial pan-genome-wide association studies with Scoary.}, journal = {Genome biology}, volume = {17}, number = {1}, pages = {238}, pmid = {27887642}, issn = {1474-760X}, abstract = {Genome-wide association studies (GWAS) have become indispensable in human medicine and genomics, but very few have been carried out on bacteria. Here we introduce Scoary, an ultra-fast, easy-to-use, and widely applicable software tool that scores the components of the pan-genome for associations to observed phenotypic traits while accounting for population stratification, with minimal assumptions about evolutionary processes. We call our approach pan-GWAS to distinguish it from traditional, single nucleotide polymorphism (SNP)-based GWAS. Scoary is implemented in Python and is available under an open source GPLv3 license at https://github.com/AdmiralenOla/Scoary .}, } @article {pmid27881098, year = {2016}, author = {Jironkin, A and Brown, RJ and Underwood, A and Chalker, VJ and Spiller, OB}, title = {Genomic determination of minimum multi-locus sequence typing schemas to represent the genomic phylogeny of Mycoplasma hominis.}, journal = {BMC genomics}, volume = {17}, number = {1}, pages = {964}, pmid = {27881098}, issn = {1471-2164}, mesh = {Alleles ; Cluster Analysis ; Genes, Bacterial ; *Genome, Bacterial ; *Genomics/methods ; Genotype ; Humans ; Multilocus Sequence Typing ; Mycoplasma hominis/*classification/*genetics/isolation & purification ; Phylogeny ; Polymorphism, Single Nucleotide ; Recombination, Genetic ; }, abstract = {BACKGROUND: Mycoplasma hominis is an opportunistic human pathogen, associated with clinically diverse disease. Currently, there is no standardised method for typing M. hominis, which would aid in understanding pathogen epidemiology and transmission. Due to availability and costs of whole genome sequencing and the challenges in obtaining adequate M. hominis DNA, the use of whole genome sequence analysis to provide clinical guidance is unpractical for this bacterial species as well as other fastidious organisms.

RESULTS: This study identified pan-genome set of 700 genes found to be present in four published reference genomes. A subset of 417 genes was identified to be core genome for 18 isolates and 1 reference. Leave-one-out analysis of the core genes highlighted set of 48 genes that are required to recapture the original phylogenetic relationships observed using whole genome SNP analysis. Three 7-locus MLST schemas with high diversity index (97%) and low dN/dS ratios (0.1, 0.13, and 0.11) were derived that could be used to confer good discrimination between strains and could be of practical use in future studies direct on clinical specimens.

CONCLUSIONS: The genes proposed in this study could be utilised to design a cost-effective and rapid PCR-based MLST assay that could be applied directly to clinical isolates, without prior isolation. This study includes additional genomic analysis revealing high levels of genetic heterogeneity among this species. This provides a novel and evidence based approach for the development of MLST schema that accurately represent genomic phylogeny for use in epidemiology and transmission studies.}, } @article {pmid27880983, year = {2017}, author = {Weller-Stuart, T and De Maayer, P and Coutinho, T}, title = {Pantoea ananatis: genomic insights into a versatile pathogen.}, journal = {Molecular plant pathology}, volume = {18}, number = {9}, pages = {1191-1198}, pmid = {27880983}, issn = {1364-3703}, mesh = {Biotechnology/methods ; Genomics/methods ; Pantoea/*genetics ; Quorum Sensing/physiology ; Type VI Secretion Systems/metabolism ; }, abstract = {UNLABELLED: Pantoea ananatis, a bacterium that is well known for its phytopathogenic characteristics, has been isolated from a myriad of ecological niches and hosts. Infection of agronomic crops, such as maize and rice, can result in substantial economic losses. In the last few years, much of the research performed on P. ananatis has been based on the sequencing and analysis of the genomes of strains isolated from different environments and with different lifestyles. In this review, we summarize the advances made in terms of pathogenicity determinants of phytopathogenic strains of P. ananatis and how this bacterium is able to adapt and survive in such a wide variety of habitats. The diversity and adaptability of P. ananatis can largely be attributed to the plasticity of its genome and the integration of mobile genetic elements on both the chromosome and plasmid. Furthermore, we discuss the recent interest in this species in various biotechnological applications.

TAXONOMY: Domain Bacteria; Class Gammaproteobacteria; Family Enterobacteriaceae; genus Pantoea; species ananatis.

DISEASE SYMPTOMS: Pantoea ananatis causes disease on a wide range of plants, and symptoms can range from dieback and stunted growth in Eucalyptus seedlings to chlorosis and bulb rotting in onions.

DISEASE CONTROL: Currently, the only methods of control of P. ananatis on most plant hosts are the use of resistant clones and cultivars or the eradication of infected plant material. The use of lytic bacteriophages on certain host plants, such as rice, has also achieved a measure of success.}, } @article {pmid27864136, year = {2017}, author = {Zhi, XY and Jiang, Z and Yang, LL and Huang, Y}, title = {The underlying mechanisms of genetic innovation and speciation in the family Corynebacteriaceae: A phylogenomics approach.}, journal = {Molecular phylogenetics and evolution}, volume = {107}, number = {}, pages = {246-255}, doi = {10.1016/j.ympev.2016.11.009}, pmid = {27864136}, issn = {1095-9513}, mesh = {Actinomycetales/*classification/*genetics ; Animals ; Gene Transfer, Horizontal ; *Genetic Speciation ; Genetic Variation ; *Genome, Bacterial ; *Genomics ; Metabolic Networks and Pathways/genetics ; *Phylogeny ; }, abstract = {The pangenome of a bacterial species population is formed by genetic reduction and genetic expansion over the long course of evolution. Gene loss is a pervasive source of genetic reduction, and (exogenous and endogenous) gene gain is the main driver of genetic expansion. To understand the genetic innovation and speciation of the family Corynebacteriaceae, which cause a wide range of serious infections in humans and animals, we analyzed the pangenome of this family, and reconstructed its phylogeny using a phylogenomics approach. Genetic variations have occurred throughout the whole evolutionary history of the Corynebacteriaceae. Gene loss has been the primary force causing genetic changes, not only in terms of the number of protein families affected, but also because of its continuity on the time series. The variation in metabolism caused by these genetic changes mainly occurred for membrane transporters, two-component systems, and metabolism related to amino acids and carbohydrates. Interestingly, horizontal gene transfer (HGT) not only caused changes related to pathogenicity, but also triggered the acquisition of antimicrobial resistance. The Darwinian theory of evolution did not adequately explain the effects of dispersive HGT and/or gene loss in the evolution of the Corynebacteriaceae. These findings provide new insight into the evolution and speciation of Corynebacteriaceae and advance our understanding of the genetic innovation in microbial populations.}, } @article {pmid27861551, year = {2016}, author = {Malki, K and Shapiro, JW and Price, TK and Hilt, EE and Thomas-White, K and Sircar, T and Rosenfeld, AB and Kuffel, G and Zilliox, MJ and Wolfe, AJ and Putonti, C}, title = {Genomes of Gardnerella Strains Reveal an Abundance of Prophages within the Bladder Microbiome.}, journal = {PloS one}, volume = {11}, number = {11}, pages = {e0166757}, pmid = {27861551}, issn = {1932-6203}, support = {R01 DK104718/DK/NIDDK NIH HHS/United States ; R21 DK097435/DK/NIDDK NIH HHS/United States ; R56 DK104718/DK/NIDDK NIH HHS/United States ; }, mesh = {Adult ; Computational Biology/methods ; DNA Transposable Elements ; Female ; Gardnerella/*genetics/virology ; Genes, Viral ; *Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Humans ; *Microbiota ; Open Reading Frames ; Phylogeny ; Prophages/*genetics ; Urinary Bladder/*microbiology ; }, abstract = {Bacterial surveys of the vaginal and bladder human microbiota have revealed an abundance of many similar bacterial taxa. As the bladder was once thought to be sterile, the complex interactions between microbes within the bladder have yet to be characterized. To initiate this process, we have begun sequencing isolates, including the clinically relevant genus Gardnerella. Herein, we present the genomic sequences of four Gardnerella strains isolated from the bladders of women with symptoms of urgency urinary incontinence; these are the first Gardnerella genomes produced from this niche. Congruent to genomic characterization of Gardnerella isolates from the reproductive tract, isolates from the bladder reveal a large pangenome, as well as evidence of high frequency horizontal gene transfer. Prophage gene sequences were found to be abundant amongst the strains isolated from the bladder, as well as amongst publicly available Gardnerella genomes from the vagina and endometrium, motivating an in depth examination of these sequences. Amongst the 39 Gardnerella strains examined here, there were more than 400 annotated prophage gene sequences that we could cluster into 95 homologous groups; 49 of these groups were unique to a single strain. While many of these prophages exhibited no sequence similarity to any lytic phage genome, estimation of the rate of phage acquisition suggests both vertical and horizontal acquisition. Furthermore, bioinformatic evidence indicates that prophage acquisition is ongoing within both vaginal and bladder Gardnerella populations. The abundance of prophage sequences within the strains examined here suggests that phages could play an important role in the species' evolutionary history and in its interactions within the complex communities found in the female urinary and reproductive tracts.}, } @article {pmid27851981, year = {2017}, author = {Yuvaraj, I and Sridhar, J and Michael, D and Sekar, K}, title = {PanGeT: Pan-genomics tool.}, journal = {Gene}, volume = {600}, number = {}, pages = {77-84}, doi = {10.1016/j.gene.2016.11.025}, pmid = {27851981}, issn = {1879-0038}, mesh = {Evolution, Molecular ; Genome, Bacterial ; Genomics/*statistics & numerical data ; Mycobacterium/classification/genetics ; Proteome/genetics ; Salmonella enterica/classification/genetics ; *Software ; Species Specificity ; }, abstract = {A decade after the concept of Pan-genome was first introduced; research in this field has spread its tentacles to areas such as pathogenesis of diseases, bacterial evolutionary studies and drug resistance. Gene content-based differentiation of virulent and a virulent strains of bacteria and identification of pathogen specific genes is imperative to understand their physiology and gain insights into the mechanism of genome evolution. Subsequently, this will aid in identifying diagnostic targets and in developing and selecting vaccines. The root of pan-genomic studies, however, is to identify the core genes, dispensable genes and strain specific genes across the genomes belonging to a clade. To this end, we have developed a tool, "PanGeT - Pan-genomics Tool" to compute the 'pan-genome' based on comparisons at the genome as well as the proteome levels. This automated tool is implemented using LaTeX libraries for effective visualization of overall pan-genome through graphical plots. Links to retrieve sequence information and functional annotations have also been provided. PanGeT can be downloaded from http://pranag.physics.iisc.ernet.in/PanGeT/ or https://github.com/PanGeTv1/PanGeT.}, } @article {pmid27834372, year = {2016}, author = {Golicz, AA and Bayer, PE and Barker, GC and Edger, PP and Kim, H and Martinez, PA and Chan, CK and Severn-Ellis, A and McCombie, WR and Parkin, IA and Paterson, AH and Pires, JC and Sharpe, AG and Tang, H and Teakle, GR and Town, CD and Batley, J and Edwards, D}, title = {The pangenome of an agronomically important crop plant Brassica oleracea.}, journal = {Nature communications}, volume = {7}, number = {}, pages = {13390}, pmid = {27834372}, issn = {2041-1723}, support = {BB/E017479/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Brassica/*genetics ; Chromosome Mapping ; Chromosomes, Plant ; Crops, Agricultural/*genetics ; Gene Expression Regulation, Plant ; Genetic Variation ; *Genome, Plant ; Phylogeny ; Plant Proteins/genetics/metabolism ; Species Specificity ; }, abstract = {There is an increasing awareness that as a result of structural variation, a reference sequence representing a genome of a single individual is unable to capture all of the gene repertoire found in the species. A large number of genes affected by presence/absence and copy number variation suggest that it may contribute to phenotypic and agronomic trait diversity. Here we show by analysis of the Brassica oleracea pangenome that nearly 20% of genes are affected by presence/absence variation. Several genes displaying presence/absence variation are annotated with functions related to major agronomic traits, including disease resistance, flowering time, glucosinolate metabolism and vitamin biosynthesis.}, } @article {pmid27833590, year = {2016}, author = {Haack, FS and Poehlein, A and Kröger, C and Voigt, CA and Piepenbring, M and Bode, HB and Daniel, R and Schäfer, W and Streit, WR}, title = {Molecular Keys to the Janthinobacterium and Duganella spp. Interaction with the Plant Pathogen Fusarium graminearum.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {1668}, pmid = {27833590}, issn = {1664-302X}, abstract = {Janthinobacterium and Duganella are well-known for their antifungal effects. Surprisingly, almost nothing is known on molecular aspects involved in the close bacterium-fungus interaction. To better understand this interaction, we established the genomes of 11 Janthinobacterium and Duganella isolates in combination with phylogenetic and functional analyses of all publicly available genomes. Thereby, we identified a core and pan genome of 1058 and 23,628 genes. All strains encoded secondary metabolite gene clusters and chitinases, both possibly involved in fungal growth suppression. All but one strain carried a single gene cluster involved in the biosynthesis of alpha-hydroxyketone-like autoinducer molecules, designated JAI-1. Genome-wide RNA-seq studies employing the background of two isolates and the corresponding JAI-1 deficient strains identified a set of 45 QS-regulated genes in both isolates. Most regulated genes are characterized by a conserved sequence motif within the promoter region. Among the most strongly regulated genes were secondary metabolite and type VI secretion system gene clusters. Most intriguing, co-incubation studies of J. sp. HH102 or its corresponding JAI-1 synthase deletion mutant with the plant pathogen Fusarium graminearum provided first evidence of a QS-dependent interaction with this pathogen.}, } @article {pmid27829029, year = {2016}, author = {Mansfeldt, CB and Heavner, GW and Rowe, AR and Hayete, B and Church, BW and Richardson, RE}, title = {Inferring Gene Networks for Strains of Dehalococcoides Highlights Conserved Relationships between Genes Encoding Core Catabolic and Cell-Wall Structural Proteins.}, journal = {PloS one}, volume = {11}, number = {11}, pages = {e0166234}, pmid = {27829029}, issn = {1932-6203}, mesh = {Cell Wall/*genetics ; Cell Wall Skeleton/*genetics ; Chloroflexi/*genetics/metabolism ; Consensus Sequence/genetics ; Gene Regulatory Networks/*genetics ; Metabolism/*genetics ; Oligonucleotide Array Sequence Analysis ; }, abstract = {The interpretation of high-throughput gene expression data for non-model microorganisms remains obscured because of the high fraction of hypothetical genes and the limited number of methods for the robust inference of gene networks. Therefore, to elucidate gene-gene and gene-condition linkages in the bioremediation-important genus Dehalococcoides, we applied a Bayesian inference strategy called Reverse Engineering/Forward Simulation (REFS™) on transcriptomic data collected from two organohalide-respiring communities containing different Dehalococcoides mccartyi strains: the Cornell University mixed community D2 and the commercially available KB-1® bioaugmentation culture. In total, 49 and 24 microarray datasets were included in the REFS™ analysis to generate an ensemble of 1,000 networks for the Dehalococcoides population in the Cornell D2 and KB-1® culture, respectively. Considering only linkages that appeared in the consensus network for each culture (exceeding the determined frequency cutoff of ≥ 60%), the resulting Cornell D2 and KB-1® consensus networks maintained 1,105 nodes (genes or conditions) with 974 edges and 1,714 nodes with 1,455 edges, respectively. These consensus networks captured multiple strong and biologically informative relationships. One of the main highlighted relationships shared between these two cultures was a direct edge between the transcript encoding for the major reductive dehalogenase (tceA (D2) or vcrA (KB-1®)) and the transcript for the putative S-layer cell wall protein (DET1407 (D2) or KB1_1396 (KB-1®)). Additionally, transcripts for two key oxidoreductases (a [Ni Fe] hydrogenase, Hup, and a protein with similarity to a formate dehydrogenase, "Fdh") were strongly linked, generalizing a strong relationship noted previously for Dehalococcoides mccartyi strain 195 to multiple strains of Dehalococcoides. Notably, the pangenome array utilized when monitoring the KB-1® culture was capable of resolving signals from multiple strains, and the network inference engine was able to reconstruct gene networks in the distinct strain populations.}, } @article {pmid27826029, year = {2016}, author = {Sváb, D and Bálint, B and Maróti, G and Tóth, I}, title = {Cytolethal distending toxin producing Escherichia coli O157:H43 strain T22 represents a novel evolutionary lineage within the O157 serogroup.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {46}, number = {}, pages = {110-117}, doi = {10.1016/j.meegid.2016.11.003}, pmid = {27826029}, issn = {1567-7257}, mesh = {Animals ; Bacterial Toxins/*genetics ; Cattle ; Escherichia coli O157/*classification/*genetics ; Evolution, Molecular ; Genome, Bacterial/genetics ; Genomics ; Milk/microbiology ; Phylogeny ; }, abstract = {Enterohemorrhagic Escherichia coli (EHEC) O157:H7/NM strains are significant foodborne pathogens intensively studied, while other sero- and pathotypes of the O157 serogroup only began to receive more attention. Here we report the first genome sequence of a cytolethal distending toxin (CDT-V) producing E. coli O157:H43 strain (T22) isolated from cattle. The genome consists of a 4.9Mb chromosome assembled into three contigs and one plasmid of 82.4kb. Comparative genomic investigations conducted with the core genomes of representative E. coli strains in GenBank (n=62) confirmed the separation of T22 from the EHEC and enteropathogenic (EPEC) O157 lineages. Gene content based pangenome analysis revealed as many as 261 T22-specific coding sequences without orthologs in EDL933 EHEC O157 prototypic and two phylogenetically related commensal E. coli strains. The genome sequence revealed 10 prophage-like regions which harbor several virulence-associated genes including cdt and heat-labile enterotoxin (LT-II) encoding operons. Our results indicate that the evolutionary path of T22 is largely independent from that of EHEC and EPEC O157:H7/NM strains. Thus, the CDT-producing T22 E. coli O157:H43 strain represents a unique lineage of E. coli O157.}, } @article {pmid27824078, year = {2016}, author = {Liu, YY and Chiou, CS and Chen, CC}, title = {PGAdb-builder: A web service tool for creating pan-genome allele database for molecular fine typing.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {36213}, pmid = {27824078}, issn = {2045-2322}, mesh = {Alleles ; Bacterial Typing Techniques/methods ; Databases, Genetic ; Genome, Bacterial ; Internet ; Multilocus Sequence Typing/*methods ; Salmonella typhimurium/*genetics ; Whole Genome Sequencing/*methods ; }, abstract = {With the advance of next generation sequencing techniques, whole genome sequencing (WGS) is expected to become the optimal method for molecular subtyping of bacterial isolates. To use WGS as a general subtyping method for disease outbreak investigation and surveillance, the layout of WGS-based typing must be comparable among laboratories. Whole genome multilocus sequence typing (wgMLST) is an approach that achieves this requirement. To apply wgMLST as a standard subtyping approach, a pan-genome allele database (PGAdb) for the population of a bacterial organism must first be established. We present a free web service tool, PGAdb-builder (http://wgmlstdb.imst.nsysu.edu.tw), for the construction of bacterial PGAdb. The effectiveness of PGAdb-builder was tested by constructing a pan-genome allele database for Salmonella enterica serovar Typhimurium, with the database being applied to create a wgMLST tree for a panel of epidemiologically well-characterized S. Typhimurium isolates. The performance of the wgMLST-based approach was as high as that of the SNP-based approach in Leekitcharoenphon's study used for discerning among epidemiologically related and non-related isolates.}, } @article {pmid27819663, year = {2016}, author = {Wolf, YI and Makarova, KS and Lobkovsky, AE and Koonin, EV}, title = {Two fundamentally different classes of microbial genes.}, journal = {Nature microbiology}, volume = {2}, number = {}, pages = {16208}, doi = {10.1038/nmicrobiol.2016.208}, pmid = {27819663}, issn = {2058-5276}, mesh = {Archaea/*genetics ; Bacteria/*genetics ; *Evolution, Molecular ; *Genes, Microbial ; Genetic Variation ; Synteny ; }, abstract = {The evolution of bacterial and archaeal genomes is highly dynamic and involves extensive horizontal gene transfer and gene loss[1-4]. Furthermore, many microbial species appear to have open pangenomes, where each newly sequenced genome contains more than 10% ORFans, that is, genes without detectable homologues in other species[5,6]. Here, we report a quantitative analysis of microbial genome evolution by fitting the parameters of a simple, steady-state evolutionary model to the comparative genomic data on the gene content and gene order similarity between archaeal genomes. The results reveal two sharply distinct classes of microbial genes, one of which is characterized by effectively instantaneous gene replacement, and the other consists of genes with finite, distributed replacement rates. These findings imply a conservative estimate of the size of the prokaryotic genomic universe, which appears to consist of at least a billion distinct genes. Furthermore, the same distribution of constraints is shown to govern the evolution of gene complement and gene order, without the need to invoke long-range conservation or the selfish operon concept[7].}, } @article {pmid27815276, year = {2017}, author = {Qumar, S and Majid, M and Kumar, N and Tiwari, SK and Semmler, T and Devi, S and Baddam, R and Hussain, A and Shaik, S and Ahmed, N}, title = {Genome Dynamics and Molecular Infection Epidemiology of Multidrug-Resistant Helicobacter pullorum Isolates Obtained from Broiler and Free-Range Chickens in India.}, journal = {Applied and environmental microbiology}, volume = {83}, number = {1}, pages = {}, pmid = {27815276}, issn = {1098-5336}, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Cephalosporins/pharmacology ; Chickens/*microbiology ; DNA, Bacterial/*genetics ; *Drug Resistance, Multiple, Bacterial ; Fluoroquinolones/pharmacology ; Food Microbiology ; *Genome, Bacterial ; Genomic Islands ; Helicobacter/drug effects/*genetics/isolation & purification ; Helicobacter Infections/epidemiology/microbiology/*veterinary ; High-Throughput Nucleotide Sequencing ; Humans ; India/epidemiology ; Microbial Sensitivity Tests ; Molecular Epidemiology ; Phylogeny ; Poultry Diseases/epidemiology/*microbiology ; Prophages/genetics/isolation & purification ; beta-Lactamases/biosynthesis/genetics ; }, abstract = {UNLABELLED: Some life-threatening, foodborne, and zoonotic infections are transmitted through poultry birds. Inappropriate and indiscriminate use of antimicrobials in the livestock industry has led to an increased prevalence of multidrug-resistant bacteria with epidemic potential. Here, we present a functional molecular epidemiological analysis entailing the phenotypic and whole-genome sequence-based characterization of 11 H. pullorum isolates from broiler and free-range chickens sampled from retail wet markets in Hyderabad City, India. Antimicrobial susceptibility tests revealed all of the isolates to be resistant to multiple antibiotic classes such as fluoroquinolones, cephalosporins, sulfonamides, and macrolides. The isolates were also found to be extended-spectrum β-lactamase producers and were even resistant to clavulanic acid. Whole-genome sequencing and comparative genomic analysis of these isolates revealed the presence of five or six well-characterized antimicrobial resistance genes, including those encoding a resistance-nodulation-division efflux pump(s). Phylogenetic analysis combined with pan-genome analysis revealed a remarkable degree of genetic diversity among the isolates from free-range chickens; in contrast, a high degree of genetic similarity was observed among broiler chicken isolates. Comparative genomic analysis of all publicly available H. pullorum genomes, including our isolates (n = 16), together with the genomes of 17 other Helicobacter species, revealed a high number (8,560) of H. pullorum-specific protein-encoding genes, with an average of 535 such genes per isolate. In silico virulence screening identified 182 important virulence genes and also revealed high strain-specific gene content in isolates from free-range chickens (average, 34) compared to broiler chicken isolates. A significant prevalence of prophages (ranging from 1 to 9) and a significant presence of genomic islands (0 to 4) were observed in free-range and broiler chicken isolates. Taken together, these observations provide significant baseline data for functional molecular infection epidemiology of nonpyloric Helicobacter species such as H. pullorum by unraveling their evolution in chickens and their possible zoonotic transmission to humans.

IMPORTANCE: Globally, the poultry industry is expanding with an ever-growing consumer base for chicken meat. Given this, food-associated transmission of multidrug-resistant bacteria represents an important health care issue. Our study involves a critical baseline approach directed at genome sequence-based epidemiology and transmission dynamics of H. pullorum, a poultry pathogen having established zoonotic potential. We believe our studies would facilitate the development of surveillance systems that ensure the safety of food for humans and guide public health policies related to the use of antibiotics in animal feed in countries such as India. We sequenced 11 new genomes of H. pullorum as a part of this study. These genomes would provide much value in addition to the ongoing comparative genomic studies of helicobacters.}, } @article {pmid27806993, year = {2017}, author = {Shippy, DC and Lemke, JJ and Berry, A and Nelson, K and Hines, ME and Talaat, AM}, title = {Superior Protection from Live-Attenuated Vaccines Directed against Johne's Disease.}, journal = {Clinical and vaccine immunology : CVI}, volume = {24}, number = {1}, pages = {}, pmid = {27806993}, issn = {1556-679X}, mesh = {Adaptive Immunity ; Animals ; Bacterial Load ; Bacterial Shedding ; Bacterial Vaccines/administration & dosage/*immunology ; Female ; Genes, Bacterial ; Goat Diseases/*prevention & control ; Goats ; Histocytochemistry ; Immunity, Cellular ; Male ; Mutation ; Mycobacterium avium subsp. paratuberculosis/*immunology/isolation & purification ; Paratuberculosis/microbiology/pathology/*prevention & control ; Treatment Outcome ; Vaccines, Attenuated/administration & dosage/immunology ; Virulence Factors/genetics ; }, abstract = {Mycobacterium avium subsp. paratuberculosis (M. paratuberculosis) is the etiological agent of Johne's disease in ruminants. Johne's disease is an important enteric infection causing large economic losses associated with infected herds. In an attempt to fight this infection, we created two novel live-attenuated vaccine candidates with mutations in sigH and lipN (pgsH and pgsN, respectively). Earlier reports in mice suggested these vaccines are promising candidates to fight Johne's disease in ruminants. In this study, we tested the performances of the two constructs as vaccine candidates using the goat model of Johne's disease. Both vaccines appeared to provide significant immunity to goats against challenge from wild-type M. paratuberculosis The pgsH and pgsN constructs showed a significant reduction in histopathological lesions and tissue colonization compared to nonvaccinated goats and those vaccinated with an inactivated vaccine. Unlike the inactivated vaccine, the pgsN construct was able to eliminate fecal shedding from challenged animals, a feature that is highly desirable to control Johne's disease in infected herds. Furthermore, strong initial cell-mediated immune responses were elicited in goats vaccinated with pgsN that were not demonstrated in other vaccine groups. Overall, the results indicate the potential use of live-attenuated vaccines to control intracellular pathogens, including M. paratuberculosis, and warrant further testing in cattle, the main target for Johne's disease control programs.}, } @article {pmid27773726, year = {2017}, author = {He, EM and Chen, CW and Guo, Y and Hsu, MH and Zhang, L and Chen, HL and Zhao, GP and Chiu, CH and Zhou, Y}, title = {The genome of serotype VI Streptococcus agalactiae serotype VI and comparative analysis.}, journal = {Gene}, volume = {597}, number = {}, pages = {59-65}, doi = {10.1016/j.gene.2016.10.030}, pmid = {27773726}, issn = {1879-0038}, mesh = {Animals ; Bacterial Proteins/genetics/metabolism ; Cattle ; Dogs ; *Genome, Bacterial ; Genomic Islands/genetics ; Humans ; Phylogeny ; Sequence Analysis, DNA ; Streptococcus agalactiae/*genetics/isolation & purification/*pathogenicity ; }, abstract = {Streptococcus agalactiae (GBS) causes serious infections in humans and other species. A total of 25 complete GBS genomes, including the first sequenced serotype VI genome (GBS-M002), were compared in this study. The power law model suggested that the pan-genome of GBS is open, with approximately 1300 genes in the core genome of GBS, accounting for approximately 60% of the average genome content. GBS-M002 has 73 specific genes and is one of the five strains containing >60 specific genes. Based on COG (Cluster of Orthologous Groups of proteins) functional classification, 24% of the genes related to defense mechanisms are specific among the strains. A phylogenetic tree shows that GBS-M002 is closely related to certain strains of serotypes III and V from humans and to isolates of unknown serotype from dog and bovine hosts, suggesting the universal infection potential of GBS from humans to other mammal and fish hosts.}, } @article {pmid27799067, year = {2016}, author = {Loviglio, MN and Beck, CR and White, JJ and Leleu, M and Harel, T and Guex, N and Niknejad, A and Bi, W and Chen, ES and Crespo, I and Yan, J and Charng, WL and Gu, S and Fang, P and Coban-Akdemir, Z and Shaw, CA and Jhangiani, SN and Muzny, DM and Gibbs, RA and Rougemont, J and Xenarios, I and Lupski, JR and Reymond, A}, title = {Identification of a RAI1-associated disease network through integration of exome sequencing, transcriptomics, and 3D genomics.}, journal = {Genome medicine}, volume = {8}, number = {1}, pages = {105}, pmid = {27799067}, issn = {1756-994X}, support = {U54 HG006542/HG/NHGRI NIH HHS/United States ; T32 GM007526/GM/NIGMS NIH HHS/United States ; UM1 HG006542/HG/NHGRI NIH HHS/United States ; K99 GM120453/GM/NIGMS NIH HHS/United States ; R01 NS058529/NS/NINDS NIH HHS/United States ; T32 GM008307/GM/NIGMS NIH HHS/United States ; }, mesh = {Animals ; Embryo, Mammalian/cytology/metabolism ; Exome/*genetics ; Female ; *Gene Regulatory Networks ; Genomics/*methods ; Humans ; Male ; Mice ; Mice, Inbred C57BL ; Mice, Knockout ; Mutation/*genetics ; Phenotype ; RNA, Messenger/genetics ; Real-Time Polymerase Chain Reaction ; Reverse Transcriptase Polymerase Chain Reaction ; Smith-Magenis Syndrome/*genetics ; Trans-Activators ; Transcription Factors/*physiology ; *Transcriptome ; }, abstract = {BACKGROUND: Smith-Magenis syndrome (SMS) is a developmental disability/multiple congenital anomaly disorder resulting from haploinsufficiency of RAI1. It is characterized by distinctive facial features, brachydactyly, sleep disturbances, and stereotypic behaviors.

METHODS: We investigated a cohort of 15 individuals with a clinical suspicion of SMS who showed neither deletion in the SMS critical region nor damaging variants in RAI1 using whole exome sequencing. A combination of network analysis (co-expression and biomedical text mining), transcriptomics, and circularized chromatin conformation capture (4C-seq) was applied to verify whether modified genes are part of the same disease network as known SMS-causing genes.

RESULTS: Potentially deleterious variants were identified in nine of these individuals using whole-exome sequencing. Eight of these changes affect KMT2D, ZEB2, MAP2K2, GLDC, CASK, MECP2, KDM5C, and POGZ, known to be associated with Kabuki syndrome 1, Mowat-Wilson syndrome, cardiofaciocutaneous syndrome, glycine encephalopathy, mental retardation and microcephaly with pontine and cerebellar hypoplasia, X-linked mental retardation 13, X-linked mental retardation Claes-Jensen type, and White-Sutton syndrome, respectively. The ninth individual carries a de novo variant in JAKMIP1, a regulator of neuronal translation that was recently found deleted in a patient with autism spectrum disorder. Analyses of co-expression and biomedical text mining suggest that these pathologies and SMS are part of the same disease network. Further support for this hypothesis was obtained from transcriptome profiling that showed that the expression levels of both Zeb2 and Map2k2 are perturbed in Rai1 [-/-] mice. As an orthogonal approach to potentially contributory disease gene variants, we used chromatin conformation capture to reveal chromatin contacts between RAI1 and the loci flanking ZEB2 and GLDC, as well as between RAI1 and human orthologs of the genes that show perturbed expression in our Rai1 [-/-] mouse model.

CONCLUSIONS: These holistic studies of RAI1 and its interactions allow insights into SMS and other disorders associated with intellectual disability and behavioral abnormalities. Our findings support a pan-genomic approach to the molecular diagnosis of a distinctive disorder.}, } @article {pmid27795210, year = {2016}, author = {Sánchez-Martín, J and Steuernagel, B and Ghosh, S and Herren, G and Hurni, S and Adamski, N and Vrána, J and Kubaláková, M and Krattinger, SG and Wicker, T and Doležel, J and Keller, B and Wulff, BB}, title = {Rapid gene isolation in barley and wheat by mutant chromosome sequencing.}, journal = {Genome biology}, volume = {17}, number = {1}, pages = {221}, pmid = {27795210}, issn = {1474-760X}, mesh = {*Chromosomes, Plant ; *Cloning, Molecular ; *Genes, Plant ; Hordeum/*genetics ; *Mutation ; Phenotype ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; Triticum/*genetics ; }, abstract = {Identification of causal mutations in barley and wheat is hampered by their large genomes and suppressed recombination. To overcome these obstacles, we have developed MutChromSeq, a complexity reduction approach based on flow sorting and sequencing of mutant chromosomes, to identify induced mutations by comparison to parental chromosomes. We apply MutChromSeq to six mutants each of the barley Eceriferum-q gene and the wheat Pm2 genes. This approach unambiguously identified single candidate genes that were verified by Sanger sequencing of additional mutants. MutChromSeq enables reference-free forward genetics in barley and wheat, thus opening up their pan-genomes to functional genomics.}, } @article {pmid27638249, year = {2016}, author = {Anastasi, E and MacArthur, I and Scortti, M and Alvarez, S and Giguère, S and Vázquez-Boland, JA}, title = {Pangenome and Phylogenomic Analysis of the Pathogenic Actinobacterium Rhodococcus equi.}, journal = {Genome biology and evolution}, volume = {8}, number = {10}, pages = {3140-3148}, pmid = {27638249}, issn = {1759-6653}, support = {BB/J004227/1//Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Bacterial Proteins/genetics ; Carbohydrate Metabolism/genetics ; *Evolution, Molecular ; *Genome, Bacterial ; Membrane Transport Proteins/genetics ; *Phylogeny ; Polymorphism, Genetic ; Rhodococcus equi/classification/*genetics ; }, abstract = {We report a comparative study of 29 representative genomes of the animal pathogen Rhodococcus equi The analyses showed that R. equi is genetically homogeneous and clonal, with a large core genome accounting for ≈80% of an isolates' gene content. An open pangenome, even distribution of accessory genes among the isolates, and absence of significant core-genome recombination, indicated that gene gain/loss is a main driver of R. equi genome evolution. Traits previously predicted to be important in R. equi physiology, virulence and niche adaptation were part of the core genome. This included the lack of a phosphoenolpyruvate:carbohydrate transport system (PTS), unique among the rhodococci except for the closely related Rhodococcus defluvii, reflecting selective PTS gene loss in the R. equi-R. defluvii sublineage. Thought to be asaccharolytic, rbsCB and glcP non-PTS sugar permease homologues were identified in the core genome and, albeit inefficiently, R. equi utilized their putative substrates, ribose and (irregularly) glucose. There was no correlation between R. equi whole-genome phylogeny and host or geographical source, with evidence of global spread of genomovars. The distribution of host-associated virulence plasmid types was consistent with the exchange of the plasmids (and corresponding host shifts) across the R. equi population, and human infection being zoonotically acquired. Phylogenomic analyses demonstrated that R. equi occupies a central position in the Rhodococcus phylogeny, not supporting the recently proposed transfer of the species to a new genus.}, } @article {pmid27477789, year = {2016}, author = {Jourdy, Y and Chatron, N and Carage, ML and Fretigny, M and Meunier, S and Zawadzki, C and Gay, V and Negrier, C and Sanlaville, D and Vinciguerra, C}, title = {Study of six patients with complete F9 deletion characterized by cytogenetic microarray: role of the SOX3 gene in intellectual disability.}, journal = {Journal of thrombosis and haemostasis : JTH}, volume = {14}, number = {10}, pages = {1988-1993}, doi = {10.1111/jth.13430}, pmid = {27477789}, issn = {1538-7836}, mesh = {Adult ; Alleles ; Chromosome Mapping ; Cytogenetics ; Factor IX/*genetics ; Female ; *Gene Deletion ; Genetic Association Studies ; Genomics ; Hemophilia B/complications/*genetics ; Heterozygote ; Humans ; Intellectual Disability/complications/*genetics ; Male ; Middle Aged ; Mutation ; Oligonucleotide Array Sequence Analysis/*methods ; Phenotype ; Prothrombin Time ; SOXB1 Transcription Factors/*genetics ; Sequence Deletion ; Young Adult ; }, abstract = {UNLABELLED: Essentials Some hemophilia B (HB) patients with complete F9 deletion present with intellectual disability (ID). We delineate six F9 complete deletions and investigate genotype/phenotype correlation. We identify SOX3 as a candidate gene for ID, acting through haploinsufficiency, in HB patients. All complete F9 deletions in ID patients should be explored with cytogenetic microarrays.

SUMMARY: Background Large deletions encompassing both the complete F9 gene and contiguous genes have been detected in patients with severe hemophilia B (HB). Some of these patients present other clinical features, such as intellectual disability (ID). Objectives/Methods In this study, we characterized six unrelated large deletions encompassing F9, by cytogenetic microarray analysis (CMA), to investigate genotype/phenotype correlation. Results Five of the six patients included in this study presented with ID associated with HB. CMA showed that the six large deletions, ranging in size from approximately 933 kb to 9.19 Mb, were located within the Xq26.3 to Xq28 bands. In all cases, the complete deletion of F9 was associated with the loss of various neighboring genes (5-28 other genes). The smallest region of overlap for ID was a 1.26-Mb region encompassing seven OMIM genes (LOC389895, SOX3, LINC00632, CDR1, SPANXF1, LDOC1, SPANXC). SOX3, our candidate gene for ID, encodes an early transcription factor involved in pituitary development. All of the patients studied who had both HB and ID had deletion of the SOX3 gene. Conclusions All HB patients with an atypical phenotype, especially if complete deletion of F9 is suspected, should be referred to a geneticist for possible pangenomic assessment, because haploinsufficiency of genes flanking F9, such as SOX3 in particular, may result in a broader phenotype, including ID. Such assessment would be of particular value for the genetic counseling of female carriers with F9 deletions, as it would facilitate analysis of the risk of transmitting HB associated with ID.}, } @article {pmid27339259, year = {2016}, author = {Coutinho, FH and Dutilh, BE and Thompson, CC and Thompson, FL}, title = {Proposal of fifteen new species of Parasynechococcus based on genomic, physiological and ecological features.}, journal = {Archives of microbiology}, volume = {198}, number = {10}, pages = {973-986}, doi = {10.1007/s00203-016-1256-y}, pmid = {27339259}, issn = {1432-072X}, mesh = {Cyanobacteria/*classification/genetics/physiology ; Genome, Bacterial/genetics ; Genomics ; Nucleic Acid Hybridization ; Oceans and Seas ; Phylogeny ; *Water Microbiology ; }, abstract = {Members of the recently proposed genus Parasynechococcus (Cyanobacteria) are extremely abundant throughout the global ocean and contribute significantly to global primary productivity. However, the taxonomy of these organisms remains poorly characterized. The aim of this study was to propose a new taxonomic framework for Parasynechococcus based on a genomic taxonomy approach that incorporates genomic, physiological and ecological data. Through in silico DNA-DNA hybridization, average amino acid identity, dinucleotide signatures and phylogenetic reconstruction, a total of 15 species of Parasynechococcus could be delineated. Each species was then described on the basis of their gene content, light and nutrient utilization strategies, geographical distribution patterns throughout the oceans and response to environmental parameters.}, } @article {pmid27769991, year = {2018}, author = {, }, title = {Computational pan-genomics: status, promises and challenges.}, journal = {Briefings in bioinformatics}, volume = {19}, number = {1}, pages = {118-135}, pmid = {27769991}, issn = {1477-4054}, mesh = {*Algorithms ; Computational Biology/methods ; *Genome, Human ; Genomics/*methods ; Humans ; *Software ; }, abstract = {Many disciplines, from human genetics and oncology to plant breeding, microbiology and virology, commonly face the challenge of analyzing rapidly increasing numbers of genomes. In case of Homo sapiens, the number of sequenced genomes will approach hundreds of thousands in the next few years. Simply scaling up established bioinformatics pipelines will not be sufficient for leveraging the full potential of such rich genomic data sets. Instead, novel, qualitatively different computational methods and paradigms are needed. We will witness the rapid extension of computational pan-genomics, a new sub-area of research in computational biology. In this article, we generalize existing definitions and understand a pan-genome as any collection of genomic sequences to be analyzed jointly or to be used as a reference. We examine already available approaches to construct and use pan-genomes, discuss the potential benefits of future technologies and methodologies and review open challenges from the vantage point of the above-mentioned biological disciplines. As a prominent example for a computational paradigm shift, we particularly highlight the transition from the representation of reference genomes as strings to representations as graphs. We outline how this and other challenges from different application domains translate into common computational problems, point out relevant bioinformatics techniques and identify open problems in computer science. With this review, we aim to increase awareness that a joint approach to computational pan-genomics can help address many of the problems currently faced in various domains.}, } @article {pmid27768818, year = {2017}, author = {Molina-Santiago, C and Udaondo, Z and Gómez-Lozano, M and Molin, S and Ramos, JL}, title = {Global transcriptional response of solvent-sensitive and solvent-tolerant Pseudomonas putida strains exposed to toluene.}, journal = {Environmental microbiology}, volume = {19}, number = {2}, pages = {645-658}, doi = {10.1111/1462-2920.13585}, pmid = {27768818}, issn = {1462-2920}, mesh = {Bacterial Proteins/genetics/metabolism ; Biodegradation, Environmental ; Gene Expression Regulation, Bacterial/*physiology ; Pseudomonas putida/*drug effects/genetics/*metabolism ; Solvents/*pharmacology ; Toluene/*pharmacology ; *Transcriptome ; }, abstract = {Pseudomonas putida strains are generally recognized as solvent tolerant, exhibiting varied sensitivity to organic solvents. Pan-genome analysis has revealed that 30% of genes belong to the core-genome of Pseudomonas. Accessory and unique genes confer high degree of adaptability and capabilities for the degradation and synthesis of a wide range of chemicals. For the use of these microbes in bioremediation and biocatalysis, it is critical to understand the mechanisms underlying these phenotypic differences. In this study, RNA-seq analysis compared the short- and long-term responses of the toluene-sensitive KT2440 strain and the highly tolerant DOT-T1E strain. The sensitive strain activates a larger number of genes in a higher magnitude than DOT-T1E. This is expected because KT2440 bears one toluene tolerant pump, while DOT-T1E encodes three of these pumps. Both strains activate membrane modifications to reduce toluene membrane permeability. The KT2440 strain activates the TCA cycle to generate energy, while avoiding energy-intensive processes such as flagellar biosynthesis. This suggests that KT2440 responds to toluene by focusing on survival mechanisms. The DOT-T1E strain activates toluene degradation pathways, using toluene as source of energy. Among the unique genes encoded by DOT-T1E is a 70 kb island composed of genes of unknown function induced in response to toluene.}, } @article {pmid27765811, year = {2016}, author = {Li, G and Shen, M and Le, S and Tan, Y and Li, M and Zhao, X and Shen, W and Yang, Y and Wang, J and Zhu, H and Li, S and Rao, X and Hu, F and Lu, S}, title = {Genomic analyses of multidrug resistant Pseudomonas aeruginosa PA1 resequenced by single-molecule real-time sequencing.}, journal = {Bioscience reports}, volume = {36}, number = {6}, pages = {}, pmid = {27765811}, issn = {1573-4935}, mesh = {Bacterial Proteins/*genetics ; Drug Resistance, Multiple, Bacterial/*genetics ; Genome, Bacterial/*genetics ; Genomic Islands/genetics ; Genomics/methods ; Phylogeny ; Pseudomonas aeruginosa/*genetics ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA/methods ; }, abstract = {As a third-generation sequencing (TGS) method, single-molecule real-time (SMRT) technology provides long read length, and it is well suited for resequencing projects and de novo assembly. In the present study, Pseudomonas aeruginosa PA1 was characterized and resequenced using SMRT technology. PA1 was also subjected to genomic, comparative and pan-genomic analyses. The multidrug resistant strain PA1 possesses a 6,498,072 bp genome and a sequence type of ST-782. The genome of PA1 was also visualized, and the results revealed the details of general genome annotations, virulence factors, regulatory proteins (RPs), secretion system proteins, type II toxin-antitoxin (T-A) pairs and genomic islands. Whole genome comparison analysis suggested that PA1 exhibits similarity to other P. aeruginosa strains but differs in terms of horizontal gene transfer (HGT) regions, such as prophages and genomic islands. Phylogenetic analyses based on 16S rRNA sequences demonstrated that PA1 is closely related to PAO1, and P. aeruginosa strains can be divided into two main groups. The pan-genome of P. aeruginosa consists of a core genome of approximately 4,000 genes and an accessory genome of at least 6,600 genes. The present study presented a detailed, visualized and comparative analysis of the PA1 genome, to enhance our understanding of this notorious pathogen.}, } @article {pmid27765012, year = {2016}, author = {Sternes, PR and Borneman, AR}, title = {Erratum to: 'Consensus pan-genome assembly of the specialised wine bacterium Oenococcus oeni'.}, journal = {BMC genomics}, volume = {17}, number = {1}, pages = {813}, pmid = {27765012}, issn = {1471-2164}, } @article {pmid27734920, year = {2016}, author = {Bolotin, E and Hershberg, R}, title = {Bacterial intra-species gene loss occurs in a largely clocklike manner mostly within a pool of less conserved and constrained genes.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {35168}, pmid = {27734920}, issn = {2045-2322}, mesh = {Bacteria/*genetics ; Bacterial Proteins/genetics ; Evolution, Molecular ; Genes, Bacterial/*genetics ; Genome, Bacterial/*genetics ; Phylogeny ; }, abstract = {Gene loss is a major contributor to the evolution of bacterial gene content. Gene loss may occur as a result of shifts in environment leading to changes in the intensity and/or directionality of selection applied for the maintenance of specific genes. Gene loss may also occur in a more neutral manner, when gene functions are lost that were not subject to strong selection to be maintained, irrespective of changes to environment. Here, we used a pangenome-based approach to investigate patterns of gene loss across 15 bacterial species. We demonstrate that gene loss tends to occur mostly within a pool of genes that are less constrained within species, even in those strains from which they are not lost, and less conserved across bacterial species. Our results indicate that shifts in selection, resulting from shifts in environment are not required to explain the majority of gene loss events occurring within a diverse collection of bacterial species. Caution should therefore be taken when attributing differences in gene content to differences in environment.}, } @article {pmid27733845, year = {2016}, author = {Shin, J and Song, Y and Jeong, Y and Cho, BK}, title = {Analysis of the Core Genome and Pan-Genome of Autotrophic Acetogenic Bacteria.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {1531}, pmid = {27733845}, issn = {1664-302X}, abstract = {Acetogens are obligate anaerobic bacteria capable of reducing carbon dioxide (CO2) to multicarbon compounds coupled to the oxidation of inorganic substrates, such as hydrogen (H2) or carbon monoxide (CO), via the Wood-Ljungdahl pathway. Owing to the metabolic capability of CO2 fixation, much attention has been focused on understanding the unique pathways associated with acetogens, particularly their metabolic coupling of CO2 fixation to energy conservation. Most known acetogens are phylogenetically and metabolically diverse bacteria present in 23 different bacterial genera. With the increased volume of available genome information, acetogenic bacterial genomes can be analyzed by comparative genome analysis. Even with the genetic diversity that exists among acetogens, the Wood-Ljungdahl pathway, a central metabolic pathway, and cofactor biosynthetic pathways are highly conserved for autotrophic growth. Additionally, comparative genome analysis revealed that most genes in the acetogen-specific core genome were associated with the Wood-Ljungdahl pathway. The conserved enzymes and those predicted as missing can provide insight into biological differences between acetogens and allow for the discovery of promising candidates for industrial applications.}, } @article {pmid27722863, year = {2016}, author = {Cui, Y and Song, Y}, title = {Genome and Evolution of Yersinia pestis.}, journal = {Advances in experimental medicine and biology}, volume = {918}, number = {}, pages = {171-192}, doi = {10.1007/978-94-024-0890-4_6}, pmid = {27722863}, issn = {0065-2598}, mesh = {*Evolution, Molecular ; Gene Silencing ; Genome, Bacterial/*genetics ; Mutation Rate ; *Pandemics ; Plague/epidemiology/*microbiology ; Yersinia pestis/classification/*genetics/pathogenicity/physiology ; }, abstract = {This chapter summarizes researches on genome and evolution features of Yersinia pestis, the young pathogen that evolved from Y. pseudotuberculosis at least 5000 years ago. Y. pestis is a highly clonal bacterial species with closed pan-genome. Comparative genomic analysis revealed that genome of Y. pestis experienced highly frequent rearrangement and genome decay events during the evolution. The genealogy of Y. pestis includes five major branches, and four of them seemed raised from a "big bang" node that is associated with the Black Death. Although whole genome-wide variation of Y. pestis reflected a neutral evolutionary process, the branch length in the genealogical tree revealed over dispersion, which was supposedly caused by varied historical molecular clock that is associated with demographical effect by alternate cycles of enzootic disease and epizootic disease in sylvatic plague foci. In recent years, palaeomicrobiology researches on victims of the Black Death, and Justinian's plague verified that two historical pandemics were indeed caused by Y. pestis, but the etiological lineages might be extinct today.}, } @article {pmid27712915, year = {2016}, author = {Zhang, X and She, S and Dong, W and Niu, J and Xiao, Y and Liang, Y and Liu, X and Zhang, X and Fan, F and Yin, H}, title = {Comparative genomics unravels metabolic differences at the species and/or strain level and extremely acidic environmental adaptation of ten bacteria belonging to the genus Acidithiobacillus.}, journal = {Systematic and applied microbiology}, volume = {39}, number = {8}, pages = {493-502}, doi = {10.1016/j.syapm.2016.08.007}, pmid = {27712915}, issn = {1618-0984}, mesh = {Acidithiobacillus/classification/*genetics/*metabolism ; Adaptation, Physiological/*genetics ; Carbon/*metabolism ; Genome, Bacterial/genetics ; Genomics ; Hydrogen-Ion Concentration ; Metabolic Networks and Pathways/*genetics ; Metals, Heavy/metabolism ; Nitrogen/*metabolism ; Sulfur/*metabolism ; }, abstract = {Members of the Acidithiobacillus genus are widely found in extreme environments characterized by low pH and high concentrations of toxic substances, thus it is necessary to identify the cellular mechanisms needed to cope with these harsh conditions. Pan-genome analysis of ten bacteria belonging to the genus Acidithiobacillus suggested the existence of core genome, most of which were assigned to the metabolism-associated genes. Additionally, the unique genes of Acidithiobacillus ferrooxidans were much less than those of other species. A large proportion of Acidithiobacillus ferrivorans-specific genes were mapped especially to metabolism-related genes, indicating that diverse metabolic pathways might confer an advantage for adaptation to local environmental conditions. Analyses of functional metabolisms revealed the differences of carbon metabolism, nitrogen metabolism, and sulfur metabolism at the species and/or strain level. The findings also showed that Acidithiobacillus spp. harbored specific adaptive mechanisms for thriving under extreme environments. The genus Acidithiobacillus had the genetic potential to resist and metabolize toxic substances such as heavy metals and organic solvents. Comparison across species and/or strains of Acidithiobacillus populations provided a deeper appreciation of metabolic differences and environmental adaptation, as well as highlighting the importance of cellular mechanisms that maintain the basal physiological functions under complex acidic environmental conditions.}, } @article {pmid27711162, year = {2016}, author = {Pucker, B and Holtgräwe, D and Rosleff Sörensen, T and Stracke, R and Viehöver, P and Weisshaar, B}, title = {A De Novo Genome Sequence Assembly of the Arabidopsis thaliana Accession Niederzenz-1 Displays Presence/Absence Variation and Strong Synteny.}, journal = {PloS one}, volume = {11}, number = {10}, pages = {e0164321}, pmid = {27711162}, issn = {1932-6203}, mesh = {Arabidopsis/genetics ; Chromosome Mapping ; DNA Copy Number Variations ; DNA, Plant/chemistry/isolation & purification/metabolism ; Databases, Genetic ; Expressed Sequence Tags ; *Genome, Plant ; High-Throughput Nucleotide Sequencing ; INDEL Mutation ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; }, abstract = {Arabidopsis thaliana is the most important model organism for fundamental plant biology. The genome diversity of different accessions of this species has been intensively studied, for example in the 1001 genome project which led to the identification of many small nucleotide polymorphisms (SNPs) and small insertions and deletions (InDels). In addition, presence/absence variation (PAV), copy number variation (CNV) and mobile genetic elements contribute to genomic differences between A. thaliana accessions. To address larger genome rearrangements between the A. thaliana reference accession Columbia-0 (Col-0) and another accession of about average distance to Col-0, we created a de novo next generation sequencing (NGS)-based assembly from the accession Niederzenz-1 (Nd-1). The result was evaluated with respect to assembly strategy and synteny to Col-0. We provide a high quality genome sequence of the A. thaliana accession (Nd-1, LXSY01000000). The assembly displays an N50 of 0.590 Mbp and covers 99% of the Col-0 reference sequence. Scaffolds from the de novo assembly were positioned on the basis of sequence similarity to the reference. Errors in this automatic scaffold anchoring were manually corrected based on analyzing reciprocal best BLAST hits (RBHs) of genes. Comparison of the final Nd-1 assembly to the reference revealed duplications and deletions (PAV). We identified 826 insertions and 746 deletions in Nd-1. Randomly selected candidates of PAV were experimentally validated. Our Nd-1 de novo assembly allowed reliable identification of larger genic and intergenic variants, which was difficult or error-prone by short read mapping approaches alone. While overall sequence similarity as well as synteny is very high, we detected short and larger (affecting more than 100 bp) differences between Col-0 and Nd-1 based on bi-directional comparisons. The de novo assembly provided here and additional assemblies that will certainly be published in the future will allow to describe the pan-genome of A. thaliana.}, } @article {pmid27697167, year = {2017}, author = {Stefanovic, E and Fitzgerald, G and McAuliffe, O}, title = {Advances in the genomics and metabolomics of dairy lactobacilli: A review.}, journal = {Food microbiology}, volume = {61}, number = {}, pages = {33-49}, doi = {10.1016/j.fm.2016.08.009}, pmid = {27697167}, issn = {1095-9998}, mesh = {Dairy Products/*microbiology ; Food Microbiology ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Genomics ; Lactobacillus/classification/*genetics/*metabolism/physiology ; *Metabolic Engineering ; *Metabolome ; Metabolomics ; Phylogeny ; }, abstract = {The Lactobacillus genus represents the largest and most diverse genera of all the lactic acid bacteria (LAB), encompassing species with applications in industrial, biotechnological and medical fields. The increasing number of available Lactobacillus genome sequences has allowed understanding of genetic and metabolic potential of this LAB group. Pangenome and core genome studies are available for numerous species, demonstrating the plasticity of the Lactobacillus genomes and providing the evidence of niche adaptability. Advancements in the application of lactobacilli in the dairy industry lie in exploring the genetic background of their commercially important characteristics, such as flavour development potential or resistance to the phage attack. The integration of available genomic and metabolomic data through the generation of genome scale metabolic models has enabled the development of computational models that predict the behaviour of organisms under specific conditions and present a route to metabolic engineering. Lactobacilli are recognised as potential cell factories, confirmed by the successful production of many compounds. In this review, we discuss the current knowledge of genomics, metabolomics and metabolic engineering of the prevalent Lactobacillus species associated with the production of fermented dairy foods. In-depth understanding of their characteristics opens the possibilities for their future knowledge-based applications.}, } @article {pmid27696900, year = {2016}, author = {Yu, D and Yin, Z and Li, B and Jin, Y and Ren, H and Zhou, J and Zhou, W and Liang, L and Yue, J}, title = {Gene flow, recombination, and positive selection in Stenotrophomonas maltophilia: mechanisms underlying the diversity of the widespread opportunistic pathogen.}, journal = {Genome}, volume = {59}, number = {12}, pages = {1063-1075}, doi = {10.1139/gen-2016-0073}, pmid = {27696900}, issn = {1480-3321}, mesh = {Bacterial Proteins/genetics ; Evolution, Molecular ; *Gene Flow ; Genetic Variation ; Genome, Bacterial ; Gram-Negative Bacterial Infections/microbiology ; Humans ; Opportunistic Infections/microbiology ; Phylogeny ; *Recombination, Genetic ; *Selection, Genetic ; Stenotrophomonas maltophilia/classification/*genetics ; }, abstract = {Stenotrophomonas maltophilia is a global multidrug-resistant human opportunistic pathogen in clinical environments. Stenotrophomonas maltophilia is also ubiquitous in aqueous environments, soil, and plants. Various molecular typing methods have revealed that S. maltophilia exhibits high levels of phenotypic and genotypic diversity. However, information regarding the genomic diversity within S. maltophilia and the corresponding genetic mechanisms resulting in said diversity remain scarce. The genome sequences of 17 S. maltophilia strains were selected to investigate the mechanisms contributing to genetic diversity at the genome level. The core and large pan-genomes of the species were first estimated, resulting in a large, open pan-genome. A species phylogeny was also reconstructed based on 344 orthologous genes with one copy per genome, and the contribution of four evolutionary mechanisms to the species genome diversity was quantified: 15%-35% of the genes showed evidence for recombination, 0%-25% of the genes in one genome were likely gained, 0%-44% of the genes in some genomes were likely lost, and less than 0.3% of the genes in a genome were under positive selection pressures. We observed that, among the four main mechanisms, homologous recombination plays a key role in maintaining diversity in S. maltophilia. In this study, we provide an overview of evolution in S. maltophilia to provide a better understanding of its evolutionary dynamics and its relationship with genome diversity.}, } @article {pmid27663497, year = {2017}, author = {Lanza, VF and Baquero, F and de la Cruz, F and Coque, TM}, title = {AcCNET (Accessory Genome Constellation Network): comparative genomics software for accessory genome analysis using bipartite networks.}, journal = {Bioinformatics (Oxford, England)}, volume = {33}, number = {2}, pages = {283-285}, doi = {10.1093/bioinformatics/btw601}, pmid = {27663497}, issn = {1367-4811}, mesh = {Bacteria/genetics ; *Genome, Bacterial ; Genomics/*methods ; *Metagenome ; *Phylogeny ; Proteome ; *Software ; }, abstract = {UNLABELLED: AcCNET (Accessory genome Constellation Network) is a Perl application that aims to compare accessory genomes of a large number of genomic units, both at qualitative and quantitative levels. Using the proteomes extracted from the analysed genomes, AcCNET creates a bipartite network compatible with standard network analysis platforms. AcCNET allows merging phylogenetic and functional information about the concerned genomes, thus improving the capability of current methods of network analysis. The AcCNET bipartite network opens a new perspective to explore the pangenome of bacterial species, focusing on the accessory genome behind the idiosyncrasy of a particular strain and/or population.

AcCNET is available under GNU General Public License version 3.0 (GPLv3) from http://sourceforge.net/projects/accnet CONTACT: : valfernandez.vf@gmail.comSupplementary information: Supplementary data are available at Bioinformatics online.}, } @article {pmid27659070, year = {2016}, author = {Shen, M and Le, S and Jin, X and Li, G and Tan, Y and Li, M and Zhao, X and Shen, W and Yang, Y and Wang, J and Zhu, H and Li, S and Rao, X and Hu, F and Lu, S}, title = {Characterization and Comparative Genomic Analyses of Pseudomonas aeruginosa Phage PaoP5: New Members Assigned to PAK_P1-like Viruses.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {34067}, pmid = {27659070}, issn = {2045-2322}, abstract = {As a potential alternative to antibiotics, phages can be used to treat multi-drug resistant bacteria. As such, the biological characteristics of phages should be investigated to utilize them as effective antimicrobial agents. In this study, phage PaoP5, a lytic virus that infects Pseudomonas aeruginosa PAO1, was isolated and genomically characterized. PaoP5 comprises an icosahedral head with an apex diameter of 69 nm and a contractile tail with a length of 120 nm. The PaoP5 genome is a linear dsDNA molecule containing 93,464 base pairs (bp) with 49.51% G + C content of 11 tRNA genes and a 1,200 bp terminal redundancy. A total of 176 protein-coding genes were predicted in the PaoP5 genome. Nine PaoP5 structural proteins were identified. Three hypothetical proteins were determined as structural. Comparative genomic analyses revealed that seven new Pseudomonas phages, namely, PaoP5, K8, C11, vB_PaeM_C2-10_Ab02, vB_PaeM_C2-10_Ab08, vB_PaeM_C2-10_Ab10, and vB_PaeM_C2-10_Ab15, were similar to PAK_P1-like viruses. Phylogenetic and pan-genome analyses suggested that the new phages should be assigned to PAK_P1-like viruses, which possess approximately 100 core genes and 150 accessory genes. This work presents a detailed and comparative analysis of PaoP5 to enhance our understanding of phage biology.}, } @article {pmid27634541, year = {2016}, author = {Hassan, A and Naz, A and Obaid, A and Paracha, RZ and Naz, K and Awan, FM and Muhmmad, SA and Janjua, HA and Ahmad, J and Ali, A}, title = {Pangenome and immuno-proteomics analysis of Acinetobacter baumannii strains revealed the core peptide vaccine targets.}, journal = {BMC genomics}, volume = {17}, number = {1}, pages = {732}, pmid = {27634541}, issn = {1471-2164}, mesh = {Acinetobacter baumannii/classification/*genetics/immunology/*metabolism ; Amino Acid Sequence ; Antigens, Bacterial/chemistry/genetics/immunology/metabolism ; Computational Biology/methods ; Epitope Mapping ; Epitopes/chemistry/genetics/immunology ; Evolution, Molecular ; *Genome, Bacterial ; *Genomics/methods ; Models, Molecular ; Molecular Sequence Annotation ; Peptides/chemistry/genetics/immunology/metabolism ; Phylogeny ; Protein Conformation ; Protein Interaction Mapping ; Protein Interaction Maps ; *Proteome ; *Proteomics/methods ; Vaccines, Subunit/genetics/immunology ; Virulence/genetics ; }, abstract = {BACKGROUND: Acinetobacter baumannii has emerged as a significant nosocomial pathogen during the last few years, exhibiting resistance to almost all major classes of antibiotics. Alternative treatment options such as vaccines tend to be most promising and cost effective approaches against this resistant pathogen. In the current study, we have explored the pan-genome of A. baumannii followed by immune-proteomics and reverse vaccinology approaches to identify potential core vaccine targets.

RESULTS: The pan-genome of all available A. baumannii strains (30 complete genomes) is estimated to contain 7,606 gene families and the core genome consists of 2,445 gene families (~32 % of the pan-genome). Phylogenetic tree, comparative genomic and proteomic analysis revealed both intra- and inter genomic similarities and evolutionary relationships. Among the conserved core genome, thirteen proteins, including P pilus assembly protein, pili assembly chaperone, AdeK, PonA, OmpA, general secretion pathway protein D, FhuE receptor, Type VI secretion system OmpA/MotB, TonB dependent siderophore receptor, general secretion pathway protein D, outer membrane protein, peptidoglycan associated lipoprotein and peptidyl-prolyl cis-trans isomerase are identified as highly antigenic. Epitope mapping of the target proteins revealed the presence of antigenic surface exposed 9-mer T-cell epitopes. Protein-protein interaction and functional annotation have shown their involvement in significant biological and molecular processes. The pipeline is validated by predicting already known immunogenic targets against Gram negative pathogen Helicobacter pylori as a positive control.

CONCLUSION: The study, based upon combinatorial approach of pan-genomics, core genomics, proteomics and reverse vaccinology led us to find out potential vaccine candidates against A. baumannii. The comprehensive analysis of all the completely sequenced genomes revealed thirteen putative antigens which could elicit substantial immune response. The integration of computational vaccinology strategies would facilitate in tackling the rapid dissemination of resistant A.baumannii strains. The scarcity of effective antibiotics and the global expansion of sequencing data making this approach desirable in the development of effective vaccines against A. baumannii and other bacterial pathogens.}, } @article {pmid27633769, year = {2016}, author = {Joseph, SJ and Cox, D and Wolff, B and Morrison, SS and Kozak-Muiznieks, NA and Frace, M and Didelot, X and Castillo-Ramirez, S and Winchell, J and Read, TD and Dean, D}, title = {Dynamics of genome change among Legionella species.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {33442}, pmid = {27633769}, issn = {2045-2322}, support = {MR/K010174/1/MRC_/Medical Research Council/United Kingdom ; R01 AI098843/AI/NIAID NIH HHS/United States ; }, mesh = {Bacterial Secretion Systems/genetics ; Base Sequence ; CRISPR-Cas Systems/genetics ; DNA, Bacterial/genetics ; Gene Transfer, Horizontal/genetics ; Genes, Bacterial ; *Genome, Bacterial ; Genomics ; Legionella/*genetics ; Phylogeny ; Recombination, Genetic/genetics ; Selection, Genetic ; Species Specificity ; }, abstract = {Legionella species inhabit freshwater and soil ecosystems where they parasitize protozoa. L. pneumonphila (LP) serogroup-1 (Lp1) is the major cause of Legionnaires' Disease (LD), a life-threatening pulmonary infection that can spread systemically. The increased global frequency of LD caused by Lp and non-Lp species underscores the need to expand our knowledge of evolutionary forces underlying disease pathogenesis. Whole genome analyses of 43 strains, including all known Lp serogroups 1-17 and 17 emergent LD-causing Legionella species (of which 33 were sequenced in this study) in addition to 10 publicly available genomes, resolved the strains into four phylogenetic clades along host virulence demarcations. Clade-specific genes were distinct for genetic exchange and signal-transduction, indicating adaptation to specific cellular and/or environmental niches. CRISPR spacer comparisons hinted at larger pools of accessory DNA sequences in Lp than predicted by the pan-genome analyses. While recombination within Lp was frequent and has been reported previously, population structure analysis identified surprisingly few DNA admixture events between species. In summary, diverse Legionella LD-causing species share a conserved core-genome, are genetically isolated from each other, and selectively acquire genes with potential for enhanced virulence.}, } @article {pmid27607357, year = {2016}, author = {van Opijnen, T and Dedrick, S and Bento, J}, title = {Strain Dependent Genetic Networks for Antibiotic-Sensitivity in a Bacterial Pathogen with a Large Pan-Genome.}, journal = {PLoS pathogens}, volume = {12}, number = {9}, pages = {e1005869}, pmid = {27607357}, issn = {1553-7374}, support = {R01 AI110724/AI/NIAID NIH HHS/United States ; R21 AI117247/AI/NIAID NIH HHS/United States ; U01 AI124302/AI/NIAID NIH HHS/United States ; }, mesh = {Anti-Bacterial Agents/*pharmacology ; Daptomycin/*pharmacology ; Drug Resistance, Bacterial/drug effects/*genetics ; Gene Expression Regulation, Bacterial/*drug effects ; Gene Regulatory Networks/*physiology ; *Genome, Bacterial ; Streptococcus pneumoniae/*genetics/growth & development ; }, abstract = {The interaction between an antibiotic and bacterium is not merely restricted to the drug and its direct target, rather antibiotic induced stress seems to resonate through the bacterium, creating selective pressures that drive the emergence of adaptive mutations not only in the direct target, but in genes involved in many different fundamental processes as well. Surprisingly, it has been shown that adaptive mutations do not necessarily have the same effect in all species, indicating that the genetic background influences how phenotypes are manifested. However, to what extent the genetic background affects the manner in which a bacterium experiences antibiotic stress, and how this stress is processed is unclear. Here we employ the genome-wide tool Tn-Seq to construct daptomycin-sensitivity profiles for two strains of the bacterial pathogen Streptococcus pneumoniae. Remarkably, over half of the genes that are important for dealing with antibiotic-induced stress in one strain are dispensable in another. By confirming over 100 genotype-phenotype relationships, probing potassium-loss, employing genetic interaction mapping as well as temporal gene-expression experiments we reveal genome-wide conditionally important/essential genes, we discover roles for genes with unknown function, and uncover parts of the antibiotic's mode-of-action. Moreover, by mapping the underlying genomic network for two query genes we encounter little conservation in network connectivity between strains as well as profound differences in regulatory relationships. Our approach uniquely enables genome-wide fitness comparisons across strains, facilitating the discovery that antibiotic responses are complex events that can vary widely between strains, which suggests that in some cases the emergence of resistance could be strain specific and at least for species with a large pan-genome less predictable.}, } @article {pmid27595771, year = {2016}, author = {Gómez-Garzón, C and Hernández-Santana, A and Dussán, J}, title = {Comparative genomics reveals Lysinibacillus sphaericus group comprises a novel species.}, journal = {BMC genomics}, volume = {17}, number = {1}, pages = {709}, pmid = {27595771}, issn = {1471-2164}, mesh = {Bacillus/*classification/genetics ; Evolution, Molecular ; Gene Transfer, Horizontal ; Genome, Bacterial ; Genomics/*methods ; Phylogeny ; Sequence Analysis, DNA/*methods ; Species Specificity ; }, abstract = {BACKGROUND: Early in the 1990s, it was recognized that Lysinibacillus sphaericus, one of the most popular and effective entomopathogenic bacteria, was a highly heterogeneous group. Many authors have even proposed it comprises more than one species, but the lack of phenotypic traits that guarantee an accurate differentiation has not allowed this issue to be clarified. Now that genomic technologies are rapidly advancing, it is possible to address the problem from a whole genome perspective, getting insights into the phylogeny, evolutive history and biology itself.

RESULTS: The genome of the Colombian strain L. sphaericus OT4b.49 was sequenced, assembled and annotated, obtaining 3 chromosomal contigs and no evidence of plasmids. Using these sequences and the 13 other L. sphaericus genomes available on the NCBI database, we carried out comparative genomic analyses that included whole genome alignments, searching for mobile elements, phylogenomic metrics (TETRA, ANI and in-silico DDH) and pan-genome assessments. The results support the hypothesis about this species as a very heterogeneous group. The entomopathogenic lineage is actually a single and independent species with 3728 core genes and 2153 accessory genes, whereas each non-toxic strain seems to be a separate species, though without a clear circumscription. Toxin-encoding genes, binA, B and mtx1, 2, 3 could be acquired via horizontal gene transfer in a single evolutionary event. The non-toxic strain OT4b.31 is the most related with the type strain KCTC 3346.

CONCLUSIONS: The current L. sphaericus is actually a sensu lato due to a sub-estimation of diversity accrued using traditional non-genomics based classification strategies. The toxic lineage is the most studied with regards to its larvicidal activity, which is a greatly conserved trait among these strains and thus, their differentiating feature. Further studies are needed in order to establish a univocal classification of the non-toxic strains that, according to our results, seem to be a paraphyletic group.}, } @article {pmid27587666, year = {2016}, author = {Sheikhizadeh, S and Schranz, ME and Akdel, M and de Ridder, D and Smit, S}, title = {PanTools: representation, storage and exploration of pan-genomic data.}, journal = {Bioinformatics (Oxford, England)}, volume = {32}, number = {17}, pages = {i487-i493}, doi = {10.1093/bioinformatics/btw455}, pmid = {27587666}, issn = {1367-4811}, mesh = {*Algorithms ; Arabidopsis ; Computational Biology/methods ; Escherichia coli ; *Genome ; Genome, Bacterial ; Genomics ; *High-Throughput Nucleotide Sequencing ; Humans ; Software ; }, abstract = {MOTIVATION: Next-generation sequencing technology is generating a wealth of highly similar genome sequences for many species, paving the way for a transition from single-genome to pan-genome analyses. Accordingly, genomics research is going to switch from reference-centric to pan-genomic approaches. We define the pan-genome as a comprehensive representation of multiple annotated genomes, facilitating analyses on the similarity and divergence of the constituent genomes at the nucleotide, gene and genome structure level. Current pan-genomic approaches do not thoroughly address scalability, functionality and usability.

RESULTS: We introduce a generalized De Bruijn graph as a pan-genome representation, as well as an online algorithm to construct it. This representation is stored in a Neo4j graph database, which makes our approach scalable to large eukaryotic genomes. Besides the construction algorithm, our software package, called PanTools, currently provides functionality for annotating pan-genomes, adding sequences, grouping genes, retrieving gene sequences or genomic regions, reconstructing genomes and comparing and querying pan-genomes. We demonstrate the performance of the tool using datasets of 62 E. coli genomes, 93 yeast genomes and 19 Arabidopsis thaliana genomes.

The Java implementation of PanTools is publicly available at http://www.bif.wur.nl

CONTACT: sandra.smit@wur.nl.}, } @article {pmid27574119, year = {2017}, author = {Millman, A and Dar, D and Shamir, M and Sorek, R}, title = {Computational prediction of regulatory, premature transcription termination in bacteria.}, journal = {Nucleic acids research}, volume = {45}, number = {2}, pages = {886-893}, pmid = {27574119}, issn = {1362-4962}, support = {260432/ERC_/European Research Council/International ; }, mesh = {Bacteria/*genetics ; *Computer Simulation ; *Gene Expression Regulation, Bacterial ; Machine Learning ; *Models, Biological ; Nucleic Acid Conformation ; RNA, Messenger/chemistry/genetics ; ROC Curve ; *Transcription Termination, Genetic ; }, abstract = {A common strategy for regulation of gene expression in bacteria is conditional transcription termination. This strategy is frequently employed by 5'UTR cis-acting RNA elements (riboregulators), including riboswitches and attenuators. Such riboregulators can assume two mutually exclusive RNA structures, one of which forms a transcriptional terminator and results in premature termination, and the other forms an antiterminator that allows read-through into the coding sequence to produce a full-length mRNA. We developed a machine-learning based approach, which, given a 5'UTR of a gene, predicts whether it can form the two alternative structures typical to riboregulators employing conditional termination. Using a large positive training set of riboregulators derived from 89 human microbiome bacteria, we show high specificity and sensitivity for our classifier. We further show that our approach allows the discovery of previously unidentified riboregulators, as exemplified by the detection of new LeuA leaders and T-boxes in Streptococci Finally, we developed PASIFIC (www.weizmann.ac.il/molgen/Sorek/PASIFIC/), an online web-server that, given a user-provided 5'UTR sequence, predicts whether this sequence can adopt two alternative structures conforming with the conditional termination paradigm. This webserver is expected to assist in the identification of new riboswitches and attenuators in the bacterial pan-genome.}, } @article {pmid27552639, year = {2017}, author = {Browne, P and Tamaki, H and Kyrpides, N and Woyke, T and Goodwin, L and Imachi, H and Bräuer, S and Yavitt, JB and Liu, WT and Zinder, S and Cadillo-Quiroz, H}, title = {Genomic composition and dynamics among Methanomicrobiales predict adaptation to contrasting environments.}, journal = {The ISME journal}, volume = {11}, number = {1}, pages = {87-99}, pmid = {27552639}, issn = {1751-7370}, mesh = {Acclimatization ; Adaptation, Physiological ; Ecosystem ; *Genome, Archaeal ; Genomics ; Methane/metabolism ; Methanomicrobiales/classification/*genetics/isolation & purification/physiology ; Phylogeny ; Soil ; Soil Microbiology ; }, abstract = {Members of the order Methanomicrobiales are abundant, and sometimes dominant, hydrogenotrophic (H2-CO2 utilizing) methanoarchaea in a broad range of anoxic habitats. Despite their key roles in greenhouse gas emissions and waste conversion to methane, little is known about the physiological and genomic bases for their widespread distribution and abundance. In this study, we compared the genomes of nine diverse Methanomicrobiales strains, examined their pangenomes, reconstructed gene flow and identified genes putatively mediating their success across different habitats. Most strains slowly increased gene content whereas one, Methanocorpusculum labreanum, evidenced genome downsizing. Peat-dwelling Methanomicrobiales showed adaptations centered on improved transport of scarce inorganic nutrients and likely use H[+] rather than Na[+] transmembrane chemiosmotic gradients during energy conservation. In contrast, other Methanomicrobiales show the potential to concurrently use Na[+] and H[+] chemiosmotic gradients. Analyses also revealed that the Methanomicrobiales lack a canonical electron bifurcation system (MvhABGD) known to produce low potential electrons in other orders of hydrogenotrophic methanogens. Additional putative differences in anabolic metabolism suggest that the dynamics of interspecies electron transfer from Methanomicrobiales syntrophic partners can also differ considerably. Altogether, these findings suggest profound differences in electron trafficking in the Methanomicrobiales compared with other hydrogenotrophs, and warrant further functional evaluations.}, } @article {pmid27536275, year = {2016}, author = {Sun, S and Xiao, J and Zhang, H and Zhang, Z}, title = {Pangenome Evidence for Higher Codon Usage Bias and Stronger Translational Selection in Core Genes of Escherichia coli.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {1180}, pmid = {27536275}, issn = {1664-302X}, abstract = {Codon usage bias, as a combined interplay from mutation and selection, has been intensively studied in Escherichia coli. However, codon usage analysis in an E. coli pangenome remains unexplored and the relative importance of mutation and selection acting on core genes and strain-specific genes is unknown. Here we perform comprehensive codon usage analyses based on a collection of multiple complete genome sequences of E. coli. Our results show that core genes that are present in all strains have higher codon usage bias than strain-specific genes that are unique to single strains. We further explore the forces in influencing codon usage and investigate the difference of the major force between core and strain-specific genes. Our results demonstrate that although mutation may exert genome-wide influences on codon usage acting similarly in different gene sets, selection dominates as an important force to shape biased codon usage as genes are present in an increased number of strains. Together, our results provide important insights for better understanding genome plasticity and complexity as well as evolutionary mechanisms behind codon usage bias.}, } @article {pmid27506891, year = {2016}, author = {Platt, JL and Salama, R and Smythies, J and Choudhry, H and Davies, JO and Hughes, JR and Ratcliffe, PJ and Mole, DR}, title = {Capture-C reveals preformed chromatin interactions between HIF-binding sites and distant promoters.}, journal = {EMBO reports}, volume = {17}, number = {10}, pages = {1410-1421}, pmid = {27506891}, issn = {1469-3178}, support = {078333/Z/05/Z/WT_/Wellcome Trust/United Kingdom ; MC_UU_12009/15/MRC_/Medical Research Council/United Kingdom ; RP-2015-06-004/DH_/Department of Health/United Kingdom ; A16016/CRUK_/Cancer Research UK/United Kingdom ; WT091857MA/WT_/Wellcome Trust/United Kingdom ; 090532/Z/09/Z/WT_/Wellcome Trust/United Kingdom ; MR/N00969X/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Algorithms ; Basic Helix-Loop-Helix Transcription Factors/metabolism ; *Binding Sites ; Cell Line, Tumor ; Chromatin/*genetics/*metabolism ; Chromatin Immunoprecipitation ; Cluster Analysis ; Computational Biology/*methods ; Enhancer Elements, Genetic ; Gene Expression Regulation ; Glycolysis ; High-Throughput Nucleotide Sequencing ; Humans ; Hypoxia-Inducible Factor 1/*metabolism ; Organ Specificity/genetics ; *Promoter Regions, Genetic ; Protein Binding ; Transcriptional Activation ; }, abstract = {Hypoxia-inducible factor (HIF) directs an extensive transcriptional cascade that transduces numerous adaptive responses to hypoxia. Pan-genomic analyses, using chromatin immunoprecipitation and transcript profiling, have revealed large numbers of HIF-binding sites that are generally associated with hypoxia-inducible transcripts, even over long chromosomal distances. However, these studies do not define the specific targets of HIF-binding sites and do not reveal how induction of HIF affects chromatin conformation over distantly connected functional elements. To address these questions, we deployed a recently developed chromosome conformation assay that enables simultaneous high-resolution analyses from multiple viewpoints. These assays defined specific long-range interactions between intergenic HIF-binding regions and one or more promoters of hypoxia-inducible genes, revealing the existence of multiple enhancer-promoter, promoter-enhancer, and enhancer-enhancer interactions. However, neither short-term activation of HIF by hypoxia, nor long-term stabilization of HIF in von Hippel-Lindau (VHL)-defective cells greatly alters these interactions, indicating that at least under these conditions, HIF can operate on preexisting patterns of chromatin-chromatin interactions that define potential transcriptional targets and permit rapid gene activation by hypoxic stress.}, } @article {pmid27505681, year = {2016}, author = {Zheng, S and Cherniack, AD and Dewal, N and Moffitt, RA and Danilova, L and Murray, BA and Lerario, AM and Else, T and Knijnenburg, TA and Ciriello, G and Kim, S and Assie, G and Morozova, O and Akbani, R and Shih, J and Hoadley, KA and Choueiri, TK and Waldmann, J and Mete, O and Robertson, AG and Wu, HT and Raphael, BJ and Shao, L and Meyerson, M and Demeure, MJ and Beuschlein, F and Gill, AJ and Sidhu, SB and Almeida, MQ and Fragoso, MCBV and Cope, LM and Kebebew, E and Habra, MA and Whitsett, TG and Bussey, KJ and Rainey, WE and Asa, SL and Bertherat, J and Fassnacht, M and Wheeler, DA and , and Hammer, GD and Giordano, TJ and Verhaak, RGW}, title = {Comprehensive Pan-Genomic Characterization of Adrenocortical Carcinoma.}, journal = {Cancer cell}, volume = {30}, number = {2}, pages = {363}, doi = {10.1016/j.ccell.2016.07.013}, pmid = {27505681}, issn = {1878-3686}, support = {U24 CA143883/CA/NCI NIH HHS/United States ; }, } @article {pmid27504980, year = {2016}, author = {Uchiyama, I and Albritton, J and Fukuyo, M and Kojima, KK and Yahara, K and Kobayashi, I}, title = {A Novel Approach to Helicobacter pylori Pan-Genome Analysis for Identification of Genomic Islands.}, journal = {PloS one}, volume = {11}, number = {8}, pages = {e0159419}, pmid = {27504980}, issn = {1932-6203}, mesh = {Chromosomes, Bacterial/genetics ; DNA Transposable Elements/genetics ; DNA, Bacterial/genetics ; Genomic Islands/*genetics ; Genomics/*methods ; Helicobacter pylori/*genetics ; Multigene Family/genetics ; Phylogeny ; RNA-Directed DNA Polymerase/genetics ; }, abstract = {Genomes of a given bacterial species can show great variation in gene content and thus systematic analysis of the entire gene repertoire, termed the pan-genome, is important for understanding bacterial intra-species diversity, population genetics, and evolution. Here, we analyzed the pan-genome from 30 completely sequenced strains of the human gastric pathogen Helicobacter pylori belonging to various phylogeographic groups, focusing on 991 accessory (not fully conserved) orthologous groups (OGs). We developed a method to evaluate the mobility of genes within a genome, using the gene order in the syntenically conserved regions as a reference, and classified the 991 accessory OGs into five classes: Core, Stable, Intermediate, Mobile, and Unique. Phylogenetic networks based on the gene content of Core and Stable classes are highly congruent with that created from the concatenated alignment of fully conserved core genes, in contrast to those of Intermediate and Mobile classes, which show quite different topologies. By clustering the accessory OGs on the basis of phylogenetic pattern similarity and chromosomal proximity, we identified 60 co-occurring gene clusters (CGCs). In addition to known genomic islands, including cag pathogenicity island, bacteriophages, and integrating conjugative elements, we identified some novel ones. One island encodes TerY-phosphorylation triad, which includes the eukaryote-type protein kinase/phosphatase gene pair, and components of type VII secretion system. Another one contains a reverse-transcriptase homolog, which may be involved in the defense against phage infection through altruistic suicide. Many of the CGCs contained restriction-modification (RM) genes. Different RM systems sometimes occupied the same (orthologous) locus in the strains. We anticipate that our method will facilitate pan-genome studies in general and help identify novel genomic islands in various bacterial species.}, } @article {pmid27499133, year = {2016}, author = {Pinosio, S and Giacomello, S and Faivre-Rampant, P and Taylor, G and Jorge, V and Le Paslier, MC and Zaina, G and Bastien, C and Cattonaro, F and Marroni, F and Morgante, M}, title = {Characterization of the Poplar Pan-Genome by Genome-Wide Identification of Structural Variation.}, journal = {Molecular biology and evolution}, volume = {33}, number = {10}, pages = {2706-2719}, pmid = {27499133}, issn = {1537-1719}, mesh = {DNA Copy Number Variations ; Genes, Plant ; Genome, Plant ; Genome-Wide Association Study ; Genomics ; INDEL Mutation ; Populus/*genetics ; Structure-Activity Relationship ; }, abstract = {Many recent studies have emphasized the important role of structural variation (SV) in determining human genetic and phenotypic variation. In plants, studies aimed at elucidating the extent of SV are still in their infancy. Evidence has indicated a high presence and an active role of SV in driving plant genome evolution in different plant species.With the aim of characterizing the size and the composition of the poplar pan-genome, we performed a genome-wide analysis of structural variation in three intercrossable poplar species: Populus nigra, Populus deltoides, and Populus trichocarpa We detected a total of 7,889 deletions and 10,586 insertions relative to the P. trichocarpa reference genome, covering respectively 33.2 Mb and 62.9 Mb of genomic sequence, and 3,230 genes affected by copy number variation (CNV). The majority of the detected variants are inter-specific in agreement with a recent origin following separation of species.Insertions and deletions (INDELs) were preferentially located in low-gene density regions of the poplar genome and were, for the majority, associated with the activity of transposable elements. Genes affected by SV showed lower-than-average expression levels and higher levels of dN/dS, suggesting that they are subject to relaxed selective pressure or correspond to pseudogenes.Functional annotation of genes affected by INDELs showed over-representation of categories associated with transposable elements activity, while genes affected by genic CNVs showed enrichment in categories related to resistance to stress and pathogens. This study provides a genome-wide catalogue of SV and the first insight on functional and structural properties of the poplar pan-genome.}, } @article {pmid27476200, year = {2016}, author = {Provorov, NA and Andronov, EE}, title = {[Evolution of Root Nodule Bacteria: Reconstruction of the Speciation Processes Resulting from Genomic Rearrangements in a Symbiotic System].}, journal = {Mikrobiologiia}, volume = {85}, number = {2}, pages = {115-125}, pmid = {27476200}, issn = {0026-3656}, mesh = {*Bacteria/genetics/metabolism ; *Evolution, Molecular ; Genome, Bacterial/*physiology ; Root Nodules, Plant/*microbiology ; Symbiosis/*physiology ; }, abstract = {The processes of speciation and macroevolution of root nodule bacteria (rhizobia), based on deep rearrangements of their genomes and occurring in the N2-fixing symbiotic system, are reconstructed. At the first stage of rhizobial evolution, transformation of free-living diazotrophs (related to Rhodopseudomonas) to symbiotic N2-fixers (Bradyrhizobium) occurred due to the acquisition of the fix gene system, which is responsible for providing nitrogenase with electrons and reducing equivalents, as well as for oxygen-dependent regulation of nitrogenase synthesis in planta, and then of the nod genes responsible for the synthesis of the lipo- chito-oligosaccharide Nod factors, which induce root nodule development. The subsequent rearrangements of bacterial genomes included: (1) increased volume of hereditary information supported by species, genera (pan-genome), and individual strains; (2) transition from the unitary genome to a multicomponent one; and (3) enhanced levels of bacterial genetic plasticity and horizontal gene transfer, resulting in formation of new genera, of which Mesorhizobium, Rhizobium, and Sinorhizobium are the largest, and of over 100 species. Rhizobial evolution caused by development and diversification of the Nod factor synthesizing systems may result in both increased host specificity range (transition of Bradyrhizobium from autotrophic to symbiotrophic carbon metabolism in interaction with a broad spectrum of legumes) and to its contraction (transition of Rhizobium and Sinorhizobium to "altruistic" interaction with legumes of the galegoid clade). Reconstruction of the evolutionary pathway from symbiotic N2-fixers to their free-living ancestors makes it possible to initiate the studies based on up-to-date genome screening technologies and aimed at the issues of genetic integration of organisms into supracpecies complexes, ratios of the macro- and microevolutionary mechanisms, and developmetn of cooperative adaptations based on altruistic relationship between the symbiotic partners.}, } @article {pmid27461509, year = {2016}, author = {Breurec, S and Criscuolo, A and Diancourt, L and Rendueles, O and Vandenbogaert, M and Passet, V and Caro, V and Rocha, EP and Touchon, M and Brisse, S}, title = {Genomic epidemiology and global diversity of the emerging bacterial pathogen Elizabethkingia anophelis.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {30379}, pmid = {27461509}, issn = {2045-2322}, support = {281605/ERC_/European Research Council/International ; }, mesh = {Clustered Regularly Interspaced Short Palindromic Repeats/genetics ; DNA Transposable Elements ; Evolution, Molecular ; Flavobacteriaceae/classification/*genetics/pathogenicity ; Gene Transfer, Horizontal ; *Genetic Variation ; *Genome, Bacterial ; *Phylogeny ; Virulence/genetics ; }, abstract = {Elizabethkingia anophelis is an emerging pathogen involved in human infections and outbreaks in distinct world regions. We investigated the phylogenetic relationships and pathogenesis-associated genomic features of two neonatal meningitis isolates isolated 5 years apart from one hospital in Central African Republic and compared them with Elizabethkingia from other regions and sources. Average nucleotide identity firmly confirmed that E. anophelis, E. meningoseptica and E. miricola represent demarcated genomic species. A core genome multilocus sequence typing scheme, broadly applicable to Elizabethkingia species, was developed and made publicly available (http://bigsdb.pasteur.fr/elizabethkingia). Phylogenetic analysis revealed distinct E. anophelis sublineages and demonstrated high genetic relatedness between the African isolates, compatible with persistence of the strain in the hospital environment. CRISPR spacer variation between the African isolates was mirrored by the presence of a large mobile genetic element. The pan-genome of E. anophelis comprised 6,880 gene families, underlining genomic heterogeneity of this species. African isolates carried unique resistance genes acquired by horizontal transfer. We demonstrated the presence of extensive variation of the capsular polysaccharide synthesis gene cluster in E. anophelis. Our results demonstrate the dynamic evolution of this emerging pathogen and the power of genomic approaches for Elizabethkingia identification, population biology and epidemiology.}, } @article {pmid27460800, year = {2016}, author = {Ray, A and Kinch, LN and de Souza Santos, M and Grishin, NV and Orth, K and Salomon, D}, title = {Proteomics Analysis Reveals Previously Uncharacterized Virulence Factors in Vibrio proteolyticus.}, journal = {mBio}, volume = {7}, number = {4}, pages = {}, pmid = {27460800}, issn = {2150-7511}, support = {K99 AI116948/AI/NIAID NIH HHS/United States ; R01 AI056404/AI/NIAID NIH HHS/United States ; R01 GM094575/GM/NIGMS NIH HHS/United States ; T32 DK007745/DK/NIDDK NIH HHS/United States ; }, mesh = {Animals ; Aquatic Organisms/chemistry ; Cell Survival/drug effects ; Cytoskeleton/metabolism ; Epithelial Cells/microbiology/physiology ; HeLa Cells ; Hemolysin Proteins/*analysis/metabolism ; Humans ; Macrophages/microbiology/physiology ; Mice ; *Proteomics ; RAW 264.7 Cells ; Vibrio/*chemistry ; Virulence Factors/*analysis ; }, abstract = {UNLABELLED: Members of the genus Vibrio include many pathogens of humans and marine animals that share genetic information via horizontal gene transfer. Hence, the Vibrio pan-genome carries the potential to establish new pathogenic strains by sharing virulence determinants, many of which have yet to be characterized. Here, we investigated the virulence properties of Vibrio proteolyticus, a Gram-negative marine bacterium previously identified as part of the Vibrio consortium isolated from diseased corals. We found that V. proteolyticus causes actin cytoskeleton rearrangements followed by cell lysis in HeLa cells in a contact-independent manner. In search of the responsible virulence factor involved, we determined the V. proteolyticus secretome. This proteomics approach revealed various putative virulence factors, including active type VI secretion systems and effectors with virulence toxin domains; however, these type VI secretion systems were not responsible for the observed cytotoxic effects. Further examination of the V. proteolyticus secretome led us to hypothesize and subsequently demonstrate that a secreted hemolysin, belonging to a previously uncharacterized clan of the leukocidin superfamily, was the toxin responsible for the V. proteolyticus-mediated cytotoxicity in both HeLa cells and macrophages. Clearly, there remains an armory of yet-to-be-discovered virulence factors in the Vibrio pan-genome that will undoubtedly provide a wealth of knowledge on how a pathogen can manipulate host cells.

IMPORTANCE: The pan-genome of the genus Vibrio is a potential reservoir of unidentified toxins that can provide insight into how members of this genus have successfully risen as emerging pathogens worldwide. We focused on Vibrio proteolyticus, a marine bacterium that was previously implicated in virulence toward marine animals, and characterized its interaction with eukaryotic cells. We found that this bacterium causes actin cytoskeleton rearrangements and leads to cell death. Using a proteomics approach, we identified a previously unstudied member of the leukocidin family of pore-forming toxins as the virulence factor responsible for the observed cytotoxicity in eukaryotic cells, as well as a plethora of additional putative virulence factors secreted by this bacterium. Our findings reveal a functional new clan of the leukocidin toxin superfamily and establish this pathogen as a reservoir of potential toxins that can be used for biomedical applications.}, } @article {pmid27446038, year = {2016}, author = {Tian, X and Zhang, Z and Yang, T and Chen, M and Li, J and Chen, F and Yang, J and Li, W and Zhang, B and Zhang, Z and Wu, J and Zhang, C and Long, L and Xiao, J}, title = {Comparative Genomics Analysis of Streptomyces Species Reveals Their Adaptation to the Marine Environment and Their Diversity at the Genomic Level.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {998}, pmid = {27446038}, issn = {1664-302X}, abstract = {Over 200 genomes of streptomycete strains that were isolated from various environments are available from the NCBI. However, little is known about the characteristics that are linked to marine adaptation in marine-derived streptomycetes. The particularity and complexity of the marine environment suggest that marine streptomycetes are genetically diverse. Here, we sequenced nine strains from the Streptomyces genus that were isolated from different longitudes, latitudes, and depths of the South China Sea. Then we compared these strains to 22 NCBI downloaded streptomycete strains. Thirty-one streptomycete strains are clearly grouped into a marine-derived subgroup and multiple source subgroup-based phylogenetic tree. The phylogenetic analyses have revealed the dynamic process underlying streptomycete genome evolution, and lateral gene transfer is an important driving force during the process. Pan-genomics analyses have revealed that streptomycetes have an open pan-genome, which reflects the diversity of these streptomycetes and guarantees the species a quick and economical response to diverse environments. Functional and comparative genomics analyses indicate that the marine-derived streptomycetes subgroup possesses some common characteristics of marine adaptation. Our findings have expanded our knowledge of how ocean isolates of streptomycete strains adapt to marine environments. The availability of streptomycete genomes from the South China Sea will be beneficial for further analysis on marine streptomycetes and will enrich the South China Sea's genetic data sources.}, } @article {pmid27446024, year = {2016}, author = {Zheng, Q and Lin, W and Liu, Y and Chen, C and Jiao, N}, title = {A Comparison of 14 Erythrobacter Genomes Provides Insights into the Genomic Divergence and Scattered Distribution of Phototrophs.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {984}, pmid = {27446024}, issn = {1664-302X}, abstract = {Aerobic anoxygenic phototrophic bacteria (AAPB) are bacteriochlorophyll a (Bchl a)-containing microbial functional population. Erythrobacter is the first genus that was identified to contain AAPB species. Here, we compared 14 Erythrobacter genomes: seven phototrophic strains and seven non- phototrophic strains. Interestingly, AAPB strains are scattered in this genus based on their phylogenetic relationships. All 14 strains could be clustered into three groups based on phylo-genomic analysis, average genomic nucleotide identity and the phylogeny of signature genes (16S rRNA and virB4 genes). The AAPB strains were distributed in three groups, and gain and loss of phototrophic genes co-occurred in the evolutionary history of the genus Erythrobacter. The organization and structure of photosynthesis gene clusters (PGCs) in seven AAPB genomes displayed high synteny of major regions except for few insertions. The 14 Erythrobacter genomes had a large range of genome sizes, from 2.72 to 3.60 M, and the sizes of the core and pan- genomes were 1231 and 8170 orthologous clusters, respectively. Integrative and conjugative elements (ICEs) were frequently identified in genomes we studied, which might play significant roles in shaping or contributing to the pan-genome of Erythrobacter. Our findings suggest the ongoing evolutionary divergence of Erythrobacter genomes and the scattered distribution characteristic of PGC.}, } @article {pmid27437028, year = {2016}, author = {Beller, T and Ohlebusch, E}, title = {A representation of a compressed de Bruijn graph for pan-genome analysis that enables search.}, journal = {Algorithms for molecular biology : AMB}, volume = {11}, number = {}, pages = {20}, pmid = {27437028}, issn = {1748-7188}, abstract = {BACKGROUND: Recently, Marcus et al. (Bioinformatics 30:3476-83, 2014) proposed to use a compressed de Bruijn graph to describe the relationship between the genomes of many individuals/strains of the same or closely related species. They devised an [Formula: see text] time algorithm called splitMEM that constructs this graph directly (i.e., without using the uncompressed de Bruijn graph) based on a suffix tree, where n is the total length of the genomes and g is the length of the longest genome. Baier et al. (Bioinformatics 32:497-504, 2016) improved their result.

RESULTS: In this paper, we propose a new space-efficient representation of the compressed de Bruijn graph that adds the possibility to search for a pattern (e.g. an allele-a variant form of a gene) within the pan-genome. The ability to search within the pan-genome graph is of utmost importance and is a design goal of pan-genome data structures.}, } @article {pmid27436046, year = {2016}, author = {Goldstone, RJ and Harris, S and Smith, DG}, title = {Genomic content typifying a prevalent clade of bovine mastitis-associated Escherichia coli.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {30115}, pmid = {27436046}, issn = {2045-2322}, support = {//Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; Cattle ; Escherichia coli/*genetics ; Escherichia coli Infections/*microbiology ; Escherichia coli Proteins/genetics ; Female ; Genome, Bacterial/genetics ; Genomics/methods ; Mammary Glands, Animal/microbiology ; Mastitis, Bovine/*microbiology ; Phylogeny ; }, abstract = {E. coli represents a heterogeneous population with capabilities to cause disease in several anatomical sites. Among sites that can be colonised is the bovine mammary gland (udder) and a distinct class of mammary pathogenic E. coli (MPEC) has been proposed. MPEC are the principle causative agents of bovine mastitis in well-managed dairy farms, costing producers in the European Union an estimated €2 billion per year. Despite the economic impact, and the threat this disease presents to small and medium sized dairy farmers, the factors which mediate the ability for E. coli to thrive in bovine mammary tissue remain poorly elucidated. Strains belonging to E. coli phylogroup A are most frequently isolated from mastitis. In this paper, we apply a population level genomic analysis to this group of E. coli to uncover genomic signatures of mammary infectivity. Through a robust statistical analysis, we show that not all strains of E. coli are equally likely to cause mastitis, and those that do possess specific gene content that may promote their adaptation and survival in the bovine udder. Through a pan-genomic analysis, we identify just three genetic loci which are ubiquitous in MPEC, but appear dispensable for E. coli from other niches.}, } @article {pmid27420027, year = {2017}, author = {Porter, SS and Chang, PL and Conow, CA and Dunham, JP and Friesen, ML}, title = {Association mapping reveals novel serpentine adaptation gene clusters in a population of symbiotic Mesorhizobium.}, journal = {The ISME journal}, volume = {11}, number = {1}, pages = {248-262}, pmid = {27420027}, issn = {1751-7370}, mesh = {Acclimatization ; Adaptation, Physiological/genetics ; Bacterial Proteins/genetics/*metabolism ; Chromosome Mapping ; Ecotype ; Genetic Variation ; Genome-Wide Association Study ; Genomics ; Mesorhizobium/classification/genetics/isolation & purification/*physiology ; Secologanin Tryptamine Alkaloids/*metabolism ; Soil Microbiology ; }, abstract = {The genetic variants that underlie microbial environmental adaptation are key components of models of microbial diversification. Characterizing adaptive variants and the pangenomic context in which they evolve remains a frontier in understanding how microbial diversity is generated. The genomics of rhizobium adaptation to contrasting soil environments is ecologically and agriculturally important because these bacteria are responsible for half of all current biologically fixed nitrogen, yet they live the majority of their lives in soil. Our study uses whole-genome sequencing to describe the pan-genome of a focal clade of wild mesorhizobia that show contrasting levels of nickel adaptation despite high relatedness (99.8% identity at 16S). We observe ecotypic specialization within an otherwise genomically cohesive population, rather than finding distinct specialized bacterial lineages in contrasting soil types. This finding supports recent reports that heterogeneous environments impose selection that maintains differentiation only at a small fraction of the genome. Our work further uses a genome-wide association study to propose candidate genes for nickel adaptation. Several candidates show homology to genetic systems involved in nickel tolerance and one cluster of candidates correlates perfectly with soil origin, which validates our approach of ascribing genomic variation to adaptive divergence.}, } @article {pmid27381510, year = {2016}, author = {Pierron, A and Mimoun, S and Murate, LS and Loiseau, N and Lippi, Y and Bracarense, AP and Schatzmayr, G and He, JW and Zhou, T and Moll, WD and Oswald, IP}, title = {Microbial biotransformation of DON: molecular basis for reduced toxicity.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {29105}, pmid = {27381510}, issn = {2045-2322}, mesh = {Animals ; Bacteria/drug effects/genetics/*metabolism ; *Biotransformation ; Caco-2 Cells ; Epithelial Cells/drug effects ; Gene Expression Regulation/drug effects ; Humans ; Intestines/chemistry/drug effects ; Mitogen-Activated Protein Kinases/*genetics ; Oxygen Consumption/genetics ; Ribosomes/drug effects/genetics ; Signal Transduction/drug effects/genetics ; Swine ; Transcriptome/drug effects/genetics ; Trichothecenes/chemistry/*toxicity ; }, abstract = {Bacteria are able to de-epoxidize or epimerize deoxynivalenol (DON), a mycotoxin, to deepoxy-deoxynivalenol (deepoxy-DON or DOM-1) or 3-epi-deoxynivalenol (3-epi-DON), respectively. Using different approaches, the intestinal toxicity of 3 molecules was compared and the molecular basis for the reduced toxicity investigated. In human intestinal epithelial cells, deepoxy-DON and 3-epi-DON were not cytotoxic, did not change the oxygen consumption or impair the barrier function. In intestinal explants, exposure for 4 hours to 10 μM DON induced intestinal lesions not seen in explants treated with deepoxy-DON and 3-epi-DON. A pan-genomic transcriptomic analysis was performed on intestinal explants. 747 probes, representing 323 genes, were differentially expressed, between DON-treated and control explants. By contrast, no differentially expressed genes were observed between control, deepoxy-DON and 3-epi-DON treated explants. Both DON and its biotransformation products were able to fit into the pockets of the A-site of the ribosome peptidyl transferase center. DON forms three hydrogen bonds with the A site and activates MAPKinases (mitogen-activated protein kinases). By contrast deepoxy-DON and 3-epi-DON only form two hydrogen bonds and do not activate MAPKinases. Our data demonstrate that bacterial de-epoxidation or epimerization of DON altered their interaction with the ribosome, leading to an absence of MAPKinase activation and a reduced toxicity.}, } @article {pmid27363390, year = {2016}, author = {Thakur, S and Guttman, DS}, title = {A De-Novo Genome Analysis Pipeline (DeNoGAP) for large-scale comparative prokaryotic genomics studies.}, journal = {BMC bioinformatics}, volume = {17}, number = {1}, pages = {260}, pmid = {27363390}, issn = {1471-2105}, mesh = {Algorithms ; Amino Acid Sequence ; Cluster Analysis ; Computational Biology ; *Genome ; Genomics/*methods ; Markov Chains ; Molecular Sequence Annotation ; Prokaryotic Cells/*metabolism ; Reproducibility of Results ; Sequence Homology, Nucleic Acid ; *Software ; }, abstract = {BACKGROUND: Comparative analysis of whole genome sequence data from closely related prokaryotic species or strains is becoming an increasingly important and accessible approach for addressing both fundamental and applied biological questions. While there are number of excellent tools developed for performing this task, most scale poorly when faced with hundreds of genome sequences, and many require extensive manual curation.

RESULTS: We have developed a de-novo genome analysis pipeline (DeNoGAP) for the automated, iterative and high-throughput analysis of data from comparative genomics projects involving hundreds of whole genome sequences. The pipeline is designed to perform reference-assisted and de novo gene prediction, homolog protein family assignment, ortholog prediction, functional annotation, and pan-genome analysis using a range of proven tools and databases. While most existing methods scale quadratically with the number of genomes since they rely on pairwise comparisons among predicted protein sequences, DeNoGAP scales linearly since the homology assignment is based on iteratively refined hidden Markov models. This iterative clustering strategy enables DeNoGAP to handle a very large number of genomes using minimal computational resources. Moreover, the modular structure of the pipeline permits easy updates as new analysis programs become available.

CONCLUSION: DeNoGAP integrates bioinformatics tools and databases for comparative analysis of a large number of genomes. The pipeline offers tools and algorithms for annotation and analysis of completed and draft genome sequences. The pipeline is developed using Perl, BioPerl and SQLite on Ubuntu Linux version 12.04 LTS. Currently, the software package accompanies script for automated installation of necessary external programs on Ubuntu Linux; however, the pipeline should be also compatible with other Linux and Unix systems after necessary external programs are installed. DeNoGAP is freely available at https://sourceforge.net/projects/denogap/ .}, } @article {pmid27358423, year = {2016}, author = {Ceapa, C and Davids, M and Ritari, J and Lambert, J and Wels, M and Douillard, FP and Smokvina, T and de Vos, WM and Knol, J and Kleerebezem, M}, title = {The Variable Regions of Lactobacillus rhamnosus Genomes Reveal the Dynamic Evolution of Metabolic and Host-Adaptation Repertoires.}, journal = {Genome biology and evolution}, volume = {8}, number = {6}, pages = {1889-1905}, pmid = {27358423}, issn = {1759-6653}, support = {250172/ERC_/European Research Council/International ; }, mesh = {CRISPR-Cas Systems ; Carbohydrate Metabolism/*genetics ; *Evolution, Molecular ; Gene Transfer, Horizontal ; *Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Lacticaseibacillus rhamnosus/*genetics ; Molecular Sequence Annotation ; Phylogeny ; }, abstract = {Lactobacillus rhamnosus is a diverse Gram-positive species with strains isolated from different ecological niches. Here, we report the genome sequence analysis of 40 diverse strains of L. rhamnosus and their genomic comparison, with a focus on the variable genome. Genomic comparison of 40 L. rhamnosus strains discriminated the conserved genes (core genome) and regions of plasticity involving frequent rearrangements and horizontal transfer (variome). The L. rhamnosus core genome encompasses 2,164 genes, out of 4,711 genes in total (the pan-genome). The accessory genome is dominated by genes encoding carbohydrate transport and metabolism, extracellular polysaccharides (EPS) biosynthesis, bacteriocin production, pili production, the cas system, and the associated clustered regularly interspaced short palindromic repeat (CRISPR) loci, and more than 100 transporter functions and mobile genetic elements like phages, plasmid genes, and transposons. A clade distribution based on amino acid differences between core (shared) proteins matched with the clade distribution obtained from the presence-absence of variable genes. The phylogenetic and variome tree overlap indicated that frequent events of gene acquisition and loss dominated the evolutionary segregation of the strains within this species, which is paralleled by evolutionary diversification of core gene functions. The CRISPR-Cas system could have contributed to this evolutionary segregation. Lactobacillus rhamnosus strains contain the genetic and metabolic machinery with strain-specific gene functions required to adapt to a large range of environments. A remarkable congruency of the evolutionary relatedness of the strains' core and variome functions, possibly favoring interspecies genetic exchanges, underlines the importance of gene-acquisition and loss within the L. rhamnosus strain diversification.}, } @article {pmid27341059, year = {2016}, author = {Bou Khalil, JY and Andreani, J and Raoult, D and La Scola, B}, title = {A Rapid Strategy for the Isolation of New Faustoviruses from Environmental Samples Using Vermamoeba vermiformis.}, journal = {Journal of visualized experiments : JoVE}, volume = {}, number = {112}, pages = {}, pmid = {27341059}, issn = {1940-087X}, mesh = {Amoeba ; Genotype ; Viruses/*isolation & purification ; }, abstract = {The isolation of giant viruses is of great interest in this new era of virology, especially since these giant viruses are related to protists. Giant viruses may be potentially pathogenic for many species of protists. They belong to the recently described order of Megavirales. The new lineage Faustovirus that has been isolated from sewage samples is distantly related to the mammalian pathogen African swine fever virus. This virus is also specific to its amoebal host, Vermamoeba vermiformis, a protist common in health care water systems. It is crucial to continue isolating new Faustovirus genotypes in order to enlarge its genotype collection and study its pan-genome. We developed new strategies for the isolation of additional strains by improving the use of antibiotic and antifungal combinations in order to avoid bacterial and fungal contaminations of the amoeba co-culture and favoring the virus multiplication. We also implemented a new starvation medium to maintain V. vermiformis in optimal conditions for viruses co-culture. Finally, we used flow cytometry rather than microscopic observation, which is time-consuming, to detect the cytopathogenic effect. We obtained two isolates from sewage samples, proving the efficiency of this method and thus widening the collection of Faustoviruses, to better understand their environment, host specificity and genetic content.}, } @article {pmid27316954, year = {2016}, author = {Perez, M and Juniper, SK}, title = {Insights into Symbiont Population Structure among Three Vestimentiferan Tubeworm Host Species at Eastern Pacific Spreading Centers.}, journal = {Applied and environmental microbiology}, volume = {82}, number = {17}, pages = {5197-5205}, pmid = {27316954}, issn = {1098-5336}, mesh = {Animals ; Biodiversity ; Biological Evolution ; Gammaproteobacteria/classification/genetics/*isolation & purification/*physiology ; Host Specificity ; Hydrothermal Vents/microbiology ; Polychaeta/classification/*microbiology/physiology ; Seawater/microbiology ; Symbiosis ; }, abstract = {UNLABELLED: The symbiotic relationship between vestimentiferan tubeworms and their intracellular chemosynthetic bacteria is one of the more noteworthy examples of adaptation to deep-sea hydrothermal vent environments. The tubeworm symbionts have never been cultured in the laboratory. Nucleotide sequences from the small subunit rRNA gene suggest that the intracellular symbionts of the eastern Pacific vent tubeworms Oasisia alvinae, Riftia pachyptila, Tevnia jerichonana, and Ridgeia piscesae belong to the same phylotype of gammaproteobacteria, "Candidatus Endoriftia persephone." Comparisons of symbiont genomes between the East Pacific Rise tubeworms R. pachyptila and T. jerichonana confirmed that these two hosts share the same symbionts. Two Ridgeia symbiont genomes were assembled from trophosome metagenomes from worms collected from the Juan de Fuca Ridge (one and five individuals, respectively). We compared these assemblies to those of the sequenced Riftia and Tevnia symbionts. Pangenome composition, genome-wide comparisons of the nucleotide sequences, and pairwise comparisons of 2,313 orthologous genes indicated that "Ca Endoriftia persephone" symbionts are structured on large geographical scales but also on smaller scales and possibly through host specificity.

IMPORTANCE: Remarkably, the intracellular symbionts of four to six species of eastern Pacific vent tubeworms all belong to the same phylotype of gammaproteobacteria, "Candidatus Endoriftia persephone." Understanding the structure, dynamism, and interconnectivity of "Ca Endoriftia persephone" populations is important to advancing our knowledge of the ecology and evolution of their host worms, which are often keystone species in vent communities. In this paper, we present the first genomes for symbionts associated with the species R. piscesae, from the Juan de Fuca Ridge. We then combine these genomes with published symbiont genomes from the East Pacific Rise tubeworms R. pachyptila and T. jerichonana to develop a portrait of the "Ca Endoriftia persephone" pangenome and an initial outline of symbiont population structure in the different host species. Our study is the first to apply genome-wide comparisons of "Ca Endoriftia persephone" assemblies in the context of population genetics and molecular evolution.}, } @article {pmid27286824, year = {2016}, author = {Bosi, E and Monk, JM and Aziz, RK and Fondi, M and Nizet, V and Palsson, BØ}, title = {Comparative genome-scale modelling of Staphylococcus aureus strains identifies strain-specific metabolic capabilities linked to pathogenicity.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {113}, number = {26}, pages = {E3801-9}, pmid = {27286824}, issn = {1091-6490}, support = {R01 GM057089/GM/NIGMS NIH HHS/United States ; R01 GM098105/GM/NIGMS NIH HHS/United States ; U01 AI124316/AI/NIAID NIH HHS/United States ; U54 HD071600/HD/NICHD NIH HHS/United States ; }, mesh = {*Genome, Bacterial ; Models, Molecular ; Species Specificity ; Staphylococcus aureus/*genetics/growth & development/metabolism/pathogenicity ; Virulence Factors/genetics ; }, abstract = {Staphylococcus aureus is a preeminent bacterial pathogen capable of colonizing diverse ecological niches within its human host. We describe here the pangenome of S. aureus based on analysis of genome sequences from 64 strains of S. aureus spanning a range of ecological niches, host types, and antibiotic resistance profiles. Based on this set, S. aureus is expected to have an open pangenome composed of 7,411 genes and a core genome composed of 1,441 genes. Metabolism was highly conserved in this core genome; however, differences were identified in amino acid and nucleotide biosynthesis pathways between the strains. Genome-scale models (GEMs) of metabolism were constructed for the 64 strains of S. aureus These GEMs enabled a systems approach to characterizing the core metabolic and panmetabolic capabilities of the S. aureus species. All models were predicted to be auxotrophic for the vitamins niacin (vitamin B3) and thiamin (vitamin B1), whereas strain-specific auxotrophies were predicted for riboflavin (vitamin B2), guanosine, leucine, methionine, and cysteine, among others. GEMs were used to systematically analyze growth capabilities in more than 300 different growth-supporting environments. The results identified metabolic capabilities linked to pathogenic traits and virulence acquisitions. Such traits can be used to differentiate strains responsible for mild vs. severe infections and preference for hosts (e.g., animals vs. humans). Genome-scale analysis of multiple strains of a species can thus be used to identify metabolic determinants of virulence and increase our understanding of why certain strains of this deadly pathogen have spread rapidly throughout the world.}, } @article {pmid27252683, year = {2016}, author = {Bezuidt, OK and Pierneef, R and Gomri, AM and Adesioye, F and Makhalanyane, TP and Kharroub, K and Cowan, DA}, title = {The Geobacillus Pan-Genome: Implications for the Evolution of the Genus.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {723}, pmid = {27252683}, issn = {1664-302X}, abstract = {The genus Geobacillus is comprised of a diverse group of spore-forming Gram-positive thermophilic bacterial species and is well known for both its ecological diversity and as a source of novel thermostable enzymes. Although the mechanisms underlying the thermophilicity of the organism and the thermostability of its macromolecules are reasonably well understood, relatively little is known of the evolutionary mechanisms, which underlie the structural and functional properties of members of this genus. In this study, we have compared 29 Geobacillus genomes, with a specific focus on the elements, which comprise the conserved core and flexible genomes. Based on comparisons of conserved core and flexible genomes, we present evidence of habitat delineation with specific Geobacillus genomes linked to specific niches. Our analysis revealed that Geobacillus and Anoxybacillus share a high proportion of genes. Moreover, the results strongly suggest that horizontal gene transfer is a major factor deriving the evolution of Geobacillus from Bacillus, with genetic contributions from other phylogenetically distant taxa.}, } @article {pmid27230650, year = {2016}, author = {Rodríguez-Blanco, A and Lemos, ML and Osorio, CR}, title = {Unveiling the pan-genome of the SXT/R391 family of ICEs: molecular characterisation of new variable regions of SXT/R391-like ICEs detected in Pseudoalteromonas sp. and Vibrio scophthalmi.}, journal = {Antonie van Leeuwenhoek}, volume = {109}, number = {8}, pages = {1141-1152}, doi = {10.1007/s10482-016-0716-3}, pmid = {27230650}, issn = {1572-9699}, mesh = {Animals ; Aquaculture ; Bacterial Proteins/genetics ; Base Sequence ; *Conjugation, Genetic ; DNA Replication ; DNA Transposable Elements ; DNA, Bacterial/genetics ; Fishes/*microbiology ; Gene Transfer, Horizontal ; Genes, Bacterial ; Genome, Bacterial ; Phylogeny ; Pseudoalteromonas/*genetics ; Sequence Analysis, DNA ; Vibrio/*genetics ; }, abstract = {Integrating conjugative elements (ICEs) of the SXT/R391 family have been identified in fish-isolated bacterial strains collected from marine aquaculture environments of the northwestern Iberian Peninsula. Here we analysed the variable regions of two ICEs, one preliminarily characterised in a previous study (ICEVscSpa3) and one newly identified (ICEPspSpa1). Bacterial strains harboring these ICEs were phylogenetically assigned to Vibrio scophthalmi and Pseudoalteromonas sp., thus constituting the first evidence of SXT/R391-like ICEs in the genus Pseudoalteromonas to date. Variable DNA regions, which confer element-specific properties to ICEs of this family, were characterised. Interestingly, the two ICEs contained 29 genes not found in variable DNA insertions of previously described ICEs. Most notably, variable gene content for ICEVscSpa3 showed similarity to genes potentially involved in housekeeping functions of replication, nucleotide metabolism and transcription. For these genes, closest homologues were found clustered in the genome of Pseudomonas psychrotolerans L19, suggesting a transfer as a block to ICEVscSpa3. Genes encoding antibiotic resistance, restriction modification systems and toxin/antitoxin systems were absent from hotspots of ICEVscSpa3. In contrast, the variable gene content of ICEPspSpa1 included genes involved in restriction/modification functions in two different hotspots and genes related to ICE maintenance. The present study unveils a relatively large number of novel genes in SXT/R391-ICEs, and demonstrates the major role of ICE elements as contributors to horizontal gene transfer.}, } @article {pmid27189997, year = {2016}, author = {Kubasova, T and Cejkova, D and Matiasovicova, J and Sekelova, Z and Polansky, O and Medvecky, M and Rychlik, I and Juricova, H}, title = {Antibiotic Resistance, Core-Genome and Protein Expression in IncHI1 Plasmids in Salmonella Typhimurium.}, journal = {Genome biology and evolution}, volume = {8}, number = {6}, pages = {1661-1671}, pmid = {27189997}, issn = {1759-6653}, mesh = {Anti-Bacterial Agents/pharmacology ; Drug Resistance, Bacterial/*genetics ; *Evolution, Molecular ; Gene Expression Regulation, Bacterial/drug effects ; Genome, Bacterial/genetics ; High-Throughput Nucleotide Sequencing ; Host-Pathogen Interactions/genetics ; Humans ; Plasmids/genetics ; Salmonella Infections/*genetics/microbiology ; Salmonella typhimurium/drug effects/*genetics/pathogenicity ; Sequence Analysis, DNA ; }, abstract = {Conjugative plasmids from the IncHI1 incompatibility group play an important role in transferring antibiotic resistance in Salmonella Typhimurium. However, knowledge of their genome structure or gene expression is limited. In this study, we determined the complete nucleotide sequences of four IncHI1 plasmids transferring resistance to antibiotics by two different next generation sequencing protocols and protein expression by mass spectrometry. Sequence data including additional 11 IncHI1 plasmids from GenBank were used for the definition of the IncHI1 plasmid core-genome and pan-genome. The core-genome consisted of approximately 123 kbp and 122 genes while the total pan-genome represented approximately 600 kbp. When the core-genome sequences were used for multiple alignments, the 15 tested IncHI1 plasmids were separated into two main lineages. GC content in core-genome genes was around 46% and 50% in accessory genome genes. A multidrug resistance region present in all 4 sequenced plasmids extended over 20 kbp and, except for tet(B), the genes responsible for antibiotic resistance were those with the highest GC content. IncHI1 plasmids therefore represent replicons that evolved in low GC content bacteria. From their original host, they spread to Salmonella and during this spread these plasmids acquired multiple accessory genes including those coding for antibiotic resistance. Antibiotic-resistance genes belonged to genes with the highest level of expression and were constitutively expressed even in the absence of antibiotics. This is the likely mechanism that facilitates host cell survival when antibiotics suddenly emerge in the environment.}, } @article {pmid27189983, year = {2016}, author = {López-Pérez, M and Rodriguez-Valera, F}, title = {Pangenome Evolution in the Marine Bacterium Alteromonas.}, journal = {Genome biology and evolution}, volume = {8}, number = {5}, pages = {1556-1570}, pmid = {27189983}, issn = {1759-6653}, mesh = {Alteromonas/*genetics ; Aquatic Organisms/*genetics ; *Evolution, Molecular ; *Genetic Variation ; Genome, Bacterial ; Molecular Sequence Annotation ; Phylogeny ; Sequence Analysis, DNA ; Species Specificity ; }, abstract = {We have examined a collection of the free-living marine bacterium Alteromonas genomes with cores diverging in average nucleotide identities ranging from 99.98% to 73.35%, i.e., from microbes that can be considered members of a natural clone (like in a clinical epidemiological outbreak) to borderline genus level. The genomes were largely syntenic allowing a precise delimitation of the core and flexible regions in each. The core was 1.4 Mb (ca. 30% of the typical strain genome size). Recombination rates along the core were high among strains belonging to the same species (37.7-83.7% of all nucleotide polymorphisms) but they decreased sharply between species (18.9-5.1%). Regarding the flexible genome, its main expansion occurred within the boundaries of the species, i.e., strains of the same species already have a large and diverse flexible genome. Flexible regions occupy mostly fixed genomic locations. Four large genomic islands are involved in the synthesis of strain-specific glycosydic receptors that we have called glycotypes. These genomic regions are exchanged by homologous recombination within and between species and there is evidence for their import from distant taxonomic units (other genera within the family). In addition, several hotspots for integration of gene cassettes by illegitimate recombination are distributed throughout the genome. They code for features that give each clone specific properties to interact with their ecological niche and must flow fast throughout the whole genus as they are found, with nearly identical sequences, in different species. Models for the generation of this genomic diversity involving phage predation are discussed.}, } @article {pmid27165744, year = {2016}, author = {Zheng, S and Cherniack, AD and Dewal, N and Moffitt, RA and Danilova, L and Murray, BA and Lerario, AM and Else, T and Knijnenburg, TA and Ciriello, G and Kim, S and Assie, G and Morozova, O and Akbani, R and Shih, J and Hoadley, KA and Choueiri, TK and Waldmann, J and Mete, O and Robertson, AG and Wu, HT and Raphael, BJ and Shao, L and Meyerson, M and Demeure, MJ and Beuschlein, F and Gill, AJ and Sidhu, SB and Almeida, MQ and Fragoso, MCBV and Cope, LM and Kebebew, E and Habra, MA and Whitsett, TG and Bussey, KJ and Rainey, WE and Asa, SL and Bertherat, J and Fassnacht, M and Wheeler, DA and , and Hammer, GD and Giordano, TJ and Verhaak, RGW}, title = {Comprehensive Pan-Genomic Characterization of Adrenocortical Carcinoma.}, journal = {Cancer cell}, volume = {29}, number = {5}, pages = {723-736}, pmid = {27165744}, issn = {1878-3686}, support = {P30 CA016672/CA/NCI NIH HHS/United States ; U24 CA143882/CA/NCI NIH HHS/United States ; U54 HG003067/HG/NHGRI NIH HHS/United States ; U24 CA143835/CA/NCI NIH HHS/United States ; P30 CA046592/CA/NCI NIH HHS/United States ; U24 CA143866/CA/NCI NIH HHS/United States ; U24 CA143845/CA/NCI NIH HHS/United States ; U24 CA143799/CA/NCI NIH HHS/United States ; U54 HG003273/HG/NHGRI NIH HHS/United States ; P30 CA008748/CA/NCI NIH HHS/United States ; U24 CA144025/CA/NCI NIH HHS/United States ; U24 CA180951/CA/NCI NIH HHS/United States ; U24 CA143840/CA/NCI NIH HHS/United States ; U24 CA143843/CA/NCI NIH HHS/United States ; U24 CA210974/CA/NCI NIH HHS/United States ; U24 CA143858/CA/NCI NIH HHS/United States ; U24 CA143848/CA/NCI NIH HHS/United States ; U54 HG003079/HG/NHGRI NIH HHS/United States ; U24 CA143883/CA/NCI NIH HHS/United States ; U24 CA143867/CA/NCI NIH HHS/United States ; U24 CA199461/CA/NCI NIH HHS/United States ; }, mesh = {Adolescent ; Adrenal Cortex Neoplasms/*genetics/pathology/therapy ; Adrenocortical Carcinoma/*genetics/pathology/therapy ; Adult ; Aged ; Aged, 80 and over ; Child ; DNA Methylation ; Disease-Free Survival ; Female ; Gene Expression Profiling/methods ; Gene Expression Regulation, Neoplastic ; Genetic Predisposition to Disease/genetics ; Genome, Human/*genetics ; Genomics/*methods ; Humans ; Male ; Middle Aged ; Mutation ; Outcome Assessment, Health Care ; Prognosis ; Young Adult ; }, abstract = {We describe a comprehensive genomic characterization of adrenocortical carcinoma (ACC). Using this dataset, we expand the catalogue of known ACC driver genes to include PRKAR1A, RPL22, TERF2, CCNE1, and NF1. Genome wide DNA copy-number analysis revealed frequent occurrence of massive DNA loss followed by whole-genome doubling (WGD), which was associated with aggressive clinical course, suggesting WGD is a hallmark of disease progression. Corroborating this hypothesis were increased TERT expression, decreased telomere length, and activation of cell-cycle programs. Integrated subtype analysis identified three ACC subtypes with distinct clinical outcome and molecular alterations which could be captured by a 68-CpG probe DNA-methylation signature, proposing a strategy for clinical stratification of patients based on molecular markers.}, } @article {pmid27118061, year = {2016}, author = {Sternes, PR and Borneman, AR}, title = {Consensus pan-genome assembly of the specialised wine bacterium Oenococcus oeni.}, journal = {BMC genomics}, volume = {17}, number = {}, pages = {308}, pmid = {27118061}, issn = {1471-2164}, mesh = {Amino Acid Sequence ; Amino Acids/biosynthesis ; Carbohydrate Metabolism ; Fermentation ; Food Microbiology ; *Genetic Variation ; *Genome, Bacterial ; Genomics/*methods ; Lactic Acid/metabolism ; Molecular Sequence Data ; Oenococcus/*genetics ; Wine/*microbiology ; }, abstract = {BACKGROUND: Oenococcus oeni is a lactic acid bacterium that is specialised for growth in the ecological niche of wine, where it is noted for its ability to perform the secondary, malolactic fermentation that is often required for many types of wine. Expanding the understanding of strain-dependent genetic variations in its small and streamlined genome is important for realising its full potential in industrial fermentation processes.

RESULTS: Whole genome comparison was performed on 191 strains of O. oeni; from this rich source of genomic information consensus pan-genome assemblies of the invariant (core) and variable (flexible) regions of this organism were established. Genetic variation in amino acid biosynthesis and sugar transport and utilisation was found to be common between strains. Furthermore, we characterised previously-unreported intra-specific genetic variations in the natural competence of this microbe.

CONCLUSION: By assembling a consensus pan-genome from a large number of strains, this study provides a tool for researchers to readily compare protein-coding genes across strains and infer functional relationships between genes in conserved syntenic regions. This establishes a foundation for further genetic, and thus phenotypic, research of this industrially-important species.}, } @article {pmid27114887, year = {2016}, author = {Asenjo, F and Olmos, A and Henríquez-Piskulich, P and Polanco, V and Aldea, P and Ugalde, JA and Trombert, AN}, title = {Genome sequencing and analysis of the first complete genome of Lactobacillus kunkeei strain MP2, an Apis mellifera gut isolate.}, journal = {PeerJ}, volume = {4}, number = {}, pages = {e1950}, pmid = {27114887}, issn = {2167-8359}, abstract = {Background. The honey bee (Apis mellifera) is the most important pollinator in agriculture worldwide. However, the number of honey bees has fallen significantly since 2006, becoming a huge ecological problem nowadays. The principal cause is CCD, or Colony Collapse Disorder, characterized by the seemingly spontaneous abandonment of hives by their workers. One of the characteristics of CCD in honey bees is the alteration of the bacterial communities in their gastrointestinal tract, mainly due to the decrease of Firmicutes populations, such as the Lactobacilli. At this time, the causes of these alterations remain unknown. We recently isolated a strain of Lactobacillus kunkeei (L. kunkeei strain MP2) from the gut of Chilean honey bees. L. kunkeei, is one of the most commonly isolated bacterium from the honey bee gut and is highly versatile in different ecological niches. In this study, we aimed to elucidate in detail, the L. kunkeei genetic background and perform a comparative genome analysis with other Lactobacillus species. Methods. L. kunkeei MP2 was originally isolated from the guts of Chilean A. mellifera individuals. Genome sequencing was done using Pacific Biosciences single-molecule real-time sequencing technology. De novo assembly was performed using Celera assembler. The genome was annotated using Prokka, and functional information was added using the EggNOG 3.1 database. In addition, genomic islands were predicted using IslandViewer, and pro-phage sequences using PHAST. Comparisons between L. kunkeei MP2 with other L. kunkeei, and Lactobacillus strains were done using Roary. Results. The complete genome of L. kunkeei MP2 comprises one circular chromosome of 1,614,522 nt. with a GC content of 36,9%. Pangenome analysis with 16 L. kunkeei strains, identified 113 unique genes, most of them related to phage insertions. A large and unique region of L. kunkeei MP2 genome contains several genes that encode for phage structural protein and replication components. Comparative analysis of MP2 with other Lactobacillus species, identified several unique genes of L. kunkeei MP2 related with metabolism, biofilm generation, survival under stress conditions, and mobile genetic elements (MGEs). Discussion. The presence of multiple mobile genetic elements, including phage sequences, suggest a high degree of genetic variability in L. kunkeei. Its versatility and ability to survive in different ecological niches (bee guts, flowers, fruits among others) could be given by its genetic capacity to change and adapt to different environments. L. kunkeei could be a new source of Lactobacillus with beneficial properties. Indeed, L. kunkeei MP2 could play an important role in honey bee nutrition through the synthesis of components as isoprenoids.}, } @article {pmid27087830, year = {2016}, author = {Holley, G and Wittler, R and Stoye, J}, title = {Bloom Filter Trie: an alignment-free and reference-free data structure for pan-genome storage.}, journal = {Algorithms for molecular biology : AMB}, volume = {11}, number = {}, pages = {3}, pmid = {27087830}, issn = {1748-7188}, abstract = {BACKGROUND: High throughput sequencing technologies have become fast and cheap in the past years. As a result, large-scale projects started to sequence tens to several thousands of genomes per species, producing a high number of sequences sampled from each genome. Such a highly redundant collection of very similar sequences is called a pan-genome. It can be transformed into a set of sequences "colored" by the genomes to which they belong. A colored de Bruijn graph (C-DBG) extracts from the sequences all colored k-mers, strings of length k, and stores them in vertices.

RESULTS: In this paper, we present an alignment-free, reference-free and incremental data structure for storing a pan-genome as a C-DBG: the bloom filter trie (BFT). The data structure allows to store and compress a set of colored k-mers, and also to efficiently traverse the graph. Bloom filter trie was used to index and query different pangenome datasets. Compared to another state-of-the-art data structure, BFT was up to two times faster to build while using about the same amount of main memory. For querying k-mers, BFT was about 52-66 times faster while using about 5.5-14.3 times less memory.

CONCLUSION: We present a novel succinct data structure called the Bloom Filter Trie for indexing a pan-genome as a colored de Bruijn graph. The trie stores k-mers and their colors based on a new representation of vertices that compress and index shared substrings. Vertices use basic data structures for lightweight substrings storage as well as Bloom filters for efficient trie and graph traversals. Experimental results prove better performance compared to another state-of-the-art data structure.

AVAILABILITY: https://www.github.com/GuillaumeHolley/BloomFilterTrie.}, } @article {pmid27075453, year = {2016}, author = {Ghatak, S and Blom, J and Das, S and Sanjukta, R and Puro, K and Mawlong, M and Shakuntala, I and Sen, A and Goesmann, A and Kumar, A and Ngachan, SV}, title = {Pan-genome analysis of Aeromonas hydrophila, Aeromonas veronii and Aeromonas caviae indicates phylogenomic diversity and greater pathogenic potential for Aeromonas hydrophila.}, journal = {Antonie van Leeuwenhoek}, volume = {109}, number = {7}, pages = {945-956}, doi = {10.1007/s10482-016-0693-6}, pmid = {27075453}, issn = {1572-9699}, mesh = {Aeromonas caviae/drug effects/*genetics/pathogenicity ; Aeromonas hydrophila/drug effects/*genetics/pathogenicity ; Aeromonas veronii/drug effects/*genetics/pathogenicity ; Animals ; Anti-Bacterial Agents/pharmacology ; Drug Resistance, Microbial ; Evolution, Molecular ; Gene Transfer, Horizontal ; Genetic Variation ; Genome, Bacterial ; Genotype ; Homologous Recombination ; Humans ; Microbial Sensitivity Tests ; Phylogeny ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Aeromonas species are important pathogens of fishes and aquatic animals capable of infecting humans and other animals via food. Due to the paucity of pan-genomic studies on aeromonads, the present study was undertaken to analyse the pan-genome of three clinically important Aeromonas species (A. hydrophila, A. veronii, A. caviae). Results of pan-genome analysis revealed an open pan-genome for all three species with pan-genome sizes of 9181, 7214 and 6884 genes for A. hydrophila, A. veronii and A. caviae, respectively. Core-genome: pan-genome ratio (RCP) indicated greater genomic diversity for A. hydrophila and interestingly RCP emerged as an effective indicator to gauge genomic diversity which could possibly be extended to other organisms too. Phylogenomic network analysis highlighted the influence of homologous recombination and lateral gene transfer in the evolution of Aeromonas spp. Prediction of virulence factors indicated no significant difference among the three species though analysis of pathogenic potential and acquired antimicrobial resistance genes revealed greater hazards from A. hydrophila. In conclusion, the present study highlighted the usefulness of whole genome analyses to infer evolutionary cues for Aeromonas species which indicated considerable phylogenomic diversity for A. hydrophila and hitherto unknown genomic evidence for pathogenic potential of A. hydrophila compared to A. veronii and A. caviae.}, } @article {pmid27071527, year = {2016}, author = {Chaudhari, NM and Gupta, VK and Dutta, C}, title = {BPGA- an ultra-fast pan-genome analysis pipeline.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {24373}, pmid = {27071527}, issn = {2045-2322}, mesh = {*Genome, Bacterial ; High-Throughput Nucleotide Sequencing/*methods ; Phylogeny ; Software ; Streptococcus pyogenes/classification/genetics ; }, abstract = {Recent advances in ultra-high-throughput sequencing technology and metagenomics have led to a paradigm shift in microbial genomics from few genome comparisons to large-scale pan-genome studies at different scales of phylogenetic resolution. Pan-genome studies provide a framework for estimating the genomic diversity of the dataset, determining core (conserved), accessory (dispensable) and unique (strain-specific) gene pool of a species, tracing horizontal gene-flux across strains and providing insight into species evolution. The existing pan genome software tools suffer from various limitations like limited datasets, difficult installation/requirements, inadequate functional features etc. Here we present an ultra-fast computational pipeline BPGA (Bacterial Pan Genome Analysis tool) with seven functional modules. In addition to the routine pan genome analyses, BPGA introduces a number of novel features for downstream analyses like core/pan/MLST (Multi Locus Sequence Typing) phylogeny, exclusive presence/absence of genes in specific strains, subset analysis, atypical G + C content analysis and KEGG &COG mapping of core, accessory and unique genes. Other notable features include minimum running prerequisites, freedom to select the gene clustering method, ultra-fast execution, user friendly command line interface and high-quality graphics outputs. The performance of BPGA has been evaluated using a dataset of complete genome sequences of 28 Streptococcus pyogenes strains.}, } @article {pmid27071075, year = {2016}, author = {Castillo, D and Christiansen, RH and Dalsgaard, I and Madsen, L and Espejo, R and Middelboe, M}, title = {Comparative Genome Analysis Provides Insights into the Pathogenicity of Flavobacterium psychrophilum.}, journal = {PloS one}, volume = {11}, number = {4}, pages = {e0152515}, pmid = {27071075}, issn = {1932-6203}, mesh = {Biofilms/growth & development ; Clustered Regularly Interspaced Short Palindromic Repeats/genetics ; Drug Resistance, Bacterial/genetics ; Flavobacterium/*genetics/*pathogenicity ; Sequence Analysis, DNA/*methods ; Virulence/*genetics ; }, abstract = {Flavobacterium psychrophilum is a fish pathogen in salmonid aquaculture worldwide that causes cold water disease (CWD) and rainbow trout fry syndrome (RTFS). Comparative genome analyses of 11 F. psychrophilum isolates representing temporally and geographically distant populations were used to describe the F. psychrophilum pan-genome and to examine virulence factors, prophages, CRISPR arrays, and genomic islands present in the genomes. Analysis of the genomic DNA sequences were complemented with selected phenotypic characteristics of the strains. The pan genome analysis showed that F. psychrophilum could hold at least 3373 genes, while the core genome contained 1743 genes. On average, 67 new genes were detected for every new genome added to the analysis, indicating that F. psychrophilum possesses an open pan genome. The putative virulence factors were equally distributed among isolates, independent of geographic location, year of isolation and source of isolates. Only one prophage-related sequence was found which corresponded to the previously described prophage 6H, and appeared in 5 out of 11 isolates. CRISPR array analysis revealed two different loci with dissimilar spacer content, which only matched one sequence in the database, the temperate bacteriophage 6H. Genomic Islands (GIs) were identified in F. psychrophilum isolates 950106-1/1 and CSF 259-93, associated with toxins and antibiotic resistance. Finally, phenotypic characterization revealed a high degree of similarity among the strains with respect to biofilm formation and secretion of extracellular enzymes. Global scale dispersion of virulence factors in the genomes and the abilities for biofilm formation, hemolytic activity and secretion of extracellular enzymes among the strains suggested that F. psychrophilum isolates have a similar mode of action on adhesion, colonization and destruction of fish tissues across large spatial and temporal scales of occurrence. Overall, the genomic characterization and phenotypic properties may provide new insights to the mechanisms of pathogenicity in F. psychrophilum.}, } @article {pmid27065955, year = {2016}, author = {Rivers, AR and Burns, AS and Chan, LK and Moran, MA}, title = {Experimental Identification of Small Non-Coding RNAs in the Model Marine Bacterium Ruegeria pomeroyi DSS-3.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {380}, pmid = {27065955}, issn = {1664-302X}, abstract = {In oligotrophic ocean waters where bacteria are often subjected to chronic nutrient limitation, community transcriptome sequencing has pointed to the presence of highly abundant small RNAs (sRNAs). The role of sRNAs in regulating response to nutrient stress was investigated in a model heterotrophic marine bacterium Ruegeria pomeroyi grown in continuous culture under carbon (C) and nitrogen (N) limitation. RNAseq analysis identified 99 putative sRNAs. Sixty-nine were cis-encoded and located antisense to a presumed target gene. Thirty were trans-encoded and initial target prediction was performed computationally. The most prevalent functional roles of genes anti-sense to the cis-sRNAs were transport, cell-cell interactions, signal transduction, and transcriptional regulation. Most sRNAs were transcribed equally under both C and N limitation, and may be involved in a general stress response. However, 14 were regulated differentially between the C and N treatments and may respond to specific nutrient limitations. A network analysis of the predicted target genes of the R. pomeroyi cis-sRNAs indicated that they average fewer connections than typical protein-encoding genes, and appear to be more important in peripheral or niche-defining functions encoded in the pan genome.}, } @article {pmid27048805, year = {2016}, author = {Hemme, CL and Green, SJ and Rishishwar, L and Prakash, O and Pettenato, A and Chakraborty, R and Deutschbauer, AM and Van Nostrand, JD and Wu, L and He, Z and Jordan, IK and Hazen, TC and Arkin, AP and Kostka, JE and Zhou, J}, title = {Lateral Gene Transfer in a Heavy Metal-Contaminated-Groundwater Microbial Community.}, journal = {mBio}, volume = {7}, number = {2}, pages = {e02234-15}, pmid = {27048805}, issn = {2150-7511}, mesh = {Gammaproteobacteria/*genetics/metabolism ; *Gene Transfer, Horizontal ; Groundwater/analysis/*microbiology ; Metals, Heavy/*analysis/metabolism ; Microbiota ; Water Pollutants, Chemical/*analysis/metabolism ; }, abstract = {UNLABELLED: Unraveling the drivers controlling the response and adaptation of biological communities to environmental change, especially anthropogenic activities, is a central but poorly understood issue in ecology and evolution. Comparative genomics studies suggest that lateral gene transfer (LGT) is a major force driving microbial genome evolution, but its role in the evolution of microbial communities remains elusive. To delineate the importance of LGT in mediating the response of a groundwater microbial community to heavy metal contamination, representative Rhodanobacter reference genomes were sequenced and compared to shotgun metagenome sequences. 16S rRNA gene-based amplicon sequence analysis indicated that Rhodanobacter populations were highly abundant in contaminated wells with low pHs and high levels of nitrate and heavy metals but remained rare in the uncontaminated wells. Sequence comparisons revealed that multiple geochemically important genes, including genes encoding Fe(2+)/Pb(2+) permeases, most denitrification enzymes, and cytochrome c553, were native to Rhodanobacter and not subjected to LGT. In contrast, the Rhodanobacter pangenome contained a recombinational hot spot in which numerous metal resistance genes were subjected to LGT and/or duplication. In particular, Co(2+)/Zn(2+)/Cd(2+) efflux and mercuric resistance operon genes appeared to be highly mobile within Rhodanobacter populations. Evidence of multiple duplications of a mercuric resistance operon common to most Rhodanobacter strains was also observed. Collectively, our analyses indicated the importance of LGT during the evolution of groundwater microbial communities in response to heavy metal contamination, and a conceptual model was developed to display such adaptive evolutionary processes for explaining the extreme dominance of Rhodanobacter populations in the contaminated groundwater microbiome.

IMPORTANCE: Lateral gene transfer (LGT), along with positive selection and gene duplication, are the three main mechanisms that drive adaptive evolution of microbial genomes and communities, but their relative importance is unclear. Some recent studies suggested that LGT is a major adaptive mechanism for microbial populations in response to changing environments, and hence, it could also be critical in shaping microbial community structure. However, direct evidence of LGT and its rates in extant natural microbial communities in response to changing environments is still lacking. Our results presented in this study provide explicit evidence that LGT played a crucial role in driving the evolution of a groundwater microbial community in response to extreme heavy metal contamination. It appears that acquisition of genes critical for survival, growth, and reproduction via LGT is the most rapid and effective way to enable microorganisms and associated microbial communities to quickly adapt to abrupt harsh environmental stresses.}, } @article {pmid27042991, year = {2016}, author = {Blanc-Mathieu, R and Ogata, H}, title = {DNA repair genes in the Megavirales pangenome.}, journal = {Current opinion in microbiology}, volume = {31}, number = {}, pages = {94-100}, doi = {10.1016/j.mib.2016.03.011}, pmid = {27042991}, issn = {1879-0364}, mesh = {DNA Repair/*genetics ; DNA, Viral/genetics ; Eukaryota/genetics/virology ; Evolution, Molecular ; Genome, Viral/genetics ; Giant Viruses/classification/*genetics/*metabolism ; }, abstract = {The order 'Megavirales' represents a group of eukaryotic viruses with a large genome encoding a few hundred up to two thousand five hundred genes. Several members of Megavirales possess genes involved in major DNA repair pathways. Some of these genes were likely inherited from an ancient virus world and some others were derived from the genomes of their hosts. Here we examine molecular phylogenies of key DNA repair enzymes in light of recent hypotheses on the origin of Megavirales, and propose that the last common ancestors of the individual families of the order Megavirales already possessed DNA repair functions to achieve and maintain a moderately large genome and that this repair capacity gradually increased, in a family-dependent manner, during their recent evolution.}, } @article {pmid27037122, year = {2016}, author = {Patel, IR and Gangiredla, J and Lacher, DW and Mammel, MK and Jackson, SA and Lampel, KA and Elkins, CA}, title = {FDA Escherichia coli Identification (FDA-ECID) Microarray: a Pangenome Molecular Toolbox for Serotyping, Virulence Profiling, Molecular Epidemiology, and Phylogeny.}, journal = {Applied and environmental microbiology}, volume = {82}, number = {11}, pages = {3384-3394}, pmid = {27037122}, issn = {1098-5336}, mesh = {Escherichia coli/*classification/*genetics ; Genetic Variation ; Genotyping Techniques/*methods ; Microarray Analysis/*methods ; Molecular Epidemiology/*methods ; Serotyping/*methods ; United States ; United States Food and Drug Administration ; Virulence Factors/*analysis/genetics ; }, abstract = {UNLABELLED: Most Escherichia coli strains are nonpathogenic. However, for clinical diagnosis and food safety analysis, current identification methods for pathogenic E. coli either are time-consuming and/or provide limited information. Here, we utilized a custom DNA microarray with informative genetic features extracted from 368 sequence sets for rapid and high-throughput pathogen identification. The FDA Escherichia coli Identification (FDA-ECID) platform contains three sets of molecularly informative features that together stratify strain identification and relatedness. First, 53 known flagellin alleles, 103 alleles of wzx and wzy, and 5 alleles of wzm provide molecular serotyping utility. Second, 41,932 probe sets representing the pan-genome of E. coli provide strain-level gene content information. Third, approximately 125,000 single nucleotide polymorphisms (SNPs) of available whole-genome sequences (WGS) were distilled to 9,984 SNPs capable of recapitulating the E. coli phylogeny. We analyzed 103 diverse E. coli strains with available WGS data, including those associated with past foodborne illnesses, to determine robustness and accuracy. The array was able to accurately identify the molecular O and H serotypes, potentially correcting serological failures and providing better resolution for H-nontypeable/nonmotile phenotypes. In addition, molecular risk assessment was possible with key virulence marker identifications. Epidemiologically, each strain had a unique comparative genomic fingerprint that was extended to an additional 507 food and clinical isolates. Finally, a 99.7% phylogenetic concordance was established between microarray analysis and WGS using SNP-level data for advanced genome typing. Our study demonstrates FDA-ECID as a powerful tool for epidemiology and molecular risk assessment with the capacity to profile the global landscape and diversity of E. coli

IMPORTANCE: This study describes a robust, state-of-the-art platform developed from available whole-genome sequences of E. coli and Shigella spp. by distilling useful signatures for epidemiology and molecular risk assessment into one assay. The FDA-ECID microarray contains features that enable comprehensive molecular serotyping and virulence profiling along with genome-scale genotyping and SNP analysis. Hence, it is a molecular toolbox that stratifies strain identification and pathogenic potential in the contexts of epidemiology and phylogeny. We applied this tool to strains from food, environmental, and clinical sources, resulting in significantly greater phylogenetic and strain-specific resolution than previously reported for available typing methods.}, } @article {pmid27035119, year = {2016}, author = {Duranti, S and Milani, C and Lugli, GA and Mancabelli, L and Turroni, F and Ferrario, C and Mangifesta, M and Viappiani, A and Sánchez, B and Margolles, A and van Sinderen, D and Ventura, M}, title = {Evaluation of genetic diversity among strains of the human gut commensal Bifidobacterium adolescentis.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {23971}, pmid = {27035119}, issn = {2045-2322}, mesh = {Animals ; Bifidobacterium adolescentis/*genetics/isolation & purification ; Carbohydrates/chemistry ; Cattle ; DNA, Bacterial/genetics ; Diet ; Feces/microbiology ; *Gastrointestinal Microbiome ; *Genetic Variation ; Genome, Bacterial ; Genomics ; Humans ; Intestines/*microbiology ; Milk/microbiology ; Open Reading Frames ; Polysaccharides/chemistry ; Rumen/microbiology ; }, abstract = {Bifidobacteria are members of the human gut microbiota, being numerically dominant in the colon of infants, while also being prevalent in the large intestine of adults. In this study, we determined and analyzed the pan-genome of Bifidobacterium adolescentis, which is one of many bacteria found in the human adult gut microbiota. In silico analysis of the genome sequences of eighteen B. adolescentis strains isolated from various environments, such as human milk, human feces and bovine rumen, revealed a high level of genetic variability, resulting in an open pan-genome. Compared to other bifidobacterial taxa such as Bifidobacterium bifidum and Bifidobacterium breve, the more extensive B. adolescentis pan-genome supports the hypothesis that the genetic arsenal of this taxon expanded so as to become more adaptable to the variable and changing ecological niche of the gut. These increased genetic capabilities are particularly evident for genes required for dietary glycan-breakdown.}, } @article {pmid27018858, year = {2016}, author = {Munns, KD and Zaheer, R and Xu, Y and Stanford, K and Laing, CR and Gannon, VP and Selinger, LB and McAllister, TA}, title = {Comparative Genomic Analysis of Escherichia coli O157:H7 Isolated from Super-Shedder and Low-Shedder Cattle.}, journal = {PloS one}, volume = {11}, number = {3}, pages = {e0151673}, pmid = {27018858}, issn = {1932-6203}, mesh = {Animals ; *Bacterial Shedding ; Bacterial Typing Techniques/methods ; Cattle ; Cattle Diseases/microbiology ; Colony Count, Microbial ; Escherichia coli Infections/veterinary ; Escherichia coli O157/classification/*genetics/physiology ; Feces/*microbiology ; Genome, Bacterial/genetics ; Genomics/*methods ; Hydrogen-Ion Concentration ; Phylogeny ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA/methods ; }, abstract = {Cattle are the primary reservoir of the foodborne pathogen Escherichia coli O157:H7, with the concentration and frequency of E. coli O157:H7 shedding varying substantially among individual hosts. The term ''super-shedder" has been applied to cattle that shed ≥10(4) cfu E. coli O157:H7/g of feces. Super-shedders have been reported to be responsible for the majority of E. coli O157:H7 shed into the environment. The objective of this study was to determine if there are phenotypic and/or genotypic differences between E. coli O157:H7 isolates obtained from super-shedder compared to low-shedder cattle. From a total of 784 isolates, four were selected from low-shedder steers and six isolates from super-shedder steers (4.01-8.45 log cfu/g feces) for whole genome sequencing. Isolates were phage and clade typed, screened for substrate utilization, pH sensitivity, virulence gene profiles and Stx bacteriophage insertion (SBI) sites. A range of 89-2473 total single nucleotide polymorphisms (SNPs) were identified when sequenced strains were compared to E. coli O157:H7 strain Sakai. More non-synonymous SNP mutations were observed in low-shedder isolates. Pan-genomic and SNPs comparisons did not identify genetic segregation between super-shedder or low-shedder isolates. All super-shedder isolates and 3 of 4 of low-shedder isolates were typed as phage type 14a, SBI cluster 3 and SNP clade 2. Super-shedder isolates displayed increased utilization of galactitol, thymidine and 3-O-β-D-galactopyranosyl-D-arabinose when compared to low-shedder isolates, but no differences in SNPs were observed in genes encoding for proteins involved in the metabolism of these substrates. While genetic traits specific to super-shedder isolates were not identified in this study, differences in the level of gene expression or genes of unknown function may still contribute to some strains of E. coli O157:H7 reaching high densities within bovine feces.}, } @article {pmid27014232, year = {2016}, author = {Patil, PP and Midha, S and Kumar, S and Patil, PB}, title = {Genome Sequence of Type Strains of Genus Stenotrophomonas.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {309}, pmid = {27014232}, issn = {1664-302X}, abstract = {Genomic resource of type strains and historically important strains of genus Stenotrophomonas allowed us to reveal the existence of 18 distinct species by applying modern phylogenomic criterions. Apart from Stenotrophomonas maltophilia, S. africana represents another species of clinical importance. Interestingly, Pseudomonas hibsicola, P. beteli, and S. pavani that are of plant origin are closer to S. maltophilia than the majority of the environmental isolates. The genus has an open pan-genome. By providing the case study on genes encoding metallo-β-lactamase and Clustered Regularly Interspaced Short Palindrome Repeats (CRISPR) regions, we have tried to show the importance of this genomic dataset in understanding its ecology.}, } @article {pmid27006628, year = {2015}, author = {Guimarães, LC and Florczak-Wyspianska, J and de Jesus, LB and Viana, MV and Silva, A and Ramos, RT and Soares, Sde C and Soares, Sde C}, title = {Inside the Pan-genome - Methods and Software Overview.}, journal = {Current genomics}, volume = {16}, number = {4}, pages = {245-252}, pmid = {27006628}, issn = {1389-2029}, abstract = {The number of genomes that have been deposited in databases has increased exponentially after the advent of Next-Generation Sequencing (NGS), which produces high-throughput sequence data; this circumstance has demanded the development of new bioinformatics software and the creation of new areas, such as comparative genomics. In comparative genomics, the genetic content of an organism is compared against other organisms, which helps in the prediction of gene function and coding region sequences, identification of evolutionary events and determination of phylogenetic relationships. However, expanding comparative genomics to a large number of related bacteria, we can infer their lifestyles, gene repertoires and minimal genome size. In this context, a powerful approach called Pan-genome has been initiated and developed. This approach involves the genomic comparison of different strains of the same species, or even genus. Its main goal is to establish the total number of non-redundant genes that are present in a determined dataset. Pan-genome consists of three parts: core genome; accessory or dispensable genome; and species-specific or strain-specific genes. Furthermore, pan-genome is considered to be "open" as long as new genes are added significantly to the total repertoire for each new additional genome and "closed" when the newly added genomes cannot be inferred to significantly increase the total repertoire of the genes. To perform all of the required calculations, a substantial amount of software has been developed, based on orthologous and paralogous gene identification.}, } @article {pmid26999001, year = {2016}, author = {Scholz, M and Ward, DV and Pasolli, E and Tolio, T and Zolfo, M and Asnicar, F and Truong, DT and Tett, A and Morrow, AL and Segata, N}, title = {Strain-level microbial epidemiology and population genomics from shotgun metagenomics.}, journal = {Nature methods}, volume = {13}, number = {5}, pages = {435-438}, pmid = {26999001}, issn = {1548-7105}, support = {HHSN272200900018C/AI/NIAID NIH HHS/United States ; }, mesh = {Escherichia coli/classification/genetics/isolation & purification/pathogenicity ; Gene Expression Profiling ; Genome, Bacterial ; Germany ; Humans ; Intestinal Mucosa/*microbiology ; Metagenome/*genetics ; Metagenomics/*methods ; Microbial Consortia/*genetics ; *Phylogeny ; Skin/*microbiology ; Software ; Species Specificity ; }, abstract = {Identifying microbial strains and characterizing their functional potential is essential for pathogen discovery, epidemiology and population genomics. We present pangenome-based phylogenomic analysis (PanPhlAn; http://segatalab.cibio.unitn.it/tools/panphlan), a tool that uses metagenomic data to achieve strain-level microbial profiling resolution. PanPhlAn recognized outbreak strains, produced the largest strain-level population genomic study of human-associated bacteria and, in combination with metatranscriptomics, profiled the transcriptional activity of strains in complex communities.}, } @article {pmid26997279, year = {2016}, author = {Ward, DV and Scholz, M and Zolfo, M and Taft, DH and Schibler, KR and Tett, A and Segata, N and Morrow, AL}, title = {Metagenomic Sequencing with Strain-Level Resolution Implicates Uropathogenic E. coli in Necrotizing Enterocolitis and Mortality in Preterm Infants.}, journal = {Cell reports}, volume = {14}, number = {12}, pages = {2912-2924}, pmid = {26997279}, issn = {2211-1247}, support = {U54HG004969/HG/NHGRI NIH HHS/United States ; HHSN272200900018C/AI/NIAID NIH HHS/United States ; R01HD059140/HD/NICHD NIH HHS/United States ; HD27853/HD/NICHD NIH HHS/United States ; P01 HD013021/HD/NICHD NIH HHS/United States ; UG1 HD027853/HD/NICHD NIH HHS/United States ; 5UL1RR026314-03/RR/NCRR NIH HHS/United States ; U10 HD027853/HD/NICHD NIH HHS/United States ; P01HD13021/HD/NICHD NIH HHS/United States ; U54 HG004969/HG/NHGRI NIH HHS/United States ; UL1 TR001425/TR/NCATS NIH HHS/United States ; R01 HD059140/HD/NICHD NIH HHS/United States ; UL1 RR026314/RR/NCRR NIH HHS/United States ; }, mesh = {Anti-Bacterial Agents/pharmacology ; DNA, Bacterial/chemistry/genetics/metabolism ; Drug Resistance, Bacterial/drug effects ; Enterocolitis, Necrotizing/microbiology/mortality/*pathology ; Feces/microbiology ; Female ; Humans ; Infant ; Infant, Newborn ; Infant, Premature ; Intestines/microbiology ; Klebsiella/genetics/isolation & purification/pathogenicity ; Male ; Metagenomics ; Microbiota ; Multilocus Sequence Typing ; Postpartum Period ; Sequence Analysis, DNA ; Survival Rate ; Uropathogenic Escherichia coli/*genetics/isolation & purification/pathogenicity ; }, abstract = {Necrotizing enterocolitis (NEC) afflicts approximately 10% of extremely preterm infants with high fatality. Inappropriate bacterial colonization with Enterobacteriaceae is implicated, but no specific pathogen has been identified. We identify uropathogenic E. coli (UPEC) colonization as a significant risk factor for the development of NEC and subsequent mortality. We describe a large-scale deep shotgun metagenomic sequence analysis of the early intestinal microbiome of 144 preterm and 22 term infants. Using a pan-genomic approach to functionally subtype the E. coli, we identify genes associated with NEC and mortality that indicate colonization by UPEC. Metagenomic multilocus sequence typing analysis further defined NEC-associated strains as sequence types often associated with urinary tract infections, including ST69, ST73, ST95, ST127, ST131, and ST144. Although other factors associated with prematurity may also contribute, this report suggests a link between UPEC and NEC and indicates that further attention to these sequence types as potential causal agents is needed.}, } @article {pmid26983005, year = {2016}, author = {Ross, DE and Marshall, CW and May, HD and Norman, RS}, title = {Comparative Genomic Analysis of Sulfurospirillum cavolei MES Reconstructed from the Metagenome of an Electrosynthetic Microbiome.}, journal = {PloS one}, volume = {11}, number = {3}, pages = {e0151214}, pmid = {26983005}, issn = {1932-6203}, mesh = {Epsilonproteobacteria/classification/*genetics ; *Genome, Bacterial ; *Microbiota ; Phylogeny ; }, abstract = {Sulfurospirillum spp. play an important role in sulfur and nitrogen cycling, and contain metabolic versatility that enables reduction of a wide range of electron acceptors, including thiosulfate, tetrathionate, polysulfide, nitrate, and nitrite. Here we describe the assembly of a Sulfurospirillum genome obtained from the metagenome of an electrosynthetic microbiome. The ubiquity and persistence of this organism in microbial electrosynthesis systems suggest it plays an important role in reactor stability and performance. Understanding why this organism is present and elucidating its genetic repertoire provide a genomic and ecological foundation for future studies where Sulfurospirillum are found, especially in electrode-associated communities. Metabolic comparisons and in-depth analysis of unique genes revealed potential ecological niche-specific capabilities within the Sulfurospirillum genus. The functional similarities common to all genomes, i.e., core genome, and unique gene clusters found only in a single genome were identified. Based upon 16S rRNA gene phylogenetic analysis and average nucleotide identity, the Sulfurospirillum draft genome was found to be most closely related to Sulfurospirillum cavolei. Characterization of the draft genome described herein provides pathway-specific details of the metabolic significance of the newly described Sulfurospirillum cavolei MES and, importantly, yields insight to the ecology of the genus as a whole. Comparison of eleven sequenced Sulfurospirillum genomes revealed a total of 6246 gene clusters in the pan-genome. Of the total gene clusters, 18.5% were shared among all eleven genomes and 50% were unique to a single genome. While most Sulfurospirillum spp. reduce nitrate to ammonium, five of the eleven Sulfurospirillum strains encode for a nitrous oxide reductase (nos) cluster with an atypical nitrous-oxide reductase, suggesting a utility for this genus in reduction of the nitrous oxide, and as a potential sink for this potent greenhouse gas.}, } @article {pmid26978387, year = {2016}, author = {Dornas, FP and Assis, FL and Aherfi, S and Arantes, T and Abrahão, JS and Colson, P and La Scola, B}, title = {A Brazilian Marseillevirus Is the Founding Member of a Lineage in Family Marseilleviridae.}, journal = {Viruses}, volume = {8}, number = {3}, pages = {76}, pmid = {26978387}, issn = {1999-4915}, mesh = {Brazil ; Cluster Analysis ; DNA Viruses/*genetics/*isolation & purification ; DNA, Viral/*chemistry/*genetics ; *Evolution, Molecular ; Gene Order ; *Genome, Viral ; Open Reading Frames ; *Phylogeny ; Sequence Analysis, DNA ; Sequence Homology ; Synteny ; }, abstract = {In 2003, Acanthamoeba polyphaga mimivirus (APMV) was discovered as parasitizing Acanthamoeba. It was revealed to exhibit remarkable features, especially odd genomic characteristics, and founded viral family Mimiviridae. Subsequently, a second family of giant amoebal viruses was described, Marseilleviridae, whose prototype member is Marseillevirus, discovered in 2009. Currently, the genomes of seven different members of this family have been fully sequenced. Previous phylogenetic analysis suggested the existence of three Marseilleviridae lineages: A, B and C. Here, we describe a new member of this family, Brazilian Marseillevirus (BrMV), which was isolated from a Brazilian sample and whose genome was fully sequenced and analyzed. Surprisingly, data from phylogenetic analyses and comparative genomics, including mean amino acid identity between BrMV and other Marseilleviridae members and the analyses of the core genome and pan-genome of marseilleviruses, indicated that this virus can be assigned to a new Marseilleviridae lineage. Even if the BrMV genome is one of the smallest among Marseilleviridae members, it harbors the second largest gene content into this family. In addition, the BrMV genome encodes 29 ORFans. Here, we describe the isolation and genome analyses of the BrMV strain, and propose its classification as the prototype virus of a new lineage D within the family Marseilleviridae.}, } @article {pmid26953603, year = {2016}, author = {de Barsy, M and Frandi, A and Panis, G and Théraulaz, L and Pillonel, T and Greub, G and Viollier, PH}, title = {Regulatory (pan-)genome of an obligate intracellular pathogen in the PVC superphylum.}, journal = {The ISME journal}, volume = {10}, number = {9}, pages = {2129-2144}, pmid = {26953603}, issn = {1751-7370}, mesh = {Animals ; Bacterial Proteins/genetics ; Chlamydiales/*genetics ; Chlorocebus aethiops ; Chromatin Immunoprecipitation ; Genome, Bacterial/*genetics ; Genomics ; Phylogeny ; Reproducibility of Results ; Transcription Factors/*genetics ; Vero Cells ; Verrucomicrobia/*genetics ; }, abstract = {Like other obligate intracellular bacteria, the Chlamydiae feature a compact regulatory genome that remains uncharted owing to poor genetic tractability. Exploiting the reduced number of transcription factors (TFs) encoded in the chlamydial (pan-)genome as a model for TF control supporting the intracellular lifestyle, we determined the conserved landscape of TF specificities by ChIP-Seq (chromatin immunoprecipitation-sequencing) in the chlamydial pathogen Waddlia chondrophila. Among 10 conserved TFs, Euo emerged as a master TF targeting >100 promoters through conserved residues in a DNA excisionase-like winged helix-turn-helix-like (wHTH) fold. Minimal target (Euo) boxes were found in conserved developmentally-regulated genes governing vertical genome transmission (cytokinesis and DNA replication) and genome plasticity (transposases). Our ChIP-Seq analysis with intracellular bacteria not only reveals that global TF regulation is maintained in the reduced regulatory genomes of Chlamydiae, but also predicts that master TFs interpret genomic information in the obligate intracellular α-proteobacteria, including the rickettsiae, from which modern day mitochondria evolved.}, } @article {pmid26941766, year = {2016}, author = {Figueroa, M and Upadhyaya, NM and Sperschneider, J and Park, RF and Szabo, LJ and Steffenson, B and Ellis, JG and Dodds, PN}, title = {Changing the Game: Using Integrative Genomics to Probe Virulence Mechanisms of the Stem Rust Pathogen Puccinia graminis f. sp. tritici.}, journal = {Frontiers in plant science}, volume = {7}, number = {}, pages = {205}, pmid = {26941766}, issn = {1664-462X}, abstract = {The recent resurgence of wheat stem rust caused by new virulent races of Puccinia graminis f. sp. tritici (Pgt) poses a threat to food security. These concerns have catalyzed an extensive global effort toward controlling this disease. Substantial research and breeding programs target the identification and introduction of new stem rust resistance (Sr) genes in cultivars for genetic protection against the disease. Such resistance genes typically encode immune receptor proteins that recognize specific components of the pathogen, known as avirulence (Avr) proteins. A significant drawback to deploying cultivars with single Sr genes is that they are often overcome by evolution of the pathogen to escape recognition through alterations in Avr genes. Thus, a key element in achieving durable rust control is the deployment of multiple effective Sr genes in combination, either through conventional breeding or transgenic approaches, to minimize the risk of resistance breakdown. In this situation, evolution of pathogen virulence would require changes in multiple Avr genes in order to bypass recognition. However, choosing the optimal Sr gene combinations to deploy is a challenge that requires detailed knowledge of the pathogen Avr genes with which they interact and the virulence phenotypes of Pgt existing in nature. Identifying specific Avr genes from Pgt will provide screening tools to enhance pathogen virulence monitoring, assess heterozygosity and propensity for mutation in pathogen populations, and confirm individual Sr gene functions in crop varieties carrying multiple effective resistance genes. Toward this goal, much progress has been made in assembling a high quality reference genome sequence for Pgt, as well as a Pan-genome encompassing variation between multiple field isolates with diverse virulence spectra. In turn this has allowed prediction of Pgt effector gene candidates based on known features of Avr genes in other plant pathogens, including the related flax rust fungus. Upregulation of gene expression in haustoria and evidence for diversifying selection are two useful parameters to identify candidate Avr genes. Recently, we have also applied machine learning approaches to agnostically predict candidate effectors. Here, we review progress in stem rust pathogenomics and approaches currently underway to identify Avr genes recognized by wheat Sr genes.}, } @article {pmid26930352, year = {2016}, author = {Pang, B and Du, P and Zhou, Z and Diao, B and Cui, Z and Zhou, H and Kan, B}, title = {The Transmission and Antibiotic Resistance Variation in a Multiple Drug Resistance Clade of Vibrio cholerae Circulating in Multiple Countries in Asia.}, journal = {PloS one}, volume = {11}, number = {3}, pages = {e0149742}, pmid = {26930352}, issn = {1932-6203}, mesh = {Anti-Bacterial Agents/pharmacology ; Asia/epidemiology ; Cholera/epidemiology/microbiology/*transmission ; Disease Outbreaks ; Drug Resistance, Microbial/drug effects/*genetics ; Drug Resistance, Multiple/drug effects/*genetics ; Epidemics ; Genetic Variation ; Genome, Bacterial/genetics ; Phylogeny ; Sequence Analysis, DNA ; Vibrio cholerae/classification/*genetics/physiology ; }, abstract = {Vibrio cholerae has caused massive outbreaks and even trans-continental epidemics. In 2008 and 2010, at least 3 remarkable cholera outbreaks occurred in Hainan, Anhui and Jiangsu provinces of China. To address the possible transmissions and the relationships to the 7th pandemic strains of those 3 outbreaks, we sequenced the whole genomes of the outbreak isolates and compared with the global isolates from the 7th pandemic. The three outbreaks in this study were caused by a cluster of V. cholerae in clade 3.B which is parallel to the clade 3.C that was transmitted from Nepal to Haiti and caused an outbreak in 2010. Pan-genome analysis provided additional evolution information on the mobile element and acquired multiple antibiotic resistance genes. We suggested that clade 3.B should be monitored because the multiple antibiotic resistant characteristics of this clade and the 'amplifier' function of China in the global transmission of current Cholera pandemic. We also show that dedicated whole genome sequencing analysis provided more information than the previous techniques and should be applied in the disease surveillance networks.}, } @article {pmid26915094, year = {2016}, author = {Garrido-Sanz, D and Meier-Kolthoff, JP and Göker, M and Martín, M and Rivilla, R and Redondo-Nieto, M}, title = {Genomic and Genetic Diversity within the Pseudomonas fluorescens Complex.}, journal = {PloS one}, volume = {11}, number = {2}, pages = {e0150183}, pmid = {26915094}, issn = {1932-6203}, mesh = {Base Sequence ; DNA, Bacterial/genetics ; Denitrification/physiology ; *Genetic Variation ; Genome, Bacterial/*genetics ; Genomics ; Phylogeny ; Pseudomonas fluorescens/classification/*genetics ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Siderophores/biosynthesis ; Soil Microbiology ; }, abstract = {The Pseudomonas fluorescens complex includes Pseudomonas strains that have been taxonomically assigned to more than fifty different species, many of which have been described as plant growth-promoting rhizobacteria (PGPR) with potential applications in biocontrol and biofertilization. So far the phylogeny of this complex has been analyzed according to phenotypic traits, 16S rDNA, MLSA and inferred by whole-genome analysis. However, since most of the type strains have not been fully sequenced and new species are frequently described, correlation between taxonomy and phylogenomic analysis is missing. In recent years, the genomes of a large number of strains have been sequenced, showing important genomic heterogeneity and providing information suitable for genomic studies that are important to understand the genomic and genetic diversity shown by strains of this complex. Based on MLSA and several whole-genome sequence-based analyses of 93 sequenced strains, we have divided the P. fluorescens complex into eight phylogenomic groups that agree with previous works based on type strains. Digital DDH (dDDH) identified 69 species and 75 subspecies within the 93 genomes. The eight groups corresponded to clustering with a threshold of 31.8% dDDH, in full agreement with our MLSA. The Average Nucleotide Identity (ANI) approach showed inconsistencies regarding the assignment to species and to the eight groups. The small core genome of 1,334 CDSs and the large pan-genome of 30,848 CDSs, show the large diversity and genetic heterogeneity of the P. fluorescens complex. However, a low number of strains were enough to explain most of the CDSs diversity at core and strain-specific genomic fractions. Finally, the identification and analysis of group-specific genome and the screening for distinctive characters revealed a phylogenomic distribution of traits among the groups that provided insights into biocontrol and bioremediation applications as well as their role as PGPR.}, } @article {pmid26912404, year = {2016}, author = {Earl, JP and de Vries, SP and Ahmed, A and Powell, E and Schultz, MP and Hermans, PW and Hill, DJ and Zhou, Z and Constantinidou, CI and Hu, FZ and Bootsma, HJ and Ehrlich, GD}, title = {Comparative Genomic Analyses of the Moraxella catarrhalis Serosensitive and Seroresistant Lineages Demonstrate Their Independent Evolution.}, journal = {Genome biology and evolution}, volume = {8}, number = {4}, pages = {955-974}, pmid = {26912404}, issn = {1759-6653}, support = {AI080935/AI/NIAID NIH HHS/United States ; R01 DC002148/DC/NIDCD NIH HHS/United States ; R01 DC005659/DC/NIDCD NIH HHS/United States ; DC02148/DC/NIDCD NIH HHS/United States ; DC05659/DC/NIDCD NIH HHS/United States ; R01 AI080935/AI/NIAID NIH HHS/United States ; }, mesh = {Cell Line ; Evolution, Molecular ; *Genome, Bacterial ; Genomics ; Humans ; Moraxella catarrhalis/*genetics/growth & development ; Moraxellaceae Infections/microbiology ; Multigene Family ; Phylogeny ; Virulence Factors/genetics ; }, abstract = {The bacterial speciesMoraxella catarrhalishas been hypothesized as being composed of two distinct lineages (referred to as the seroresistant [SR] and serosensitive [SS]) with separate evolutionary histories based on several molecular typing methods, whereas 16S ribotyping has suggested an additional split within the SS lineage. Previously, we characterized whole-genome sequences of 12 SR-lineage isolates, which revealed a relatively small supragenome when compared with other opportunistic nasopharyngeal pathogens, suggestive of a relatively short evolutionary history. Here, we performed whole-genome sequencing on 18 strains from both ribotypes of the SS lineage, an additional SR strain, as well as four previously identified highly divergent strains based on multilocus sequence typing analyses. All 35 strains were subjected to a battery of comparative genomic analyses which clearly show that there are three lineages-the SR, SS, and the divergent. The SR and SS lineages are closely related, but distinct from each other based on three different methods of comparison: Allelic differences observed among core genes; possession of lineage-specific sets of core and distributed genes; and by an alignment of concatenated core sequences irrespective of gene annotation. All these methods show that the SS lineage has much longer interstrain branches than the SR lineage indicating that this lineage has likely been evolving either longer or faster than the SR lineage. There is evidence of extensive horizontal gene transfer (HGT) within both of these lineages, and to a lesser degree between them. In particular, we identified very high rates of HGT between these two lineages for ß-lactamase genes. The four divergent strains aresui generis, being much more distantly related to both the SR and SS groups than these other two groups are to each other. Based on average nucleotide identities, gene content, GC content, and genome size, this group could be considered as a separate taxonomic group. The SR and SS lineages, although distinct, clearly form a single species based on multiple criteria including a large common core genome, average nucleotide identity values, GC content, and genome size. Although neither of these lineages arose from within the other based on phylogenetic analyses, the question of how and when these lineages split and then subsequently reunited in the human nasopharynx is explored.}, } @article {pmid26903955, year = {2016}, author = {Gómez-Lunar, Z and Hernández-González, I and Rodríguez-Torres, MD and Souza, V and Olmedo-Álvarez, G}, title = {Microevolution Analysis of Bacillus coahuilensis Unveils Differences in Phosphorus Acquisition Strategies and Their Regulation.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {58}, pmid = {26903955}, issn = {1664-302X}, abstract = {Bacterial genomes undergo numerous events of gene losses and gains that generate genome variability among strains of the same species (microevolution). Our aim was to compare the genomes and relevant phenotypes of three Bacillus coahuilensis strains from two oligotrophic hydrological systems in the Cuatro Ciénegas Basin (México), to unveil the environmental challenges that this species cope with, and the microevolutionary differences in these genotypes. Since the strains were isolated from a low P environment, we placed emphasis on the search of different phosphorus acquisition strategies. The three B. coahuilensis strains exhibited similar numbers of coding DNA sequences, of which 82% (2,893) constituted the core genome, and 18% corresponded to accessory genes. Most of the genes in this last group were associated with mobile genetic elements (MGEs) or were annotated as hypothetical proteins. Ten percent of the pangenome consisted of strain-specific genes. Alignment of the three B. coahuilensis genomes indicated a high level of synteny and revealed the presence of several genomic islands. Unexpectedly, one of these islands contained genes that encode the 2-keto-3-deoxymannooctulosonic acid (Kdo) biosynthesis enzymes, a feature associated to cell walls of Gram-negative bacteria. Some microevolutionary changes were clearly associated with MGEs. Our analysis revealed inconsistencies between phenotype and genotype, which we suggest result from the impossibility to map regulatory features to genome analysis. Experimental results revealed variability in the types and numbers of auxotrophies between the strains that could not consistently be explained by in silico metabolic models. Several intraspecific differences in preferences for carbohydrate and phosphorus utilization were observed. Regarding phosphorus recycling, scavenging, and storage, variations were found between the three genomes. The three strains exhibited differences regarding alkaline phosphatase that revealed that in addition to gene gain and loss, regulation adjustment of gene expression also has contributed to the intraspecific diversity of B. coahuilensis.}, } @article {pmid26903952, year = {2016}, author = {Benamar, S and Reteno, DG and Bandaly, V and Labas, N and Raoult, D and La Scola, B}, title = {Faustoviruses: Comparative Genomics of New Megavirales Family Members.}, journal = {Frontiers in microbiology}, volume = {7}, number = {}, pages = {3}, pmid = {26903952}, issn = {1664-302X}, abstract = {An emerging interest for the giant virus discovery process, genome sequencing and analysis has allowed an expansion of the number of known Megavirales members. Using the protist Vermamoeba sp. as cell support, a new giant virus named Faustovirus has been isolated. In this study, we describe the genome sequences of nine Faustoviruses and build a genomic comparison in order to have a comprehensive overview of genomic composition and diversity among this new virus family. The average sequence length of these viruses is 467,592.44 bp (ranging from 455,803 to 491,024 bp), making them the fourth largest Megavirales genome after Mimiviruses, Pandoraviruses, and Pithovirus sibericum. Faustovirus genomes displayed an average G+C content of 37.14 % (ranging from 36.22 to 39.59%) which is close to the G+C content range of the Asfarviridae genomes (38%). The proportion of best matches and the phylogenetic analysis suggest a shared origin with Asfarviridae without belonging to the same family. The core-gene-based phylogeny of Faustoviruses study has identified four lineages. These results were confirmed by the analysis of amino acids and COGs category distribution. The diversity of the gene composition of these lineages is mainly explained by gene deletion or acquisition and some exceptions for gene duplications. The high proportion of best matches from Bacteria and Phycodnaviridae on the pan-genome and unique genes may be explained by an interaction occurring after the separation of the lineages. The Faustovirus core-genome appears to consolidate the surrounding of 207 genes whereas the pan-genome is described as an open pan-genome, its enrichment via the discovery of new Faustoviruses is required to better seize all the genomic diversity of this family.}, } @article {pmid26899827, year = {2016}, author = {Roisin, S and Gaudin, C and De Mendonça, R and Bellon, J and Van Vaerenbergh, K and De Bruyne, K and Byl, B and Pouseele, H and Denis, O and Supply, P}, title = {Pan-genome multilocus sequence typing and outbreak-specific reference-based single nucleotide polymorphism analysis to resolve two concurrent Staphylococcus aureus outbreaks in neonatal services.}, journal = {Clinical microbiology and infection : the official publication of the European Society of Clinical Microbiology and Infectious Diseases}, volume = {22}, number = {6}, pages = {520-526}, doi = {10.1016/j.cmi.2016.01.024}, pmid = {26899827}, issn = {1469-0691}, mesh = {Belgium/epidemiology ; Cross Infection/*epidemiology/microbiology ; *Disease Outbreaks ; *Genome, Bacterial ; Hospitals ; Humans ; Infant, Newborn ; Molecular Epidemiology ; *Multilocus Sequence Typing ; *Polymorphism, Single Nucleotide ; Staphylococcal Infections/*epidemiology/microbiology ; Staphylococcus aureus/*classification/genetics/isolation & purification ; }, abstract = {We used a two-step whole genome sequencing analysis for resolving two concurrent outbreaks in two neonatal services in Belgium, caused by exfoliative toxin A-encoding-gene-positive (eta+) methicillin-susceptible Staphylococcus aureus with an otherwise sporadic spa-type t209 (ST-109). Outbreak A involved 19 neonates and one healthcare worker in a Brussels hospital from May 2011 to October 2013. After a first episode interrupted by decolonization procedures applied over 7 months, the outbreak resumed concomitantly with the onset of outbreak B in a hospital in Asse, comprising 11 neonates and one healthcare worker from mid-2012 to January 2013. Pan-genome multilocus sequence typing, defined on the basis of 42 core and accessory reference genomes, and single-nucleotide polymorphisms mapped on an outbreak-specific de novo assembly were used to compare 28 available outbreak isolates and 19 eta+/spa-type t209 isolates identified by routine or nationwide surveillance. Pan-genome multilocus sequence typing showed that the outbreaks were caused by independent clones not closely related to any of the surveillance isolates. Isolates from only ten cases with overlapping stays in outbreak A, including four pairs of twins, showed no or only a single nucleotide polymorphism variation, indicating limited sequential transmission. Detection of larger genomic variation, even from the start of the outbreak, pointed to sporadic seeding from a pre-existing exogenous source, which persisted throughout the whole course of outbreak A. Whole genome sequencing analysis can provide unique fine-tuned insights into transmission pathways of complex outbreaks even at their inception, which, with timely use, could valuably guide efforts for early source identification.}, } @article {pmid26885654, year = {2016}, author = {Ding, T and Case, KA and Omolo, MA and Reiland, HA and Metz, ZP and Diao, X and Baumler, DJ}, title = {Predicting Essential Metabolic Genome Content of Niche-Specific Enterobacterial Human Pathogens during Simulation of Host Environments.}, journal = {PloS one}, volume = {11}, number = {2}, pages = {e0149423}, pmid = {26885654}, issn = {1932-6203}, mesh = {Computer Simulation ; Enterobacteriaceae/*genetics/growth & development ; Genes, Bacterial ; *Genome, Bacterial ; Host-Pathogen Interactions/*genetics ; Humans ; Metabolome/*genetics ; Salmonella/genetics ; }, abstract = {Microorganisms have evolved to occupy certain environmental niches, and the metabolic genes essential for growth in these locations are retained in the genomes. Many microorganisms inhabit niches located in the human body, sometimes causing disease, and may retain genes essential for growth in locations such as the bloodstream and urinary tract, or growth during intracellular invasion of the hosts' macrophage cells. Strains of Escherichia coli (E. coli) and Salmonella spp. are thought to have evolved over 100 million years from a common ancestor, and now cause disease in specific niches within humans. Here we have used a genome scale metabolic model representing the pangenome of E. coli which contains all metabolic reactions encoded by genes from 16 E. coli genomes, and have simulated environmental conditions found in the human bloodstream, urinary tract, and macrophage to determine essential metabolic genes needed for growth in each location. We compared the predicted essential genes for three E. coli strains and one Salmonella strain that cause disease in each host environment, and determined that essential gene retention could be accurately predicted using this approach. This project demonstrated that simulating human body environments such as the bloodstream can successfully lead to accurate computational predictions of essential/important genes.}, } @article {pmid26859489, year = {2016}, author = {Alcaraz, LD and Martínez-Sánchez, S and Torres, I and Ibarra-Laclette, E and Herrera-Estrella, L}, title = {The Metagenome of Utricularia gibba's Traps: Into the Microbial Input to a Carnivorous Plant.}, journal = {PloS one}, volume = {11}, number = {2}, pages = {e0148979}, pmid = {26859489}, issn = {1932-6203}, support = {55007646//Howard Hughes Medical Institute/United States ; }, mesh = {Carnivory ; DNA, Plant/genetics ; Gene Library ; Genome, Plant/*genetics ; Lamiales/*genetics/microbiology ; *Metagenome ; Microbiota ; Plant Structures/*genetics/microbiology ; }, abstract = {The genome and transcriptome sequences of the aquatic, rootless, and carnivorous plant Utricularia gibba L. (Lentibulariaceae), were recently determined. Traps are necessary for U. gibba because they help the plant to survive in nutrient-deprived environments. The U. gibba's traps (Ugt) are specialized structures that have been proposed to selectively filter microbial inhabitants. To determine whether the traps indeed have a microbiome that differs, in composition or abundance, from the microbiome in the surrounding environment, we used whole-genome shotgun (WGS) metagenomics to describe both the taxonomic and functional diversity of the Ugt microbiome. We collected U. gibba plants from their natural habitat and directly sequenced the metagenome of the Ugt microbiome and its surrounding water. The total predicted number of species in the Ugt was more than 1,100. Using pan-genome fragment recruitment analysis, we were able to identify to the species level of some key Ugt players, such as Pseudomonas monteilii. Functional analysis of the Ugt metagenome suggests that the trap microbiome plays an important role in nutrient scavenging and assimilation while complementing the hydrolytic functions of the plant.}, } @article {pmid26857276, year = {2016}, author = {Scholz, CF and Brüggemann, H and Lomholt, HB and Tettelin, H and Kilian, M}, title = {Genome stability of Propionibacterium acnes: a comprehensive study of indels and homopolymeric tracts.}, journal = {Scientific reports}, volume = {6}, number = {}, pages = {20662}, pmid = {26857276}, issn = {2045-2322}, mesh = {*Databases, Nucleic Acid ; *Genome, Bacterial ; *Genomic Instability ; *INDEL Mutation ; Propionibacterium acnes/*genetics ; }, abstract = {We present a species-wide comparative analysis of 90 genomes of Propionibacterium acnes that represent the known diversity of the species. Our results are augmented by six high-quality genomes and a manual investigation of all gene-sized indels found in the strains. Overall, the order of genes is conserved throughout the species. A public sybil database for easy comparative analysis of the 90 genomes was established. The analysis of indels revealed a total of 66 loci of non-core genes that correlate with phylogenetic clades. No gene was strain-specific in agreement with our conclusion that the P. acnes pan-genome is closed. An exhaustive search for homopolymeric tracts (HPTs) identified a total of 54 variable-length HPTs almost exclusively of guanine/cytosines located between genes or affecting the reading frame of genes. The repeat variation was consistent with phylogenetic clades suggesting slow accumulation over time rather than active modification. By transcriptome analysis we demonstrate how an HPT variation can affect the gene expression levels. Selected cases of both indels and HPTs are described. The catalogued data and the public P. acnes Sybil database provide a solid foundation for generating hypotheses and facilitate comparative genetic analyses in future P. acnes research.}, } @article {pmid26854360, year = {2016}, author = {D'Amato, F and Eldin, C and Raoult, D}, title = {The contribution of genomics to the study of Q fever.}, journal = {Future microbiology}, volume = {11}, number = {2}, pages = {253-272}, doi = {10.2217/fmb.15.137}, pmid = {26854360}, issn = {1746-0921}, mesh = {Animals ; Axenic Culture ; Computer Simulation ; Coxiella burnetii/*genetics/pathogenicity ; Disease Outbreaks ; Genetic Variation ; *Genome, Bacterial ; *Genomics ; Genotype ; Humans ; Mice ; Phylogeny ; Q Fever/diagnosis/*microbiology/physiopathology/therapy ; Real-Time Polymerase Chain Reaction ; Virulence/genetics ; Zoonoses ; }, abstract = {Coxiella burnetii is the etiological agent of Q fever, a worldwide zoonosis that can result in large outbreaks. The birth of genomics and sequencing of C. burnetii strains has revolutionized many fields of study of this infection. Accurate genotyping methods and comparative genomic analysis have enabled description of the diversity of strains around the world and their link with pathogenicity. Genomics has also permitted the development of qPCR tools and axenic culture medium, facilitating the diagnosis of Q fever. Moreover, several pathophysiological mechanisms can now be predicted and therapeutic strategies can be determined thanks to in silico genome analysis. An extensive pan-genomic analysis will allow for a comprehensive view of the clonal diversity of C. burnetii and its link with virulence.}, } @article {pmid26839740, year = {2016}, author = {Coutinho, F and Tschoeke, DA and Thompson, F and Thompson, C}, title = {Comparative genomics of Synechococcus and proposal of the new genus Parasynechococcus.}, journal = {PeerJ}, volume = {4}, number = {}, pages = {e1522}, pmid = {26839740}, issn = {2167-8359}, abstract = {Synechococcus is among the most important contributors to global primary productivity. The genomes of several strains of this taxon have been previously sequenced in an effort to understand the physiology and ecology of these highly diverse microorganisms. Here we present a comparative study of Synechococcus genomes. For that end, we developed GenTaxo, a program written in Perl to perform genomic taxonomy based on average nucleotide identity, average amino acid identity and dinucleotide signatures, which revealed that the analyzed strains are drastically distinct regarding their genomic content. Phylogenomic reconstruction indicated a division of Synechococcus in two clades (i.e. Synechococcus and the new genus Parasynechococcus), corroborating evidences that this is in fact a polyphyletic group. By clustering protein encoding genes into homologue groups we were able to trace the Pangenome and core genome of both marine and freshwater Synechococcus and determine the genotypic traits that differentiate these lineages.}, } @article {pmid26834703, year = {2015}, author = {Cherradi, N}, title = {microRNAs as Potential Biomarkers in Adrenocortical Cancer: Progress and Challenges.}, journal = {Frontiers in endocrinology}, volume = {6}, number = {}, pages = {195}, pmid = {26834703}, issn = {1664-2392}, abstract = {Adrenocortical carcinoma (ACC) is a rare malignancy with poor prognosis and limited therapeutic options. Over the last decade, pan-genomic analyses of genetic and epigenetic alterations and genome-wide expression profile studies allowed major advances in the understanding of the molecular genetics of ACC. Besides the well-known dysfunctional molecular pathways in adrenocortical tumors, such as the IGF2 pathway, the Wnt pathway, and TP53, high-throughput technologies enabled a more comprehensive genomic characterization of adrenocortical cancer. Integration of expression profile data with exome sequencing, SNP array analysis, methylation, and microRNA (miRNA) profiling led to the identification of subgroups of malignant tumors with distinct molecular alterations and clinical outcomes. miRNAs post-transcriptionally silence their target gene expression either by degrading mRNA or by inhibiting translation. Although our knowledge of the contribution of deregulated miRNAs to the pathogenesis of ACC is still in its infancy, recent studies support their relevance in gene expression alterations in these tumors. Some miRNAs have been shown to carry potential diagnostic and prognostic values, while others may be good candidates for therapeutic interventions. With the emergence of disease-specific blood-borne miRNAs signatures, analyses of small cohorts of patients with ACC suggest that circulating miRNAs represent promising non-invasive biomarkers of malignancy or recurrence. However, some technical challenges still remain, and most of the miRNAs reported in the literature have not yet been validated in sufficiently powered and longitudinal studies. In this review, we discuss the current knowledge regarding the deregulation of tumor-associated and circulating miRNAs in ACC patients, while emphasizing their potential significance in pathogenic pathways in light of recent insights into the role of miRNAs in shaping the tumor microenvironment.}, } @article {pmid26819511, year = {2016}, author = {González-Grande, R and Jiménez-Pérez, M and González Arjona, C and Mostazo Torres, J}, title = {New approaches in the treatment of hepatitis C.}, journal = {World journal of gastroenterology}, volume = {22}, number = {4}, pages = {1421-1432}, pmid = {26819511}, issn = {2219-2840}, mesh = {Antiviral Agents/adverse effects/*therapeutic use ; Drug Resistance, Viral/genetics ; Drug Therapy, Combination ; Genotype ; Hepacivirus/*drug effects/genetics/pathogenicity ; Hepatitis C/diagnosis/*drug therapy/virology ; Humans ; Phenotype ; Treatment Outcome ; }, abstract = {About 130-170 million people, is estimated to be infected with the hepatitis C virus (HCV). Chronic HCV infection is one of the leading causes of liver-related death and in many countries it is the primary reason for having a liver transplant. The main aim of antiviral treatment is to eradicate the virus. Until a few years ago the only treatment strategy was based on the combination of pegylated interferon and ribavirin (PEG/RBV). However, in genotypes 1 and 4 the rates of viral response did not surpass 50%, reaching up to 80% in the rest. In 2011 approval was given for the first direct acting antiviral agents (DAA), boceprevir and telaprevir, for treatment of genotype 1, in combination with traditional dual therapy. This strategy managed to increase the rates of sustained viral response (SVR) in both naive patients and in retreated patients, but with greater toxicity, interactions and cost, as well as being less safe in patients with advanced disease, in whom this treatment can trigger decompensation or even death. The recent, accelerated incorporation since 2013 of new more effective DAA, with pan-genomic properties and excellent tolerance, besides increasing the rates of SVR (even up to 100%), has also created a new scenario: shorter therapies, less toxicity and regimens free of PEG/RBV. This has enabled their almost generalised applicability in all patients. However, it should be noted that most of the scientific evidence available is based on expert opinion, case-control series, cohort studies and phase 2 and 3 trials, some with a reduced number of patients and select groups. Few data are currently available about the use of these drugs in daily clinical practice, particularly in relation to the appearance of side effects and interactions with other drugs, or their use in special populations or persons with the less common genotypes. This situation suggests the need for the generalised implementation of registries of patients receiving antiviral therapy. The main inconvenience of these new drugs is their high cost. This necessitates selection and prioritization of candidate patients to receive them, via strategies established by the various national organs, in accordance with the recommendations of scientific societies.}, } @article {pmid26775250, year = {2016}, author = {van der Lee, TAJ and Medema, MH}, title = {Computational strategies for genome-based natural product discovery and engineering in fungi.}, journal = {Fungal genetics and biology : FG & B}, volume = {89}, number = {}, pages = {29-36}, doi = {10.1016/j.fgb.2016.01.006}, pmid = {26775250}, issn = {1096-0937}, mesh = {*Biological Products/isolation & purification ; Biosynthetic Pathways/genetics ; *Computational Biology ; Fungi/*genetics/metabolism ; *Genetic Engineering/methods ; *Genome, Fungal ; Genomics ; Multigene Family ; }, abstract = {Fungal natural products possess biological activities that are of great value to medicine, agriculture and manufacturing. Recent metagenomic studies accentuate the vastness of fungal taxonomic diversity, and the accompanying specialized metabolic diversity offers a great and still largely untapped resource for natural product discovery. Although fungal natural products show an impressive variation in chemical structures and biological activities, their biosynthetic pathways share a number of key characteristics. First, genes encoding successive steps of a biosynthetic pathway tend to be located adjacently on the chromosome in biosynthetic gene clusters (BGCs). Second, these BGCs are often are located on specific regions of the genome and show a discontinuous distribution among evolutionarily related species and isolates. Third, the same enzyme (super)families are often involved in the production of widely different compounds. Fourth, genes that function in the same pathway are often co-regulated, and therefore co-expressed across various growth conditions. In this mini-review, we describe how these partly interlinked characteristics can be exploited to computationally identify BGCs in fungal genomes and to connect them to their products. Particular attention will be given to novel algorithms to identify unusual classes of BGCs, as well as integrative pan-genomic approaches that use a combination of genomic and metabolomic data for parallelized natural product discovery across multiple strains. Such novel technologies will not only expedite the natural product discovery process, but will also allow the assembly of a high-quality toolbox for the re-design or even de novo design of biosynthetic pathways using synthetic biology approaches.}, } @article {pmid26754847, year = {2016}, author = {Mosquera-Rendón, J and Rada-Bravo, AM and Cárdenas-Brito, S and Corredor, M and Restrepo-Pineda, E and Benítez-Páez, A}, title = {Pangenome-wide and molecular evolution analyses of the Pseudomonas aeruginosa species.}, journal = {BMC genomics}, volume = {17}, number = {}, pages = {45}, pmid = {26754847}, issn = {1471-2164}, mesh = {Biofilms/growth & development ; *Evolution, Molecular ; Genome, Bacterial ; Genotype ; Humans ; *Phylogeny ; Pseudomonas Infections/*genetics/microbiology ; Pseudomonas aeruginosa/*genetics/pathogenicity ; }, abstract = {BACKGROUND: Drug treatments and vaccine designs against the opportunistic human pathogen Pseudomonas aeruginosa have multiple issues, all associated with the diverse genetic traits present in this pathogen, ranging from multi-drug resistant genes to the molecular machinery for the biosynthesis of biofilms. Several candidate vaccines against P. aeruginosa have been developed, which target the outer membrane proteins; however, major issues arise when attempting to establish complete protection against this pathogen due to its presumably genotypic variation at the strain level. To shed light on this concern, we proposed this study to assess the P. aeruginosa pangenome and its molecular evolution across multiple strains.

RESULTS: The P. aeruginosa pangenome was estimated to contain more than 16,000 non-redundant genes, and approximately 15 % of these constituted the core genome. Functional analyses of the accessory genome indicated a wide presence of genetic elements directly associated with pathogenicity. An in-depth molecular evolution analysis revealed the full landscape of selection forces acting on the P. aeruginosa pangenome, in which purifying selection drives evolution in the genome of this human pathogen. We also detected distinctive positive selection in a wide variety of outer membrane proteins, with the data supporting the concept of substantial genetic variation in proteins probably recognized as antigens. Approaching the evolutionary information of genes under extremely positive selection, we designed a new Multi-Locus Sequencing Typing assay for an informative, rapid, and cost-effective genotyping of P. aeruginosa clinical isolates.

CONCLUSIONS: We report the unprecedented pangenome characterization of P. aeruginosa on a large scale, which included almost 200 bacterial genomes from one single species and a molecular evolutionary analysis at the pangenome scale. Evolutionary information presented here provides a clear explanation of the issues associated with the use of protein conjugates from pili, flagella, or secretion systems as antigens for vaccine design, which exhibit high genetic variation in terms of non-synonymous substitutions in P. aeruginosa strains.}, } @article {pmid26748339, year = {2016}, author = {Dumas, E and Christina Boritsch, E and Vandenbogaert, M and Rodríguez de la Vega, RC and Thiberge, JM and Caro, V and Gaillard, JL and Heym, B and Girard-Misguich, F and Brosch, R and Sapriel, G}, title = {Mycobacterial Pan-Genome Analysis Suggests Important Role of Plasmids in the Radiation of Type VII Secretion Systems.}, journal = {Genome biology and evolution}, volume = {8}, number = {2}, pages = {387-402}, pmid = {26748339}, issn = {1759-6653}, mesh = {*Evolution, Molecular ; Gene Rearrangement ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Mycobacterium/classification/*genetics ; Phylogeny ; Plasmids/*genetics ; Synteny ; Type IV Secretion Systems/*genetics ; }, abstract = {In mycobacteria, various type VII secretion systems corresponding to different ESX (ESAT-6 secretory) types, are contributing to pathogenicity, iron acquisition, and/or conjugation. In addition to the known chromosomal ESX loci, the existence of plasmid-encoded ESX systems was recently reported. To investigate the potential role of ESX-encoding plasmids on mycobacterial evolution, we analyzed a large representative collection of mycobacterial genomes, including both chromosomal and plasmid-borne sequences. Data obtained for chromosomal ESX loci confirmed the previous five classical ESX types and identified a novel mycobacterial ESX-4-like type, termed ESX-4-bis. Moreover, analysis of the plasmid-encoded ESX loci showed extensive diversification, with at least seven new ESX profiles, identified. Three of them (ESX-P clusters 1-3) were found in multiple plasmids, while four corresponded to singletons. Our phylogenetic and gene-order-analyses revealed two main groups of ESX types: 1) ancestral types, including ESX-4 and ESX-4-like systems from mycobacterial and non-mycobacterial actinobacteria and 2) mycobacteria-specific ESX systems, including ESX-1-2-3-5 systems and the plasmid-encoded ESX types. Synteny analysis revealed that ESX-P systems are part of phylogenetic groups that derived from a common ancestor, which diversified and resulted in the different ESX types through extensive gene rearrangements. A converging body of evidence, derived from composition bias-, phylogenetic-, and synteny analyses points to a scenario in which ESX-encoding plasmids have been a major driving force for acquisition and diversification of type VII systems in mycobacteria, which likely played (and possibly still play) important roles in the adaptation to new environments and hosts during evolution of mycobacterial pathogenesis.}, } @article {pmid26739091, year = {2016}, author = {Faillot, S and Assie, G}, title = {ENDOCRINE TUMOURS: The genomics of adrenocortical tumors.}, journal = {European journal of endocrinology}, volume = {174}, number = {6}, pages = {R249-65}, doi = {10.1530/EJE-15-1118}, pmid = {26739091}, issn = {1479-683X}, mesh = {Adrenal Cortex Neoplasms/*genetics/pathology ; Adrenocortical Adenoma/*genetics/pathology ; Adrenocortical Carcinoma/*genetics/pathology ; Gene Expression Regulation, Neoplastic ; Humans ; Mutation ; Transcriptome ; }, abstract = {The last decade witnessed the emergence of genomics, a set of high-throughput molecular measurements in biological samples. These pan-genomic and agnostic approaches have revolutionized the molecular biology and genetics of malignant and benign tumors. These techniques have been applied successfully to adrenocortical tumors. Exome sequencing identified new major drivers in all tumor types, including KCNJ5, ATP1A1, ATP2B3 and CACNA1D mutations in aldosterone-producing adenomas (APA), PRKACA mutations in cortisol-producing adenomas (CPA), ARMC5 mutations in primary bilateral macronodular adrenocortical hyperplasia (PBMAH) and ZNRF3 mutations in adrenocortical carcinomas (ACC). Moreover, the various genomic approaches - including exome sequencing, transcriptome, miRNome, genome and methylome - converge into a single molecular classification of adrenocortical tumors. Especially for ACC, two main molecular groups have emerged, showing major differences in outcomes. These ACC groups differ by their gene expression profiles, but also by recurrent mutations and specific DNA hypermethylation patterns in the subgroup of poor outcome. The clinical impact of these findings is just starting. The main altered signaling pathways now become therapeutic targets. The molecular groups of diseases individualize robust subtypes within diseases such as APA, CPA, PBMAH and ACC. A revised nosology of adrenocortical tumors should impact the clinical research. Obvious consequences also include genetic counseling for the new genetic diseases such as ARMC5 mutations in PBMAH, and a better prognostication of ACC based on targeted measurements of a few discriminant molecular alterations. Identifying the main molecular groups of adrenocortical tumors by extensively gathering the molecular variations is a significant step forward towards precision medicine.}, } @article {pmid26724943, year = {2016}, author = {Yang, X and Li, Y and Zang, J and Li, Y and Bie, P and Lu, Y and Wu, Q}, title = {Analysis of pan-genome to identify the core genes and essential genes of Brucella spp.}, journal = {Molecular genetics and genomics : MGG}, volume = {291}, number = {2}, pages = {905-912}, pmid = {26724943}, issn = {1617-4623}, mesh = {Animals ; Brucella/*genetics/pathogenicity ; Computational Biology ; Energy Metabolism/*genetics ; Genes, Essential ; *Genome, Bacterial ; Humans ; Zoonoses/*genetics/microbiology ; }, abstract = {Brucella spp. are facultative intracellular pathogens, that cause a contagious zoonotic disease, that can result in such outcomes as abortion or sterility in susceptible animal hosts and grave, debilitating illness in humans. For deciphering the survival mechanism of Brucella spp. in vivo, 42 Brucella complete genomes from NCBI were analyzed for the pan-genome and core genome by identification of their composition and function of Brucella genomes. The results showed that the total 132,143 protein-coding genes in these genomes were divided into 5369 clusters. Among these, 1710 clusters were associated with the core genome, 1182 clusters with strain-specific genes and 2477 clusters with dispensable genomes. COG analysis indicated that 44 % of the core genes were devoted to metabolism, which were mainly responsible for energy production and conversion (COG category C), and amino acid transport and metabolism (COG category E). Meanwhile, approximately 35 % of the core genes were in positive selection. In addition, 1252 potential essential genes were predicted in the core genome by comparison with a prokaryote database of essential genes. The results suggested that the core genes in Brucella genomes are relatively conservation, and the energy and amino acid metabolism play a more important role in the process of growth and reproduction in Brucella spp. This study might help us to better understand the mechanisms of Brucella persistent infection and provide some clues for further exploring the gene modules of the intracellular survival in Brucella spp.}, } @article {pmid26723274, year = {2016}, author = {Lew-Tabor, AE and Rodriguez Valle, M}, title = {A review of reverse vaccinology approaches for the development of vaccines against ticks and tick borne diseases.}, journal = {Ticks and tick-borne diseases}, volume = {7}, number = {4}, pages = {573-585}, doi = {10.1016/j.ttbdis.2015.12.012}, pmid = {26723274}, issn = {1877-9603}, mesh = {Animals ; Cattle ; Drug Discovery/*methods ; Humans ; Tick Infestations/*prevention & control/veterinary ; Tick-Borne Diseases/*prevention & control/veterinary ; Vaccines/*immunology/*isolation & purification ; }, abstract = {The field of reverse vaccinology developed as an outcome of the genome sequence revolution. Following the introduction of live vaccinations in the western world by Edward Jenner in 1798 and the coining of the phrase 'vaccine', in 1881 Pasteur developed a rational design for vaccines. Pasteur proposed that in order to make a vaccine that one should 'isolate, inactivate and inject the microorganism' and these basic rules of vaccinology were largely followed for the next 100 years leading to the elimination of several highly infectious diseases. However, new technologies were needed to conquer many pathogens which could not be eliminated using these traditional technologies. Thus increasingly, computers were used to mine genome sequences to rationally design recombinant vaccines. Several vaccines for bacterial and viral diseases (i.e. meningococcus and HIV) have been developed, however the on-going challenge for parasite vaccines has been due to their comparatively larger genomes. Understanding the immune response is important in reverse vaccinology studies as this knowledge will influence how the genome mining is to be conducted. Vaccine candidates for anaplasmosis, cowdriosis, theileriosis, leishmaniasis, malaria, schistosomiasis, and the cattle tick have been identified using reverse vaccinology approaches. Some challenges for parasite vaccine development include the ability to address antigenic variability as well the understanding of the complex interplay between antibody, mucosal and/or T cell immune responses. To understand the complex parasite interactions with the livestock host, there is the limitation where algorithms for epitope mining using the human genome cannot directly be adapted for bovine, for example the prediction of peptide binding to major histocompatibility complex motifs. As the number of genomes for both hosts and parasites increase, the development of new algorithms for pan-genomic mining will continue to impact the future of parasite and ricketsial (and other tick borne pathogens) disease vaccine development.}, } @article {pmid26717500, year = {2015}, author = {Chaves-Moreno, D and Wos-Oxley, ML and Jáuregui, R and Medina, E and Oxley, AP and Pieper, DH}, title = {Application of a Novel "Pan-Genome"-Based Strategy for Assigning RNAseq Transcript Reads to Staphylococcus aureus Strains.}, journal = {PloS one}, volume = {10}, number = {12}, pages = {e0145861}, pmid = {26717500}, issn = {1932-6203}, mesh = {Computational Biology/methods ; Databases, Nucleic Acid ; Gene Library ; *Genome, Bacterial ; *Genomics/methods ; *High-Throughput Nucleotide Sequencing ; Humans ; Metagenomics/methods ; Staphylococcal Infections/microbiology ; Staphylococcus aureus/*genetics ; *Transcriptome ; }, abstract = {Understanding the behaviour of opportunistic pathogens such as Staphylococcus aureus in their natural human niche holds great medical interest. With the development of sensitive molecular methods and deep-sequencing technology, it is now possible to robustly assess the global transcriptome of bacterial species in their human habitat. However, as the genomes of the colonizing strains are often not available compiling the pan-genome for the species of interest may provide an effective method to reliably and rapidly compile the transcriptome of a bacterial species. The pan-genome of S. aureus and its associated core and accessory components were compiled based on 25 genomes and comprises a total of 65,557 proteins clustering into 4,198 Orthologous Groups (OGs). The generated gene catalogue was used to assign RNAseq-derived sequence reads to S. aureus in a variety of in vitro and in vivo samples. In all cases, the number of reads that could be assigned to S. aureus was greater using the OG database than using a reference genome. Growth of two S. aureus strains in synthetic nasal medium confirmed that both strains experienced strong iron starvation. Traits such as purine metabolism appeared to be more affected in a typical nasal colonizer than in a strain representative of the S. aureus USA300 lineage. Mapping sequencing reads from a metatranscriptome generated from the human anterior nares allowed the identification of genes highly expressed by S. aureus in vivo. The OG database generated in this study represents a useful tool to obtain a snapshot of the functional attributes of S. aureus under different in vitro and in vivo conditions. The approach proved to be advantageous to assign sequencing reads to bacterial strains when RNAseq data is derived from samples where strain information and/or the corresponding genome/s are unavailable.}, } @article {pmid26713670, year = {2016}, author = {Scott, RA and Lindow, SE}, title = {Transcriptional control of quorum sensing and associated metabolic interactions in Pseudomonas syringae strain B728a.}, journal = {Molecular microbiology}, volume = {99}, number = {6}, pages = {1080-1098}, doi = {10.1111/mmi.13289}, pmid = {26713670}, issn = {1365-2958}, mesh = {Bacterial Proteins/genetics/metabolism ; Gene Expression Regulation, Bacterial ; Plant Diseases/microbiology ; Promoter Regions, Genetic ; Pseudomonas syringae/*genetics/*metabolism ; Quorum Sensing/*genetics ; Regulon ; Transcription Factors/genetics/metabolism ; }, abstract = {Pseudomonas syringae pv. syringae cell densities fluctuate regularly during host plant colonization. Previously we identified nine genes dependent on the quorum-sensing-associated luxR homolog ahlR during epiphytic and apoplastic stages of host colonization. Yet their contributions to host colonization remain obscure, despite ahlR regulon presence within and beyond the P. syringae pan-genome. To elucidate AhIR regulon member functions, we characterized their regulation, interactions with each other, and contributions to the metabolome. We report Psyr_1625, encoding a functional pyruvate deydrogenase-E1 subunit PdhQ, is required to prevent the accumulation of pyruvate in rich media. Furthermore it is exquisitely regulated by both repression of its own promoter by QrpR within a novel clade of the MarR regulator family, and co-transcription on a 5kb transcript originating from the AhlR-driven ahlI promoter, that reads over ahlR and qrpR. Metabolites accumulated during expression of the second AhlR-driven operon (Psyr_1620-1616, paoABCDE), only in a pdhQ mutant background, in addition to pyruvate, are herein associated with derepression of QrpR-repressed pdhQ. AHL signaling, QrpR, and transcriptional read-through events integrate to ensure AHL-dependent expression of a novel metabolism in anticipation of environmental stress, while minimizing endogenously generated cytotoxicity.}, } @article {pmid26705468, year = {2015}, author = {Liu, S and Huang, S and Rao, J and Ye, W and , and Krogh, A and Wang, J}, title = {Discovery, genotyping and characterization of structural variation and novel sequence at single nucleotide resolution from de novo genome assemblies on a population scale.}, journal = {GigaScience}, volume = {4}, number = {}, pages = {64}, pmid = {26705468}, issn = {2047-217X}, support = {K24 DK002800/DK/NIDDK NIH HHS/United States ; }, mesh = {Genetic Variation ; *Genome, Human ; Genomics/*methods ; Genotype ; High-Throughput Nucleotide Sequencing/methods ; Humans ; Sequence Analysis/*methods ; *Software ; }, abstract = {BACKGROUND: Comprehensive recognition of genomic variation in one individual is important for understanding disease and developing personalized medication and treatment. Many tools based on DNA re-sequencing exist for identification of single nucleotide polymorphisms, small insertions and deletions (indels) as well as large deletions. However, these approaches consistently display a substantial bias against the recovery of complex structural variants and novel sequence in individual genomes and do not provide interpretation information such as the annotation of ancestral state and formation mechanism.

FINDINGS: We present a novel approach implemented in a single software package, AsmVar, to discover, genotype and characterize different forms of structural variation and novel sequence from population-scale de novo genome assemblies up to nucleotide resolution. Application of AsmVar to several human de novo genome assemblies captures a wide spectrum of structural variants and novel sequences present in the human population in high sensitivity and specificity.

CONCLUSIONS: Our method provides a direct solution for investigating structural variants and novel sequences from de novo genome assemblies, facilitating the construction of population-scale pan-genomes. Our study also highlights the usefulness of the de novo assembly strategy for definition of genome structure.}, } @article {pmid26671758, year = {2016}, author = {Martinucci, M and Roscetto, E and Iula, VD and Votsi, A and Catania, MR and De Gregorio, E}, title = {Accurate identification of members of the Burkholderia cepacia complex in cystic fibrosis sputum.}, journal = {Letters in applied microbiology}, volume = {62}, number = {3}, pages = {221-229}, doi = {10.1111/lam.12537}, pmid = {26671758}, issn = {1472-765X}, mesh = {Base Sequence ; Burkholderia Infections/microbiology ; Burkholderia cepacia complex/*classification/genetics/isolation & purification ; Cystic Fibrosis/*microbiology ; Humans ; Immunocompromised Host ; Male ; Real-Time Polymerase Chain Reaction/methods ; Rec A Recombinases/*genetics ; Respiratory Tract Infections/microbiology ; Sensitivity and Specificity ; Sequence Analysis, DNA ; Species Specificity ; Sputum/*microbiology ; }, abstract = {UNLABELLED: The Burkholderia cepacia complex (BCC) is a group of closely related species which includes opportunistic pathogens causing chronic respiratory infections in immunocompromised patients, or individuals affected by cystic fibrosis (CF). Other Burkholderia species causing infection in the CF population are Burkholderia gladioli and Burkholderia pseudomallei. Traditional phenotypic analyses have been demonstrated to be inadequate for reliable identifications of isolates of BCC and B. gladioli. A pan-genomic analysis approach was used to design species-specific probes for Burkholderia cenocepacia, B. cepacia, Burkholderia multivorans, Burkholderia vietnamiensis, Burkholderia ambifaria, Burkholderia dolosa, Burkholderia pyrrocinia and B. gladioli. Multiplex real-time PCR assay was developed and tested using sputum specimens collected from CF patients spiked with Burkholderia species. The assay exhibited 100% sensitivity for all eight target species and detected 10(2) to 10(3) CFU ml(-1) when applied to spiked sputum. Our PCR assay resulted highly specific for each of the Burkholderia species tested, allowing discrimination among Burkholderia and non-Burkholderia pathogens. Analysis carried out on 200 sputa positive for the presence of Burkholderia revealed that PCR assay and recA sequencing were fully comparable for identification of Burkholderia at the level of species.

Burkholderia cepacia complex (BCC) has a complex taxonomic organization and its identification is a challenge for microbiology laboratories. Nonidentification or misidentification of BCC isolates represent a problem in epidemiology and treatment of cystic fibrosis patients. The high specificity and sensitivity of the multiplex Real-time PCR assay developed in this study indicates its potential to be a rapid and reliable method for the detection of Burkholderia at the level of species from sputum samples of cystic fibrosis patients.}, } @article {pmid26659686, year = {2015}, author = {Couger, MB and Hanafy, RA and Mitacek, RM and Budd, C and French, DP and Hoff, WD and Elshahed, MS and Youssef, NH}, title = {The Draft Genome Sequence of Xanthomonas sp. Strain Mitacek01 Expands the Pangenome of a Genus of Plant Pathogens.}, journal = {Genome announcements}, volume = {3}, number = {6}, pages = {}, pmid = {26659686}, issn = {2169-8287}, abstract = {We report the draft genome sequence of Xanthomonas sp. strain Mitacek01, isolated from an indoor environment vending machine surface with frequent human use in Stillwater, Oklahoma, USA, as part of the Student-Initiated Microbial Discovery project. The genome has a total size of 3,617,426 bp and a contig N50 of 1,906,967 bp.}, } @article {pmid26657763, year = {2016}, author = {Ngugi, DK and Blom, J and Stepanauskas, R and Stingl, U}, title = {Diversification and niche adaptations of Nitrospina-like bacteria in the polyextreme interfaces of Red Sea brines.}, journal = {The ISME journal}, volume = {10}, number = {6}, pages = {1383-1399}, pmid = {26657763}, issn = {1751-7370}, mesh = {*Adaptation, Physiological ; Bacteria/classification/*genetics/metabolism ; DNA, Bacterial/chemistry/genetics ; DNA, Ribosomal/chemistry/genetics ; Ecosystem ; Indian Ocean ; Metabolic Networks and Pathways ; *Metagenomics ; Nitrites/metabolism ; Oxidation-Reduction ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Salinity ; Salts ; Seawater/microbiology ; Sequence Analysis, DNA ; }, abstract = {Nitrite-oxidizing bacteria (NOB) of the genus Nitrospina have exclusively been found in marine environments. In the brine-seawater interface layer of Atlantis II Deep (Red Sea), Nitrospina-like bacteria constitute up to one-third of the bacterial 16S ribosomal RNA (rRNA) gene sequences. This is much higher compared with that reported in other marine habitats (~10% of all bacteria), and was unexpected because no NOB culture has been observed to grow above 4.0% salinity, presumably due to the low net energy gained from their metabolism that is insufficient for both growth and osmoregulation. Using phylogenetics, single-cell genomics and metagenomic fragment recruitment approaches, we document here that these Nitrospina-like bacteria, designated as Candidatus Nitromaritima RS, are not only highly diverged from the type species Nitrospina gracilis (pairwise genome identity of 69%) but are also ubiquitous in the deeper, highly saline interface layers (up to 11.2% salinity) with temperatures of up to 52 °C. Comparative pan-genome analyses revealed that less than half of the predicted proteome of Ca. Nitromaritima RS is shared with N. gracilis. Interestingly, the capacity for nitrite oxidation is also conserved in both genomes. Although both lack acidic proteomes synonymous with extreme halophiles, the pangenome of Ca. Nitromaritima RS specifically encodes enzymes with osmoregulatory and thermoprotective roles (i.e., ectoine/hydroxyectoine biosynthesis) and of thermodynamic importance (i.e., nitrate and nitrite reductases). Ca. Nitromaritima RS also possesses many hallmark traits of microaerophiles and high-affinity NOB. The abundance of the uncultured Ca. Nitromaritima lineage in marine oxyclines suggests their unrecognized ecological significance in deoxygenated areas of the global ocean.}, } @article {pmid26633545, year = {2016}, author = {Posey, JE and Rosenfeld, JA and James, RA and Bainbridge, M and Niu, Z and Wang, X and Dhar, S and Wiszniewski, W and Akdemir, ZH and Gambin, T and Xia, F and Person, RE and Walkiewicz, M and Shaw, CA and Sutton, VR and Beaudet, AL and Muzny, D and Eng, CM and Yang, Y and Gibbs, RA and Lupski, JR and Boerwinkle, E and Plon, SE}, title = {Molecular diagnostic experience of whole-exome sequencing in adult patients.}, journal = {Genetics in medicine : official journal of the American College of Medical Genetics}, volume = {18}, number = {7}, pages = {678-685}, pmid = {26633545}, issn = {1530-0366}, support = {U54 HG006542/HG/NHGRI NIH HHS/United States ; T32 GM007526/GM/NIGMS NIH HHS/United States ; U54 HG003273/HG/NHGRI NIH HHS/United States ; U01 HG006485/HG/NHGRI NIH HHS/United States ; K23 NS078056/NS/NINDS NIH HHS/United States ; R01 NS058529/NS/NINDS NIH HHS/United States ; }, mesh = {Adult ; Exome/genetics ; Female ; Genetic Diseases, Inborn/*diagnosis/epidemiology ; Genetic Predisposition to Disease ; *Genetic Testing ; *Genome, Human ; High-Throughput Nucleotide Sequencing/*methods ; Humans ; Male ; Pathology, Molecular/methods ; }, abstract = {PURPOSE: Whole-exome sequencing (WES) is increasingly used as a diagnostic tool in medicine, but prior reports focus on predominantly pediatric cohorts with neurologic or developmental disorders. We describe the diagnostic yield and characteristics of WES in adults.

METHODS: We performed a retrospective analysis of consecutive WES reports for adults from a diagnostic laboratory. Phenotype composition was determined using Human Phenotype Ontology terms.

RESULTS: Molecular diagnoses were reported for 17.5% (85/486) of adults, which is lower than that for a primarily pediatric population (25.2%; P = 0.0003); the diagnostic rate was higher (23.9%) for those 18-30 years of age compared to patients older than 30 years (10.4%; P = 0.0001). Dual Mendelian diagnoses contributed to 7% of diagnoses, revealing blended phenotypes. Diagnoses were more frequent among individuals with abnormalities of the nervous system, skeletal system, head/neck, and growth. Diagnostic rate was independent of family history information, and de novo mutations contributed to 61.4% of autosomal dominant diagnoses.

CONCLUSION: Early WES experience in adults demonstrates molecular diagnoses in a substantial proportion of patients, informing clinical management, recurrence risk, and recommendations for relatives. A positive family history was not predictive, consistent with molecular diagnoses often revealed by de novo events, informing the Mendelian basis of genetic disease in adults.Genet Med 18 7, 678-685.}, } @article {pmid26626322, year = {2015}, author = {Wegmann, U and MacKenzie, DA and Zheng, J and Goesmann, A and Roos, S and Swarbreck, D and Walter, J and Crossman, LC and Juge, N}, title = {The pan-genome of Lactobacillus reuteri strains originating from the pig gastrointestinal tract.}, journal = {BMC genomics}, volume = {16}, number = {}, pages = {1023}, pmid = {26626322}, issn = {1471-2164}, support = {BB/J004529/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/K019554/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/F/00044452/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; Bacterial Outer Membrane Proteins/genetics ; Bacterial Secretion Systems/genetics ; Bacteriophages ; Basal Metabolism/genetics ; Chromosomes, Bacterial ; Gastrointestinal Tract/microbiology ; Gene Order ; Gene Transfer, Horizontal ; Genetic Structures ; *Genome, Bacterial ; *Genomics/methods ; High-Throughput Nucleotide Sequencing ; Host-Pathogen Interactions ; Limosilactobacillus reuteri/*genetics/isolation & purification/metabolism/virology ; Multigene Family ; Phylogeny ; Pseudogenes ; Swine ; }, abstract = {BACKGROUND: Lactobacillus reuteri is a gut symbiont of a wide variety of vertebrate species that has diversified into distinct phylogenetic clades which are to a large degree host-specific. Previous work demonstrated host specificity in mice and begun to determine the mechanisms by which gut colonisation and host restriction is achieved. However, how L. reuteri strains colonise the gastrointestinal (GI) tract of pigs is unknown.

RESULTS: To gain insight into the ecology of L. reuteri in the pig gut, the genome sequence of the porcine small intestinal isolate L. reuteri ATCC 53608 was completed and consisted of a chromosome of 1.94 Mbp and two plasmids of 138.5 kbp and 9.09 kbp, respectively. Furthermore, we generated draft genomes of four additional L. reuteri strains isolated from pig faeces or lower GI tract, lp167-67, pg-3b, 20-2 and 3c6, and subjected all five genomes to a comparative genomic analysis together with the previously completed genome of strain I5007. A phylogenetic analysis based on whole genomes showed that porcine L. reuteri strains fall into two distinct clades, as previously suggested by multi-locus sequence analysis. These six pig L. reuteri genomes contained a core set of 1364 orthologous gene clusters, as determined by OrthoMCL analysis, that contributed to a pan-genome totalling 3373 gene clusters. Genome comparisons of the six pig L. reuteri strains with 14 L. reuteri strains from other host origins gave a total pan-genome of 5225 gene clusters that included a core genome of 851 gene clusters but revealed that there were no pig-specific genes per se. However, genes specific for and conserved among strains of the two pig phylogenetic lineages were detected, some of which encoded cell surface proteins that could contribute to the diversification of the two lineages and their observed host specificity.

CONCLUSIONS: This study extends the phylogenetic analysis of L. reuteri strains at a genome-wide level, pointing to distinct evolutionary trajectories of porcine L. reuteri lineages, and providing new insights into the genomic events in L. reuteri that occurred during specialisation to their hosts. The occurrence of two distinct pig-derived clades may reflect differences in host genotype, environmental factors such as dietary components or to evolution from ancestral strains of human and rodent origin following contact with pig populations.}, } @article {pmid26615445, year = {2015}, author = {Goryunov, DV and Nagaev, BE and Nikolaev, MY and Alexeevski, AV and Troitsky, AV}, title = {Moss Phylogeny Reconstruction Using Nucleotide Pangenome of Complete Mitogenome Sequences.}, journal = {Biochemistry. Biokhimiia}, volume = {80}, number = {11}, pages = {1522-1527}, doi = {10.1134/S0006297915110152}, pmid = {26615445}, issn = {1608-3040}, mesh = {Bryophyta/classification/*genetics ; DNA, Mitochondrial/*metabolism ; Databases, Genetic ; Internet ; *Phylogeny ; User-Computer Interface ; }, abstract = {Stability of composition and sequence of genes was shown earlier in 13 mitochondrial genomes of mosses (Rensing, S. A., et al. (2008) Science, 319, 64-69). It is of interest to study the evolution of mitochondrial genomes not only at the gene level, but also on the level of nucleotide sequences. To do this, we have constructed a "nucleotide pangenome" for mitochondrial genomes of 24 moss species. The nucleotide pangenome is a set of aligned nucleotide sequences of orthologous genome fragments covering the totality of all genomes. The nucleotide pangenome was constructed using specially developed new software, NPG-explorer (NPGe). The stable part of the mitochondrial genome (232 stable blocks) is shown to be, on average, 45% of its length. In the joint alignment of stable blocks, 82% of positions are conserved. The phylogenetic tree constructed with the NPGe program is in good correlation with other phylogenetic reconstructions. With the NPGe program, 30 blocks have been identified with repeats no shorter than 50 bp. The maximal length of a block with repeats is 140 bp. Duplications in the mitochondrial genomes of mosses are rare. On average, the genome contains about 500 bp in large duplications. The total length of insertions and deletions was determined in each genome. The losses and gains of DNA regions are rather active in mitochondrial genomes of mosses, and such rearrangements presumably can be used as additional markers in the reconstruction of phylogeny.}, } @article {pmid26615220, year = {2015}, author = {Brito, AF and Braconi, CT and Weidmann, M and Dilcher, M and Alves, JM and Gruber, A and Zanotto, PM}, title = {The Pangenome of the Anticarsia gemmatalis Multiple Nucleopolyhedrovirus (AgMNPV).}, journal = {Genome biology and evolution}, volume = {8}, number = {1}, pages = {94-108}, pmid = {26615220}, issn = {1759-6653}, mesh = {Animals ; Baculoviridae/*genetics ; Base Sequence ; *Genome, Viral ; Genomic Instability ; Lepidoptera/*virology ; Molecular Sequence Data ; Open Reading Frames ; Polymorphism, Genetic ; Recombination, Genetic ; Ubiquitins/genetics ; Viral Proteins/genetics ; }, abstract = {The alphabaculovirus Anticarsia gemmatalis multiple nucleopolyhedrovirus (AgMNPV) is the world's most successful viral bioinsecticide. Through the 1980s and 1990s, this virus was extensively used for biological control of populations of Anticarsia gemmatalis (Velvetbean caterpillar) in soybean crops. During this period, genetic studies identified several variable loci in the AgMNPV; however, most of them were not characterized at the sequence level. In this study we report a full genome comparison among 17 wild-type isolates of AgMNPV. We found the pangenome of this virus to contain at least 167 hypothetical genes, 151 of which are shared by all genomes. The gene bro-a that might be involved in host specificity and carrying transporter is absent in some genomes, and new hypothetical genes were observed. Among these genes there is a unique rnf12-like gene, probably implicated in ubiquitination. Events of gene fission and fusion are common, as four genes have been observed as single or split open reading frames. Gains and losses of genomic fragments (from 20 to 900 bp) are observed within tandem repeats, such as in eight direct repeats and four homologous regions. Most AgMNPV genes present low nucleotide diversity, and variable genes are mainly located in a locus known to evolve through homologous recombination. The evolution of AgMNPV is mainly driven by small indels, substitutions, gain and loss of nucleotide stretches or entire coding sequences. These variations may cause relevant phenotypic alterations, which probably affect the infectivity of AgMNPV. This work provides novel information on genomic evolution of the AgMNPV in particular and of baculoviruses in general.}, } @article {pmid26597042, year = {2015}, author = {de Bruijn, I and Cheng, X and de Jager, V and Expósito, RG and Watrous, J and Patel, N and Postma, J and Dorrestein, PC and Kobayashi, D and Raaijmakers, JM}, title = {Comparative genomics and metabolic profiling of the genus Lysobacter.}, journal = {BMC genomics}, volume = {16}, number = {}, pages = {991}, pmid = {26597042}, issn = {1471-2164}, mesh = {*Genomics ; Lysobacter/*genetics/*metabolism/physiology ; *Metabolomics ; Movement ; Multigene Family ; Rhizoctonia/physiology ; }, abstract = {BACKGROUND: Lysobacter species are Gram-negative bacteria widely distributed in soil, plant and freshwater habitats. Lysobacter owes its name to the lytic effects on other microorganisms. To better understand their ecology and interactions with other (micro)organisms, five Lysobacter strains representing the four species L. enzymogenes, L. capsici, L. gummosus and L. antibioticus were subjected to genomics and metabolomics analyses.

RESULTS: Comparative genomics revealed a diverse genome content among the Lysobacter species with a core genome of 2,891 and a pangenome of 10,028 coding sequences. Genes encoding type I, II, III, IV, V secretion systems and type IV pili were highly conserved in all five genomes, whereas type VI secretion systems were only found in L. enzymogenes and L. gummosus. Genes encoding components of the flagellar apparatus were absent in the two sequenced L. antibioticus strains. The genomes contained a large number of genes encoding extracellular enzymes including chitinases, glucanases and peptidases. Various nonribosomal peptide synthase (NRPS) and polyketide synthase (PKS) gene clusters encoding putative bioactive metabolites were identified but only few of these clusters were shared between the different species. Metabolic profiling by imaging mass spectrometry complemented, in part, the in silico genome analyses and allowed visualisation of the spatial distribution patterns of several secondary metabolites produced by or induced in Lysobacter species during interactions with the soil-borne fungus Rhizoctonia solani.

CONCLUSIONS: Our work shows that mining the genomes of Lysobacter species in combination with metabolic profiling provides novel insights into the genomic and metabolic potential of this widely distributed but understudied and versatile bacterial genus.}, } @article {pmid26593040, year = {2016}, author = {Golicz, AA and Batley, J and Edwards, D}, title = {Towards plant pangenomics.}, journal = {Plant biotechnology journal}, volume = {14}, number = {4}, pages = {1099-1105}, doi = {10.1111/pbi.12499}, pmid = {26593040}, issn = {1467-7652}, mesh = {Arabidopsis/genetics ; *Genome, Plant ; Genomics/*methods ; Soybeans/genetics ; Zea mays/genetics ; }, abstract = {As an increasing number of genome sequences become available for a wide range of species, there is a growing understanding that the genome of a single individual is insufficient to represent the gene diversity within a whole species. Many studies examine the sequence diversity within genes, and this allelic variation is an important source of phenotypic variation which can be selected for by man or nature. However, the significant gene presence/absence variation that has been observed within species and the impact of this variation on traits is only now being studied in detail. The sum of the genes for a species is termed the pangenome, and the determination and characterization of the pangenome is a requirement to understand variation within a species. In this review, we explore the current progress in pangenomics as well as methods and approaches for the characterization of pangenomes for a wide range of plant species.}, } @article {pmid26585406, year = {2016}, author = {Kanehisa, M and Sato, Y and Morishima, K}, title = {BlastKOALA and GhostKOALA: KEGG Tools for Functional Characterization of Genome and Metagenome Sequences.}, journal = {Journal of molecular biology}, volume = {428}, number = {4}, pages = {726-731}, doi = {10.1016/j.jmb.2015.11.006}, pmid = {26585406}, issn = {1089-8638}, mesh = {Computational Biology/*methods ; *Genome ; Internet ; *Metagenome ; Sequence Analysis, DNA/*methods ; }, abstract = {BlastKOALA and GhostKOALA are automatic annotation servers for genome and metagenome sequences, which perform KO (KEGG Orthology) assignments to characterize individual gene functions and reconstruct KEGG pathways, BRITE hierarchies and KEGG modules to infer high-level functions of the organism or the ecosystem. Both servers are made freely available at the KEGG Web site (http://www.kegg.jp/blastkoala/). In BlastKOALA, the KO assignment is performed by a modified version of the internally used KOALA algorithm after the BLAST search against a non-redundant dataset of pangenome sequences at the species, genus or family level, which is generated from the KEGG GENES database by retaining the KO content of each taxonomic category. In GhostKOALA, which utilizes more rapid GHOSTX for database search and is suitable for metagenome annotation, the pangenome dataset is supplemented with Cd-hit clusters including those for viral genes. The result files may be downloaded and manipulated for further KEGG Mapper analysis, such as comparative pathway analysis using multiple BlastKOALA results.}, } @article {pmid26578582, year = {2016}, author = {Winsor, GL and Griffiths, EJ and Lo, R and Dhillon, BK and Shay, JA and Brinkman, FS}, title = {Enhanced annotations and features for comparing thousands of Pseudomonas genomes in the Pseudomonas genome database.}, journal = {Nucleic acids research}, volume = {44}, number = {D1}, pages = {D646-53}, pmid = {26578582}, issn = {1362-4962}, support = {//Canadian Institutes of Health Research/Canada ; }, mesh = {Bacterial Proteins/analysis/chemistry ; *Databases, Genetic ; Drug Resistance, Bacterial/genetics ; Gene Ontology ; *Genome, Bacterial ; Genomic Islands ; Internet ; *Molecular Sequence Annotation ; Pseudomonas/drug effects/*genetics/pathogenicity ; Virulence Factors ; }, abstract = {The Pseudomonas Genome Database (http://www.pseudomonas.com) is well known for the application of community-based annotation approaches for producing a high-quality Pseudomonas aeruginosa PAO1 genome annotation, and facilitating whole-genome comparative analyses with other Pseudomonas strains. To aid analysis of potentially thousands of complete and draft genome assemblies, this database and analysis platform was upgraded to integrate curated genome annotations and isolate metadata with enhanced tools for larger scale comparative analysis and visualization. Manually curated gene annotations are supplemented with improved computational analyses that help identify putative drug targets and vaccine candidates or assist with evolutionary studies by identifying orthologs, pathogen-associated genes and genomic islands. The database schema has been updated to integrate isolate metadata that will facilitate more powerful analysis of genomes across datasets in the future. We continue to place an emphasis on providing high-quality updates to gene annotations through regular review of the scientific literature and using community-based approaches including a major new Pseudomonas community initiative for the assignment of high-quality gene ontology terms to genes. As we further expand from thousands of genomes, we plan to provide enhancements that will aid data visualization and analysis arising from whole-genome comparative studies including more pan-genome and population-based approaches.}, } @article {pmid26578556, year = {2016}, author = {Sheppard, TK and Hitz, BC and Engel, SR and Song, G and Balakrishnan, R and Binkley, G and Costanzo, MC and Dalusag, KS and Demeter, J and Hellerstedt, ST and Karra, K and Nash, RS and Paskov, KM and Skrzypek, MS and Weng, S and Wong, ED and Cherry, JM}, title = {The Saccharomyces Genome Database Variant Viewer.}, journal = {Nucleic acids research}, volume = {44}, number = {D1}, pages = {D698-702}, pmid = {26578556}, issn = {1362-4962}, support = {U41 HG001315/HG/NHGRI NIH HHS/United States ; HG001315/HG/NHGRI NIH HHS/United States ; }, mesh = {*Databases, Genetic ; *Genetic Variation ; *Genome, Fungal ; Molecular Sequence Annotation ; Saccharomyces cerevisiae/*genetics ; Sequence Alignment ; Sequence Analysis, DNA ; Sequence Analysis, Protein ; User-Computer Interface ; }, abstract = {The Saccharomyces Genome Database (SGD; http://www.yeastgenome.org) is the authoritative community resource for the Saccharomyces cerevisiae reference genome sequence and its annotation. In recent years, we have moved toward increased representation of sequence variation and allelic differences within S. cerevisiae. The publication of numerous additional genomes has motivated the creation of new tools for their annotation and analysis. Here we present the Variant Viewer: a dynamic open-source web application for the visualization of genomic and proteomic differences. Multiple sequence alignments have been constructed across high quality genome sequences from 11 different S. cerevisiae strains and stored in the SGD. The alignments and summaries are encoded in JSON and used to create a two-tiered dynamic view of the budding yeast pan-genome, available at http://www.yeastgenome.org/variant-viewer.}, } @article {pmid26569403, year = {2015}, author = {Huang, S and Zhang, S and Jiao, N and Chen, F}, title = {Comparative Genomic and Phylogenomic Analyses Reveal a Conserved Core Genome Shared by Estuarine and Oceanic Cyanopodoviruses.}, journal = {PloS one}, volume = {10}, number = {11}, pages = {e0142962}, pmid = {26569403}, issn = {1932-6203}, mesh = {Aquatic Organisms/genetics ; Bacteriophages/genetics ; *Conserved Sequence/genetics ; Cyanobacteria/genetics ; *Estuaries ; Genes, Viral ; *Genome, Viral ; Genomics/*methods ; Likelihood Functions ; *Oceans and Seas ; *Phylogeny ; Podoviridae/*genetics ; Viral Proteins/genetics ; }, abstract = {Podoviruses are among the major viral groups that infect marine picocyanobacteria Prochlorococcus and Synechococcus. Here, we reported the genome sequences of five Synechococcus podoviruses isolated from the estuarine environment, and performed comparative genomic and phylogenomic analyses based on a total of 20 cyanopodovirus genomes. The genomes of all the known marine cyanopodoviruses are highly syntenic. A pan-genome of 349 clustered orthologous groups was determined, among which 15 were core genes. These core genes make up nearly half of each genome in length, reflecting the high level of genome conservation among this cyanophage type. The whole genome phylogenies based on concatenated core genes and gene content were highly consistent and confirmed the separation of two discrete marine cyanopodovirus clusters MPP-A and MPP-B. The genomes within cluster MPP-B grouped into subclusters mainly corresponding to Prochlorococcus or Synechococcus host types. Auxiliary metabolic genes tend to occur in a specific phylogenetic group of these cyanopodoviruses. All the MPP-B phages analyzed here encode the photosynthesis gene psbA, which are absent in all the MPP-A genomes thus far. Interestingly, all the MPP-B and two MPP-A Synechococcus podoviruses encode the thymidylate synthase gene thyX, while at the same genome locus all the MPP-B Prochlorococcus podoviruses encode the transaldolase gene talC. Both genes are hypothesized to have the potential to facilitate the biosynthesis of deoxynucleotide for phage replication. Inheritance of specific functional genes could be important to the evolution and ecological fitness of certain cyanophage genotypes. Our analyses demonstrate that cyanopodoviruses of estuarine and oceanic origins share a conserved core genome and suggest that accessory genes may be related to environmental adaptation.}, } @article {pmid26559891, year = {2015}, author = {Dias, L and Caetano, T and Pinheiro, M and Mendo, S}, title = {The lanthipeptides of Bacillus methylotrophicus and their association with genomic islands.}, journal = {Systematic and applied microbiology}, volume = {38}, number = {8}, pages = {525-533}, doi = {10.1016/j.syapm.2015.10.002}, pmid = {26559891}, issn = {1618-0984}, support = {097831/Z/11/Z//Wellcome Trust/United Kingdom ; }, mesh = {Bacillus/*genetics ; Bacteriocins/*genetics ; Gene Order ; Genetic Variation ; *Genomic Islands ; Peptides/*genetics ; }, abstract = {Bacillus methylotrophicus strains are known for their potential as plant-growth promoters and as microbial pesticides that effectively control plant diseases caused by bacteria and fungi. Over the past few years, a wide diversity of their secondary metabolites has been extensively characterized. Among these are the RiPPs lanthipeptides, which are an important and growing group of notable compounds. The increasing interest in B. methylotrophicus species, accompanied by the development of high throughput sequencing techniques, has resulted in a substantial number of full genomes being available. Here, an in silico analysis was performed on these genomes in order to survey the presence of lanthipeptide biosynthetic clusters. It was found that the pan genome of B. methylotrophicus only encoded the biosynthesis of mersacidin and amylolysin, which are lanthipeptides with antibacterial activity. However, the amylolysin gene cluster identified was comprised of more genetic elements than those previously described, and it had certain features of two-peptide lantibiotics. Additionally, it was also established that the association of lanthipeptides with genomic islands (GIs) was not confined to mersacidin. This was also found for the amylolysin cluster as well as other class I and class II lanthipeptides, supporting the idea that their production is probably related to functional adaptation.}, } @article {pmid26556047, year = {2015}, author = {Shelburne, SA and Ajami, NJ and Chibucos, MC and Beird, HC and Tarrand, J and Galloway-Peña, J and Albert, N and Chemaly, RF and Ghantoji, SS and Marsh, L and Pemmaraju, N and Andreeff, M and Shpall, EJ and Wargo, JA and Rezvani, K and Alousi, A and Bruno, VM and Futreal, PA and Petrosino, JF and Kontoyiannis, DP}, title = {Implementation of a Pan-Genomic Approach to Investigate Holobiont-Infecting Microbe Interaction: A Case Report of a Leukemic Patient with Invasive Mucormycosis.}, journal = {PloS one}, volume = {10}, number = {11}, pages = {e0139851}, pmid = {26556047}, issn = {1932-6203}, support = {P30 CA016672/CA/NCI NIH HHS/United States ; R01AI089891/AI/NIAID NIH HHS/United States ; R01 CA061508/CA/NCI NIH HHS/United States ; U19 AI110820/AI/NIAID NIH HHS/United States ; U19AI110820/AI/NIAID NIH HHS/United States ; HHSN272200900009C//PHS HHS/United States ; HHSN272200900009C/AI/NIAID NIH HHS/United States ; R01 AI089891/AI/NIAID NIH HHS/United States ; }, mesh = {Antifungal Agents/therapeutic use ; Antineoplastic Combined Chemotherapy Protocols/adverse effects ; Chemotherapy-Induced Febrile Neutropenia ; Fungal Proteins/genetics ; Fungemia/microbiology ; Gastrointestinal Microbiome/*genetics ; *Genome, Fungal ; Host-Pathogen Interactions ; Humans ; Leukemia, Myeloid, Acute/*complications ; Male ; Middle Aged ; Mucor/*genetics/isolation & purification ; Mucormycosis/drug therapy/*microbiology ; Neoplasm Proteins/genetics ; Onychomycosis/complications ; Opportunistic Infections/drug therapy/*microbiology ; }, abstract = {Disease can be conceptualized as the result of interactions between infecting microbe and holobiont, the combination of a host and its microbial communities. It is likely that genomic variation in the host, infecting microbe, and commensal microbiota are key determinants of infectious disease clinical outcomes. However, until recently, simultaneous, multiomic investigation of infecting microbe and holobiont components has rarely been explored. Herein, we characterized the infecting microbe, host, micro- and mycobiomes leading up to infection onset in a leukemia patient that developed invasive mucormycosis. We discovered that the patient was infected with a strain of the recently described Mucor velutinosus species which we determined was hypervirulent in a Drosophila challenge model and has a predisposition for skin dissemination. After completing the infecting M. velutinosus genome and genomes from four other Mucor species, comparative pathogenomics was performed and assisted in identifying 66 M. velutinosus-specific putatively secreted proteins, including multiple novel secreted aspartyl proteinases which may contribute to the unique clinical presentation of skin dissemination. Whole exome sequencing of the patient revealed multiple non-synonymous polymorphisms in genes critical to control of fungal proliferation, such as TLR6 and PTX3. Moreover, the patient had a non-synonymous polymorphism in the NOD2 gene and a missense mutation in FUT2, which have been linked to microbial dysbiosis and microbiome diversity maintenance during physiologic stress, respectively. In concert with host genetic polymorphism data, the micro- and mycobiome analyses revealed that the infection developed amid a dysbiotic microbiome with low α-diversity, dominated by staphylococci. Additionally, longitudinal mycobiome data showed that M. velutinosus DNA was detectable in oral samples preceding disease onset. Our genome-level study of the host-infecting microbe-commensal triad extends the concept of personalized genomic medicine to the holobiont-infecting microbe interface thereby offering novel opportunities for using synergistic genetic methods to increase understanding of infectious diseases pathogenesis and clinical outcomes.}, } @article {pmid26546738, year = {2015}, author = {Ghosh, P and Shippy, DC and Talaat, AM}, title = {Superior protection elicited by live-attenuated vaccines in the murine model of paratuberculosis.}, journal = {Vaccine}, volume = {33}, number = {51}, pages = {7262-7270}, doi = {10.1016/j.vaccine.2015.10.116}, pmid = {26546738}, issn = {1873-2518}, mesh = {Adjuvants, Immunologic/administration & dosage ; Animal Structures/pathology ; Animals ; Bacterial Load ; Bacterial Vaccines/administration & dosage/*immunology ; Disease Models, Animal ; Female ; Gene Knockout Techniques ; Interferon-gamma/metabolism ; Leukocytes, Mononuclear/immunology ; Mice, Inbred C57BL ; Mycobacterium avium subsp. paratuberculosis/genetics/*immunology ; Paratuberculosis/immunology/pathology/*prevention & control ; Quillaja Saponins/administration & dosage ; Sigma Factor/deficiency ; Vaccines, Attenuated/administration & dosage/immunology ; Virulence ; Virulence Factors/deficiency ; }, abstract = {Mycobacterium avium subspecies paratuberculosis (M. paratuberculosis) causes Johne's disease, a chronic enteric infection in ruminants with severe economic impact on the dairy industry in the USA and worldwide. Currently, available vaccines have limited protective efficacy against disease progression and does not prevent spread of the infection among animals. Because of their ability to elicit wide-spectrum immune responses, we adopted a live-attenuated vaccine approach based on a sigH knock-out strain of M. paratuberculosis (ΔsigH). Earlier analysis of the ΔsigH mutant in mice indicated their inadequate ability to colonize host tissues, unlike the isogenic wild-type strain, validating the role of this sigma factor in M. paratuberculosis virulence. In the present study, we evaluated the performance of the ΔsigH mutant compared to inactivated vaccine constructs in a vaccine/challenge model of murine paratuberculosis. The presented analysis indicated that ΔsigH mutant with or without QuilA adjuvant is capable of eliciting strong immune responses (such as interferon gamma-γ, IFN-γ) suggesting their immunogenicity and ability to potentially initiate effective vaccine-induced immunity. Following a challenge with virulent strains of M. paratuberculosis, ΔsigH conferred protective immunity as indicated by the reduced bacterial burden accompanied with reduced lesions in main body organs (liver, spleen and intestine) usually infected with M. paratuberculosis. More importantly, our data indicated better ability of the ΔsigH vaccine to confer protection compared to the inactivated vaccine constructs even with the presence of oil-adjuvant. Overall, our approach provides a rational basis for using live-attenuated mutant strains to develop improved vaccines that elicit robust immunity against this chronic infection.}, } @article {pmid26519390, year = {2016}, author = {Jun, SR and Wassenaar, TM and Nookaew, I and Hauser, L and Wanchai, V and Land, M and Timm, CM and Lu, TY and Schadt, CW and Doktycz, MJ and Pelletier, DA and Ussery, DW}, title = {Diversity of Pseudomonas Genomes, Including Populus-Associated Isolates, as Revealed by Comparative Genome Analysis.}, journal = {Applied and environmental microbiology}, volume = {82}, number = {1}, pages = {375-383}, pmid = {26519390}, issn = {1098-5336}, mesh = {Comparative Genomic Hybridization ; *Genetic Variation ; *Genome, Bacterial ; Phylogeny ; Plant Roots/microbiology ; Populus/*microbiology ; Pseudomonas/*classification/*genetics/isolation & purification ; Pseudomonas aeruginosa/genetics/isolation & purification ; Pseudomonas fluorescens/classification/genetics/isolation & purification ; Pseudomonas putida/genetics/isolation & purification ; Rhizosphere ; Sequence Analysis, DNA ; }, abstract = {The Pseudomonas genus contains a metabolically versatile group of organisms that are known to occupy numerous ecological niches, including the rhizosphere and endosphere of many plants. Their diversity influences the phylogenetic diversity and heterogeneity of these communities. On the basis of average amino acid identity, comparative genome analysis of >1,000 Pseudomonas genomes, including 21 Pseudomonas strains isolated from the roots of native Populus deltoides (eastern cottonwood) trees resulted in consistent and robust genomic clusters with phylogenetic homogeneity. All Pseudomonas aeruginosa genomes clustered together, and these were clearly distinct from other Pseudomonas species groups on the basis of pangenome and core genome analyses. In contrast, the genomes of Pseudomonas fluorescens were organized into 20 distinct genomic clusters, representing enormous diversity and heterogeneity. Most of our 21 Populus-associated isolates formed three distinct subgroups within the major P. fluorescens group, supported by pathway profile analysis, while two isolates were more closely related to Pseudomonas chlororaphis and Pseudomonas putida. Genes specific to Populus-associated subgroups were identified. Genes specific to subgroup 1 include several sensory systems that act in two-component signal transduction, a TonB-dependent receptor, and a phosphorelay sensor. Genes specific to subgroup 2 contain hypothetical genes, and genes specific to subgroup 3 were annotated with hydrolase activity. This study justifies the need to sequence multiple isolates, especially from P. fluorescens, which displays the most genetic variation, in order to study functional capabilities from a pangenomic perspective. This information will prove useful when choosing Pseudomonas strains for use to promote growth and increase disease resistance in plants.}, } @article {pmid26518049, year = {2016}, author = {Alhashash, F and Wang, X and Paszkiewicz, K and Diggle, M and Zong, Z and McNally, A}, title = {Increase in bacteraemia cases in the East Midlands region of the UK due to MDR Escherichia coli ST73: high levels of genomic and plasmid diversity in causative isolates.}, journal = {The Journal of antimicrobial chemotherapy}, volume = {71}, number = {2}, pages = {339-343}, doi = {10.1093/jac/dkv365}, pmid = {26518049}, issn = {1460-2091}, mesh = {Bacteremia/*epidemiology/microbiology ; Computational Biology ; *Drug Resistance, Multiple, Bacterial ; Electrophoresis, Gel, Pulsed-Field ; Epidemiologic Studies ; Escherichia coli/*classification/drug effects/*genetics/isolation & purification ; Escherichia coli Infections/*epidemiology/microbiology ; *Genetic Variation ; Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Humans ; Incidence ; Molecular Typing ; Plasmids/analysis ; Sequence Analysis, DNA ; United Kingdom/epidemiology ; }, abstract = {OBJECTIVES: The objective of this study was to determine the population structure of Escherichia coli ST73 isolated from human bacteraemia and urinary tract infections.

METHODS: The genomes of 22 E. coli ST73 isolates were sequenced using the Illumina HiSeq platform. High-resolution SNP typing was used to create a phylogenetic tree. Comparative genomics were also performed using a pangenome approach. In silico and S1-PFGE plasmid profiling was conducted, and isolates were checked for their ability to survive exposure to human serum.

RESULTS: E. coli ST73 isolates circulating in clinically unrelated episodes show a high degree of diversity at a whole-genome level, but exhibit conservation in gene content, particularly in virulence-associated gene carriage. The isolates also contain a highly diverse plasmid pool that confers MDR via carriage of CTX-M genes.

CONCLUSIONS: Our data show that a rise in incidence of MDR E. coli ST73 clinical isolates is not due to a circulating outbreak strain as in E. coli ST131. Rather the ST73 circulating strains are distantly related and carry a diverse set of resistance plasmids. This suggests that the evolutionary events behind emergence of drug-resistant E. coli differ between lineages.}, } @article {pmid26504144, year = {2016}, author = {Baier, U and Beller, T and Ohlebusch, E}, title = {Graphical pan-genome analysis with compressed suffix trees and the Burrows-Wheeler transform.}, journal = {Bioinformatics (Oxford, England)}, volume = {32}, number = {4}, pages = {497-504}, doi = {10.1093/bioinformatics/btv603}, pmid = {26504144}, issn = {1367-4811}, mesh = {*Algorithms ; Computational Biology/*methods ; Computer Simulation ; *Genome, Human ; Genomics/*methods ; Humans ; Models, Genetic ; Sequence Analysis, DNA/*methods ; }, abstract = {MOTIVATION: Low-cost genome sequencing gives unprecedented complete information about the genetic structure of populations, and a population graph captures the variations between many individuals of a population. Recently, Marcus et al. proposed to use a compressed de Bruijn graph for representing an entire population of genomes. They devised an O(n log g) time algorithm called splitMEM that constructs this graph directly (i.e. without using the uncompressed de Bruijn graph) based on a suffix tree, where n is the total length of the genomes and g is the length of the longest genome. Since the applicability of their algorithm is limited to rather small datasets, there is a strong need for space-efficient construction algorithms.

RESULTS: We present two algorithms that outperform splitMEM in theory and in practice. The first implements a novel linear-time suffix tree algorithm by means of a compressed suffix tree. The second algorithm uses the Burrows-Wheeler transform to build the compressed de Bruijn graph in [Formula: see text] time, where σ is the size of the alphabet. To demonstrate the scalability of the algorithms, we applied it to seven human genomes.

https://www.uni-ulm.de/in/theo/research/seqana/.}, } @article {pmid26497500, year = {2015}, author = {Ariff, A and Wise, MJ and Kahler, CM and Tay, CY and Peters, F and Perkins, TT and Chang, BJ}, title = {Novel Moraxella catarrhalis prophages display hyperconserved non-structural genes despite their genomic diversity.}, journal = {BMC genomics}, volume = {16}, number = {}, pages = {860}, pmid = {26497500}, issn = {1471-2164}, mesh = {Codon ; Computational Biology/methods ; *Conserved Sequence ; Evolution, Molecular ; *Genetic Variation ; *Genome, Viral ; Genomics/methods ; Moraxella catarrhalis/*virology ; Multilocus Sequence Typing ; Phylogeny ; Prophages/classification/*genetics ; Viral Nonstructural Proteins/chemistry/*genetics ; Viral Proteins/chemistry/genetics ; Virulence/genetics ; }, abstract = {BACKGROUND: Moraxella catarrhalis is an important pathogen that often causes otitis media in children, a disease that is not currently vaccine preventable. Asymptomatic colonisation of the human upper respiratory tract is common and lack of clearance by the immune system is likely due to the emergence of seroresistant genetic lineages. No active bacteriophages or prophages have been described in this species. This study was undertaken to identify and categorise prophages in M. catarrhalis, their genetic diversity and the relationship of such diversity with the host-species phylogeny.

RESULTS: This study presents a comparative analysis of 32 putative prophages identified in 95 phylogenetically variable, newly sequenced M. catarrhalis genomes. The prophages were genotypically classified into four diverse clades. The genetic synteny of each clade is similar to the group 1 phage family Siphoviridae, however, they form genotypic clusters that are distinct from other members of this family. No core genetic sequences exist across the 32 prophages despite clades 2, 3, and 4 sharing the most sequence identity. The analysis of non-structural prophage genes (coding the integrase, and terminase), and portal gene showed that the respective genes were identical for clades 2, 3, and 4, but unique for clade 1. Empirical analysis calculated that these genes are unexpectedly hyperconserved, under purifying selection, suggesting a tightly regulated functional role. As such, it is improbable that the prophages are decaying remnants but stable components of a fluctuating, flexible and unpredictable system ultimately maintained by functional constraints on non-structural and packaging genes. Additionally, the plate encoding genes were well conserved across all four prophage clades, and the tail fibre genes, commonly responsible for receptor recognition, were clustered into three major groups distributed across the prophage clades. A pan-genome of 283,622 bp was identified, and the prophages were mapped onto the diverse M. catarrhalis multi-locus sequence type (MLST) backbone.

CONCLUSION: This study has provided the first evidence of putatively mobile prophages in M. catarrhalis, identifying a diverse and fluctuating system dependent on the hyperconservation of a few key, non-structural genes. Some prophages harbour virulence-related genes, and potentially influence the physiology and virulence of M. catarrhalis. Importantly our data will provide supporting information on the identification of novel prophages in other species by adding greater weight to the identification of non-structural genes.}, } @article {pmid26497129, year = {2015}, author = {Baig, A and McNally, A and Dunn, S and Paszkiewicz, KH and Corander, J and Manning, G}, title = {Genetic import and phenotype specific alleles associated with hyper-invasion in Campylobacter jejuni.}, journal = {BMC genomics}, volume = {16}, number = {}, pages = {852}, pmid = {26497129}, issn = {1471-2164}, mesh = {*Alleles ; Bacterial Capsules/genetics/metabolism ; Campylobacter Infections/microbiology ; Campylobacter jejuni/classification/*genetics/pathogenicity ; Gene Order ; Genes, Bacterial ; *Genome, Bacterial ; *Genomics/methods ; Humans ; *Phenotype ; Phylogeny ; Polysaccharides, Bacterial/metabolism ; Quantitative Trait Loci ; }, abstract = {BACKGROUND: Campylobacter jejuni is a major zoonotic pathogen, causing gastroenteritis in humans. Invasion is an important pathogenesis trait by which C. jejuni causes disease. Here we report the genomic analysis of 134 strains to identify traits unique to hyperinvasive isolates.

METHODS: A total of 134 C. jejuni genomes were used to create a phylogenetic tree to position the hyperinvasive strains. Comparative genomics lead to the identification of mosaic capsule regions. A pan genome approach led to the discovery of unique loci, or loci with unique alleles, to the hyperinvasive strains.

RESULTS: Phylogenetic analysis showed that the hyper-invasive phenotype is a generalist trait. Despite the fact that hyperinvasive strains are only distantly related based on the whole genome phylogeny, they all possess genes within the capsule region with high identity to capsule genes from C. jejuni subsp. doylei and C. lari. In addition there were genes unique to the hyper-invasive strains with identity to non-C. jejuni genes, as well as allelic variants of mainly pathogenesis related genes already known in the other C. jejuni. In particular, the sequence of flagella genes, flgD-E and flgL were highly conserved amongst the hyper-invasive strains and divergent from sequences in other C. jejuni. A novel cytolethal distending toxin (cdt) operon was also identified as present in all hyper-invasive strains in addition to the classic cdt operon present in other C. jejuni.

CONCLUSIONS: Overall, the hyper-invasive phenotype is strongly linked to the presence of orthologous genes from other Campylobacter species in their genomes, notably within the capsule region, in addition to the observed association with unique allelic variants in flagellar genes and the secondary cdt operon which is unlikely under random sharing of accessory alleles in separate lineages.}, } @article {pmid26489930, year = {2015}, author = {O'Callaghan, A and Bottacini, F and O'Connell Motherway, M and van Sinderen, D}, title = {Pangenome analysis of Bifidobacterium longum and site-directed mutagenesis through by-pass of restriction-modification systems.}, journal = {BMC genomics}, volume = {16}, number = {}, pages = {832}, pmid = {26489930}, issn = {1471-2164}, mesh = {Bifidobacterium/classification/*genetics/metabolism ; Carbohydrate Metabolism/genetics ; Computational Biology/methods ; DNA Methylation ; Epigenesis, Genetic ; Genes, Bacterial ; Genetic Loci ; Genetic Variation ; *Genome, Bacterial ; *Genomics/methods ; Mutagenesis, Site-Directed ; Open Reading Frames ; Phenotype ; Phylogeny ; Sequence Analysis, DNA ; Transformation, Bacterial ; }, abstract = {BACKGROUND: Bifidobacterial genome analysis has provided insights as to how these gut commensals adapt to and persist in the human GIT, while also revealing genetic diversity among members of a given bifidobacterial (sub)species. Bifidobacteria are notoriously recalcitrant to genetic modification, which prevents exploration of their genomic functions, including those that convey (human) health benefits.

METHODS: PacBio SMRT sequencing was used to determine the whole genome seqeunces of two B. longum subsp. longum strains. The B. longum pan-genome was computed using PGAP v1.2 and the core B. longum phylogenetic tree was constructed using a maximum-likelihood based approach in PhyML v3.0. M.blmNCII was cloned in E. coli and an internal fragment if arfBarfB was cloned into pORI19 for insertion mutagenesis.

RESULTS: In this study we present the complete genome sequences of two Bifidobacterium longum subsp. longum strains. Comparative analysis with thirty one publicly available B. longum genomes allowed the definition of the B. longum core and dispensable genomes. This analysis also highlighted differences in particular metabolic abilities between members of the B. longum subspecies infantis, longum and suis. Furthermore, phylogenetic analysis of the B. longum core genome indicated the existence of a novel subspecies. Methylome data, coupled to the analysis of restriction-modification systems, allowed us to substantially increase the genetic accessibility of B. longum subsp. longum NCIMB 8809 to a level that was shown to permit site-directed mutagenesis.

CONCLUSIONS: Comparative genomic analysis of thirty three B. longum representatives revealed a closed pan-genome for this bifidobacterial species. Phylogenetic analysis of the B. longum core genome also provides evidence for a novel fifth B. longum subspecies. Finally, we improved genetic accessibility for the strain B. longum subsp. longum NCIMB 8809, which allowed the generation of a mutant of this strain.}, } @article {pmid26484663, year = {2015}, author = {Spring-Pearson, SM and Stone, JK and Doyle, A and Allender, CJ and Okinaka, RT and Mayo, M and Broomall, SM and Hill, JM and Karavis, MA and Hubbard, KS and Insalaco, JM and McNew, LA and Rosenzweig, CN and Gibbons, HS and Currie, BJ and Wagner, DM and Keim, P and Tuanyok, A}, title = {Pangenome Analysis of Burkholderia pseudomallei: Genome Evolution Preserves Gene Order despite High Recombination Rates.}, journal = {PloS one}, volume = {10}, number = {10}, pages = {e0140274}, pmid = {26484663}, issn = {1932-6203}, mesh = {Algorithms ; Burkholderia pseudomallei/classification/*genetics/isolation & purification ; Evolution, Molecular ; *Gene Order ; Gene Transfer, Horizontal ; Genes, Bacterial/*genetics ; Genetic Variation ; Genome, Bacterial/*genetics ; Models, Genetic ; Recombination, Genetic ; Species Specificity ; }, abstract = {The pangenomic diversity in Burkholderia pseudomallei is high, with approximately 5.8% of the genome consisting of genomic islands. Genomic islands are known hotspots for recombination driven primarily by site-specific recombination associated with tRNAs. However, recombination rates in other portions of the genome are also high, a feature we expected to disrupt gene order. We analyzed the pangenome of 37 isolates of B. pseudomallei and demonstrate that the pangenome is 'open', with approximately 136 new genes identified with each new genome sequenced, and that the global core genome consists of 4568±16 homologs. Genes associated with metabolism were statistically overrepresented in the core genome, and genes associated with mobile elements, disease, and motility were primarily associated with accessory portions of the pangenome. The frequency distribution of genes present in between 1 and 37 of the genomes analyzed matches well with a model of genome evolution in which 96% of the genome has very low recombination rates but 4% of the genome recombines readily. Using homologous genes among pairs of genomes, we found that gene order was highly conserved among strains, despite the high recombination rates previously observed. High rates of gene transfer and recombination are incompatible with retaining gene order unless these processes are either highly localized to specific sites within the genome, or are characterized by symmetrical gene gain and loss. Our results demonstrate that both processes occur: localized recombination introduces many new genes at relatively few sites, and recombination throughout the genome generates the novel multi-locus sequence types previously observed while preserving gene order.}, } @article {pmid26476454, year = {2016}, author = {Kanehisa, M and Sato, Y and Kawashima, M and Furumichi, M and Tanabe, M}, title = {KEGG as a reference resource for gene and protein annotation.}, journal = {Nucleic acids research}, volume = {44}, number = {D1}, pages = {D457-62}, pmid = {26476454}, issn = {1362-4962}, mesh = {*Amino Acid Sequence ; *Databases, Genetic ; Drug Resistance, Microbial ; *Genes ; Genome ; Metabolic Networks and Pathways ; *Molecular Sequence Annotation ; Plasmids/genetics ; Proteins/genetics ; Viruses/genetics ; }, abstract = {KEGG (http://www.kegg.jp/ or http://www.genome.jp/kegg/) is an integrated database resource for biological interpretation of genome sequences and other high-throughput data. Molecular functions of genes and proteins are associated with ortholog groups and stored in the KEGG Orthology (KO) database. The KEGG pathway maps, BRITE hierarchies and KEGG modules are developed as networks of KO nodes, representing high-level functions of the cell and the organism. Currently, more than 4000 complete genomes are annotated with KOs in the KEGG GENES database, which can be used as a reference data set for KO assignment and subsequent reconstruction of KEGG pathways and other molecular networks. As an annotation resource, the following improvements have been made. First, each KO record is re-examined and associated with protein sequence data used in experiments of functional characterization. Second, the GENES database now includes viruses, plasmids, and the addendum category for functionally characterized proteins that are not represented in complete genomes. Third, new automatic annotation servers, BlastKOALA and GhostKOALA, are made available utilizing the non-redundant pangenome data set generated from the GENES database. As a resource for translational bioinformatics, various data sets are created for antimicrobial resistance and drug interaction networks.}, } @article {pmid26458099, year = {2015}, author = {Tang, X and Li, J and Millán-Aguiñaga, N and Zhang, JJ and O'Neill, EC and Ugalde, JA and Jensen, PR and Mantovani, SM and Moore, BS}, title = {Identification of Thiotetronic Acid Antibiotic Biosynthetic Pathways by Target-directed Genome Mining.}, journal = {ACS chemical biology}, volume = {10}, number = {12}, pages = {2841-2849}, pmid = {26458099}, issn = {1554-8937}, support = {R01 GM085770/GM/NIGMS NIH HHS/United States ; U19 TW007401/TW/FIC NIH HHS/United States ; R01-GM085770/GM/NIGMS NIH HHS/United States ; U19-TW007401/TW/FIC NIH HHS/United States ; }, mesh = {Anti-Bacterial Agents/chemistry/metabolism/pharmacology ; Biosynthetic Pathways/*genetics ; Computational Biology ; Gene Targeting ; *Genome, Bacterial ; Hydroxybutyrates/chemistry/metabolism/pharmacology ; Molecular Structure ; Multigene Family ; Streptomyces/drug effects/genetics/physiology ; Sulfhydryl Compounds/chemistry/metabolism/pharmacology ; Thiophenes/chemistry/pharmacology ; }, abstract = {Recent genome sequencing efforts have led to the rapid accumulation of uncharacterized or "orphaned" secondary metabolic biosynthesis gene clusters (BGCs) in public databases. This increase in DNA-sequenced big data has given rise to significant challenges in the applied field of natural product genome mining, including (i) how to prioritize the characterization of orphan BGCs and (ii) how to rapidly connect genes to biosynthesized small molecules. Here, we show that by correlating putative antibiotic resistance genes that encode target-modified proteins with orphan BGCs, we predict the biological function of pathway specific small molecules before they have been revealed in a process we call target-directed genome mining. By querying the pan-genome of 86 Salinispora bacterial genomes for duplicated house-keeping genes colocalized with natural product BGCs, we prioritized an orphan polyketide synthase-nonribosomal peptide synthetase hybrid BGC (tlm) with a putative fatty acid synthase resistance gene. We employed a new synthetic double-stranded DNA-mediated cloning strategy based on transformation-associated recombination to efficiently capture tlm and the related ttm BGCs directly from genomic DNA and to heterologously express them in Streptomyces hosts. We show the production of a group of unusual thiotetronic acid natural products, including the well-known fatty acid synthase inhibitor thiolactomycin that was first described over 30 years ago, yet never at the genetic level in regards to biosynthesis and autoresistance. This finding not only validates the target-directed genome mining strategy for the discovery of antibiotic producing gene clusters without a priori knowledge of the molecule synthesized but also paves the way for the investigation of novel enzymology involved in thiotetronic acid natural product biosynthesis.}, } @article {pmid26456591, year = {2015}, author = {Paul, S and Bhardwaj, A and Bag, SK and Sokurenko, EV and Chattopadhyay, S}, title = {PanCoreGen - Profiling, detecting, annotating protein-coding genes in microbial genomes.}, journal = {Genomics}, volume = {106}, number = {6}, pages = {367-372}, pmid = {26456591}, issn = {1089-8646}, support = {R01 AI106007/AI/NIAID NIH HHS/United States ; }, mesh = {Bacterial Proteins/genetics ; Computational Biology/*methods ; Gene Expression Profiling/*methods ; Gene Transfer, Horizontal/genetics ; Genome, Bacterial/genetics ; Genome, Microbial/*genetics ; Molecular Sequence Annotation/*methods ; Open Reading Frames/*genetics ; Phylogeny ; Reproducibility of Results ; Salmonella enterica/classification/genetics ; Species Specificity ; }, abstract = {A large amount of genomic data, especially from multiple isolates of a single species, has opened new vistas for microbial genomics analysis. Analyzing the pan-genome (i.e. the sum of genetic repertoire) of microbial species is crucial in understanding the dynamics of molecular evolution, where virulence evolution is of major interest. Here we present PanCoreGen - a standalone application for pan- and core-genomic profiling of microbial protein-coding genes. PanCoreGen overcomes key limitations of the existing pan-genomic analysis tools, and develops an integrated annotation-structure for a species-specific pan-genomic profile. It provides important new features for annotating draft genomes/contigs and detecting unidentified genes in annotated genomes. It also generates user-defined group-specific datasets within the pan-genome. Interestingly, analyzing an example-set of Salmonella genomes, we detect potential footprints of adaptive convergence of horizontally transferred genes in two human-restricted pathogenic serovars - Typhi and Paratyphi A. Overall, PanCoreGen represents a state-of-the-art tool for microbial phylogenomics and pathogenomics study.}, } @article {pmid26455417, year = {2015}, author = {Kayansamruaj, P and Pirarat, N and Kondo, H and Hirono, I and Rodkhum, C}, title = {Genomic comparison between pathogenic Streptococcus agalactiae isolated from Nile tilapia in Thailand and fish-derived ST7 strains.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {36}, number = {}, pages = {307-314}, doi = {10.1016/j.meegid.2015.10.009}, pmid = {26455417}, issn = {1567-7257}, mesh = {Animals ; Cichlids/*microbiology ; Clustered Regularly Interspaced Short Palindromic Repeats/genetics ; Drug Resistance, Bacterial/genetics ; Evolution, Molecular ; Genome, Bacterial/*genetics ; Phylogeny ; Prophages/genetics ; Streptococcal Infections/*microbiology/*veterinary ; Streptococcus agalactiae/*genetics ; }, abstract = {Streptococcus agalactiae, or Group B streptococcus (GBS), is a highly virulent pathogen in aquatic animals, causing huge mortalities worldwide. In Thailand, the serotype Ia, β-hemolytic GBS, belonging to sequence type (ST) 7 of clonal complex (CC) 7, was found to be the major cause of streptococcosis outbreaks in fish farms. In this study, we performed an in silico genomic comparison, aiming to investigate the phylogenetic relationship between the pathogenic fish strains of Thai ST7 and other ST7 from different hosts and geographical origins. In general, the genomes of Thai ST7 strains are closely related to other fish ST7s, as the core genome is shared by 92-95% of any individual fish ST7 genome. Among the fish ST7 genomes, we observed only small dissimilarities, based on the analysis of clustered regularly interspaced short palindromic repeats (CRISPRs), surface protein markers, insertions sequence (IS) elements and putative virulence genes. The phylogenetic tree based on single nucleotide polymorphisms (SNPs) of the core genome sequences clearly categorized the ST7 strains according to their geographical and host origins, with the human ST7 being genetically distant from other fish ST7 strains. A pan-genome analysis of ST7 strains detected a 48-kb gene island specifically in the Thai ST7 isolates. The orientations and predicted amino acid sequences of the genes in the island closely matched those of Tn5252, a streptococcal conjugative transposon, in GBS 2603V/R serotype V, Streptococcus pneumoniae and Streptococcus suis. Thus, it was presumed that Thai ST7 acquired this Tn5252 homologue from related streptococci. The close phylogenetic relationship between the fish ST7 strains suggests that these strains were derived from a common ancestor and have diverged in different geographical regions and in different hosts.}, } @article {pmid26452736, year = {2015}, author = {Sangal, V and Blom, J and Sutcliffe, IC and von Hunolstein, C and Burkovski, A and Hoskisson, PA}, title = {Adherence and invasive properties of Corynebacterium diphtheriae strains correlates with the predicted membrane-associated and secreted proteome.}, journal = {BMC genomics}, volume = {16}, number = {}, pages = {765}, pmid = {26452736}, issn = {1471-2164}, mesh = {ATP-Binding Cassette Transporters/genetics ; Amino Acid Sequence ; Bacterial Adhesion/genetics ; Corynebacterium diphtheriae/*genetics/pathogenicity ; *Genome, Bacterial ; Humans ; Membrane Proteins/genetics ; Proteome/*genetics ; Splenic Diseases/*genetics/microbiology/pathology ; }, abstract = {BACKGROUND: Non-toxigenic Corynebacterium diphtheriae strains are emerging as a major cause of severe pharyngitis and tonsillitis as well as invasive diseases such as endocarditis, septic arthritis, splenic abscesses and osteomyelitis. C. diphtheriae strains have been reported to vary in their ability to adhere and invade different cell lines. To identify the genetic basis of variation in the degrees of pathogenicity, we sequenced the genomes of four strains of C. diphtheriae (ISS 3319, ISS 4060, ISS 4746 and ISS 4749) that are well characterised in terms of their ability to adhere and invade mammalian cells.

RESULTS: Comparative analyses of 20 C. diphtheriae genome sequences, including 16 publicly available genomes, revealed a pan-genome comprising 3,989 protein coding sequences that include 1,625 core genes and 2,364 accessory genes. Most of the genomic variation between these strains relates to uncharacterised genes encoding hypothetical proteins or transposases. Further analyses of protein sequences using an array of bioinformatic tools predicted most of the accessory proteome to be located in the cytoplasm. The membrane-associated and secreted proteins are generally involved in adhesion and virulence characteristics. The genes encoding membrane-associated proteins, especially the number and organisation of the pilus gene clusters (spa) including the number of genes encoding surface proteins with LPXTG motifs differed between different strains. Other variations were among the genes encoding extracellular proteins, especially substrate binding proteins of different functional classes of ABC transport systems and 'non-classical' secreted proteins.

CONCLUSIONS: The structure and organisation of the spa gene clusters correlates with differences in the ability of C. diphtheriae strains to adhere and invade the host cells. Furthermore, differences in the number of genes encoding membrane-associated proteins, e.g., additional proteins with LPXTG motifs could also result in variation in the adhesive properties between different strains. The variation in the secreted proteome may be associated with the degree of pathogenesis. While the role of the 'non-classical' secretome in virulence remains unclear, differences in the substrate binding proteins of various ABC transport systems and cytoplasmic proteins potentially suggest strain variation in nutritional requirements or a differential ability to utilize various carbon sources.}, } @article {pmid26442149, year = {2015}, author = {Rouli, L and Merhej, V and Fournier, PE and Raoult, D}, title = {The bacterial pangenome as a new tool for analysing pathogenic bacteria.}, journal = {New microbes and new infections}, volume = {7}, number = {}, pages = {72-85}, pmid = {26442149}, issn = {2052-2975}, abstract = {The bacterial pangenome was introduced in 2005 and, in recent years, has been the subject of many studies. Thanks to progress in next-generation sequencing methods, the pangenome can be divided into two parts, the core (common to the studied strains) and the accessory genome, offering a large panel of uses. In this review, we have presented the analysis methods, the pangenome composition and its application as a study of lifestyle. We have also shown that the pangenome may be used as a new tool for redefining the pathogenic species. We applied this to the Escherichia coli and Shigella species, which have been a subject of controversy regarding their taxonomic and pathogenic position.}, } @article {pmid26420254, year = {2015}, author = {Caputo, A and Merhej, V and Georgiades, K and Fournier, PE and Croce, O and Robert, C and Raoult, D}, title = {Pan-genomic analysis to redefine species and subspecies based on quantum discontinuous variation: the Klebsiella paradigm.}, journal = {Biology direct}, volume = {10}, number = {}, pages = {55}, pmid = {26420254}, issn = {1745-6150}, mesh = {*Classification ; *Genome, Bacterial ; *Genomics ; Klebsiella/*classification/*genetics ; Phylogeny ; }, abstract = {BACKGROUND: Various methods are currently used to define species and are based on the phylogenetic marker 16S ribosomal RNA gene sequence, DNA-DNA hybridization and DNA GC content. However, these are restricted genetic tools and showed significant limitations.

RESULTS: In this work, we describe an alternative method to build taxonomy by analyzing the pan-genome composition of different species of the Klebsiella genus. Klebsiella species are Gram-negative bacilli belonging to the large Enterobacteriaceae family. Interestingly, when comparing the core/pan-genome ratio; we found a clear discontinuous variation that can define a new species.

CONCLUSIONS: Using this pan-genomic approach, we showed that Klebsiella pneumoniae subsp. ozaenae and Klebsiella pneumoniae subsp. rhinoscleromatis are species of the Klebsiella genus, rather than subspecies of Klebsiella pneumoniae. This pan-genomic analysis, helped to develop a new tool for defining species introducing a quantic perspective for taxonomy.}, } @article {pmid26409790, year = {2015}, author = {Jun, SR and Robeson, MS and Hauser, LJ and Schadt, CW and Gorin, AA}, title = {PanFP: pangenome-based functional profiles for microbial communities.}, journal = {BMC research notes}, volume = {8}, number = {}, pages = {479}, pmid = {26409790}, issn = {1756-0500}, mesh = {*Algorithms ; Bacteria/*genetics ; Metagenome/*genetics ; Metagenomics/*methods ; Sequence Analysis, DNA ; Statistics, Nonparametric ; }, abstract = {BACKGROUND: For decades there has been increasing interest in understanding the relationships between microbial communities and ecosystem functions. Current DNA sequencing technologies allows for the exploration of microbial communities in two principle ways: targeted rRNA gene surveys and shotgun metagenomics. For large study designs, it is often still prohibitively expensive to sequence metagenomes at both the breadth and depth necessary to statistically capture the true functional diversity of a community. Although rRNA gene surveys provide no direct evidence of function, they do provide a reasonable estimation of microbial diversity, while being a very cost-effective way to screen samples of interest for later shotgun metagenomic analyses. However, there is a great deal of 16S rRNA gene survey data currently available from diverse environments, and thus a need for tools to infer functional composition of environmental samples based on 16S rRNA gene survey data.

RESULTS: We present a computational method called pangenome-based functional profiles (PanFP), which infers functional profiles of microbial communities from 16S rRNA gene survey data for Bacteria and Archaea. PanFP is based on pangenome reconstruction of a 16S rRNA gene operational taxonomic unit (OTU) from known genes and genomes pooled from the OTU's taxonomic lineage. From this lineage, we derive an OTU functional profile by weighting a pangenome's functional profile with the OTUs abundance observed in a given sample. We validated our method by comparing PanFP to the functional profiles obtained from the direct shotgun metagenomic measurement of 65 diverse communities via Spearman correlation coefficients. These correlations improved with increasing sequencing depth, within the range of 0.8-0.9 for the most deeply sequenced Human Microbiome Project mock community samples. PanFP is very similar in performance to another recently released tool, PICRUSt, for almost all of survey data analysed here. But, our method is unique in that any OTU building method can be used, as opposed to being limited to closed-reference OTU picking strategies against specific reference sequence databases.

CONCLUSIONS: We developed an automated computational method, which derives an inferred functional profile based on the 16S rRNA gene surveys of microbial communities. The inferred functional profile provides a cost effective way to study complex ecosystems through predicted comparative functional metagenomes and metadata analysis. All PanFP source code and additional documentation are freely available online at GitHub (https://github.com/srjun/PanFP).}, } @article {pmid26404761, year = {2016}, author = {Pierron, A and Mimoun, S and Murate, LS and Loiseau, N and Lippi, Y and Bracarense, AP and Liaubet, L and Schatzmayr, G and Berthiller, F and Moll, WD and Oswald, IP}, title = {Intestinal toxicity of the masked mycotoxin deoxynivalenol-3-β-D-glucoside.}, journal = {Archives of toxicology}, volume = {90}, number = {8}, pages = {2037-2046}, doi = {10.1007/s00204-015-1592-8}, pmid = {26404761}, issn = {1432-0738}, mesh = {Animals ; Caco-2 Cells ; Cell Culture Techniques ; Cell Survival/drug effects ; Cytokines/genetics ; Food Contamination/*analysis ; Glucosides/*toxicity ; Humans ; Jejunum/*drug effects/metabolism/pathology ; MAP Kinase Signaling System/drug effects ; Peptidyl Transferases/metabolism ; Protein Binding ; Ribosomes/drug effects/enzymology ; Swine ; Transcriptome/drug effects ; Trichothecenes/*toxicity ; p38 Mitogen-Activated Protein Kinases/metabolism ; }, abstract = {Natural food contaminants such as mycotoxins are an important problem for human health. Deoxynivalenol (DON) is one of the most common mycotoxins detected in cereals and grains. Its toxicological effects mainly concern the immune system and the gastrointestinal tract. This toxin is a potent ribotoxic stressor leading to MAP kinase activation and inflammatory response. DON frequently co-occurs with its glucosylated form, the masked mycotoxin deoxynivalenol-3-β-D-glucoside (D3G). The toxicity of this later compound remains unknown in mammals. This study aimed to assess the ability of D3G to elicit a ribotoxic stress and to induce intestinal toxicity. The toxicity of D3G and DON (0-10 µM) was studied in vitro, on the human intestinal Caco-2 cell line, and ex vivo, on porcine jejunal explants. First, an in silico analysis revealed that D3G, contrary to DON, was unable to bind to the A-site of the ribosome peptidyl transferase center, the main targets for DON toxicity. Accordingly, D3G did not activate JNK and P38 MAPKs in treated Caco-2 cells and did not alter viability and barrier function on cells, as measured by the trans-epithelial electrical resistance. Treatment of intestinal explants for 4 h with 10 µM DON induced morphological lesions and up-regulated the expression of pro-inflammatory cytokines as measured by qPCR and pan-genomic microarray analysis. By contrast, expression profile of D3G-treated explants was similar to that of controls, and these explants did not show histomorphology alteration. In conclusion, our data demonstrated that glucosylation of DON suppresses its ability to bind to the ribosome and decreases its intestinal toxicity.}, } @article {pmid26383601, year = {2015}, author = {Yu, G and Wang, XC and Tian, WH and Shi, JC and Wang, B and Ye, Q and Dong, SG and Zeng, M and Wang, JZ}, title = {Genomic Diversity and Evolution of Bacillus subtilis.}, journal = {Biomedical and environmental sciences : BES}, volume = {28}, number = {8}, pages = {620-625}, doi = {10.3967/bes2015.087}, pmid = {26383601}, issn = {0895-3988}, mesh = {Bacillus subtilis/*genetics ; *Evolution, Molecular ; *Genes, Bacterial ; Polymorphism, Single Nucleotide ; }, abstract = {Bacillus subtilis is the focus of both academic and industrial research. Previous studies have reported a number of sequence variations in different B. subtilis strains. To uncover the genetic variation and evolutionary pressure in B. subtilis strains, we performed whole genome sequencing of two B. subtilis isolates, KM and CGMCC63528. Comparative genomic analyses of these two strains with other B. subtilis strains identified high sequence variations including large insertions, deletions and SNPs. Most SNPs in genes were synonymous and the average frequency of synonymous mutations was significantly higher than that of the non-synonymous mutations. Pan-genome analysis of B. subtilis strains showed that the core genome had lower dN/dS values than the accessory genome. Whole genome comparisons of these two isolates with other B. subtilis strains showed that strains in different subspecies have similar dN/dS values. Nucleotide diversity analysis showed that spizizenii subspecies have higher nucleotide diversity than subtilis subspecies. Our results indicate that genes in B. subtilis strains are under high purifying selection pressure. The evolutionary pressure in different subspecies of B. subtilis is complex.}, } @article {pmid26357267, year = {2015}, author = {Mehmood, T and Bohlin, J and Snipen, L}, title = {A Partial Least Squares Based Procedure for Upstream Sequence Classification in Prokaryotes.}, journal = {IEEE/ACM transactions on computational biology and bioinformatics}, volume = {12}, number = {3}, pages = {560-567}, doi = {10.1109/TCBB.2014.2366146}, pmid = {26357267}, issn = {1557-9964}, mesh = {Algorithms ; DNA, Bacterial/analysis/genetics ; Genome, Bacterial/*genetics ; Genomics/*methods ; Least-Squares Analysis ; Regulatory Sequences, Nucleic Acid/*genetics ; Sequence Analysis, DNA/*methods ; }, abstract = {The upstream region of coding genes is important for several reasons, for instance locating transcription factor, binding sites, and start site initiation in genomic DNA. Motivated by a recently conducted study, where multivariate approach was successfully applied to coding sequence modeling, we have introduced a partial least squares (PLS) based procedure for the classification of true upstream prokaryotic sequence from background upstream sequence. The upstream sequences of conserved coding genes over genomes were considered in analysis, where conserved coding genes were found by using pan-genomics concept for each considered prokaryotic species. PLS uses position specific scoring matrix (PSSM) to study the characteristics of upstream region. Results obtained by PLS based method were compared with Gini importance of random forest (RF) and support vector machine (SVM), which is much used method for sequence classification. The upstream sequence classification performance was evaluated by using cross validation, and suggested approach identifies prokaryotic upstream region significantly better to RF (p-value < 0.01) and SVM (p-value < 0.01). Further, the proposed method also produced results that concurred with known biological characteristics of the upstream region.}, } @article {pmid26340565, year = {2015}, author = {Galardini, M and Brilli, M and Spini, G and Rossi, M and Roncaglia, B and Bani, A and Chiancianesi, M and Moretto, M and Engelen, K and Bacci, G and Pini, F and Biondi, EG and Bazzicalupo, M and Mengoni, A}, title = {Evolution of Intra-specific Regulatory Networks in a Multipartite Bacterial Genome.}, journal = {PLoS computational biology}, volume = {11}, number = {9}, pages = {e1004478}, pmid = {26340565}, issn = {1553-7358}, mesh = {Computational Biology ; Evolution, Molecular ; Gene Regulatory Networks/*genetics ; Genome, Bacterial/*genetics ; *Models, Genetic ; Sinorhizobium meliloti/genetics ; }, abstract = {Reconstruction of the regulatory network is an important step in understanding how organisms control the expression of gene products and therefore phenotypes. Recent studies have pointed out the importance of regulatory network plasticity in bacterial adaptation and evolution. The evolution of such networks within and outside the species boundary is however still obscure. Sinorhizobium meliloti is an ideal species for such study, having three large replicons, many genomes available and a significant knowledge of its transcription factors (TF). Each replicon has a specific functional and evolutionary mark; which might also emerge from the analysis of their regulatory signatures. Here we have studied the plasticity of the regulatory network within and outside the S. meliloti species, looking for the presence of 41 TFs binding motifs in 51 strains and 5 related rhizobial species. We have detected a preference of several TFs for one of the three replicons, and the function of regulated genes was found to be in accordance with the overall replicon functional signature: house-keeping functions for the chromosome, metabolism for the chromid, symbiosis for the megaplasmid. This therefore suggests a replicon-specific wiring of the regulatory network in the S. meliloti species. At the same time a significant part of the predicted regulatory network is shared between the chromosome and the chromid, thus adding an additional layer by which the chromid integrates itself in the core genome. Furthermore, the regulatory network distance was found to be correlated with both promoter regions and accessory genome evolution inside the species, indicating that both pangenome compartments are involved in the regulatory network evolution. We also observed that genes which are not included in the species regulatory network are more likely to belong to the accessory genome, indicating that regulatory interactions should also be considered to predict gene conservation in bacterial pangenomes.}, } @article {pmid26336600, year = {2015}, author = {Cock, PJ and Chilton, JM and Grüning, B and Johnson, JE and Soranzo, N}, title = {NCBI BLAST+ integrated into Galaxy.}, journal = {GigaScience}, volume = {4}, number = {}, pages = {39}, pmid = {26336600}, issn = {2047-217X}, mesh = {*Computational Biology ; Internet ; National Institutes of Health (U.S.) ; United States ; }, abstract = {BACKGROUND: The NCBI BLAST suite has become ubiquitous in modern molecular biology and is used for small tasks such as checking capillary sequencing results of single PCR products, genome annotation or even larger scale pan-genome analyses. For early adopters of the Galaxy web-based biomedical data analysis platform, integrating BLAST into Galaxy was a natural step for sequence comparison workflows.

FINDINGS: The command line NCBI BLAST+ tool suite was wrapped for use within Galaxy. Appropriate datatypes were defined as needed. The integration of the BLAST+ tool suite into Galaxy has the goal of making common BLAST tasks easy and advanced tasks possible.

CONCLUSIONS: This project is an informal international collaborative effort, and is deployed and used on Galaxy servers worldwide. Several examples of applications are described here.}, } @article {pmid26328606, year = {2015}, author = {Hennig, A and Bernhardt, J and Nieselt, K}, title = {Pan-Tetris: an interactive visualisation for Pan-genomes.}, journal = {BMC bioinformatics}, volume = {16 Suppl 11}, number = {Suppl 11}, pages = {S3}, pmid = {26328606}, issn = {1471-2105}, mesh = {Algorithms ; Bacterial Proteins/*genetics ; Computational Biology/*methods ; *Computer Graphics ; *Genome, Bacterial ; Genomics/*methods ; *Software ; Staphylococcus aureus/*genetics ; }, abstract = {BACKGROUND: Large-scale genome projects have paved the way to microbial pan-genome analyses. Pan-genomes describe the union of all genes shared by all members of the species or taxon under investigation. They offer a framework to assess the genomic diversity of a given collection of individual genomes and moreover they help to consolidate gene predictions and annotations. The computation of pan-genomes is often a challenge, and many techniques that use a global alignment-independent approach run the risk of not separating paralogs from orthologs. Also alignment-based approaches which take the gene neighbourhood into account often need additional manual curation of the results. This is quite time consuming and so far there is no visualisation tool available that offers an interactive GUI for the pan-genome to support curating pan-genomic computations or annotations of orthologous genes.

RESULTS: We introduce Pan-Tetris, a Java based interactive software tool that provides a clearly structured and suitable way for the visual inspection of gene occurrences in a pan-genome table. The main features of Pan-Tetris are a standard coordinate based presentation of multiple genomes complemented by easy to use tools compensating for algorithmic weaknesses in the pan-genome generation workflow. We demonstrate an application of Pan-Tetris to the pan-genome of Staphylococcus aureus.

CONCLUSIONS: Pan-Tetris is currently the only interactive pan-genome visualisation tool. Pan-Tetris is available from http://bit.ly/1vVxYZT.}, } @article {pmid26317361, year = {2015}, author = {Boussaha, M and Esquerré, D and Barbieri, J and Djari, A and Pinton, A and Letaief, R and Salin, G and Escudié, F and Roulet, A and Fritz, S and Samson, F and Grohs, C and Bernard, M and Klopp, C and Boichard, D and Rocha, D}, title = {Genome-Wide Study of Structural Variants in Bovine Holstein, Montbéliarde and Normande Dairy Breeds.}, journal = {PloS one}, volume = {10}, number = {8}, pages = {e0135931}, pmid = {26317361}, issn = {1932-6203}, mesh = {Animals ; Animals, Inbred Strains ; Cattle/*genetics ; Dairying ; Genome-Wide Association Study ; *Genomic Structural Variation ; Genotype ; Quantitative Trait Loci ; }, abstract = {High-throughput sequencing technologies have offered in recent years new opportunities to study genome variations. These studies have mostly focused on single nucleotide polymorphisms, small insertions or deletions and on copy number variants. Other structural variants, such as large insertions or deletions, tandem duplications, translocations, and inversions are less well-studied, despite that some have an important impact on phenotypes. In the present study, we performed a large-scale survey of structural variants in cattle. We report the identification of 6,426 putative structural variants in cattle extracted from whole-genome sequence data of 62 bulls representing the three major French dairy breeds. These genomic variants affect DNA segments greater than 50 base pairs and correspond to deletions, inversions and tandem duplications. Out of these, we identified a total of 547 deletions and 410 tandem duplications which could potentially code for CNVs. Experimental validation was carried out on 331 structural variants using a novel high-throughput genotyping method. Out of these, 255 structural variants (77%) generated good quality genotypes and 191 (75%) of them were validated. Gene content analyses in structural variant regions revealed 941 large deletions removing completely one or several genes, including 10 single-copy genes. In addition, some of the structural variants are located within quantitative trait loci for dairy traits. This study is a pan-genome assessment of genomic variations in cattle and may provide a new glimpse into the bovine genome architecture. Our results may also help to study the effects of structural variants on gene expression and consequently their effect on certain phenotypes of interest.}, } @article {pmid26303830, year = {2016}, author = {Povolotsky, TL and Hengge, R}, title = {Genome-Based Comparison of Cyclic Di-GMP Signaling in Pathogenic and Commensal Escherichia coli Strains.}, journal = {Journal of bacteriology}, volume = {198}, number = {1}, pages = {111-126}, pmid = {26303830}, issn = {1098-5530}, mesh = {Amino Acid Sequence ; Conserved Sequence ; Cyclic GMP/*analogs & derivatives/genetics/metabolism ; Escherichia coli/*classification/genetics/*metabolism/pathogenicity ; Escherichia coli Proteins/genetics/*metabolism ; Gene Expression Regulation, Bacterial/*physiology ; Genome, Bacterial ; Protein Structure, Tertiary ; Signal Transduction/*physiology ; }, abstract = {UNLABELLED: The ubiquitous bacterial second messenger cyclic di-GMP (c-di-GMP) has recently become prominent as a trigger for biofilm formation in many bacteria. It is generated by diguanylate cyclases (DGCs; with GGDEF domains) and degraded by specific phosphodiesterases (PDEs; containing either EAL or HD-GYP domains). Most bacterial species contain multiples of these proteins with some having specific functions that are based on direct molecular interactions in addition to their enzymatic activities. Escherichia coli K-12 laboratory strains feature 29 genes encoding GGDEF and/or EAL domains, resulting in a set of 12 DGCs, 13 PDEs, and four enzymatically inactive "degenerate" proteins that act by direct macromolecular interactions. We present here a comparative analysis of GGDEF/EAL domain-encoding genes in 61 genomes of pathogenic, commensal, and probiotic E. coli strains (including enteric pathogens such as enteroaggregative, enterohemorrhagic, enteropathogenic, enterotoxigenic, and adherent and invasive Escherichia coli and the 2011 German outbreak O104:H4 strain, as well as extraintestinal pathogenic E. coli, such as uropathogenic and meningitis-associated E. coli). We describe additional genes for two membrane-associated DGCs (DgcX and DgcY) and four PDEs (the membrane-associated PdeT, as well as the EAL domain-only proteins PdeW, PdeX, and PdeY), thus showing the pangenome of E. coli to contain at least 35 GGDEF/EAL domain proteins. A core set of only eight proteins is absolutely conserved in all 61 strains: DgcC (YaiC), DgcI (YliF), PdeB (YlaB), PdeH (YhjH), PdeK (YhjK), PdeN (Rtn), and the degenerate proteins CsrD and CdgI (YeaI). In all other GGDEF/EAL domain genes, diverse point and frameshift mutations, as well as small or large deletions, were discovered in various strains.

IMPORTANCE: Our analysis reveals interesting trends in pathogenic Escherichia coli that could reflect different host cell adherence mechanisms. These may either benefit from or be counteracted by the c-di-GMP-stimulated production of amyloid curli fibers and cellulose. Thus, EAEC, which adhere in a "stacked brick" biofilm mode, have a potential for high c-di-GMP accumulation due to DgcX, a strongly expressed additional DGC. In contrast, EHEC and UPEC, which use alternative adherence mechanisms, tend to have extra PDEs, suggesting that low cellular c-di-GMP levels are crucial for these strains under specific conditions. Overall, our study also indicates that GGDEF/EAL domain proteins evolve rapidly and thereby contribute to adaptation to host-specific and environmental niches of various types of E. coli.}, } @article {pmid26284032, year = {2015}, author = {Fullmer, MS and Soucy, SM and Gogarten, JP}, title = {The pan-genome as a shared genomic resource: mutual cheating, cooperation and the black queen hypothesis.}, journal = {Frontiers in microbiology}, volume = {6}, number = {}, pages = {728}, pmid = {26284032}, issn = {1664-302X}, } @article {pmid26275230, year = {2015}, author = {Chaplin, AV and Efimov, BA and Smeianov, VV and Kafarskaia, LI and Pikina, AP and Shkoporov, AN}, title = {Intraspecies Genomic Diversity and Long-Term Persistence of Bifidobacterium longum.}, journal = {PloS one}, volume = {10}, number = {8}, pages = {e0135658}, pmid = {26275230}, issn = {1932-6203}, mesh = {Bifidobacterium/*genetics/isolation & purification ; Child ; Child, Preschool ; Clustered Regularly Interspaced Short Palindromic Repeats ; Feces/microbiology ; *Gastrointestinal Microbiome/genetics ; *Genetic Variation ; Genome, Bacterial ; Glycoside Hydrolases/genetics ; Humans ; Infant ; Longitudinal Studies ; Minisatellite Repeats ; Molecular Sequence Data ; *Phylogeny ; Plasmids ; }, abstract = {Members of genus Bifidobacterium are Gram-positive bacteria, representing a large part of the human infant microbiota and moderately common in adults. However, our knowledge about their diversity, intraspecific phylogeny and long-term persistence in humans is still limited. Bifidobacterium longum is generally considered to be the most common and prevalent species in the intestinal microbiota. In this work we studied whole genome sequences of 28 strains of B. longum, including 8 sequences described in this paper. Part of these strains were isolated from healthy children during a long observation period (up to 10 years between isolation from the same patient). The three known subspecies (longum, infantis and suis) could be clearly divided using sequence-based phylogenetic methods, gene content and the average nucleotide identity. The profiles of glycoside hydrolase genes reflected the different ecological specializations of these three subspecies. The high impact of horizontal gene transfer on genomic diversity was observed, which is possibly due to a large number of prophages and rapidly spreading plasmids. The pan-genome characteristics of the subspecies longum corresponded to the open pan-genome model. While the major part of the strain-specific genetic loci represented transposons and phage-derived regions, a large number of cell envelope synthesis genes were also observed within this category, representing high variability of cell surface molecules. We observed the cases of isolation of high genetically similar strains of B. longum from the same patients after long periods of time, however, we didn't succeed in the isolation of genetically identical bacteria: a fact, reflecting the high plasticity of microbiota in children.}, } @article {pmid26262842, year = {2015}, author = {Salama, R and Masson, N and Simpson, P and Sciesielski, LK and Sun, M and Tian, YM and Ratcliffe, PJ and Mole, DR}, title = {Heterogeneous Effects of Direct Hypoxia Pathway Activation in Kidney Cancer.}, journal = {PloS one}, volume = {10}, number = {8}, pages = {e0134645}, pmid = {26262842}, issn = {1932-6203}, support = {/WT_/Wellcome Trust/United Kingdom ; 078333/Z/05/Z/WT_/Wellcome Trust/United Kingdom ; A16016/CRUK_/Cancer Research UK/United Kingdom ; WT091857MA/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Basic Helix-Loop-Helix Transcription Factors/genetics/metabolism ; Binding Sites ; Cell Line, Tumor ; Gene Expression ; Humans ; Hypoxia/*metabolism ; Hypoxia-Inducible Factor 1/genetics/metabolism ; Hypoxia-Inducible Factor 1, alpha Subunit/genetics/metabolism ; Kidney Neoplasms/genetics/*metabolism/mortality ; Prognosis ; Protein Binding ; Protein Interaction Domains and Motifs ; *Signal Transduction ; Transcriptional Activation ; }, abstract = {General activation of hypoxia-inducible factor (HIF) pathways is classically associated with adverse prognosis in cancer and has been proposed to contribute to oncogenic drive. In clear cell renal carcinoma (CCRC) HIF pathways are upregulated by inactivation of the von-Hippel-Lindau tumor suppressor. However HIF-1α and HIF-2α have contrasting effects on experimental tumor progression. To better understand this paradox we examined pan-genomic patterns of HIF DNA binding and associated gene expression in response to manipulation of HIF-1α and HIF-2α and related the findings to CCRC prognosis. Our findings reveal distinct pan-genomic organization of canonical and non-canonical HIF isoform-specific DNA binding at thousands of sites. Overall associations were observed between HIF-1α-specific binding, and genes associated with favorable prognosis and between HIF-2α-specific binding and adverse prognosis. However within each isoform-specific set, individual gene associations were heterogeneous in sign and magnitude, suggesting that activation of each HIF-α isoform contributes a highly complex mix of pro- and anti-tumorigenic effects.}, } @article {pmid26261031, year = {2016}, author = {Udaondo, Z and Molina, L and Segura, A and Duque, E and Ramos, JL}, title = {Analysis of the core genome and pangenome of Pseudomonas putida.}, journal = {Environmental microbiology}, volume = {18}, number = {10}, pages = {3268-3283}, doi = {10.1111/1462-2920.13015}, pmid = {26261031}, issn = {1462-2920}, mesh = {Biological Transport/*genetics ; Carbon/metabolism ; Energy Metabolism/*genetics ; Genome, Bacterial/*genetics ; Membrane Transport Proteins/*genetics/metabolism ; Open Reading Frames ; Pseudomonas putida/*genetics/metabolism ; }, abstract = {Pseudomonas putida are strict aerobes that proliferate in a range of temperate niches and are of interest for environmental applications due to their capacity to degrade pollutants and ability to promote plant growth. Furthermore solvent-tolerant strains are useful for biosynthesis of added-value chemicals. We present a comprehensive comparative analysis of nine strains and the first characterization of the Pseudomonas putida pangenome. The core genome of P. putida comprises approximately 3386 genes. The most abundant genes within the core genome are those that encode nutrient transporters. Other conserved genes include those for central carbon metabolism through the Entner-Doudoroff pathway, the pentose phosphate cycle, arginine and proline metabolism, and pathways for degradation of aromatic chemicals. Genes that encode transporters, enzymes and regulators for amino acid metabolism (synthesis and degradation) are all part of the core genome, as well as various electron transporters, which enable aerobic metabolism under different oxygen regimes. Within the core genome are 30 genes for flagella biosynthesis and 12 key genes for biofilm formation. Pseudomonas putida strains share 85% of the coding regions with Pseudomonas aeruginosa; however, in P. putida, virulence factors such as exotoxins and type III secretion systems are absent.}, } @article {pmid26259823, year = {2015}, author = {Martín-Moldes, Z and Zamarro, MT and Del Cerro, C and Valencia, A and Gómez, MJ and Arcas, A and Udaondo, Z and García, JL and Nogales, J and Carmona, M and Díaz, E}, title = {Whole-genome analysis of Azoarcus sp. strain CIB provides genetic insights to its different lifestyles and predicts novel metabolic features.}, journal = {Systematic and applied microbiology}, volume = {38}, number = {7}, pages = {462-471}, doi = {10.1016/j.syapm.2015.07.002}, pmid = {26259823}, issn = {1618-0984}, mesh = {Adaptation, Biological ; Aerobiosis ; Anaerobiosis ; Azoarcus/*genetics/*physiology ; *Computational Biology ; Drug Resistance, Bacterial ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Interspersed Repetitive Sequences ; Metabolic Networks and Pathways/*genetics ; Metals, Heavy/metabolism/toxicity ; Molecular Sequence Data ; *Multigene Family ; Nitrogen Fixation ; *Sequence Analysis, DNA ; }, abstract = {The genomic features of Azoarcus sp. CIB reflect its most distinguishing phenotypes as a diazotroph, facultative anaerobe, capable of degrading either aerobically and/or anaerobically a wide range of aromatic compounds, including some toxic hydrocarbons such as toluene and m-xylene, as well as its endophytic lifestyle. The analyses of its genome have expanded the catabolic potential of strain CIB toward common natural compounds, such as certain diterpenes, that were not anticipated as carbon sources. The high number of predicted solvent efflux pumps and heavy metal resistance gene clusters has provided the first evidence for two environmentally relevant features of this bacterium that remained unknown. Genome mining has revealed several gene clusters likely involved in the endophytic lifestyle of strain CIB, opening the door to the molecular characterization of some plant growth promoting traits. Horizontal gene transfer and mobile genetic elements appear to have played a major role as a mechanism of adaptation of this bacterium to different lifestyles. This work paves the way for a systems biology-based understanding of the abilities of Azoarcus sp. CIB to integrate aerobic and anaerobic metabolism of aromatic compounds, tolerate stress conditions, and interact with plants as an endophyte of great potential for phytostimulation and phytoremediation strategies. Comparative genomics provides an Azoarcus pan genome that confirms the global metabolic flexibility of this genus, and suggests that its phylogeny should be revisited.}, } @article {pmid26257709, year = {2015}, author = {Nelson, WC and Stegen, JC}, title = {The reduced genomes of Parcubacteria (OD1) contain signatures of a symbiotic lifestyle.}, journal = {Frontiers in microbiology}, volume = {6}, number = {}, pages = {713}, pmid = {26257709}, issn = {1664-302X}, abstract = {Candidate phylum OD1 bacteria (also referred to as Parcubacteria) have been identified in a broad range of anoxic environments through community survey analysis. Although none of these species have been isolated in the laboratory, several genome sequences have been reconstructed from metagenomic sequence data and single-cell sequencing. The organisms have small (generally <1 Mb) genomes with severely reduced metabolic capabilities. We have reconstructed 8 partial to near-complete OD1 genomes from oxic groundwater samples, and compared them against existing genomic data. The conserved core gene set comprises 202 genes, or ~28% of the genomic complement. "Housekeeping" genes and genes for biosynthesis of peptidoglycan and Type IV pilus production are conserved. Gene sets for biosynthesis of cofactors, amino acids, nucleotides, and fatty acids are absent entirely or greatly reduced. The only aspects of energy metabolism conserved are the non-oxidative branch of the pentose-phosphate shunt and central glycolysis. These organisms also lack some activities conserved in almost all other known bacterial genomes, including signal recognition particle, pseudouridine synthase A, and FAD synthase. Pan-genome analysis indicates a broad genotypic diversity and perhaps a highly fluid gene complement, indicating historical adaptation to a wide range of growth environments and a high degree of specialization. The genomes were examined for signatures suggesting either a free-living, streamlined lifestyle, or a symbiotic lifestyle. The lack of biosynthetic capabilities and DNA repair, along with the presence of potential attachment and adhesion proteins suggest that the Parcubacteria are ectosymbionts or parasites of other organisms. The wide diversity of genes that potentially mediate cell-cell contact suggests a broad range of partner/prey organisms across the phylum.}, } @article {pmid26254574, year = {2015}, author = {Lomonaco, S and Nucera, D and Filipello, V}, title = {The evolution and epidemiology of Listeria monocytogenes in Europe and the United States.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {35}, number = {}, pages = {172-183}, doi = {10.1016/j.meegid.2015.08.008}, pmid = {26254574}, issn = {1567-7257}, mesh = {Europe/epidemiology ; Evolution, Molecular ; Food Microbiology ; Genome, Bacterial ; Genomics ; Humans ; Listeria monocytogenes/*classification/genetics/*pathogenicity ; Listeriosis/*epidemiology/*microbiology ; Phylogeny ; Selection, Genetic ; United States/epidemiology ; }, abstract = {Listeria monocytogenes is an opportunistic food-borne pathogen responsible for listeriosis, a disease associated with high mortality rates. L. monocytogenes causes invasive syndromes and case-fatality can be as high as 30%, in specific high-risk population groups such as the elderly, immuno-compromised individuals, fetuses and newborns. Acquisition of the disease is mainly due to consumption of contaminated (predominantly ready-to-eat) food. We aimed to provide a state-of-the-art collection of different likely evolutionary models, based on recombination and positive selection, and the phylogenetic relationship between lineages of L. monocytogenes and between them and other Listeria species. We described the most recent findings in comparative pan-genomics, considering the core and accessory genome in relation to virulence and adaptation to different environments. Finally, this review illustrates L. monocytogenes epidemiology and transmission in humans, foods and animals, the surveillance systems of the European Union and United States and the application of molecular techniques as a core tool in epidemiological investigation.}, } @article {pmid26253671, year = {2015}, author = {Zheng, J and Ruan, L and Sun, M and Gänzle, M}, title = {A Genomic View of Lactobacilli and Pediococci Demonstrates that Phylogeny Matches Ecology and Physiology.}, journal = {Applied and environmental microbiology}, volume = {81}, number = {20}, pages = {7233-7243}, pmid = {26253671}, issn = {1098-5336}, mesh = {Ecology ; Genome, Bacterial/*genetics ; Lactobacillus/classification/*genetics ; Pediococcus/classification/*genetics ; Phylogeny ; }, abstract = {Lactobacilli are used widely in food, feed, and health applications. The taxonomy of the genus Lactobacillus, however, is confounded by the apparent lack of physiological markers for phylogenetic groups of lactobacilli and the unclear relationships between the diverse phylogenetic groups. This study used the core and pan-genomes of 174 type strains of Lactobacillus and Pediococcus to establish phylogenetic relationships and to identify metabolic properties differentiating phylogenetic groups. The core genome phylogenetic tree separated homofermentative lactobacilli and pediococci from heterofermentative lactobacilli. Aldolase and phosphofructokinase were generally present in homofermentative but not in heterofermentative lactobacilli; a two-domain alcohol dehydrogenase and mannitol dehydrogenase were present in most heterofermentative lactobacilli but absent in most homofermentative organisms. Other genes were predominantly present in homofermentative lactobacilli (pyruvate formate lyase) or heterofermentative lactobacilli (lactaldehyde dehydrogenase and glycerol dehydratase). Cluster analysis of the phylogenomic tree and the average nucleotide identity grouped the genus Lactobacillus sensu lato into 24 phylogenetic groups, including pediococci, with stable intra- and intergroup relationships. Individual groups may be differentiated by characteristic metabolic properties. The link between phylogeny and physiology that is proposed in this study facilitates future studies on the ecology, physiology, and industrial applications of lactobacilli.}, } @article {pmid26239655, year = {2015}, author = {Gaborit, B and Venteclef, N and Ancel, P and Pelloux, V and Gariboldi, V and Leprince, P and Amour, J and Hatem, SN and Jouve, E and Dutour, A and Clément, K}, title = {Human epicardial adipose tissue has a specific transcriptomic signature depending on its anatomical peri-atrial, peri-ventricular, or peri-coronary location.}, journal = {Cardiovascular research}, volume = {108}, number = {1}, pages = {62-73}, doi = {10.1093/cvr/cvv208}, pmid = {26239655}, issn = {1755-3245}, mesh = {Adipose Tissue/*metabolism ; Aged ; Aged, 80 and over ; Female ; Humans ; Male ; Middle Aged ; Pericardium/*metabolism ; *Transcriptome ; }, abstract = {AIMS: Human epicardial adipose tissue (EAT) is a visceral and perivascular fat that has been shown to act locally on myocardium, atria, and coronary arteries. Its abundance has been linked to coronary artery disease (CAD) and atrial fibrillation. However, its physiological function remains highly debated. The aim of this study was to determine a specific EAT transcriptomic signature, depending on its anatomical peri-atrial (PA), peri-ventricular (PV), or peri-coronary location.

METHODS AND RESULTS: Samples of EAT and thoracic subcutaneous fat, obtained from 41 patients paired for cardiovascular risk factors, CAD, and atrial fibrillation were analysed using a pangenomic approach. We found 2728 significantly up-regulated genes in the EAT vs. subcutaneous fat with 400 genes being common between PA, PV, and peri-coronary EAT. These common genes were related to extracellular matrix remodelling, inflammation, infection, and thrombosis pathways. Omentin (ITLN1) was the most up-regulated gene and secreted adipokine in EAT (fold-change >12, P < 0.0001). Among EAT-enriched genes, we observed different patterns depending on adipose tissue location. A beige expression phenotype was found in EAT but PV EAT highly expressed uncoupled protein 1 (P = 0.01). Genes overexpressed in peri-coronary EAT were implicated in proliferation, O-N glycan biosynthesis, and sphingolipid metabolism. PA EAT displayed an atypical pattern with genes implicated in cardiac muscle contraction and intracellular calcium signalling pathway.

CONCLUSION: This study opens new perspectives in understanding the physiology of human EAT and its local interaction with neighbouring structures.}, } @article {pmid26230489, year = {2015}, author = {Roach, DJ and Burton, JN and Lee, C and Stackhouse, B and Butler-Wu, SM and Cookson, BT and Shendure, J and Salipante, SJ}, title = {A Year of Infection in the Intensive Care Unit: Prospective Whole Genome Sequencing of Bacterial Clinical Isolates Reveals Cryptic Transmissions and Novel Microbiota.}, journal = {PLoS genetics}, volume = {11}, number = {7}, pages = {e1005413}, pmid = {26230489}, issn = {1553-7404}, mesh = {Adolescent ; Adult ; Aged ; Aged, 80 and over ; Bacteria/classification/genetics/*isolation & purification ; Bacterial Infections/microbiology/*transmission ; Bacterial Typing Techniques ; Biodiversity ; Cross Infection/microbiology/transmission ; DNA, Bacterial/genetics ; Female ; Genetic Variation ; Genome, Bacterial/*genetics ; Humans ; Infant ; Infant, Newborn ; *Intensive Care Units ; Male ; Microbiota/*genetics ; Middle Aged ; Molecular Epidemiology ; Prospective Studies ; Tertiary Care Centers ; Young Adult ; }, abstract = {Bacterial whole genome sequencing holds promise as a disruptive technology in clinical microbiology, but it has not yet been applied systematically or comprehensively within a clinical context. Here, over the course of one year, we performed prospective collection and whole genome sequencing of nearly all bacterial isolates obtained from a tertiary care hospital's intensive care units (ICUs). This unbiased collection of 1,229 bacterial genomes from 391 patients enables detailed exploration of several features of clinical pathogens. A sizable fraction of isolates identified as clinically relevant corresponded to previously undescribed species: 12% of isolates assigned a species-level classification by conventional methods actually qualified as distinct, novel genomospecies on the basis of genomic similarity. Pan-genome analysis of the most frequently encountered pathogens in the collection revealed substantial variation in pan-genome size (1,420 to 20,432 genes) and the rate of gene discovery (1 to 152 genes per isolate sequenced). Surprisingly, although potential nosocomial transmission of actively surveilled pathogens was rare, 8.7% of isolates belonged to genomically related clonal lineages that were present among multiple patients, usually with overlapping hospital admissions, and were associated with clinically significant infection in 62% of patients from which they were recovered. Multi-patient clonal lineages were particularly evident in the neonatal care unit, where seven separate Staphylococcus epidermidis clonal lineages were identified, including one lineage associated with bacteremia in 5/9 neonates. Our study highlights key differences in the information made available by conventional microbiological practices versus whole genome sequencing, and motivates the further integration of microbial genome sequencing into routine clinical care.}, } @article {pmid26215705, year = {2014}, author = {Cazalis, MA and Lepape, A and Venet, F and Frager, F and Mougin, B and Vallin, H and Paye, M and Pachot, A and Monneret, G}, title = {Early and dynamic changes in gene expression in septic shock patients: a genome-wide approach.}, journal = {Intensive care medicine experimental}, volume = {2}, number = {1}, pages = {20}, pmid = {26215705}, issn = {2197-425X}, abstract = {BACKGROUND: As early and appropriate care of severe septic patients is associated with better outcome, understanding of the very first events in the disease process is needed. Pan-genomic analyses offer an interesting opportunity to study global genomic response within the very first hours after sepsis. The objective of this study was to investigate the systemic genomic response in severe intensive care unit (ICU) patients and determine whether patterns of gene expression could be associated with clinical severity evaluated by the severity score.

METHODS: Twenty-eight ICU patients were enrolled at the onset of septic shock. Blood samples were collected within 30 min and 24 and 48 h after shock and genomic response was evaluated using microarrays. The genome-wide expression pattern of blood leukocytes was sequentially compared to healthy volunteers and after stratification based on Simplified Acute Physiology Score II (SAPSII) score to identify potential mechanisms of dysregulation.

RESULTS: Septic shock induces a global reprogramming of the whole leukocyte transcriptome affecting multiple functions and pathways (>71% of the whole genome was modified). Most altered pathways were not significantly different between SAPSII-high and SAPSII-low groups of patients. However, the magnitude and the duration of these alterations were different between these two groups. Importantly, we observed that the more severe patients did not exhibit the strongest modulation. This indicates that some regulation mechanisms leading to recovery seem to take place at the early stage.

CONCLUSIONS: In conclusion, both pro- and anti-inflammatory processes, measured at the transcriptomic level, are induced within the very first hours after septic shock. Interestingly, the more severe patients did not exhibit the strongest modulation. This highlights that not only the responses mechanisms by themselves but mainly their early and appropriate regulation are crucial for patient recovery. This reinforces the idea that an immediate and tailored aggressive care of patients, aimed at restoring an appropriately regulated immune response, may have a beneficial impact on the outcome.}, } @article {pmid26198743, year = {2015}, author = {Espinoza-Valles, I and Vora, GJ and Lin, B and Leekitcharoenphon, P and González-Castillo, A and Ussery, D and Høj, L and Gomez-Gil, B}, title = {Unique and conserved genome regions in Vibrio harveyi and related species in comparison with the shrimp pathogen Vibrio harveyi CAIM 1792.}, journal = {Microbiology (Reading, England)}, volume = {161}, number = {9}, pages = {1762-1779}, doi = {10.1099/mic.0.000141}, pmid = {26198743}, issn = {1465-2080}, mesh = {Animals ; Cluster Analysis ; Computational Biology ; *Conserved Sequence ; Decapoda/microbiology ; *Evolution, Molecular ; Genes, Bacterial ; *Genome, Bacterial ; Genomics/methods ; Multigene Family ; Multilocus Sequence Typing ; Phylogeny ; Polymorphism, Single Nucleotide ; Proteome ; Vibrio/classification/*genetics/metabolism ; }, abstract = {Vibrio harveyi CAIM 1792 is a marine bacterial strain that causes mortality in farmed shrimp in north-west Mexico, and the identification of virulence genes in this strain is important for understanding its pathogenicity. The aim of this work was to compare the V. harveyi CAIM 1792 genome with related genome sequences to determine their phylogenic relationship and explore unique regions in silico that differentiate this strain from other V. harveyi strains. Twenty-one newly sequenced genomes were compared in silico against the CAIM 1792 genome at nucleotidic and predicted proteome levels. The proteome of CAIM 1792 had higher similarity to those of other V. harveyi strains (78%) than to those of the other closely related species Vibrio owensii (67%), Vibrio rotiferianus (63%) and Vibrio campbellii (59%). Pan-genome ORFans trees showed the best fit with the accepted phylogeny based on DNA-DNA hybridization and multi-locus sequence analysis of 11 concatenated housekeeping genes. SNP analysis clustered 34/38 genomes within their accepted species. The pangenomic and SNP trees showed that V. harveyi is the most conserved of the four species studied and V. campbellii may be divided into at least three subspecies, supported by intergenomic distance analysis. blastp atlases were created to identify unique regions among the genomes most related to V. harveyi CAIM 1792; these regions included genes encoding glycosyltransferases, specific type restriction modification systems and a transcriptional regulator, LysR, reported to be involved in virulence, metabolism, quorum sensing and motility.}, } @article {pmid26198102, year = {2015}, author = {Page, AJ and Cummins, CA and Hunt, M and Wong, VK and Reuter, S and Holden, MT and Fookes, M and Falush, D and Keane, JA and Parkhill, J}, title = {Roary: rapid large-scale prokaryote pan genome analysis.}, journal = {Bioinformatics (Oxford, England)}, volume = {31}, number = {22}, pages = {3691-3693}, pmid = {26198102}, issn = {1367-4811}, support = {MR/M501608/1/MRC_/Medical Research Council/United Kingdom ; WT 098051//Wellcome Trust/United Kingdom ; }, mesh = {Computer Simulation ; Databases, Genetic ; *Genome, Bacterial ; Prokaryotic Cells/*metabolism ; Salmonella typhi/genetics ; *Software ; }, abstract = {UNLABELLED: A typical prokaryote population sequencing study can now consist of hundreds or thousands of isolates. Interrogating these datasets can provide detailed insights into the genetic structure of prokaryotic genomes. We introduce Roary, a tool that rapidly builds large-scale pan genomes, identifying the core and accessory genes. Roary makes construction of the pan genome of thousands of prokaryote samples possible on a standard desktop without compromising on the accuracy of results. Using a single CPU Roary can produce a pan genome consisting of 1000 isolates in 4.5 hours using 13 GB of RAM, with further speedups possible using multiple processors.

Roary is implemented in Perl and is freely available under an open source GPLv3 license from http://sanger-pathogens.github.io/Roary

CONTACT: roary@sanger.ac.uk

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid26195261, year = {2015}, author = {Chan, AP and Sutton, G and DePew, J and Krishnakumar, R and Choi, Y and Huang, XZ and Beck, E and Harkins, DM and Kim, M and Lesho, EP and Nikolich, MP and Fouts, DE}, title = {A novel method of consensus pan-chromosome assembly and large-scale comparative analysis reveal the highly flexible pan-genome of Acinetobacter baumannii.}, journal = {Genome biology}, volume = {16}, number = {1}, pages = {143}, pmid = {26195261}, issn = {1474-760X}, support = {HHSN272200900007C/AI/NIAID NIH HHS/United States ; HHSN272200900007C//PHS HHS/United States ; }, mesh = {Acinetobacter baumannii/*genetics/isolation & purification/pathogenicity ; Algorithms ; *Chromosomes, Bacterial ; Gene Order ; Genes, Essential ; *Genome, Bacterial ; Genomic Islands ; Genomics/*methods ; Humans ; Metabolic Networks and Pathways/genetics ; Military Personnel ; Virulence/genetics ; }, abstract = {BACKGROUND: Infections by pan-drug resistant Acinetobacter baumannii plague military and civilian healthcare systems. Previous A. baumannii pan-genomic studies used modest sample sizes of low diversity and comparisons to a single reference genome, limiting our understanding of gene order and content. A consensus representation of multiple genomes will provide a better framework for comparison. A large-scale comparative study will identify genomic determinants associated with their diversity and adaptation as a successful pathogen.

RESULTS: We determine draft-level genomic sequence of 50 diverse military isolates and conduct the largest bacterial pan-genome analysis of 249 genomes. The pan-genome of A. baumannii is open when the input genomes are normalized for diversity with 1867 core proteins and a paralog-collapsed pan-genome size of 11,694 proteins. We developed a novel graph-based algorithm and use it to assemble the first consensus pan-chromosome, identifying both the order and orientation of core genes and flexible genomic regions. Comparative genome analyses demonstrate the existence of novel resistance islands and isolates with increased numbers of resistance island insertions over time, from single insertions in the 1950s to triple insertions in 2011. Gene clusters responsible for carbon utilization, siderophore production, and pilus assembly demonstrate frequent gain or loss among isolates.

CONCLUSIONS: The highly variable and dynamic nature of the A. baumannii genome may be the result of its success in rapidly adapting to both abiotic and biotic environments through the gain and loss of gene clusters controlling fitness. Importantly, some archaic adaptation mechanisms appear to have reemerged among recent isolates.}, } @article {pmid26163675, year = {2015}, author = {Bolotin, E and Hershberg, R}, title = {Gene Loss Dominates As a Source of Genetic Variation within Clonal Pathogenic Bacterial Species.}, journal = {Genome biology and evolution}, volume = {7}, number = {8}, pages = {2173-2187}, pmid = {26163675}, issn = {1759-6653}, mesh = {Bacteria/genetics ; Bacterial Proteins/genetics ; *Evolution, Molecular ; Gene Transfer, Horizontal ; *Genes, Bacterial ; *Genetic Variation ; Genome, Bacterial ; Molecular Sequence Annotation ; Pseudogenes ; }, abstract = {Some of the most dangerous pathogens such as Mycobacterium tuberculosis and Yersinia pestis evolve clonally. This means that little or no recombination occurs between strains belonging to these species. Paradoxically, although different members of these species show extreme sequence similarity of orthologous genes, some show considerable intraspecies phenotypic variation, the source of which remains elusive. To examine the possible sources of phenotypic variation within clonal pathogenic bacterial species, we carried out an extensive genomic and pan-genomic analysis of the sources of genetic variation available to a large collection of clonal and nonclonal pathogenic bacterial species. We show that while nonclonal species diversify through a combination of changes to gene sequences, gene loss and gene gain, gene loss completely dominates as a source of genetic variation within clonal species. Indeed, gene loss is so prevalent within clonal species as to lead to levels of gene content variation comparable to those found in some nonclonal species that are much more diverged in their gene sequences and that acquire a substantial number of genes horizontally. Gene loss therefore needs to be taken into account as a potential dominant source of phenotypic variation within clonal bacterial species.}, } @article {pmid26163434, year = {2015}, author = {Loguercio, LL and Argôlo-Filho, RC}, title = {Anthropogenic action shapes the evolutionary ecology of Bacillus thuringiensis: response to Ruan et al.}, journal = {Trends in microbiology}, volume = {23}, number = {9}, pages = {519-520}, doi = {10.1016/j.tim.2015.06.002}, pmid = {26163434}, issn = {1878-4380}, mesh = {Animals ; Bacillus thuringiensis/*classification/*isolation & purification ; *Ecosystem ; Nematoda/*microbiology ; *Soil Microbiology ; }, } @article {pmid26131958, year = {2015}, author = {Assis, FL and Bajrai, L and Abrahao, JS and Kroon, EG and Dornas, FP and Andrade, KR and Boratto, PV and Pilotto, MR and Robert, C and Benamar, S and Scola, BL and Colson, P}, title = {Pan-Genome Analysis of Brazilian Lineage A Amoebal Mimiviruses.}, journal = {Viruses}, volume = {7}, number = {7}, pages = {3483-3499}, pmid = {26131958}, issn = {1999-4915}, mesh = {Base Sequence ; Brazil ; Fresh Water/*virology ; *Genome, Viral ; Mimiviridae/chemistry/classification/*genetics/isolation & purification ; Molecular Sequence Data ; Phylogeny ; Sequence Alignment ; }, abstract = {Since the recent discovery of Samba virus, the first representative of the family Mimiviridae from Brazil, prospecting for mimiviruses has been conducted in different environmental conditions in Brazil. Recently, we isolated using Acanthamoeba sp. three new mimiviruses, all of lineage A of amoebal mimiviruses: Kroon virus from urban lake water; Amazonia virus from the Brazilian Amazon river; and Oyster virus from farmed oysters. The aims of this work were to sequence and analyze the genome of these new Brazilian mimiviruses (mimi-BR) and update the analysis of the Samba virus genome. The genomes of Samba virus, Amazonia virus and Oyster virus were 97%-99% similar, whereas Kroon virus had a low similarity (90%-91%) with other mimi-BR. A total of 3877 proteins encoded by mimi-BR were grouped into 974 orthologous clusters. In addition, we identified three new ORFans in the Kroon virus genome. Additional work is needed to expand our knowledge of the diversity of mimiviruses from Brazil, including if and why among amoebal mimiviruses those of lineage A predominate in the Brazilian environment.}, } @article {pmid26119945, year = {2016}, author = {Berbegall, AP and Villamón, E and Piqueras, M and Tadeo, I and Djos, A and Ambros, PF and Martinsson, T and Ambros, IM and Cañete, A and Castel, V and Navarro, S and Noguera, R}, title = {Comparative genetic study of intratumoral heterogenous MYCN amplified neuroblastoma versus aggressive genetic profile neuroblastic tumors.}, journal = {Oncogene}, volume = {35}, number = {11}, pages = {1423-1432}, pmid = {26119945}, issn = {1476-5594}, mesh = {Adolescent ; Adult ; Aged ; Aged, 80 and over ; Child ; Child, Preschool ; Chromosome Deletion ; Chromosomes, Human, Pair 1/genetics ; Chromosomes, Human, Pair 11/genetics ; Chromosomes, Human, Pair 17/genetics ; Chromosomes, Human, Pair 2/genetics ; Cohort Studies ; Gene Dosage/*genetics ; Humans ; In Situ Hybridization, Fluorescence ; Middle Aged ; N-Myc Proto-Oncogene Protein ; Neuroblastoma/classification/*genetics ; Nuclear Proteins/*genetics ; Oncogene Proteins/*genetics ; Polymorphism, Single Nucleotide/genetics ; Young Adult ; }, abstract = {Intratumoral heterogeneous MYCN amplification (hetMNA) is an unusual event in neuroblastoma with unascertained biological and clinical implications. Diagnosis is based on the detection of MYCN amplification surrounded by non-amplified tumor cells by fluorescence in situ hybridization (FISH). To better define the genetic features of hetMNA tumors, we studied the Spanish cohort of neuroblastic tumors by FISH and single nucleotide polymorphism arrays. We compared hetMNA tumors with homogeneous MNA (homMNA) and nonMNA tumors with 11q deletion (nonMNA w11q-). Of 1091 primary tumors, 28 were hetMNA by FISH. Intratumoral heterogeneity of 1p, 2p, 11q and 17q was closely associated with hetMNA tumors when analyzing different pieces for each case. For chromosome 2, 16 cases showed 2p intact, 4 focal gain at 2p24.3 and 8 MNA. The lengths of the smallest regions of overlap (SROs) for 2p gains and 1p deletions were between the SRO lengths observed in homMNA and nonMNA w11q- tumors. Co-occurrence of 11q- and +17q was frequently found with the largest SROs for both aberrations. The evidence for and frequency of different genetic subpopulations representing a hallmark of the hetMNA subgroup of NB indicates, on one hand, the presence of a considerable genetic instability with different SRO of either gains and losses compared with those of the other NB groups and highlights and, on the other hand, the need for multiple sampling from distant and macroscopically and microscopically distinct tumor areas. Narrowing down the different SRO for both deletions and gains in NB groups would be crucial to pinpointing the candidate gene(s) and the critical gene dosage with prognostic and therapeutic significance. This complexity of segmental chromosomal aberration patterns reinforces the necessity for a larger cohort study using FISH and pangenomic techniques to develop a suitable therapeutic strategy for these patients.}, } @article {pmid26085550, year = {2015}, author = {Knight, DR and Elliott, B and Chang, BJ and Perkins, TT and Riley, TV}, title = {Diversity and Evolution in the Genome of Clostridium difficile.}, journal = {Clinical microbiology reviews}, volume = {28}, number = {3}, pages = {721-741}, pmid = {26085550}, issn = {1098-6618}, mesh = {Clostridioides difficile/classification/*genetics/pathogenicity ; Clostridium Infections/*microbiology ; *Evolution, Molecular ; *Genetic Variation ; Genome, Bacterial/*genetics ; Humans ; Phylogeny ; }, abstract = {Clostridium difficile infection (CDI) is the leading cause of antimicrobial and health care-associated diarrhea in humans, presenting a significant burden to global health care systems. In the last 2 decades, PCR- and sequence-based techniques, particularly whole-genome sequencing (WGS), have significantly furthered our knowledge of the genetic diversity, evolution, epidemiology, and pathogenicity of this once enigmatic pathogen. C. difficile is taxonomically distinct from many other well-known clostridia, with a diverse population structure comprising hundreds of strain types spread across at least 6 phylogenetic clades. The C. difficile species is defined by a large diverse pangenome with extreme levels of evolutionary plasticity that has been shaped over long time periods by gene flux and recombination, often between divergent lineages. These evolutionary events are in response to environmental and anthropogenic activities and have led to the rapid emergence and worldwide dissemination of virulent clonal lineages. Moreover, genome analysis of large clinically relevant data sets has improved our understanding of CDI outbreaks, transmission, and recurrence. The epidemiology of CDI has changed dramatically over the last 15 years, and CDI may have a foodborne or zoonotic etiology. The WGS era promises to continue to redefine our view of this significant pathogen.}, } @article {pmid26063294, year = {2015}, author = {Fernández-Romero, N and Romero-Gómez, MP and Mora-Rillo, M and Rodríguez-Baño, J and López-Cerero, L and Pascual, Á and Mingorance, J}, title = {Uncoupling between core genome and virulome in extraintestinal pathogenic Escherichia coli.}, journal = {Canadian journal of microbiology}, volume = {61}, number = {9}, pages = {647-652}, doi = {10.1139/cjm-2014-0835}, pmid = {26063294}, issn = {1480-3275}, mesh = {Escherichia coli/classification/*genetics/isolation & purification/*pathogenicity ; Escherichia coli Infections/*microbiology ; Escherichia coli Proteins/genetics/metabolism ; Genetic Variation ; *Genome, Bacterial ; Genotype ; Humans ; Multilocus Sequence Typing ; Polymorphism, Single Nucleotide ; Virulence ; }, abstract = {Extraintestinal pathogenic Escherichia coli (ExPEC) are among the most frequently isolated bacterial pathogens in hospitals. They are considered opportunistic pathogens and are found mostly in urinary and bloodstream infections. They are genetically diverse, and many studies have sought associations between genotypes or virulence genes and infection site, severity, or outcome, with varied, often contradictory, results. To understand these difficulties, we have analyzed the diversity patterns in the core genomes and virulomes of more than 500 ExPEC isolates from 5 different collections. The core genome was analyzed using a multilocus sequence type-based single-nucleotide polymorphism (SNP) pyrosequencing approach, while the virulence gene content (the virulome) was studied by polymerase chain reaction detection of 25 representative genes. SNP typing showed a similar population structure in the different collections: half of the isolates belong to a few sequence types (5 to 8), while the other half is composed of a large diversity of sequence types that are found once or twice. Sampling analysis by rarefaction plots of SNP profiles showed saturation curves indicative of a limited diversity. Contrary to this, the virulome shows an extremely high diversity, with almost as many gene profiles as isolates, and linear, nonsaturating, rarefaction plots, even within sequence types. These data show that genetic exchange rates are very heterogeneous along the chromosome, being much higher in the virulome fraction of the genome than in the core genome.}, } @article {pmid26051958, year = {2015}, author = {Andreani, NA and Carraro, L and Martino, ME and Fondi, M and Fasolato, L and Miotto, G and Magro, M and Vianello, F and Cardazzo, B}, title = {A genomic and transcriptomic approach to investigate the blue pigment phenotype in Pseudomonas fluorescens.}, journal = {International journal of food microbiology}, volume = {213}, number = {}, pages = {88-98}, doi = {10.1016/j.ijfoodmicro.2015.05.024}, pmid = {26051958}, issn = {1879-3460}, mesh = {Dairy Products/*microbiology ; Down-Regulation ; Energy Metabolism/genetics ; *Food Microbiology ; Gene Expression Profiling ; Genomics ; Oxidoreductases/genetics ; Oxygen Consumption/genetics ; Phenotype ; Phylogeny ; Pigments, Biological/*genetics ; Pseudomonas fluorescens/genetics/*metabolism ; Transcriptome/genetics ; Tryptophan/biosynthesis ; Up-Regulation ; }, abstract = {Pseudomonas fluorescens is a well-known food spoiler, able to cause serious economic losses in the food industry due to its ability to produce many extracellular, and often thermostable, compounds. The most outstanding spoilage events involving P. fluorescens were blue discoloration of several food stuffs, mainly dairy products. The bacteria involved in such high-profile cases have been identified as belonging to a clearly distinct phylogenetic cluster of the P. fluorescens group. Although the blue pigment has recently been investigated in several studies, the biosynthetic pathway leading to the pigment formation, as well as its chemical nature, remain challenging and unsolved points. In the present paper, genomic and transcriptomic data of 4 P. fluorescens strains (2 blue-pigmenting strains and 2 non-pigmenting strains) were analyzed to evaluate the presence and the expression of blue strain-specific genes. In particular, the pangenome analysis showed the presence in the blue-pigmenting strains of two copies of genes involved in the tryptophan biosynthesis pathway (including trpABCDF). The global expression profiling of blue-pigmenting strains versus non-pigmenting strains showed a general up-regulation of genes involved in iron uptake and a down-regulation of genes involved in primary metabolism. Chromogenic reaction of the blue-pigmenting bacterial cells with Kovac's reagent indicated an indole-derivative as the precursor of the blue pigment. Finally, solubility tests and MALDI-TOF mass spectrometry analysis of the isolated pigment suggested that its molecular structure is very probably a hydrophobic indigo analog.}, } @article {pmid26041381, year = {2015}, author = {Radusky, LG and Hassan, S and Lanzarotti, E and Tiwari, S and Jamal, S and Ali, J and Ali, A and Ferreira, R and Barh, D and Silva, A and Turjanski, AG and Azevedo, VA}, title = {An integrated structural proteomics approach along the druggable genome of Corynebacterium pseudotuberculosis species for putative druggable targets.}, journal = {BMC genomics}, volume = {16 Suppl 5}, number = {Suppl 5}, pages = {S9}, pmid = {26041381}, issn = {1471-2164}, mesh = {Algorithms ; Animals ; Anti-Bacterial Agents/*pharmacology ; Base Sequence ; Binding Sites ; Computational Biology/*methods ; Corynebacterium Infections/drug therapy/*veterinary ; Corynebacterium pseudotuberculosis/*drug effects/*genetics ; Genome, Bacterial/drug effects/genetics ; Humans ; Open Reading Frames/genetics ; Proteomics/methods ; }, abstract = {BACKGROUND: The bacterium Corynebacterium pseudotuberculosis (Cp) causes caseous lymphadenitis (CLA), mastitis, ulcerative lymphangitis, and oedema in a number of hosts, comprising ruminants, thereby intimidating economic and dairy industries worldwide. So far there is no effective drug or vaccine available against Cp. Previously, a pan-genomic analysis was performed for both biovar equi and biovar ovis and a Pathogenicity Islands (PAIS) analysis within the strains highlighted a large set of proteins that could be relevant therapeutic targets for controlling the onset of CLA. In the present work, a structural druggability analysis pipeline was accomplished along 15 previously sequenced Cp strains from both biovar equi and biovar ovis.

METHODS AND RESULTS: We computed the whole modelome of a reference strain Cp1002 (NCBI Accession: NC_017300.1) and then the homology models of proteins, of 14 different Cp strains, with high identity (≥ 85%) to the reference strain were also done. Druggability score of all proteins pockets was calculated and only those targets that have a highly druggable (HD) pocket in all strains were kept, a set of 58 proteins. Finally, this information was merged with the previous PAIS analysis giving two possible highly relevant targets to conduct drug discovery projects. Also, off-targeting information against host organisms, including Homo sapiens and a further analysis for protein essentiality provided a final set of 31 druggable, essential and non-host homologous targets, tabulated in table S4, additional file 1. Out of 31 globally druggable targets, 9 targets have already been reported in other pathogenic microorganisms, 3 of them (3-isopropylmalate dehydratase small subunit, 50S ribosomal protein L30, Chromosomal replication initiator protein DnaA) in C. pseudotuberculosis.

CONCLUSION: Overall we provide valuable information of possible targets against C. pseudotuberculosis where some of these targets have already been reported in other microorganisms for drug discovery projects, also discarding targets that might be physiologically relevant but are not amenable for drug binding. We propose that the constructed in silico dataset might serve as a guidance for the scientific community to have a better understanding while selecting putative therapeutic protein candidates as druggable ones as effective measures against C. pseudotuberculosis.}, } @article {pmid26032364, year = {2015}, author = {Kim, JN and Kim, Y and Jeong, Y and Roe, JH and Kim, BG and Cho, BK}, title = {Comparative Genomics Reveals the Core and Accessory Genomes of Streptomyces Species.}, journal = {Journal of microbiology and biotechnology}, volume = {25}, number = {10}, pages = {1599-1605}, doi = {10.4014/jmb.1504.04008}, pmid = {26032364}, issn = {1738-8872}, mesh = {Computational Biology ; Genes, Bacterial ; *Genetic Variation ; *Genome, Bacterial ; Genomics ; Streptomyces/*classification/*genetics ; }, abstract = {The development of rapid and efficient genome sequencing methods has enabled us to study the evolutionary background of bacterial genetic information. Here, we present comparative genomic analysis of 17 Streptomyces species, for which the genome has been completely sequenced, using the pan-genome approach. The analysis revealed that 34,592 ortholog clusters constituted the pan-genome of these Streptomyces species, including 2,018 in the core genome, 11,743 in the dispensable genome, and 20,831 in the unique genome. The core genome was converged to a smaller number of genes than reported previously, with 3,096 gene families. Functional enrichment analysis showed that genes involved in transcription were most abundant in the Streptomyces pan-genome. Finally, we investigated core genes for the sigma factors, mycothiol biosynthesis pathway, and secondary metabolism pathways; our data showed that many genes involved in stress response and morphological differentiation were commonly expressed in Streptomyces species. Elucidation of the core genome offers a basis for understanding the functional evolution of Streptomyces species and provides insights into target selection for the construction of industrial strains.}, } @article {pmid26017550, year = {2015}, author = {Song, G and Dickins, BJ and Demeter, J and Engel, S and Gallagher, J and Choe, K and Dunn, B and Snyder, M and Cherry, JM}, title = {Correction: AGAPE (Automated Genome Analysis PipelinE) for Pan-Genome Analysis of Saccharomyces cerevisiae.}, journal = {PloS one}, volume = {10}, number = {5}, pages = {e0129184}, pmid = {26017550}, issn = {1932-6203}, } @article {pmid25995193, year = {2015}, author = {Murphy, TF and Kirkham, C and Jones, MM and Sethi, S and Kong, Y and Pettigrew, MM}, title = {Expression of IgA Proteases by Haemophilus influenzae in the Respiratory Tract of Adults With Chronic Obstructive Pulmonary Disease.}, journal = {The Journal of infectious diseases}, volume = {212}, number = {11}, pages = {1798-1805}, pmid = {25995193}, issn = {1537-6613}, support = {R01 AI019641/AI/NIAID NIH HHS/United States ; UL1 TR001412/TR/NCATS NIH HHS/United States ; AI19641/AI/NIAID NIH HHS/United States ; }, mesh = {Adult ; Bacterial Proteins/chemistry/genetics/*metabolism ; Cohort Studies ; Haemophilus Infections/complications/*microbiology ; Haemophilus influenzae/*enzymology/genetics ; Humans ; Pulmonary Disease, Chronic Obstructive/complications/*microbiology ; Respiratory Tract Infections/complications/microbiology ; Serine Endopeptidases/chemistry/genetics/*metabolism ; Sputum/microbiology ; }, abstract = {BACKGROUND: Immunoglobulin (Ig)A proteases of Haemophilus influenzae are highly specific endopeptidases that cleave the hinge region of human IgA1 and also mediate invasion and trafficking in human respiratory epithelial cells, facilitating persistence of H. influenzae. Little is known about the expression of IgA proteases in clinical settings of H. influenzae infection.

METHODS: We identified and characterized IgA protease genes in H. influenzae and studied their expression and proteolytic specificity, in vitro and in vivo in 169 independent strains of H. influenzae collected longitudinally over 10 years from adults with chronic obstructive pulmonary disease.

RESULTS: The H. influenzae pangenome has 2 alleles of IgA protease genes; all strains have igaA, and 40% of strains have igaB. Each allele has 2 variants with differing proteolytic specificities for human IgA1. A total of 88% of 169 strains express IgA protease activity. Expression of the 4 forms of IgA protease varies among strains. Based on the presence of IgA1 fragments in sputum samples, each of the different forms of IgA protease is selectively expressed in the human airways during infection.

CONCLUSIONS: Four variants of IgA proteases are variably expressed by H. influenzae during infection of the human airways.}, } @article {pmid25984558, year = {2015}, author = {Harrison, OB and Bray, JE and Maiden, MC and Caugant, DA}, title = {Genomic Analysis of the Evolution and Global Spread of Hyper-invasive Meningococcal Lineage 5.}, journal = {EBioMedicine}, volume = {2}, number = {3}, pages = {234-243}, pmid = {25984558}, issn = {2352-3964}, support = {087622//Wellcome Trust/United Kingdom ; }, abstract = {BACKGROUND: The predominant model for bacterial pandemics is the emergence of a virulent variant that diversifies as it spreads in human populations. We investigated a 40-year meningococcal disease pandemic caused by the hyper-invasive ET-5/ST-32 complex.

METHODS: A global collection of Neisseria meningitidis isolates dating from 1969 to 2008 was whole genome sequenced (WGS) and analysed using a gene-by-gene approach at http://pubmlst.org/neisseria.

FINDINGS: Analysis of WGS data identified a 'Lineage 5 pan genome' of 1940 genes, 1752 (92%) of which were present in all isolates (Lineage 5 'core genome'). Genetic diversity, which was mostly generated by horizontal gene transfer, was unevenly distributed in the genome; however, genealogical analysis of diverse and conserved core genes, accessory genes, and antigen encoding genes, robustly identified a star phylogeny with a number of sub-lineages. Most European and American isolates belonged to one of two closely related sub-lineages, which had diversified before the identification of the pandemic in the 1970s. A third, genetically more diverse sub-lineage, was associated with Asian isolates. Several isolates had acquired DNA from the related gonococcus.

INTERPRETATION: These data were inconsistent with a single point of origin followed by pandemic spread, rather suggesting that the sub-lineages had diversified and spread by asymptomatic transmission, with multiple distinct strains causing localised hyperendemic outbreaks.}, } @article {pmid25974630, year = {2015}, author = {Uchiyama, T and Irie, M and Mori, H and Kurokawa, K and Yamada, T}, title = {FuncTree: Functional Analysis and Visualization for Large-Scale Omics Data.}, journal = {PloS one}, volume = {10}, number = {5}, pages = {e0126967}, pmid = {25974630}, issn = {1932-6203}, mesh = {Bacteria/genetics ; Chromosome Mapping ; Computational Biology ; Databases, Factual ; Genome, Bacterial ; Genomics ; Humans ; Internet ; Intestines/microbiology ; Metagenome ; *User-Computer Interface ; }, abstract = {Exponential growth of high-throughput data and the increasing complexity of omics information have been making processing and interpreting biological data an extremely difficult and daunting task. Here we developed FuncTree (http://bioviz.tokyo/functree), a web-based application for analyzing and visualizing large-scale omics data, including but not limited to genomic, metagenomic, and transcriptomic data. FuncTree allows user to map their omics data onto the "Functional Tree map", a predefined circular dendrogram, which represents the hierarchical relationship of all known biological functions defined in the KEGG database. This novel visualization method allows user to overview the broad functionality of their data, thus allowing a more accurate and comprehensive understanding of the omics information. FuncTree provides extensive customization and calculation methods to not only allow user to directly map their omics data to identify the functionality of their data, but also to compute statistically enriched functions by comparing it to other predefined omics data. We have validated FuncTree's analysis and visualization capability by mapping pan-genomic data of three different types of bacterial genera, metagenomic data of the human gut, and transcriptomic data of two different types of human cell expression. All three mapping strongly confirms FuncTree's capability to analyze and visually represent key functional feature of the omics data. We believe that FuncTree's capability to conduct various functional calculations and visualizing the result into a holistic overview of biological function, would make it an integral analysis/visualization tool for extensive omics base research.}, } @article {pmid25953741, year = {2015}, author = {Sangwan, N and Lambert, C and Sharma, A and Gupta, V and Khurana, P and Khurana, JP and Sockett, RE and Gilbert, JA and Lal, R}, title = {Arsenic rich Himalayan hot spring metagenomics reveal genetically novel predator-prey genotypes.}, journal = {Environmental microbiology reports}, volume = {7}, number = {6}, pages = {812-823}, doi = {10.1111/1758-2229.12297}, pmid = {25953741}, issn = {1758-2229}, mesh = {Adaptation, Biological/genetics ; Arsenic/*analysis/toxicity ; *Genotype ; Hot Springs/*chemistry/*microbiology ; *Metagenome ; *Metagenomics ; *Microbiota ; Phylogeny ; Stress, Physiological ; *Water Microbiology ; }, abstract = {Bdellovibrio bacteriovorus are small Deltaproteobacteria that invade, kill and assimilate their prey. Metagenomic assembly analysis of the microbial mats of an arsenic rich, hot spring was performed to describe the genotypes of the predator Bdellovibrio and the ecogenetically adapted taxa Enterobacter. The microbial mats were enriched with Bdellovibrio (1.3%) and several Gram-negative bacteria including Bordetella (16%), Enterobacter (6.8%), Burkholderia (4.8%), Acinetobacter (2.3%) and Yersinia (1%). A high-quality (47 contigs, 25X coverage; 3.5 Mbp) draft genome of Bdellovibrio (strain ArHS; Arsenic Hot Spring) was reassembled, which lacked the marker gene Bd0108 associated with the usual method of prey interaction and invasion for this genus, while maintaining genes coding for the hydrolytic enzymes necessary for prey assimilation. By filtering microbial mat samples (< 0.45 μm) to enrich for small predatory cell sizes, we observed Bdellovibrio-like cells attached side-on to E. coli through electron microscopy. Furthermore, a draft pan-genome of the dominant potential host taxon, Enterobacter cloacae ArHS (4.8 Mb), along with three of its viral genotypes (n = 3; 42 kb, 49 kb and 50 kb), was assembled. These data were further used to analyse the population level evolutionary dynamics (taxonomical and functional) of reconstructed genotypes.}, } @article {pmid25934264, year = {2015}, author = {Podicheti, R and Mockaitis, K}, title = {FEATnotator: A tool for integrated annotation of sequence features and variation, facilitating interpretation in genomics experiments.}, journal = {Methods (San Diego, Calif.)}, volume = {79-80}, number = {}, pages = {11-17}, doi = {10.1016/j.ymeth.2015.04.028}, pmid = {25934264}, issn = {1095-9130}, mesh = {Arabidopsis/*genetics ; Databases, Genetic ; *Genetic Variation ; Genome, Plant ; Genomics/methods ; High-Throughput Nucleotide Sequencing ; Molecular Sequence Annotation/*methods ; Polymorphism, Single Nucleotide/genetics ; *Software ; }, abstract = {As approaches are sought for more efficient and democratized uses of non-model and expanded model genomics references, ease of integration of genomic feature datasets is especially desirable in multidisciplinary research communities. Valuable conclusions are often missed or slowed when researchers refer experimental results to a single reference sequence that lacks integrated pan-genomic and multi-experiment data in accessible formats. Association of genomic positional information, such as results from an expansive variety of next-generation sequencing experiments, with annotated reference features such as genes or predicted protein binding sites, provides the context essential for conclusions and ongoing research. When the experimental system includes polymorphic genomic inputs, rapid calculation of gene structural and protein translational effects of sequence variation from the reference can be invaluable. Here we present FEATnotator, a lightweight, fast and easy to use open source software program that integrates and reports overlap and proximity in genomic information from any user-defined datasets including those from next generation sequencing applications. We illustrate use of the tool by summarizing whole genome sequence variation of a widely used natural isolate of Arabidopsis thaliana in the context of gene models of the reference accession. Previous discovery of a protein coding deletion influencing root development is replicated rapidly. Appropriate even in investigations of a single gene or genic regions such as QTL, comprehensive reports provided by FEATnotator better prepare researchers for interpretation of their experimental results. The tool is available for download at http://featnotator.sourceforge.net.}, } @article {pmid25908654, year = {2015}, author = {Tkacz, A and Poole, P}, title = {Role of root microbiota in plant productivity.}, journal = {Journal of experimental botany}, volume = {66}, number = {8}, pages = {2167-2175}, pmid = {25908654}, issn = {1460-2431}, mesh = {Metagenomics ; *Microbiota ; Mycorrhizae/*physiology ; *Plant Development ; Plant Root Nodulation ; Plant Roots/*microbiology ; }, abstract = {The growing human population requires increasing amounts of food, but modern agriculture has limited possibilities for increasing yields. New crop varieties may be bred to have increased yields and be more resistant to environmental stress and pests. However, they still require fertilization to supplement essential nutrients that are normally limited in the soil. Soil microorganisms present an opportunity to reduce the requirement for inorganic fertilization in agriculture. Microorganisms, due to their enormous genetic pool, are also a potential source of biochemical reactions that recycle essential nutrients for plant growth. Microbes that associate with plants can be considered to be part of the plant's pan-genome. Therefore, it is essential for us to understand microbial community structure and their 'metagenome' and how it is influenced by different soil types and crop varieties. In the future we may be able to modify and better utilize the soil microbiota potential for promoting plant growth.}, } @article {pmid25898829, year = {2015}, author = {Sharma, A and Sangwan, N and Negi, V and Kohli, P and Khurana, JP and Rao, DL and Lal, R}, title = {Pan-genome dynamics of Pseudomonas gene complements enriched across hexachlorocyclohexane dumpsite.}, journal = {BMC genomics}, volume = {16}, number = {1}, pages = {313}, pmid = {25898829}, issn = {1471-2164}, mesh = {Base Sequence ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Genotype ; Hexachlorocyclohexane/chemistry/*metabolism ; Integrons/genetics ; Metagenomics ; Phylogeny ; Pseudomonas/classification/*genetics/isolation & purification ; RNA, Ribosomal, 16S/analysis ; Sequence Analysis, DNA ; Soil Microbiology ; Water Microbiology ; Water Pollutants, Chemical/chemistry/metabolism ; }, abstract = {BACKGROUND: Phylogenetic heterogeneity across Pseudomonas genus is complemented by its diverse genome architecture enriched by accessory genetic elements (plasmids, transposons, and integrons) conferring resistance across this genus. Here, we sequenced a stress tolerant genotype i.e. Pseudomonas sp. strain RL isolated from a hexachlorocyclohexane (HCH) contaminated pond (45 mg of total HCH g(-1) sediment) and further compared its gene repertoire with 17 reference ecotypes belonging to P. stutzeri, P. mendocina, P. aeruginosa, P. psychrotolerans and P. denitrificans, representing metabolically diverse ecosystems (i.e. marine, clinical, and soil/sludge). Metagenomic data from HCH contaminated pond sediment and similar HCH contaminated sites were further used to analyze the pan-genome dynamics of Pseudomonas genotypes enriched across increasing HCH gradient.

RESULTS: Although strain RL demonstrated clear species demarcation (ANI ≤ 80.03%) from the rest of its phylogenetic relatives, it was found to be closest to P. stutzeri clade which was further complemented functionally. Comparative functional analysis elucidated strain specific enrichment of metabolic pathways like α-linoleic acid degradation and carbazole degradation in Pseudomonas sp. strain RL and P. stutzeri XLDN-R, respectively. Composition based methods (%codon bias and %G + C difference) further highlighted the significance of horizontal gene transfer (HGT) in evolution of nitrogen metabolism, two-component system (TCS) and methionine metabolism across the Pseudomonas genomes used in this study. An intact mobile class-I integron (3,552 bp) with a captured gene cassette encoding for dihydrofolate reductase (dhfra1) was detected in strain RL, distinctly demarcated from other integron harboring species (i.e. P. aeruginosa, P. stutzeri, and P. putida). Mobility of this integron was confirmed by its association with Tnp21-like transposon (95% identity) suggesting stress specific mobilization across HCH contaminated sites. Metagenomics data from pond sediment and recently surveyed HCH adulterated soils revealed the in situ enrichment of integron associated transposase gene (TnpA6100) across increasing HCH contamination (0.7 to 450 mg HCH g(-1) of soil).

CONCLUSIONS: Unlocking the potential of comparative genomics supplemented with metagenomics, we have attempted to resolve the environment and strain specific demarcations across 18 Pseudomonas gene complements. Pan-genome analyses of these strains indicate at astoundingly diverse metabolic strategies and provide genetic basis for the cosmopolitan existence of this taxon.}, } @article {pmid25896518, year = {2015}, author = {Zhu, A and Sunagawa, S and Mende, DR and Bork, P}, title = {Inter-individual differences in the gene content of human gut bacterial species.}, journal = {Genome biology}, volume = {16}, number = {1}, pages = {82}, pmid = {25896518}, issn = {1474-760X}, support = {268985/ERC_/European Research Council/International ; }, mesh = {Chromosome Mapping ; Databases, Genetic ; Feces/microbiology ; Gastrointestinal Microbiome/*genetics ; Gastrointestinal Tract/*microbiology ; Gene Deletion ; Genetic Loci ; Genetic Variation ; Genome Size ; *Genome, Bacterial ; Humans ; *Metagenome ; Phylogeny ; Polysaccharides/metabolism ; Selection, Genetic ; }, abstract = {BACKGROUND: Gene content differences in human gut microbes can lead to inter-individual phenotypic variations such as digestive capacity. It is unclear whether gene content variation is caused by differences in microbial species composition or by the presence of different strains of the same species; the extent of gene content variation in the latter is unknown. Unlike pan-genome studies of cultivable strains, the use of metagenomic data can provide an unbiased view of structural variation of gut bacterial strains by measuring them in their natural habitats, the gut of each individual in this case, representing native boundaries between gut bacterial populations. We analyzed publicly available metagenomic data from fecal samples to characterize inter-individual variation in gut bacterial species.

RESULTS: A comparison of 11 abundant gut bacterial species showed that the gene content of strains from the same species differed, on average, by 13% between individuals. This number is based on gene deletions only and represents a lower limit, yet the variation is already in a similar range as observed between completely sequenced strains of cultivable species. We show that accessory genes that differ considerably between individuals can encode important functions, such as polysaccharide utilization and capsular polysaccharide synthesis loci.

CONCLUSION: Metagenomics can yield insights into gene content variation of strains in complex communities, which cannot be predicted by phylogenetic marker genes alone. The large degree of inter-individual variability in gene content implies that strain resolution must be considered in order to fully assess the functional potential of an individual's human gut microbiome.}, } @article {pmid25888688, year = {2015}, author = {Méric, G and Miragaia, M and de Been, M and Yahara, K and Pascoe, B and Mageiros, L and Mikhail, J and Harris, LG and Wilkinson, TS and Rolo, J and Lamble, S and Bray, JE and Jolley, KA and Hanage, WP and Bowden, R and Maiden, MC and Mack, D and de Lencastre, H and Feil, EJ and Corander, J and Sheppard, SK}, title = {Ecological Overlap and Horizontal Gene Transfer in Staphylococcus aureus and Staphylococcus epidermidis.}, journal = {Genome biology and evolution}, volume = {7}, number = {5}, pages = {1313-1328}, pmid = {25888688}, issn = {1759-6653}, support = {090532/Z/09/Z/WT_/Wellcome Trust/United Kingdom ; BB/I02464X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; 087622/WT_/Wellcome Trust/United Kingdom ; G0801929/MRC_/Medical Research Council/United Kingdom ; MR/M501608/1/MRC_/Medical Research Council/United Kingdom ; MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; U54 GM088558/GM/NIGMS NIH HHS/United States ; U54GM088558/GM/NIGMS NIH HHS/United States ; /WT_/Wellcome Trust/United Kingdom ; }, mesh = {Ecological and Environmental Phenomena ; Evolution, Molecular ; *Gene Transfer, Horizontal ; Genes, Fungal ; Genetic Variation ; Genome, Fungal ; Homologous Recombination ; Staphylococcus aureus/*genetics ; Staphylococcus epidermidis/*genetics ; }, abstract = {The opportunistic pathogens Staphylococcus aureus and Staphylococcus epidermidis represent major causes of severe nosocomial infection, and are associated with high levels of mortality and morbidity worldwide. These species are both common commensals on the human skin and in the nasal pharynx, but are genetically distinct, differing at 24% average nucleotide divergence in 1,478 core genes. To better understand the genome dynamics of these ecologically similar staphylococcal species, we carried out a comparative analysis of 324 S. aureus and S. epidermidis genomes, including 83 novel S. epidermidis sequences. A reference pan-genome approach and whole genome multilocus-sequence typing revealed that around half of the genome was shared between the species. Based on a BratNextGen analysis, homologous recombination was found to have impacted on 40% of the core genes in S. epidermidis, but on only 24% of the core genes in S. aureus. Homologous recombination between the species is rare, with a maximum of nine gene alleles shared between any two S. epidermidis and S. aureus isolates. In contrast, there was considerable interspecies admixture of mobile elements, in particular genes associated with the SaPIn1 pathogenicity island, metal detoxification, and the methicillin-resistance island SCCmec. Our data and analysis provide a context for considering the nature of recombinational boundaries between S. aureus and S. epidermidis and, the selective forces that influence realized recombination between these species.}, } @article {pmid25888166, year = {2015}, author = {Snipen, L and Liland, KH}, title = {micropan: an R-package for microbial pan-genomics.}, journal = {BMC bioinformatics}, volume = {16}, number = {}, pages = {79}, pmid = {25888166}, issn = {1471-2105}, mesh = {*Algorithms ; Computational Biology/*methods ; Enterococcus/classification/*genetics ; *Genome, Bacterial ; Genomics/*methods ; *Software ; }, abstract = {BACKGROUND: A pan-genome is defined as the set of all unique gene families found in one or more strains of a prokaryotic species. Due to the extensive within-species diversity in the microbial world, the pan-genome is often many times larger than a single genome. Studies of pan-genomes have become popular due to the easy access to whole-genome sequence data for prokaryotes. A pan-genome study reveals species diversity and gene families that may be of special interest, e.g because of their role in bacterial survival or their ability to discriminate strains.

RESULTS: We present an R package for the study of prokaryotic pan-genomes. The R computing environment harbors endless possibilities with respect to statistical analyses and graphics. External free software is used for the heavy computations involved, and the R package provides functions for building a computational pipeline.

CONCLUSIONS: We demonstrate parts of the package on a data set for the gram positive bacterium Enterococcus faecalis. The package is free to download and install from The Comprehensive R Archive Network.}, } @article {pmid25887946, year = {2015}, author = {Gupta, VK and Chaudhari, NM and Iskepalli, S and Dutta, C}, title = {Divergences in gene repertoire among the reference Prevotella genomes derived from distinct body sites of human.}, journal = {BMC genomics}, volume = {16}, number = {1}, pages = {153}, pmid = {25887946}, issn = {1471-2164}, mesh = {Gastrointestinal Tract/microbiology ; *Genome, Bacterial ; Humans ; Metagenome ; Microbiota/*genetics ; Mouth/microbiology ; *Phylogeny ; Prevotella/*genetics ; Skin/microbiology ; Tissue Distribution ; Urogenital System/microbiology ; }, abstract = {BACKGROUND: The community composition of the human microbiome is known to vary at distinct anatomical niches. But little is known about the nature of variations, if any, at the genome/sub-genome levels of a specific microbial community across different niches. The present report aims to explore, as a case study, the variations in gene repertoire of 28 Prevotella reference genomes derived from different body-sites of human, as reported earlier by the Human Microbiome Consortium.

RESULTS: The pan-genome for Prevotella remains "open". On an average, 17% of predicted protein-coding genes of any particular Prevotella genome represent the conserved core genes, while the remaining 83% contribute to the flexible and singletons. The study reveals exclusive presence of 11798, 3673, 3348 and 934 gene families and exclusive absence of 17, 221, 115 and 645 gene families in Prevotella genomes derived from human oral cavity, gastro-intestinal tracts (GIT), urogenital tract (UGT) and skin, respectively. Distribution of various functional COG categories differs significantly among the habitat-specific genes. No niche-specific variations could be observed in distribution of KEGG pathways.

CONCLUSIONS: Prevotella genomes derived from different body sites differ appreciably in gene repertoire, suggesting that these microbiome components might have developed distinct genetic strategies for niche adaptation within the host. Each individual microbe might also have a component of its own genetic machinery for host adaptation, as appeared from the huge number of singletons.}, } @article {pmid25887558, year = {2015}, author = {Shinozuka, H and Cogan, NO and Shinozuka, M and Marshall, A and Kay, P and Lin, YH and Spangenberg, GC and Forster, JW}, title = {A simple method for semi-random DNA amplicon fragmentation using the methylation-dependent restriction enzyme MspJI.}, journal = {BMC biotechnology}, volume = {15}, number = {}, pages = {25}, pmid = {25887558}, issn = {1472-6750}, mesh = {Agrobacterium/genetics ; Arabidopsis/genetics ; Bacterial Proteins/*metabolism ; DNA Restriction Enzymes/*metabolism ; DNA, Bacterial/analysis/genetics ; DNA, Plant/analysis/genetics ; Deoxycytosine Nucleotides ; Genotyping Techniques ; High-Throughput Nucleotide Sequencing/*methods ; Nucleic Acid Amplification Techniques ; Sequence Analysis, DNA/*methods ; }, abstract = {BACKGROUND: Fragmentation at random nucleotide locations is an essential process for preparation of DNA libraries to be used on massively parallel short-read DNA sequencing platforms. Although instruments for physical shearing, such as the Covaris S2 focused-ultrasonicator system, and products for enzymatic shearing, such as the Nextera technology and NEBNext dsDNA Fragmentase kit, are commercially available, a simple and inexpensive method is desirable for high-throughput sequencing library preparation. MspJI is a recently characterised restriction enzyme which recognises the sequence motif CNNR (where R = G or A) when the first base is modified to 5-methylcytosine or 5-hydroxymethylcytosine.

RESULTS: A semi-random enzymatic DNA amplicon fragmentation method was developed based on the unique cleavage properties of MspJI. In this method, random incorporation of 5-methyl-2'-deoxycytidine-5'-triphosphate is achieved through DNA amplification with DNA polymerase, followed by DNA digestion with MspJI. Due to the recognition sequence of the enzyme, DNA amplicons are fragmented in a relatively sequence-independent manner. The size range of the resulting fragments was capable of control through optimisation of 5-methyl-2'-deoxycytidine-5'-triphosphate concentration in the reaction mixture. A library suitable for sequencing using the Illumina MiSeq platform was prepared and processed using the proposed method. Alignment of generated short reads to a reference sequence demonstrated a relatively high level of random fragmentation.

CONCLUSIONS: The proposed method may be performed with standard laboratory equipment. Although the uniformity of coverage was slightly inferior to the Covaris physical shearing procedure, due to efficiencies of cost and labour, the method may be more suitable than existing approaches for implementation in large-scale sequencing activities, such as bacterial artificial chromosome (BAC)-based genome sequence assembly, pan-genomic studies and locus-targeted genotyping-by-sequencing.}, } @article {pmid25881062, year = {2015}, author = {Lu, F and Romay, MC and Glaubitz, JC and Bradbury, PJ and Elshire, RJ and Wang, T and Li, Y and Li, Y and Semagn, K and Zhang, X and Hernandez, AG and Mikel, MA and Soifer, I and Barad, O and Buckler, ES}, title = {High-resolution genetic mapping of maize pan-genome sequence anchors.}, journal = {Nature communications}, volume = {6}, number = {}, pages = {6914}, pmid = {25881062}, issn = {2041-1723}, mesh = {Chromosome Mapping ; Genome, Plant/*genetics ; Machine Learning ; Models, Genetic ; Polymorphism, Single Nucleotide ; Sequence Alignment ; Sequence Analysis, DNA ; Zea mays/*genetics ; }, abstract = {In addition to single-nucleotide polymorphisms, structural variation is abundant in many plant genomes. The structural variation across a species can be represented by a 'pan-genome', which is essential to fully understand the genetic control of phenotypes. However, the pan-genome's complexity hinders its accurate assembly via sequence alignment. Here we demonstrate an approach to facilitate pan-genome construction in maize. By performing 18 trillion association tests we map 26 million tags generated by reduced representation sequencing of 14,129 maize inbred lines. Using machine-learning models we select 4.4 million accurately mapped tags as sequence anchors, 1.1 million of which are presence/absence variations. Structural variations exhibit enriched association with phenotypic traits, indicating that it is a significant source of adaptive variation in maize. The ability to efficiently map ultrahigh-density pan-genome sequence anchors enables fine characterization of structural variation and will advance both genetic research and breeding in many crops.}, } @article {pmid25880171, year = {2015}, author = {Kweon, O and Kim, SJ and Blom, J and Kim, SK and Kim, BS and Baek, DH and Park, SI and Sutherland, JB and Cerniglia, CE}, title = {Comparative functional pan-genome analyses to build connections between genomic dynamics and phenotypic evolution in polycyclic aromatic hydrocarbon metabolism in the genus Mycobacterium.}, journal = {BMC evolutionary biology}, volume = {15}, number = {}, pages = {21}, pmid = {25880171}, issn = {1471-2148}, mesh = {Biodegradation, Environmental ; Biological Evolution ; Epistasis, Genetic ; Gene Transfer, Horizontal ; Genes, Bacterial ; Genomics ; Mycobacterium/classification/*genetics/*metabolism ; Phylogeny ; Polycyclic Aromatic Hydrocarbons/metabolism ; }, abstract = {BACKGROUND: The bacterial genus Mycobacterium is of great interest in the medical and biotechnological fields. Despite a flood of genome sequencing and functional genomics data, significant gaps in knowledge between genome and phenome seriously hinder efforts toward the treatment of mycobacterial diseases and practical biotechnological applications. In this study, we propose the use of systematic, comparative functional pan-genomic analysis to build connections between genomic dynamics and phenotypic evolution in polycyclic aromatic hydrocarbon (PAH) metabolism in the genus Mycobacterium.

RESULTS: Phylogenetic, phenotypic, and genomic information for 27 completely genome-sequenced mycobacteria was systematically integrated to reconstruct a mycobacterial phenotype network (MPN) with a pan-genomic concept at a network level. In the MPN, mycobacterial phenotypes show typical scale-free relationships. PAH degradation is an isolated phenotype with the lowest connection degree, consistent with phylogenetic and environmental isolation of PAH degraders. A series of functional pan-genomic analyses provide conserved and unique types of genomic evidence for strong epistatic and pleiotropic impacts on evolutionary trajectories of the PAH-degrading phenotype. Under strong natural selection, the detailed gene gain/loss patterns from horizontal gene transfer (HGT)/deletion events hypothesize a plausible evolutionary path, an epistasis-based birth and pleiotropy-dependent death, for PAH metabolism in the genus Mycobacterium. This study generated a practical mycobacterial compendium of phenotypic and genomic changes, focusing on the PAH-degrading phenotype, with a pan-genomic perspective of the evolutionary events and the environmental challenges.

CONCLUSIONS: Our findings suggest that when selection acts on PAH metabolism, only a small fraction of possible trajectories is likely to be observed, owing mainly to a combination of the ambiguous phenotypic effects of PAHs and the corresponding pleiotropy- and epistasis-dependent evolutionary adaptation. Evolutionary constraints on the selection of trajectories, like those seen in PAH-degrading phenotypes, are likely to apply to the evolution of other phenotypes in the genus Mycobacterium.}, } @article {pmid25872255, year = {2014}, author = {Falgenhauer, L and Schmiedel, J and Ghosh, H and Fritzenwanker, M and Yao, Y and Bauerfeind, R and Imirzalioglu, C and Chakraborty, T}, title = {Resistance plasmids in ESBL-encoding Escherichia coli isolates from humans, dogs and cats.}, journal = {Berliner und Munchener tierarztliche Wochenschrift}, volume = {127}, number = {11-12}, pages = {458-463}, pmid = {25872255}, issn = {0005-9366}, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Cat Diseases/*microbiology ; Cats ; Dog Diseases/*microbiology ; Dogs ; Escherichia coli/drug effects/*genetics ; Escherichia coli Infections/*microbiology/*veterinary ; Humans ; Microbial Sensitivity Tests ; Plasmids/*genetics/isolation & purification ; beta-Lactamases/*genetics ; }, abstract = {We characterized ESBL-producing Escherichia coli isolates from diseased dog, cat and human sources for their plasmid content. Plasmids with different Inc groups and combinations of resistance genes were detected in these isolates. The pan-genome of the plasmid-associated genes was found to be large, indicating diversity of the gene pool among the plasmids. No commonly occurring plasmids with similar gene content in isolates from dog, cats and humans were detected.}, } @article {pmid25869283, year = {2015}, author = {Treven, P}, title = {Strategies to develop strain-specific PCR based assays for probiotics.}, journal = {Beneficial microbes}, volume = {6}, number = {6}, pages = {887-898}, doi = {10.3920/BM2015.0009}, pmid = {25869283}, issn = {1876-2891}, mesh = {DNA, Bacterial/genetics ; Genome, Bacterial ; Polymerase Chain Reaction/*methods ; Probiotics/*analysis ; Sensitivity and Specificity ; }, abstract = {Since health benefits conferred by probiotics are strain-specific, identification to the strain level is mandatory to allow the monitoring of the presence and the abundance of specific probiotic in a product or in a gastrointestinal tract. Compared to standard plate counts, the reduced duration of the assays and higher specificity makes PCR-based methods (standard PCR and quantitative PCR) very appropriate for detection or quantification of probiotics. Development of strain-specific assay consists of 4 main stages: (1) strain-specific marker identification; (2) construction of potential strain-specific primers; (3) validation on DNA from pure cultures of target and related strains; and (4) validation on spiked samples. The most important and also the most challenging step is the identification of strain-specific sequences, which can be subsequently targeted by specific primers or probes. Such regions can be identified on sequences derived from 16S-23S internally transcribed spacers, randomly amplified polymorphic DNA, representational difference analysis and suppression subtractive hybridisation. Already known phenotypic or genotypic characteristics of the target strain can also be used to develop the strain-specific assay. However, the initial stage of strain-specific assay development can be replaced by comparative genomics analysis of target genome with related genomes in public databases. Advances in whole genome sequencing (WGS) have resulted in a cost reduction for bacterial genome sequencing and consequently have made this approach available to most laboratories. In the present paper I reviewed the available literature on PCR and qPCR assays developed for detection of a specific probiotic strain and discussed future WGS and comparative genomics-based approaches.}, } @article {pmid25856195, year = {2015}, author = {Li, P and Kwok, AH and Jiang, J and Ran, T and Xu, D and Wang, W and Leung, FC}, title = {Comparative genome analyses of Serratia marcescens FS14 reveals its high antagonistic potential.}, journal = {PloS one}, volume = {10}, number = {4}, pages = {e0123061}, pmid = {25856195}, issn = {1932-6203}, mesh = {Antibiosis/*genetics ; Atractylodes/*microbiology ; Biological Assay ; DNA Restriction-Modification Enzymes/*genetics ; Fusarium/physiology ; *Genetic Variation ; Genome, Bacterial/*genetics ; Serratia marcescens/*genetics ; Type VI Secretion Systems/*genetics ; }, abstract = {S. marcescens FS14 was isolated from an Atractylodes macrocephala Koidz plant that was infected by Fusarium oxysporum and showed symptoms of root rot. With the completion of the genome sequence of FS14, the first comprehensive comparative-genomic analysis of the Serratia genus was performed. Pan-genome and COG analyses showed that the majority of the conserved core genes are involved in basic cellular functions, while genomic factors such as prophages contribute considerably to genome diversity. Additionally, a Type I restriction-modification system, a Type III secretion system and tellurium resistance genes are found in only some Serratia species. Comparative analysis further identified that S. marcescens FS14 possesses multiple mechanisms for antagonism against other microorganisms, including the production of prodigiosin, bacteriocins, and multi-antibiotic resistant determinants as well as chitinases. The presence of two evolutionarily distinct Type VI secretion systems (T6SSs) in FS14 may provide further competitive advantages for FS14 against other microbes. To our knowledge, this is the first report of comparative analysis on T6SSs in the genus, which identifies four types of T6SSs in Serratia spp.. Competition bioassays of FS14 against the vital plant pathogenic bacterium Ralstonia solanacearum and fungi Fusarium oxysporum and Sclerotinia sclerotiorum were performed to support our genomic analyses, in which FS14 demonstrated high antagonistic activities against both bacterial and fungal phytopathogens.}, } @article {pmid25853708, year = {2015}, author = {Periwal, V and Patowary, A and Vellarikkal, SK and Gupta, A and Singh, M and Mittal, A and Jeyapaul, S and Chauhan, RK and Singh, AV and Singh, PK and Garg, P and Katoch, VM and Katoch, K and Chauhan, DS and Sivasubbu, S and Scaria, V}, title = {Comparative whole-genome analysis of clinical isolates reveals characteristic architecture of Mycobacterium tuberculosis pangenome.}, journal = {PloS one}, volume = {10}, number = {4}, pages = {e0122979}, pmid = {25853708}, issn = {1932-6203}, mesh = {Base Sequence ; Comparative Genomic Hybridization ; DNA, Bacterial/genetics ; *Genetic Variation ; Genome, Bacterial ; Humans ; Mycobacterium tuberculosis/classification/*genetics/pathogenicity ; *Phylogeny ; Tuberculosis/*genetics/microbiology/pathology ; }, abstract = {The tubercle complex consists of closely related mycobacterium species which appear to be variants of a single species. Comparative genome analysis of different strains could provide useful clues and insights into the genetic diversity of the species. We integrated genome assemblies of 96 strains from Mycobacterium tuberculosis complex (MTBC), which included 8 Indian clinical isolates sequenced and assembled in this study, to understand its pangenome architecture. We predicted genes for all the 96 strains and clustered their respective CDSs into homologous gene clusters (HGCs) to reveal a hard-core, soft-core and accessory genome component of MTBC. The hard-core (HGCs shared amongst 100% of the strains) was comprised of 2,066 gene clusters whereas the soft-core (HGCs shared amongst at least 95% of the strains) comprised of 3,374 gene clusters. The change in the core and accessory genome components when observed as a function of their size revealed that MTBC has an open pangenome. We identified 74 HGCs that were absent from reference strains H37Rv and H37Ra but were present in most of clinical isolates. We report PCR validation on 9 candidate genes depicting 7 genes completely absent from H37Rv and H37Ra whereas 2 genes shared partial homology with them accounting to probable insertion and deletion events. The pangenome approach is a promising tool for studying strain specific genetic differences occurring within species. We also suggest that since selecting appropriate target genes for typing purposes requires the expected target gene be present in all isolates being typed, therefore estimating the core-component of the species becomes a subject of prime importance.}, } @article {pmid25843513, year = {2015}, author = {Huang, DS and Wang, Z and He, XJ and Diplas, BH and Yang, R and Killela, PJ and Meng, Q and Ye, ZY and Wang, W and Jiang, XT and Xu, L and He, XL and Zhao, ZS and Xu, WJ and Wang, HJ and Ma, YY and Xia, YJ and Li, L and Zhang, RX and Jin, T and Zhao, ZK and Xu, J and Yu, S and Wu, F and Liang, J and Wang, S and Jiao, Y and Yan, H and Tao, HQ}, title = {Recurrent TERT promoter mutations identified in a large-scale study of multiple tumour types are associated with increased TERT expression and telomerase activation.}, journal = {European journal of cancer (Oxford, England : 1990)}, volume = {51}, number = {8}, pages = {969-976}, pmid = {25843513}, issn = {1879-0852}, support = {P50 NS020023/NS/NINDS NIH HHS/United States ; P01 CA154291/CA/NCI NIH HHS/United States ; R01 CA140316/CA/NCI NIH HHS/United States ; 5 P01 CA154291-01/CA/NCI NIH HHS/United States ; 5 R01-CA140316/CA/NCI NIH HHS/United States ; 5 P50 NS020023-30/NS/NINDS NIH HHS/United States ; T32 GM007171/GM/NIGMS NIH HHS/United States ; }, mesh = {Adult ; Asian People/genetics/statistics & numerical data ; Base Sequence ; DNA Mutational Analysis ; Enzyme Activation/genetics ; Gene Expression Regulation, Neoplastic ; Gene Frequency ; Genetic Association Studies ; Humans ; *Mutation ; Neoplasms/epidemiology/*genetics/pathology ; Polymorphism, Single Nucleotide ; Promoter Regions, Genetic/*genetics ; Telomerase/*genetics/*metabolism ; Tumor Cells, Cultured ; Up-Regulation/genetics ; }, abstract = {BACKGROUND: Several somatic mutation hotspots were recently identified in the telomerase reverse transcriptase (TERT) promoter region in human cancers. Large scale studies of these mutations in multiple tumour types are limited, in particular in Asian populations. This study aimed to: analyse TERT promoter mutations in multiple tumour types in a large Chinese patient cohort, investigate novel tumour types and assess the functional significance of the mutations.

METHODS: TERT promoter mutation status was assessed by Sanger sequencing for 13 different tumour types and 799 tumour tissues from Chinese cancer patients. Thymic epithelial tumours, gastrointestinal leiomyoma, and gastric schwannoma were included, for which the TERT promoter has not been previously sequenced. Functional studies included TERT expression by reverse-transcriptase quantitative polymerase chain reaction (RT-qPCR), telomerase activity by the telomeric repeat amplification protocol (TRAP) assay and promoter activity by the luciferase reporter assay.

RESULTS: TERT promoter mutations were highly frequent in glioblastoma (83.9%), urothelial carcinoma (64.5%), oligodendroglioma (70.0%), medulloblastoma (33.3%) and hepatocellular carcinoma (31.4%). C228T and C250T were the most common mutations. In urothelial carcinoma, several novel rare mutations were identified. TERT promoter mutations were absent in gastrointestinal stromal tumour (GIST), thymic epithelial tumours, gastrointestinal leiomyoma, gastric schwannoma, cholangiocarcinoma, gastric and pancreatic cancer. TERT promoter mutations highly correlated with upregulated TERT mRNA expression and telomerase activity in adult gliomas. These mutations differentially enhanced the transcriptional activity of the TERT core promoter.

CONCLUSIONS: TERT promoter mutations are frequent in multiple tumour types and have similar distributions in Chinese cancer patients. The functional significance of these mutations reflect the importance to telomere maintenance and hence tumourigenesis, making them potential therapeutic targets.}, } @article {pmid25832353, year = {2015}, author = {Puymège, A and Bertin, S and Guédon, G and Payot, S}, title = {Analysis of Streptococcus agalactiae pan-genome for prevalence, diversity and functionality of integrative and conjugative or mobilizable elements integrated in the tRNA(Lys CTT) gene.}, journal = {Molecular genetics and genomics : MGG}, volume = {290}, number = {5}, pages = {1727-1740}, pmid = {25832353}, issn = {1617-4623}, mesh = {*Conjugation, Genetic ; *Genome, Bacterial ; Phylogeny ; RNA, Transfer, Lys/*genetics ; Streptococcus agalactiae/*genetics ; }, abstract = {Streptococcus agalactiae is the first cause of invasive infections in human neonates and is also a major bovine and fish pathogen. High genomic diversity was observed in this species that hosts numerous mobile genetic elements, in particular elements transferable by conjugation. This works aims to evaluate the contribution of these elements to GBS genome diversity. Focusing on genomic islands integrated in the tRNA(Lys) (CTT) gene, a known hotspot of recombination, an extensive in silico search was performed on the sequenced genome of 303 strains of S. agalactiae isolated from different hosts. In all the isolates (except 9), whatever their origin (human, bovine, camel, dog, gray seal, dolphin, fish species or bullfrog), this locus carries highly diverse genomic islands transferable by conjugation such as integrative and conjugative elements (ICEs), integrative and mobilizable elements (IMEs), CIs-mobilizable elements (CIMEs) or composite elements. Transfer of an ICE from an ST67 bovine strain to a phylogenetically distant ST23 human isolate was obtained experimentally indicating that there was no barrier to ICE transfer between strains from different hosts. Interestingly, a novel family of putative IMEs that site-specifically integrate in the nic site of oriT of ICEs belonging to Tn916/ICESt3 superfamily was detected in silico. These elements carry an antibiotic resistance gene (lsa(C)) already described to confer cross-resistance to lincosamides, streptogramins A and pleuromutilins. Further work is needed to evaluate the impact of these IMEs on the transfer of targeted ICEs and the mobility and the dissemination of these IMEs.}, } @article {pmid25803742, year = {2015}, author = {Sahl, JW and Allender, CJ and Colman, RE and Califf, KJ and Schupp, JM and Currie, BJ and Van Zandt, KE and Gelhaus, HC and Keim, P and Tuanyok, A}, title = {Genomic characterization of Burkholderia pseudomallei isolates selected for medical countermeasures testing: comparative genomics associated with differential virulence.}, journal = {PloS one}, volume = {10}, number = {3}, pages = {e0121052}, pmid = {25803742}, issn = {1932-6203}, support = {U01 AI075568/AI/NIAID NIH HHS/United States ; U54 AI065359/AI/NIAID NIH HHS/United States ; U54 AI-065359/AI/NIAID NIH HHS/United States ; U01 AI-075568/AI/NIAID NIH HHS/United States ; }, mesh = {Animals ; Burkholderia pseudomallei/drug effects/*genetics/isolation & purification/*pathogenicity ; *Drug Discovery ; Evolution, Molecular ; Female ; Genome, Bacterial/genetics ; *Genomics ; Genotype ; Mice ; Mice, Inbred BALB C ; Phenotype ; Phylogeny ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; Virulence/drug effects/genetics ; }, abstract = {Burkholderia pseudomallei is the causative agent of melioidosis and a potential bioterrorism agent. In the development of medical countermeasures against B. pseudomallei infection, the US Food and Drug Administration (FDA) animal Rule recommends using well-characterized strains in animal challenge studies. In this study, whole genome sequence data were generated for 6 B. pseudomallei isolates previously identified as candidates for animal challenge studies; an additional 5 isolates were sequenced that were associated with human inhalational melioidosis. A core genome single nucleotide polymorphism (SNP) phylogeny inferred from a concatenated SNP alignment from the 11 isolates sequenced in this study and a diverse global collection of isolates demonstrated the diversity of the proposed Animal Rule isolates. To understand the genomic composition of each isolate, a large-scale blast score ratio (LS-BSR) analysis was performed on the entire pan-genome; this demonstrated the variable composition of genes across the panel and also helped to identify genes unique to individual isolates. In addition, a set of ~550 genes associated with pathogenesis in B. pseudomallei were screened against the 11 sequenced genomes with LS-BSR. Differential gene distribution for 54 virulence-associated genes was observed between genomes and three of these genes were correlated with differential virulence observed in animal challenge studies using BALB/c mice. Differentially conserved genes and SNPs associated with disease severity were identified and could be the basis for future studies investigating the pathogenesis of B. pseudomallei. Overall, the genetic characterization of the 11 proposed Animal Rule isolates provides context for future studies involving B. pseudomallei pathogenesis, differential virulence, and efficacy to therapeutics.}, } @article {pmid25781462, year = {2015}, author = {Song, G and Dickins, BJ and Demeter, J and Engel, S and Gallagher, J and Choe, K and Dunn, B and Snyder, M and Cherry, JM}, title = {AGAPE (Automated Genome Analysis PipelinE) for pan-genome analysis of Saccharomyces cerevisiae.}, journal = {PloS one}, volume = {10}, number = {3}, pages = {e0120671}, pmid = {25781462}, issn = {1932-6203}, support = {P41 HG001315/HG/NHGRI NIH HHS/United States ; U24 HG001315/HG/NHGRI NIH HHS/United States ; U41 HG001315/HG/NHGRI NIH HHS/United States ; }, mesh = {Contig Mapping/*methods ; *Genome, Fungal ; Saccharomyces cerevisiae/*genetics ; Sequence Analysis, DNA/*methods ; *Software ; }, abstract = {The characterization and public release of genome sequences from thousands of organisms is expanding the scope for genetic variation studies. However, understanding the phenotypic consequences of genetic variation remains a challenge in eukaryotes due to the complexity of the genotype-phenotype map. One approach to this is the intensive study of model systems for which diverse sources of information can be accumulated and integrated. Saccharomyces cerevisiae is an extensively studied model organism, with well-known protein functions and thoroughly curated phenotype data. To develop and expand the available resources linking genomic variation with function in yeast, we aim to model the pan-genome of S. cerevisiae. To initiate the yeast pan-genome, we newly sequenced or re-sequenced the genomes of 25 strains that are commonly used in the yeast research community using advanced sequencing technology at high quality. We also developed a pipeline for automated pan-genome analysis, which integrates the steps of assembly, annotation, and variation calling. To assign strain-specific functional annotations, we identified genes that were not present in the reference genome. We classified these according to their presence or absence across strains and characterized each group of genes with known functional and phenotypic features. The functional roles of novel genes not found in the reference genome and associated with strains or groups of strains appear to be consistent with anticipated adaptations in specific lineages. As more S. cerevisiae strain genomes are released, our analysis can be used to collate genome data and relate it to lineage-specific patterns of genome evolution. Our new tool set will enhance our understanding of genomic and functional evolution in S. cerevisiae, and will be available to the yeast genetics and molecular biology community.}, } @article {pmid25766446, year = {2015}, author = {Liu, L and Zhu, W and Cao, Z and Xu, B and Wang, G and Luo, M}, title = {High correlation between genotypes and phenotypes of environmental bacteria Comamonas testosteroni strains.}, journal = {BMC genomics}, volume = {16}, number = {1}, pages = {110}, pmid = {25766446}, issn = {1471-2164}, mesh = {Base Sequence ; Comamonas testosteroni/*genetics ; Environment ; *Genetic Association Studies ; Genome, Bacterial/*genetics ; }, abstract = {BACKGROUND: Members of Comamonas testosteroni are environmental microorganisms that are usually found in polluted environment samples. They utilize steroids and aromatic compounds but rarely sugars, and show resistance to multiple heavy metals and multiple drugs. However, comprehensive genomic analysis among the C. testosteroni strains is lacked.

RESULTS: To understand the genome bases of the features of C. testosteroni, we sequenced 10 strains of this species and analyzed them together with other related published genome sequences. The results revealed that: 1) the strains of C. testosteroni have genome sizes ranging from 5.1 to 6.0 Mb and G + C contents ranging from 61.1% to 61.8%. The pan-genome contained 10,165 gene families and the core genome contained 3,599 gene families. Heap's law analysis indicated that the pan-genome of C. testosteroni may be open (α = 0.639); 2) by analyzing 31 phenotypes of 11 available C. testosteroni strains, 99.4% of the genotypes (putative genes) were found to be correlated to the phenotypes, indicating a high correlation between phenotypes and genotypes; 3) gene clusters for nitrate reduction, steroids degradation and metal and multi-drug resistance were found and were highly conserved among all the genomes of this species; 4) the genome similarity of C. testosteroni may be related to the geographical distances.

CONCLUSIONS: This work provided an overview on the genomes of C. testosteroni and new genome resources that would accelerate the further investigations of this species. Importantly, this work focused on the analysis of potential genetic determinants for the typical characters and found high correlation between the phenotypes and their corresponding genotypes.}, } @article {pmid25764568, year = {2015}, author = {Ferrario, C and Milani, C and Mancabelli, L and Lugli, GA and Turroni, F and Duranti, S and Mangifesta, M and Viappiani, A and Sinderen, Dv and Ventura, M}, title = {A genome-based identification approach for members of the genus Bifidobacterium.}, journal = {FEMS microbiology ecology}, volume = {91}, number = {3}, pages = {}, doi = {10.1093/femsec/fiv009}, pmid = {25764568}, issn = {1574-6941}, mesh = {Base Sequence ; Bifidobacterium/*classification/genetics/*isolation & purification ; DNA Primers/genetics ; DNA, Bacterial/*genetics ; Genome, Bacterial/*genetics ; Polymerase Chain Reaction/*methods ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {During recent years, the significant and increasing interest in novel bifidobacterial strains with health-promoting characteristics has catalyzed the development of methods for efficient and reliable identification of Bifidobacterium strains at (sub) species level. We developed an assay based on recently acquired bifidobacterial genomic data and involving 98 primer pairs, called the Bifidobacterium-ampliseq panel. This panel includes multiplex PCR primers that target both core and variable genes of the pangenome of this genus. Our results demonstrate that the employment of the Bifidobacterium-ampliseq panel allows rapid and specific identification of the so far recognized 48 (sub)species harboring the Bifidobacterium genus, and thus represents a cost- and time-effective bifidobacterial screening methodology.}, } @article {pmid25736410, year = {2015}, author = {Rossi-Tamisier, M and Benamar, S and Raoult, D and Fournier, PE}, title = {Cautionary tale of using 16S rRNA gene sequence similarity values in identification of human-associated bacterial species.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {65}, number = {Pt 6}, pages = {1929-1934}, doi = {10.1099/ijs.0.000161}, pmid = {25736410}, issn = {1466-5034}, mesh = {Bacteria/*classification ; Bacterial Typing Techniques/*methods ; DNA, Bacterial/genetics ; Genomics/*methods ; Genotype ; Humans ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {Modern bacterial taxonomy is based on a polyphasic approach that combines phenotypic and genotypic characteristics, including 16S rRNA sequence similarity. However, the 95 % (for genus) and 98.7 % (for species) sequence similarity thresholds that are currently recommended to classify bacterial isolates were defined by comparison of a limited number of bacterial species, and may not apply to many genera that contain human-associated species. For each of 158 bacterial genera containing human-associated species, we computed pairwise sequence similarities between all species that have names with standing in nomenclature and then analysed the results, considering as abnormal any similarity value lower than 95 % or greater than 98.7 %. Many of the current bacterial species with validly published names do not respect the 95 and 98.7 % thresholds, with 57.1 % of species exhibiting 16S rRNA gene sequence similarity rates ≥98.7 %, and 60.1 % of genera containing species exhibiting a 16S rRNA gene sequence similarity rate <95 %. In only 17 of the 158 genera studied (10.8 %), all species respected the 95 and 98.7 % thresholds. As we need powerful and reliable taxonomical tools, and as potential new tools such as pan-genomics have not yet been fully evaluated for taxonomic purposes, we propose to use as thresholds, genus by genus, the minimum and maximum similarity values observed among species.}, } @article {pmid25733873, year = {2015}, author = {Ku, C and Nelson-Sathi, S and Roettger, M and Garg, S and Hazkani-Covo, E and Martin, WF}, title = {Endosymbiotic gene transfer from prokaryotic pangenomes: Inherited chimerism in eukaryotes.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {112}, number = {33}, pages = {10139-10146}, pmid = {25733873}, issn = {1091-6490}, support = {232975/ERC_/European Research Council/International ; }, mesh = {Alleles ; Animals ; Chloroplasts/genetics ; Computational Biology ; Cyanobacteria/genetics ; DNA, Bacterial/genetics ; Escherichia coli/genetics ; *Evolution, Molecular ; *Gene Transfer, Horizontal ; Genome ; Genome, Bacterial ; Humans ; Mitochondria/genetics ; Phylogeny ; Plastids/genetics ; Recombination, Genetic ; Symbiosis/*genetics ; }, abstract = {Endosymbiotic theory in eukaryotic-cell evolution rests upon a foundation of three cornerstone partners--the plastid (a cyanobacterium), the mitochondrion (a proteobacterium), and its host (an archaeon)--and carries a corollary that, over time, the majority of genes once present in the organelle genomes were relinquished to the chromosomes of the host (endosymbiotic gene transfer). However, notwithstanding eukaryote-specific gene inventions, single-gene phylogenies have never traced eukaryotic genes to three single prokaryotic sources, an issue that hinges crucially upon factors influencing phylogenetic inference. In the age of genomes, single-gene trees, once used to test the predictions of endosymbiotic theory, now spawn new theories that stand to eventually replace endosymbiotic theory with descriptive, gene tree-based variants featuring supernumerary symbionts: prokaryotic partners distinct from the cornerstone trio and whose existence is inferred solely from single-gene trees. We reason that the endosymbiotic ancestors of mitochondria and chloroplasts brought into the eukaryotic--and plant and algal--lineage a genome-sized sample of genes from the proteobacterial and cyanobacterial pangenomes of their respective day and that, even if molecular phylogeny were artifact-free, sampling prokaryotic pangenomes through endosymbiotic gene transfer would lead to inherited chimerism. Recombination in prokaryotes (transduction, conjugation, transformation) differs from recombination in eukaryotes (sex). Prokaryotic recombination leads to pangenomes, and eukaryotic recombination leads to vertical inheritance. Viewed from the perspective of endosymbiotic theory, the critical transition at the eukaryote origin that allowed escape from Muller's ratchet--the origin of eukaryotic recombination, or sex--might have required surprisingly little evolutionary innovation.}, } @article {pmid25727385, year = {2015}, author = {Lyu, Z and Lu, Y}, title = {Comparative genomics of three Methanocellales strains reveal novel taxonomic and metabolic features.}, journal = {Environmental microbiology reports}, volume = {7}, number = {3}, pages = {526-537}, doi = {10.1111/1758-2229.12283}, pmid = {25727385}, issn = {1758-2229}, mesh = {Euryarchaeota/classification/*genetics/*metabolism ; *Genetic Variation ; *Genomics ; Metabolic Networks and Pathways/*genetics ; Phylogeny ; }, abstract = {Methanocellales represents a new order of methanogens, which is widespread in environments and plays specifically the important role in methane emissions from paddy fields. To gain more insights into Methanocellales, comparative genomic studies were performed among three Methanocellales strains through the same annotation pipeline. Genetic relationships among strains revealed by genome alignment, pan-genome reconstruction and comparison of amino average identity suggest that they should be classified in different genera. In addition, multiple copies of cell cycle regulator proteins were identified for the first time in Archaea. Core metabolisms were reconstructed, predicting certain unique and novel features for Methanocellales, including a set of methanogenesis genes potentially organized toward specialization in utilizing low concentrations of H2, a new route of disulfide reduction catalysed by a disulfide-reducing hydrogenase (Drh) complex phylogenetically related to sulfate-reducing prokaryotes, an oxidative tricarboxylic acid (TCA) cycle, a sophisticated nitrogen uptake and regulation system as well as a versatile sulfur utilization system. These core metabolisms are largely conserved among the three strains, but differences in gene copy number and metabolic diversity are evident. The present study thus adds new dimensions to the unique ecophysiology of Methanocellales and offers a road map for further experimental characterization of this methanogen lineage.}, } @article {pmid25721608, year = {2015}, author = {Xiao, J and Zhang, Z and Wu, J and Yu, J}, title = {A brief review of software tools for pangenomics.}, journal = {Genomics, proteomics & bioinformatics}, volume = {13}, number = {1}, pages = {73-76}, pmid = {25721608}, issn = {2210-3244}, mesh = {Bacteria/*genetics ; *Computational Biology ; Databases, Factual ; *Genome ; Genomics/*methods ; Humans ; Molecular Sequence Annotation/*methods ; Open Reading Frames/genetics ; Phylogeny ; *Software ; }, abstract = {Since the proposal for pangenomic study, there have been a dozen software tools actively in use for pangenomic analysis. By the end of 2014, Panseq and the pan-genomes analysis pipeline (PGAP) ranked as the top two most popular packages according to cumulative citations of peer-reviewed scientific publications. The functions of the software packages and tools, albeit variable among them, include categorizing orthologous genes, calculating pangenomic profiles, integrating gene annotations, and constructing phylogenies. As epigenomic elements are being gradually revealed in prokaryotes, it is expected that pangenomic databases and toolkits have to be extended to handle information of detailed functional annotations for genes and non-protein-coding sequences including non-coding RNAs, insertion elements, and conserved structural elements. To develop better bioinformatic tools, user feedback and integration of novel features are both of essence.}, } @article {pmid25705648, year = {2015}, author = {Ali, A and Naz, A and Soares, SC and Bakhtiar, M and Tiwari, S and Hassan, SS and Hanan, F and Ramos, R and Pereira, U and Barh, D and Figueiredo, HC and Ussery, DW and Miyoshi, A and Silva, A and Azevedo, V}, title = {Pan-genome analysis of human gastric pathogen H. pylori: comparative genomics and pathogenomics approaches to identify regions associated with pathogenicity and prediction of potential core therapeutic targets.}, journal = {BioMed research international}, volume = {2015}, number = {}, pages = {139580}, pmid = {25705648}, issn = {2314-6141}, mesh = {DNA, Bacterial/genetics ; Genetic Variation/genetics ; Genome, Bacterial/*genetics ; Genomic Islands/*genetics ; Genomics/methods ; Helicobacter Infections/microbiology/pathology ; Helicobacter pylori/*genetics ; Humans ; Phylogeny ; Stomach/*microbiology/pathology ; Stomach Neoplasms/microbiology/pathology ; Virulence/*genetics ; }, abstract = {Helicobacter pylori is a human gastric pathogen implicated as the major cause of peptic ulcer and second leading cause of gastric cancer (~70%) around the world. Conversely, an increased resistance to antibiotics and hindrances in the development of vaccines against H. pylori are observed. Pan-genome analyses of the global representative H. pylori isolates consisting of 39 complete genomes are presented in this paper. Phylogenetic analyses have revealed close relationships among geographically diverse strains of H. pylori. The conservation among these genomes was further analyzed by pan-genome approach; the predicted conserved gene families (1,193) constitute ~77% of the average H. pylori genome and 45% of the global gene repertoire of the species. Reverse vaccinology strategies have been adopted to identify and narrow down the potential core-immunogenic candidates. Total of 28 nonhost homolog proteins were characterized as universal therapeutic targets against H. pylori based on their functional annotation and protein-protein interaction. Finally, pathogenomics and genome plasticity analysis revealed 3 highly conserved and 2 highly variable putative pathogenicity islands in all of the H. pylori genomes been analyzed.}, } @article {pmid25682011, year = {2015}, author = {Thynne, E and McDonald, MC and Solomon, PS}, title = {Phytopathogen emergence in the genomics era.}, journal = {Trends in plant science}, volume = {20}, number = {4}, pages = {246-255}, doi = {10.1016/j.tplants.2015.01.009}, pmid = {25682011}, issn = {1878-4372}, mesh = {*Bacterial Physiological Phenomena/genetics ; Fungi/genetics/*physiology ; Gene Transfer, Horizontal ; Genome, Microbial ; Host Specificity ; *Host-Pathogen Interactions ; Plant Diseases/genetics/*microbiology/virology ; *Virus Physiological Phenomena/genetics ; }, abstract = {Phytopathogens are a global threat to plant agriculture and biodiversity. The genomics era has lead to an exponential rise in comparative gene and genome studies of both economically significant and insignificant microorganisms. In this review we highlight some recent comparisons and discuss how they identify shared genes or genomic regions associated with host virulence. The two major mechanisms of rapid genome adaptation - horizontal gene transfer and hybridisation - are reviewed and we consider how intra-specific pan-genome sequences encode alternative host specificity. We also discuss the power that access to expansive gene databases provides in aiding the study of phytopathogen emergence. These databases can rapidly enable the identification of an unknown pathogen and its origin, as well as genomic adaptations required for emergence.}, } @article {pmid25649684, year = {2015}, author = {Thode, SK and Kahlke, T and Robertsen, EM and Hansen, H and Haugen, P}, title = {The immediate global responses of Aliivibrio salmonicida to iron limitations.}, journal = {BMC microbiology}, volume = {15}, number = {1}, pages = {9}, pmid = {25649684}, issn = {1471-2180}, mesh = {Aliivibrio salmonicida/*genetics/growth & development/metabolism/*physiology ; Gene Expression Profiling ; *Gene Expression Regulation, Bacterial ; Iron/*metabolism ; Microarray Analysis ; Molecular Sequence Data ; Sequence Analysis, DNA ; Siderophores/*biosynthesis/genetics ; *Stress, Physiological ; Vibrio cholerae/genetics/metabolism ; Vibrio vulnificus/genetics/metabolism ; }, abstract = {BACKGROUND: Iron is an essential micronutrient for all living organisms, and virulence and sequestration of iron in pathogenic bacteria are believed to be correlated. As a defence mechanism, potential hosts therefore keep the level of free iron inside the body to a minimum. In general, iron metabolism is well studied for some bacteria (mostly human or animal pathogens). However, this area is still under-investigated for a number of important bacterial pathogens. Aliivibrio salmonicida is a fish pathogen, and previous studies of this bacterium have shown that production of siderophores is temperature regulated and dependent on low iron conditions. In this work we studied the immediate changes in transcription in response to a sudden decrease in iron levels in cultures of A. salmonicida. In addition, we compared our results to studies performed with Vibrio cholerae and Vibrio vulnificus using a pan-genomic approach.

RESULTS: Microarray technology was used to monitor global changes in transcriptional levels. Cultures of A. salmonicida were grown to mid log phase before the iron chelator 2,2'-dipyridyl was added and samples were collected after 15 minutes of growth. Using our statistical cut-off values, we retrieved thirty-two differentially expressed genes where the most up-regulated genes belong to an operon encoding proteins responsible for producing the siderophore bisucaberin. A subsequent pan-transcriptome analysis revealed that nine of the up-regulated genes from our dataset were also up-regulated in datasets from similar experiments using V. cholerae and V. vulnificus, thus indicating that these genes are involved in a shared strategy to mitigate low iron conditions.

CONCLUSIONS: The present work highlights the effect of iron limitation on the gene regulatory network of the fish pathogen A. salmonicida, and provides insights into common and unique strategies of Vibrionaceae species to mitigate low iron conditions.}, } @article {pmid25642229, year = {2014}, author = {Koton, Y and Gordon, M and Chalifa-Caspi, V and Bisharat, N}, title = {Comparative genomic analysis of clinical and environmental Vibrio vulnificus isolates revealed biotype 3 evolutionary relationships.}, journal = {Frontiers in microbiology}, volume = {5}, number = {}, pages = {803}, pmid = {25642229}, issn = {1664-302X}, abstract = {In 1996 a common-source outbreak of severe soft tissue and bloodstream infections erupted among Israeli fish farmers and fish consumers due to changes in fish marketing policies. The causative pathogen was a new strain of Vibrio vulnificus, named biotype 3, which displayed a unique biochemical and genotypic profile. Initial observations suggested that the pathogen erupted as a result of genetic recombination between two distinct populations. We applied a whole genome shotgun sequencing approach using several V. vulnificus strains from Israel in order to study the pan genome of V. vulnificus and determine the phylogenetic relationship of biotype 3 with existing populations. The core genome of V. vulnificus based on 16 draft and complete genomes consisted of 3068 genes, representing between 59 and 78% of the whole genome of 16 strains. The accessory genome varied in size from 781 to 2044 kbp. Phylogenetic analysis based on whole, core, and accessory genomes displayed similar clustering patterns with two main clusters, clinical (C) and environmental (E), all biotype 3 strains formed a distinct group within the E cluster. Annotation of accessory genomic regions found in biotype 3 strains and absent from the core genome yielded 1732 genes, of which the vast majority encoded hypothetical proteins, phage-related proteins, and mobile element proteins. A total of 1916 proteins (including 713 hypothetical proteins) were present in all human pathogenic strains (both biotype 3 and non-biotype 3) and absent from the environmental strains. Clustering analysis of the non-hypothetical proteins revealed 148 protein clusters shared by all human pathogenic strains; these included transcriptional regulators, arylsulfatases, methyl-accepting chemotaxis proteins, acetyltransferases, GGDEF family proteins, transposases, type IV secretory system (T4SS) proteins, and integrases. Our study showed that V. vulnificus biotype 3 evolved from environmental populations and formed a genetically distinct group within the E-cluster. The unique epidemiological circumstances facilitated disease outbreak and brought this genotype to the attention of the scientific community.}, } @article {pmid25641101, year = {2015}, author = {Kadam, A and Janto, B and Eutsey, R and Earl, JP and Powell, E and Dahlgren, ME and Hu, FZ and Ehrlich, GD and Hiller, NL}, title = {Streptococcus pneumoniae Supragenome Hybridization Arrays for Profiling of Genetic Content and Gene Expression.}, journal = {Current protocols in microbiology}, volume = {36}, number = {}, pages = {9D.4.1-9D.4.20}, pmid = {25641101}, issn = {1934-8533}, support = {R00 DC011322/DC/NIDCD NIH HHS/United States ; AI080935/AI/NIAID NIH HHS/United States ; K99 DC011322/DC/NIDCD NIH HHS/United States ; R01 DC002148/DC/NIDCD NIH HHS/United States ; DC011322/DC/NIDCD NIH HHS/United States ; DC02148/DC/NIDCD NIH HHS/United States ; R01 AI080935/AI/NIAID NIH HHS/United States ; }, mesh = {Gene Expression Profiling/methods ; *Genetic Variation ; Genetics, Microbial/*methods ; Genomics/methods ; Molecular Biology/*methods ; Nucleic Acid Hybridization/*methods ; Streptococcus pneumoniae/*classification/*genetics ; }, abstract = {There is extensive genomic diversity among Streptococcus pneumoniae isolates. Approximately half of the comprehensive set of genes in the species (the supragenome or pangenome) is present in all the isolates (core set), and the remaining is unevenly distributed among strains (distributed set). The Streptococcus pneumoniae Supragenome Hybridization (SpSGH) array provides coverage for an extensive set of genes and polymorphisms encountered within this species, capturing this genomic diversity. Further, the capture is quantitative. In this manner, the SpSGH array allows for both genomic and transcriptomic analyses of diverse S. pneumoniae isolates on a single platform. In this unit, we present the SpSGH array, and describe in detail its design and implementation for both genomic and transcriptomic analyses. The methodology can be applied to construction and modification of SpSGH array platforms, as well to other bacterial species as long as multiple whole-genome sequences are available that collectively capture the vast majority of the species supragenome.}, } @article {pmid25620970, year = {2014}, author = {Upadhyaya, NM and Garnica, DP and Karaoglu, H and Sperschneider, J and Nemri, A and Xu, B and Mago, R and Cuomo, CA and Rathjen, JP and Park, RF and Ellis, JG and Dodds, PN}, title = {Comparative genomics of Australian isolates of the wheat stem rust pathogen Puccinia graminis f. sp. tritici reveals extensive polymorphism in candidate effector genes.}, journal = {Frontiers in plant science}, volume = {5}, number = {}, pages = {759}, pmid = {25620970}, issn = {1664-462X}, abstract = {The wheat stem rust fungus Puccinia graminis f. sp. tritici (Pgt) is one of the most destructive pathogens of wheat. In this study, a draft genome was built for a founder Australian Pgt isolate of pathotype (pt.) 21-0 (collected in 1954) by next generation DNA sequencing. A combination of reference-based assembly using the genome of the previously sequenced American Pgt isolate CDL 75-36-700-3 (p7a) and de novo assembly were performed resulting in a 92 Mbp reference genome for Pgt isolate 21-0. Approximately 13 Mbp of de novo assembled sequence in this genome is not present in the p7a reference assembly. This novel sequence is not specific to 21-0 as it is also present in three other Pgt rust isolates of independent origin. The new reference genome was subsequently used to build a pan-genome based on five Australian Pgt isolates. Transcriptomes from germinated urediniospores and haustoria were separately assembled for pt. 21-0 and comparison of gene expression profiles showed differential expression in ∼10% of the genes each in germinated spores and haustoria. A total of 1,924 secreted proteins were predicted from the 21-0 transcriptome, of which 520 were classified as haustorial secreted proteins (HSPs). Comparison of 21-0 with two presumed clonal field derivatives of this lineage (collected in 1982 and 1984) that had evolved virulence on four additional resistance genes (Sr5, Sr11, Sr27, SrSatu) identified mutations in 25 HSP effector candidates. Some of these mutations could explain their novel virulence phenotypes.}, } @article {pmid25608745, year = {2015}, author = {Zheng, J and Guan, Z and Cao, S and Peng, D and Ruan, L and Jiang, D and Sun, M}, title = {Plasmids are vectors for redundant chromosomal genes in the Bacillus cereus group.}, journal = {BMC genomics}, volume = {16}, number = {1}, pages = {6}, pmid = {25608745}, issn = {1471-2164}, mesh = {Bacillus cereus/*classification/*genetics ; Bacterial Proteins/genetics ; Biological Evolution ; Chromosomes, Bacterial/*genetics/metabolism ; Cluster Analysis ; Databases, Genetic ; Phylogeny ; Plasmids/*genetics/metabolism ; }, abstract = {BACKGROUND: Prokaryotic plasmids have played significant roles in the evolution of bacterial genomes and have a great impact on the metabolic functions of the host cell. Many bacterial strains contain multiple plasmids, but the relationships between bacterial plasmids and chromosomes are unclear. We focused on plasmids from the Bacillus cereus group because most strains contain several plasmids.

RESULTS: We collected the genome sequences of 104 plasmids and 20 chromosomes from B. cereus group strains, and we studied the relationships between plasmids and chromosomes by focusing on the pan-genomes of these plasmids and chromosomes. In terms of basic features (base composition and codon usage), the genes on plasmids were more similar to the chromosomal variable genes (distributed genes and unique genes) than to the chromosomal core genes. Although all the functional categories of the chromosomal genes were exhibited by the plasmid genes, the proportions of each category differed between these two gene sets. The 598 gene families shared between chromosomes and plasmids displayed a uniform distribution between the two groups. A phylogenetic analysis of the shared genes, including the chromosomal core gene set, indicated that gene exchange events between plasmids and chromosomes occurred frequently during the evolutionary histories of the strains and species in this group. Moreover, the shared genes between plasmids and chromosomes usually had different promoter and terminator sequences, suggesting that they are regulated by different elements at the transcriptional level.

CONCLUSIONS: We speculate that for the entire B. cereus group, adaptive genes are preserved on both plasmids and chromosomes; however, in a single cell, homologous genes on plasmids and the chromosome are controlled by different regulators to reduce the burden of maintaining redundant genes.}, } @article {pmid25597990, year = {2015}, author = {Besenbacher, S and Liu, S and Izarzugaza, JM and Grove, J and Belling, K and Bork-Jensen, J and Huang, S and Als, TD and Li, S and Yadav, R and Rubio-García, A and Lescai, F and Demontis, D and Rao, J and Ye, W and Mailund, T and Friborg, RM and Pedersen, CN and Xu, R and Sun, J and Liu, H and Wang, O and Cheng, X and Flores, D and Rydza, E and Rapacki, K and Damm Sørensen, J and Chmura, P and Westergaard, D and Dworzynski, P and Sørensen, TI and Lund, O and Hansen, T and Xu, X and Li, N and Bolund, L and Pedersen, O and Eiberg, H and Krogh, A and Børglum, AD and Brunak, S and Kristiansen, K and Schierup, MH and Wang, J and Gupta, R and Villesen, P and Rasmussen, S}, title = {Novel variation and de novo mutation rates in population-wide de novo assembled Danish trios.}, journal = {Nature communications}, volume = {6}, number = {}, pages = {5969}, pmid = {25597990}, issn = {2041-1723}, mesh = {Algorithms ; Genome, Human/*genetics ; Humans ; Mutation Rate ; Polymorphism, Single Nucleotide/genetics ; Sequence Analysis, DNA/methods ; }, abstract = {Building a population-specific catalogue of single nucleotide variants (SNVs), indels and structural variants (SVs) with frequencies, termed a national pan-genome, is critical for further advancing clinical and public health genetics in large cohorts. Here we report a Danish pan-genome obtained from sequencing 10 trios to high depth (50 ×). We report 536k novel SNVs and 283k novel short indels from mapping approaches and develop a population-wide de novo assembly approach to identify 132k novel indels larger than 10 nucleotides with low false discovery rates. We identify a higher proportion of indels and SVs than previous efforts showing the merits of high coverage and de novo assembly approaches. In addition, we use trio information to identify de novo mutations and use a probabilistic method to provide direct estimates of 1.27e-8 and 1.5e-9 per nucleotide per generation for SNVs and indels, respectively.}, } @article {pmid25566394, year = {2014}, author = {Rouli, L and MBengue, M and Robert, C and Ndiaye, M and La Scola, B and Raoult, D}, title = {Genomic analysis of three African strains of Bacillus anthracis demonstrates that they are part of the clonal expansion of an exclusively pathogenic bacterium.}, journal = {New microbes and new infections}, volume = {2}, number = {6}, pages = {161-169}, pmid = {25566394}, issn = {2052-2975}, abstract = {Bacillus anthracis is the causative agent of anthrax and is classified as a 'Category A' biological weapon. Six complete genomes of B. anthracis (A0248, Ames, Ames Ancestor, CDC684, H0491, and Sterne) are currently available. In this report, we add three African strain genomes: Sen2Col2, Sen3 and Gmb1. To study the pan-genome of B. anthracis, we used bioinformatics tools, such as Cluster of Orthologous Groups, and performed phylogenetic analysis. We found that the three African strains contained the pX01 and pX02 plasmids, the nonsense mutation in the plcR gene and the four known prophages. These strains are most similar to the CDC684 strain and belong to the A cluster. We estimated that the B. anthracis pan-genome has 2893 core genes (99% of the genome size) and 85 accessory genes. We validated the hypothesis that B. anthracis has a closed pan-genome and found that the three African strains carry the two plasmids associated with bacterial virulence. The pan-genome nature of B. anthracis confirms its lack of exchange (similar to Clostridium tetani) and supports its exclusively pathogenic role, despite its survival in the environment. Moreover, thanks to the study of the core content single nucleotide polymorphisms, we can see that our three African strains diverged very recently from the other B. anthracis strains.}, } @article {pmid25565268, year = {2015}, author = {Nguyen, N and Hickey, G and Zerbino, DR and Raney, B and Earl, D and Armstrong, J and Kent, WJ and Haussler, D and Paten, B}, title = {Building a pan-genome reference for a population.}, journal = {Journal of computational biology : a journal of computational molecular cell biology}, volume = {22}, number = {5}, pages = {387-401}, pmid = {25565268}, issn = {1557-8666}, support = {U41 HG002371/HG/NHGRI NIH HHS/United States ; U41HG004568/HG/NHGRI NIH HHS/United States ; U01HG004695/HG/NHGRI NIH HHS/United States ; 095908//Wellcome Trust/United Kingdom ; U54 HG007990/HG/NHGRI NIH HHS/United States ; P41HG002371/HG/NHGRI NIH HHS/United States ; U54HG004555/HG/NHGRI NIH HHS/United States ; }, mesh = {*Algorithms ; Computer Graphics ; Evolution, Molecular ; Genetics, Population/*standards/statistics & numerical data ; *Genome, Human ; Humans ; Reference Standards ; Sequence Alignment ; Sequence Analysis, DNA ; *Software ; }, abstract = {A reference genome is a high quality individual genome that is used as a coordinate system for the genomes of a population, or genomes of closely related subspecies. Given a set of genomes partitioned by homology into alignment blocks we formalize the problem of ordering and orienting the blocks such that the resulting ordering maximally agrees with the underlying genomes' ordering and orientation, creating a pan-genome reference ordering. We show this problem is NP-hard, but also demonstrate, empirically and within simulations, the performance of heuristic algorithms based upon a cactus graph decomposition to find locally maximal solutions. We describe an extension of our Cactus software to create a pan-genome reference for whole genome alignments, and demonstrate how it can be used to create novel genome browser visualizations using human variation data as a test. In addition, we test the use of a pan-genome for describing variations and as a reference for read mapping.}, } @article {pmid25563635, year = {2015}, author = {Bacci, G and Ceccherini, MT and Bani, A and Bazzicalupo, M and Castaldini, M and Galardini, M and Giovannetti, L and Mocali, S and Pastorelli, R and Pantani, OL and Arfaioli, P and Pietramellara, G and Viti, C and Nannipieri, P and Mengoni, A}, title = {Exploring the dynamics of bacterial community composition in soil: the pan-bacteriome approach.}, journal = {Antonie van Leeuwenhoek}, volume = {107}, number = {3}, pages = {785-797}, doi = {10.1007/s10482-014-0372-4}, pmid = {25563635}, issn = {1572-9699}, mesh = {*Biota ; Desiccation ; Longitudinal Studies ; Soil/chemistry ; *Soil Microbiology ; }, abstract = {We performed a longitudinal study (repeated observations of the same sample over time) to investigate both the composition and structure of temporal changes of bacterial community composition in soil mesocosms, subjected to three different treatments (water and 5 or 25 mg kg(-1) of dried soil Cd(2+)). By analogy with the pan genome concept, we identified a core bacteriome and an accessory bacteriome. Resident taxa were assigned to the core bacteriome, while occasional taxa were assigned to the accessory bacteriome. Core and accessory bacteriome represented roughly 35 and 50 % of the taxa detected, respectively, and were characterized by different taxonomic signatures from phylum to genus level while 15 % of the taxa were found to be unique to a particular sample. In particular, the core bacteriome was characterized by higher abundance of members of Planctomycetes, Actinobacteria, Verrucomicrobia and Acidobacteria, while the accessory bacteriome included more members of Firmicutes, Clamydiae and Proteobacteria, suggesting potentially different responses to environmental changes of members from these phyla. We conclude that the pan-bacteriome model may be a useful approach to gain insight for modeling bacterial community structure and inferring different abilities of bacteria taxa.}, } @article {pmid25539682, year = {2014}, author = {Howell, KJ and Weinert, LA and Chaudhuri, RR and Luan, SL and Peters, SE and Corander, J and Harris, D and Angen, Ø and Aragon, V and Bensaid, A and Williamson, SM and Parkhill, J and Langford, PR and Rycroft, AN and Wren, BW and Holden, MT and Tucker, AW and Maskell, DJ and , }, title = {The use of genome wide association methods to investigate pathogenicity, population structure and serovar in Haemophilus parasuis.}, journal = {BMC genomics}, volume = {15}, number = {}, pages = {1179}, pmid = {25539682}, issn = {1471-2164}, support = {BB/G019177/1//Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/G019274/1//Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/G003203/1//Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/G018553/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/G020744/1//Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; Genome, Viral ; *Genome-Wide Association Study ; Haemophilus parasuis/classification/genetics/*pathogenicity ; Recombination, Genetic ; Swine/virology ; Virulence/genetics ; }, abstract = {BACKGROUND: Haemophilus parasuis is the etiologic agent of Glässer's disease in pigs and causes devastating losses to the farming industry. Whilst some hyper-virulent isolates have been described, the relationship between genetics and disease outcome has been only partially established. In particular, there is weak correlation between serovar and disease phenotype. We sequenced the genomes of 212 isolates of H. parasuis and have used this to describe the pan-genome and to correlate this with clinical and carrier status, as well as with serotype.

RESULTS: Recombination and population structure analyses identified five groups with very high rates of recombination, separated into two clades of H. parasuis with no signs of recombination between them. We used genome-wide association methods including discriminant analysis of principal components (DAPC) and generalised linear modelling (glm) to look for genetic determinants of this population partition, serovar and pathogenicity. We were able to identify genes from the accessory genome that were significantly associated with phenotypes such as potential serovar specific genes including capsule genes, and 48 putative virulence factors that were significantly different between the clinical and non-clinical isolates. We also show that the presence of many previously suggested virulence factors is not an appropriate marker of virulence.

CONCLUSIONS: These genes will inform the generation of new molecular diagnostics and vaccines, and refinement of existing typing schemes and show the importance of the accessory genome of a diverse species when investigating the relationship between genotypes and phenotypes.}, } @article {pmid25523018, year = {2015}, author = {Duranti, S and Milani, C and Lugli, GA and Turroni, F and Mancabelli, L and Sanchez, B and Ferrario, C and Viappiani, A and Mangifesta, M and Mancino, W and Gueimonde, M and Margolles, A and van Sinderen, D and Ventura, M}, title = {Insights from genomes of representatives of the human gut commensal Bifidobacterium bifidum.}, journal = {Environmental microbiology}, volume = {17}, number = {7}, pages = {2515-2531}, doi = {10.1111/1462-2920.12743}, pmid = {25523018}, issn = {1462-2920}, mesh = {Animals ; Bifidobacterium/*genetics/growth & development/*metabolism ; Biological Evolution ; *Gastrointestinal Microbiome ; Gastrointestinal Tract/*microbiology ; Genome, Bacterial ; Humans ; Infant ; Infant, Newborn ; Molecular Sequence Data ; Mucins/metabolism ; Polysaccharides/*metabolism ; }, abstract = {Bifidobacteria are bacterial gut commensals of mammals, birds and social insects that are perceived to influence the metabolism/physiology of their host. In this context, members of the Bifidobacterium bifidum species are believed to significantly contribute to the overall microbiota of the human gut at infant stage. However, the molecular reasons for their adaptation to this environment are poorly understood. In this study, we analysed the pan-genome of B. bifidum species by decoding genomes of 15 B. bifidum strains, which highlighted the existence of a conserved gene uniquely present in this bifidobacterial taxon, underscoring a nutrient acquisition strategy that targets host-derived glycans, such as those present in mucin. Growth experiments and corresponding transcriptomic analyses confirmed the in silico data and supported these intriguing and unique host glycan-specific saccharolytic features. The ubiquity of the genetic features of B. bifidum for the breakdown of host glycans was confirmed by interrogating metagenomic datasets, thereby supporting the notion that metabolic access to host-derived glycans is a potent evolutionary force that has shaped B. bifidum genomes and consequently the ecology of the infant intestinal microbiota.}, } @article {pmid25522143, year = {2014}, author = {Lanza, VF and de Toro, M and Garcillán-Barcia, MP and Mora, A and Blanco, J and Coque, TM and de la Cruz, F}, title = {Plasmid flux in Escherichia coli ST131 sublineages, analyzed by plasmid constellation network (PLACNET), a new method for plasmid reconstruction from whole genome sequences.}, journal = {PLoS genetics}, volume = {10}, number = {12}, pages = {e1004766}, pmid = {25522143}, issn = {1553-7404}, support = {L60 MD002414/MD/NIMHD NIH HHS/United States ; }, mesh = {DNA, Bacterial/genetics ; Escherichia coli/classification/*genetics ; Evolution, Molecular ; *Gene Expression Regulation, Bacterial ; *Genome, Bacterial ; Multigene Family ; Phylogeny ; Plasmids/*genetics ; Sequence Analysis, DNA ; }, abstract = {Bacterial whole genome sequence (WGS) methods are rapidly overtaking classical sequence analysis. Many bacterial sequencing projects focus on mobilome changes, since macroevolutionary events, such as the acquisition or loss of mobile genetic elements, mainly plasmids, play essential roles in adaptive evolution. Existing WGS analysis protocols do not assort contigs between plasmids and the main chromosome, thus hampering full analysis of plasmid sequences. We developed a method (called plasmid constellation networks or PLACNET) that identifies, visualizes and analyzes plasmids in WGS projects by creating a network of contig interactions, thus allowing comprehensive plasmid analysis within WGS datasets. The workflow of the method is based on three types of data: assembly information (including scaffold links and coverage), comparison to reference sequences and plasmid-diagnostic sequence features. The resulting network is pruned by expert analysis, to eliminate confounding data, and implemented in a Cytoscape-based graphic representation. To demonstrate PLACNET sensitivity and efficacy, the plasmidome of the Escherichia coli lineage ST131 was analyzed. ST131 is a globally spread clonal group of extraintestinal pathogenic E. coli (ExPEC), comprising different sublineages with ability to acquire and spread antibiotic resistance and virulence genes via plasmids. Results show that plasmids flux in the evolution of this lineage, which is wide open for plasmid exchange. MOBF12/IncF plasmids were pervasive, adding just by themselves more than 350 protein families to the ST131 pangenome. Nearly 50% of the most frequent γ-proteobacterial plasmid groups were found to be present in our limited sample of ten analyzed ST131 genomes, which represent the main ST131 sublineages.}, } @article {pmid25513663, year = {2015}, author = {Mengoni, A and Fondi, M and Galardini, M}, title = {Preface. Bacterial pangenomics.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {1231}, number = {}, pages = {v-vi}, doi = {10.1007/978-1-4939-1720-4}, pmid = {25513663}, issn = {1940-6029}, mesh = {Bacteria/classification/*genetics ; Evolution, Molecular ; *Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Metagenomics/instrumentation/*methods ; Molecular Sequence Annotation/*methods/statistics & numerical data ; Phylogeny ; *Software ; }, } @article {pmid25510495, year = {2015}, author = {Tatusova, T and Ciufo, S and Federhen, S and Fedorov, B and McVeigh, R and O'Neill, K and Tolstoy, I and Zaslavsky, L}, title = {Update on RefSeq microbial genomes resources.}, journal = {Nucleic acids research}, volume = {43}, number = {Database issue}, pages = {D599-605}, pmid = {25510495}, issn = {1362-4962}, support = {//Intramural NIH HHS/United States ; }, mesh = {*Databases, Nucleic Acid ; *Genome, Archaeal ; *Genome, Bacterial ; Internet ; Molecular Sequence Annotation ; }, abstract = {NCBI RefSeq genome collection http://www.ncbi.nlm.nih.gov/genome represents all three major domains of life: Eukarya, Bacteria and Archaea as well as Viruses. Prokaryotic genome sequences are the most rapidly growing part of the collection. During the year of 2014 more than 10,000 microbial genome assemblies have been publicly released bringing the total number of prokaryotic genomes close to 30,000. We continue to improve the quality and usability of the microbial genome resources by providing easy access to the data and the results of the pre-computed analysis, and improving analysis and visualization tools. A number of improvements have been incorporated into the Prokaryotic Genome Annotation Pipeline. Several new features have been added to RefSeq prokaryotic genomes data processing pipeline including the calculation of genome groups (clades) and the optimization of protein clusters generation using pan-genome approach.}, } @article {pmid25504040, year = {2014}, author = {Baddam, R and Kumar, N and Shaik, S and Lankapalli, AK and Ahmed, N}, title = {Genome dynamics and evolution of Salmonella Typhi strains from the typhoid-endemic zones.}, journal = {Scientific reports}, volume = {4}, number = {}, pages = {7457}, pmid = {25504040}, issn = {2045-2322}, mesh = {Asia, Southeastern/epidemiology ; Cluster Analysis ; Endemic Diseases ; Genome, Bacterial ; Humans ; Interspersed Repetitive Sequences ; Phylogeny ; Polymorphism, Single Nucleotide ; Salmonella typhi/*genetics ; Typhoid Fever/epidemiology/*microbiology ; }, abstract = {Typhoid fever poses significant burden on healthcare systems in Southeast Asia and other endemic countries. Several epidemiological and genomic studies have attributed pseudogenisation to be the major driving force for the evolution of Salmonella Typhi although its real potential remains elusive. In the present study, we analyzed genomes of S. Typhi from different parts of Southeast Asia and Oceania, comprising of isolates from outbreak, sporadic and carrier cases. The genomes showed high genetic relatedness with limited opportunity for gene acquisition as evident from pan-genome structure. Given that pseudogenisation is an active process in S. Typhi, we further investigated core and pan-genome profiles of functional and pseudogenes separately. We observed a decline in core functional gene content and a significant increase in accessory pseudogene content. Upon functional classification, genes encoding metabolic functions formed a major constituent of pseudogenes as well as core functional gene clusters with SNPs. Further, an in-depth analysis of accessory pseudogene content revealed the existence of heterogeneous complements of functional and pseudogenes among the strains. In addition, these polymorphic genes were also enriched in metabolism related functions. Thus, the study highlights the existence of heterogeneous strains in a population with varying metabolic potential and that S. Typhi possibly resorts to metabolic fine tuning for its adaptation.}, } @article {pmid25502908, year = {2014}, author = {Li, J and Wong, CF and Wong, MT and Huang, H and Leung, FC}, title = {Modularized evolution in archaeal methanogens phylogenetic forest.}, journal = {Genome biology and evolution}, volume = {6}, number = {12}, pages = {3344-3359}, pmid = {25502908}, issn = {1759-6653}, mesh = {Euryarchaeota/classification/*genetics/metabolism ; *Evolution, Molecular ; Gene Transfer, Horizontal ; *Genes, Archaeal ; Methane/metabolism ; *Phylogeny ; }, abstract = {Methanogens are methane-producing archaea that plays a key role in the global carbon cycle. To date, the evolutionary history of methanogens and closely related nonmethanogen species remains unresolved among studies conducted upon different genetic markers, attributing to horizontal gene transfers (HGTs). With an effort to decipher both congruent and conflicting evolutionary events, reconstruction of coevolved gene clusters and hierarchical structure in the archaeal methanogen phylogenetic forest, comprehensive evolution, and network analyses were performed upon 3,694 gene families from 41 methanogens and 33 closely related archaea. Our results show that 1) greater than 50% of genes are in topological dissonance with others; 2) the prevalent interorder HGTs, even for core genes, in methanogen genomes led to their scrambled phylogenetic relationships; 3) most methanogenesis-related genes have experienced at least one HGT; 4) greater than 20% of the genes in methanogen genomes were transferred horizontally from other archaea, with genes involved in cell-wall synthesis and defense system having been transferred most frequently; 5) the coevolution network contains seven statistically robust modules, wherein the central module has the highest average node strength and comprises a majority of the core genes; 6) different coevolutionary module genes boomed in different time and evolutionary lineage, constructing diversified pan-genome structures; 7) the modularized evolution is also closely related to the vertical evolution signals and the HGT rate of the genes. Overall, this study presented a modularized phylogenetic forest that describes a combination of complicated vertical and nonvertical evolutionary processes for methanogenic archaeal species.}, } @article {pmid25499580, year = {2015}, author = {Baumdicker, F}, title = {The site frequency spectrum of dispensable genes.}, journal = {Theoretical population biology}, volume = {100C}, number = {}, pages = {13-25}, doi = {10.1016/j.tpb.2014.12.001}, pmid = {25499580}, issn = {1096-0325}, abstract = {The differences between DNA-sequences within a population are the basis to infer the ancestral relationship of the individuals. Within the classical infinitely many sites model, it is possible to estimate the mutation rate based on the site frequency spectrum, which is comprised by the numbers C1,…,Cn-1 where n is the sample size and Cs is the number of site mutations (Single Nucleotide Polymorphisms, SNPs) which are seen in s genomes. Classical results can be used to compare the observed site frequency spectrum with its neutral expectation, E[Cs]=θ2/s, where θ2 is the scaled site mutation rate. In this paper, we will relax the assumption of the infinitely many sites model that all individuals only carry homologous genetic material. Especially, it is today well-known that bacterial genomes have the ability to gain and lose genes, such that every single genome is a mosaic of genes, and genes are present and absent in a random fashion, giving rise to the dispensable genome. While this presence and absence has been modeled under neutral evolution within the infinitely many genes model in Baumdicker et al. (2010), we link presence and absence of genes with the numbers of site mutations seen within each gene. In this work we derive a formula for the expectation of the joint gene and site frequency spectrum, denoted by Gk,s, the number of mutated sites occurring in exactly s gene sequences, while the corresponding gene is present in exactly k individuals. We show that standard estimators of θ2 for dispensable genes are biased and that the site frequency spectrum for dispensable genes differs from the classical result.}, } @article {pmid25483351, year = {2015}, author = {Vernikos, G and Medini, D and Riley, DR and Tettelin, H}, title = {Ten years of pan-genome analyses.}, journal = {Current opinion in microbiology}, volume = {23}, number = {}, pages = {148-154}, doi = {10.1016/j.mib.2014.11.016}, pmid = {25483351}, issn = {1879-0364}, mesh = {Animals ; Computational Biology/*methods/trends ; *Genome, Microbial ; Genomics/*methods/trends ; Humans ; Sequence Alignment/methods ; }, abstract = {Next generation sequencing technologies have engendered a genome sequence data deluge in public databases. Genome analyses have transitioned from single or few genomes to hundreds to thousands of genomes. Pan-genome analyses provide a framework for estimating the genomic diversity of the dataset at hand and predicting the number of additional whole genomes sequences that would be necessary to fully characterize that diversity. We review recent implementations of the pan-genome approach, its impact and limits, and we propose possible extensions, including analyses at the whole genome multiple sequence alignment level.}, } @article {pmid25480015, year = {2014}, author = {Ojala, T and Kankainen, M and Castro, J and Cerca, N and Edelman, S and Westerlund-Wikström, B and Paulin, L and Holm, L and Auvinen, P}, title = {Comparative genomics of Lactobacillus crispatus suggests novel mechanisms for the competitive exclusion of Gardnerella vaginalis.}, journal = {BMC genomics}, volume = {15}, number = {}, pages = {1070}, pmid = {25480015}, issn = {1471-2164}, mesh = {Antibiosis/*genetics ; Bacterial Adhesion/genetics ; Bacteriophages ; Cell Wall/metabolism ; Clustered Regularly Interspaced Short Palindromic Repeats ; Computational Biology ; Female ; Gardnerella vaginalis/*genetics ; Gene Order ; Gene Transfer, Horizontal ; Genetic Variation ; *Genome, Bacterial ; *Genomics ; HeLa Cells ; Humans ; Lactobacillus/classification/*genetics/metabolism/virology ; Metabolic Networks and Pathways ; Molecular Sequence Annotation ; Multigene Family ; Phylogeny ; Polysaccharides, Bacterial/metabolism ; }, abstract = {BACKGROUND: Lactobacillus crispatus is a ubiquitous micro-organism encountered in a wide range of host-associated habitats. It can be recovered from the gastrointestinal tract of animals and it is a common constituent of the vaginal microbiota of humans. Moreover, L. crispatus can contribute to the urogenital health of the host through competitive exclusion and the production of antimicrobial agents. In order to investigate the genetic diversity of this important urogenital species, we performed a comparative genomic analysis of L. crispatus.

RESULTS: Utilizing the completed genome sequence of a strain ST1 and the draft genome sequences of nine other L. crispatus isolates, we defined the scale and scope of the pan- and core genomic potential of L. crispatus. Our comparative analysis identified 1,224 and 2,705 ortholog groups present in all or only some of the ten strains, respectively. Based on mathematical modeling, sequencing of additional L. crispatus isolates would result in the identification of new genes and functions, whereas the conserved core of the ten strains was a good representation of the final L. crispatus core genome, estimated to level at about 1,116 ortholog groups. Importantly, the current core was observed to encode bacterial components potentially promoting urogenital health. Using antibody fragments specific for one of the conserved L. crispatus adhesins, we demonstrated that the L. crispatus core proteins have a potential to reduce the ability of Gardnerella vaginalis to adhere to epithelial cells. These findings thereby suggest that L. crispatus core proteins could protect the vagina from G. vaginalis and bacterial vaginosis.

CONCLUSIONS: Our pan-genome analysis provides insights into the intraspecific genome variability and the collective molecular mechanisms of the species L. crispatus. Using this approach, we described the differences and similarities between the genomes and identified features likely to be important for urogenital health. Notably, the conserved genetic backbone of L. crispatus accounted for close to 60% of the ortholog groups of an average L. crispatus strain and included factors for the competitive exclusion of G. vaginalis, providing an explanation on how this urogenital species could improve vaginal health.}, } @article {pmid25468217, year = {2014}, author = {Schatz, MC and Maron, LG and Stein, JC and Hernandez Wences, A and Gurtowski, J and Biggers, E and Lee, H and Kramer, M and Antoniou, E and Ghiban, E and Wright, MH and Chia, JM and Ware, D and McCouch, SR and McCombie, WR}, title = {Whole genome de novo assemblies of three divergent strains of rice, Oryza sativa, document novel gene space of aus and indica.}, journal = {Genome biology}, volume = {15}, number = {11}, pages = {506}, pmid = {25468217}, issn = {1474-760X}, support = {R01 HG006677/HG/NHGRI NIH HHS/United States ; R01-HG006677/HG/NHGRI NIH HHS/United States ; }, mesh = {Breeding ; Chromosome Mapping ; *Genetic Variation ; *Genome, Plant ; High-Throughput Nucleotide Sequencing ; Oryza/*genetics ; Phenotype ; Quantitative Trait Loci/*genetics ; Sequence Alignment ; }, abstract = {BACKGROUND: The use of high throughput genome-sequencing technologies has uncovered a large extent of structural variation in eukaryotic genomes that makes important contributions to genomic diversity and phenotypic variation. When the genomes of different strains of a given organism are compared, whole genome resequencing data are typically aligned to an established reference sequence. However, when the reference differs in significant structural ways from the individuals under study, the analysis is often incomplete or inaccurate.

RESULTS: Here, we use rice as a model to demonstrate how improvements in sequencing and assembly technology allow rapid and inexpensive de novo assembly of next generation sequence data into high-quality assemblies that can be directly compared using whole genome alignment to provide an unbiased assessment. Using this approach, we are able to accurately assess the "pan-genome" of three divergent rice varieties and document several megabases of each genome absent in the other two.

CONCLUSIONS: Many of the genome-specific loci are annotated to contain genes, reflecting the potential for new biological properties that would be missed by standard reference-mapping approaches. We further provide a detailed analysis of several loci associated with agriculturally important traits, including the S5 hybrid sterility locus, the Sub1 submergence tolerance locus, the LRK gene cluster associated with improved yield, and the Pup1 cluster associated with phosphorus deficiency, illustrating the utility of our approach for biological discovery. All of the data and software are openly available to support further breeding and functional studies of rice and other species.}, } @article {pmid25445602, year = {2014}, author = {Di, H and Ye, L and Yan, H and Meng, H and Yamasak, S and Shi, L}, title = {Comparative analysis of CRISPR loci in different Listeria monocytogenes lineages.}, journal = {Biochemical and biophysical research communications}, volume = {454}, number = {3}, pages = {399-403}, doi = {10.1016/j.bbrc.2014.10.018}, pmid = {25445602}, issn = {1090-2104}, mesh = {Base Sequence ; CRISPR-Cas Systems ; *Clustered Regularly Interspaced Short Palindromic Repeats ; Genetic Variation ; Genome, Bacterial ; Genotyping Techniques ; Humans ; Listeria monocytogenes/*genetics ; Listeriosis/*microbiology ; Phylogeny ; }, abstract = {Listeria monocytogenes, an important food-borne pathogen, causes high mortality rate of listeriosis. Pan-genomic comparisons revealed the species genome of L. monocytogenes is highly stable but not completely clonal. The population structure of this species displays at least four evolutionary lineages (I-IV). Isolates of different lineages displayed distinct genetic, phenotypic and ecologic characteristics, which appear to affect their ability to be transmitted through foods and to cause human disease, as well as their ability to thrive in markedly phage-rich environments. CRISPR (clustered regularly interspaced short palindrome repeats), a recently described adaptive immunity system, not only confers defense against invading elements derived from bacteriophages or plasmids in many bacteria and archaeal, but also displays strains-level variations in almost any given endowed species. This work was aimed to investigate CRISPR diversity in L. monocytogenes strains of different lineages and estimated the potential practicability of the CRISPR-based approach to resolve this species' biodiversity. Only a third of strains contained all three CRISPR loci (here defined as LMa, LMb and LMc) at same time. Combined the strain-level variations in presence/absence of each CRISPR locus and its relative size and spacer arrangements, a total of 29 CRISPR genotypes and 11 groups were defined within a collection of 128 strains covering all serotypes. The CRISPR-based approach showed powerful ability to subtype the more commonly food-borne isolates of serotype 1/2a (lineage II) and serotypes 1/2b (lineage I), but limited by the absence of typical CRISPR structure in many lineage I isolates. Strikingly, we found a long associated cas1 gene as well as two self-targeting LMb spacers accidently homologous with endogenous genes in a fraction of serotype 1/2a isolations, demonstrated that CRISPR I B system might involve in bacterial physiology besides antiviral immunity.}, } @article {pmid25437801, year = {2014}, author = {Lehmann, JS and Matthias, MA and Vinetz, JM and Fouts, DE}, title = {Leptospiral pathogenomics.}, journal = {Pathogens (Basel, Switzerland)}, volume = {3}, number = {2}, pages = {280-308}, pmid = {25437801}, issn = {2076-0817}, support = {R25 GM083275/GM/NIGMS NIH HHS/United States ; HHSN272200900007C/AI/NIAID NIH HHS/United States ; R01 TW005860/TW/FIC NIH HHS/United States ; R01 AI108276/AI/NIAID NIH HHS/United States ; T32 GM008666/GM/NIGMS NIH HHS/United States ; D43 TW007120/TW/FIC NIH HHS/United States ; R21 AI064466/AI/NIAID NIH HHS/United States ; }, abstract = {Leptospirosis, caused by pathogenic spirochetes belonging to the genus Leptospira, is a zoonosis with important impacts on human and animal health worldwide. Research on the mechanisms of Leptospira pathogenesis has been hindered due to slow growth of infectious strains, poor transformability, and a paucity of genetic tools. As a result of second generation sequencing technologies, there has been an acceleration of leptospiral genome sequencing efforts in the past decade, which has enabled a concomitant increase in functional genomics analyses of Leptospira pathogenesis. A pathogenomics approach, by coupling of pan-genomic analysis of multiple isolates with sequencing of experimentally attenuated highly pathogenic Leptospira, has resulted in the functional inference of virulence factors. The global Leptospira Genome Project supported by the U.S. National Institute of Allergy and Infectious Diseases to which key scientific contributions have been made from the international leptospirosis research community has provided a new roadmap for comprehensive studies of Leptospira and leptospirosis well into the future. This review describes functional genomics approaches to apply the data generated by the Leptospira Genome Project towards deepening our knowledge of virulence factors of Leptospira using the emerging discipline of pathogenomics.}, } @article {pmid25437609, year = {2014}, author = {D'Auria, G and Schneider, MV and Moya, A}, title = {Live genomics for pathogen monitoring in public health.}, journal = {Pathogens (Basel, Switzerland)}, volume = {3}, number = {1}, pages = {93-108}, pmid = {25437609}, issn = {2076-0817}, abstract = {Whole genome analysis based on next generation sequencing (NGS) now represents an affordable framework in public health systems. Robust analytical pipelines of genomic data provides in short laps of time (hours) information about taxonomy, comparative genomics (pan-genome) and single polymorphisms profiles. Pathogenic organisms of interest can be tracked at the genomic level, allowing monitoring at one-time several variables including: epidemiology, pathogenicity, resistance to antibiotics, virulence, persistence factors, mobile elements and adaptation features. Such information can be obtained not only at large spectra, but also at the "local" level, such as in the event of a recurrent or emergency outbreak. This paper reviews the state of the art in infection diagnostics in the context of modern NGS methodologies. We describe how actuation protocols in a public health environment will benefit from a "streaming approach" (pipeline). Such pipeline would NGS data quality assessment, data mining for comparative analysis, searching differential genetic features, such as virulence, resistance persistence factors and mutation profiles (SNPs and InDels) and formatted "comprehensible" results. Such analytical protocols will enable a quick response to the needs of locally circumscribed outbreaks, providing information on the causes of resistance and genetic tracking elements for rapid detection, and monitoring actuations for present and future occurrences.}, } @article {pmid25425232, year = {2014}, author = {Kang, Y and Gu, C and Yuan, L and Wang, Y and Zhu, Y and Li, X and Luo, Q and Xiao, J and Jiang, D and Qian, M and Ahmed Khan, A and Chen, F and Zhang, Z and Yu, J}, title = {Flexibility and symmetry of prokaryotic genome rearrangement reveal lineage-associated core-gene-defined genome organizational frameworks.}, journal = {mBio}, volume = {5}, number = {6}, pages = {e01867}, pmid = {25425232}, issn = {2150-7511}, mesh = {Archaea/*genetics ; Bacteria/*genetics ; Computational Biology ; Gene Rearrangement ; *Genes, Essential ; Genome, Archaeal ; Genome, Bacterial ; Genomic Instability ; *Genomic Structural Variation ; Synteny ; }, abstract = {UNLABELLED: The prokaryotic pangenome partitions genes into core and dispensable genes. The order of core genes, albeit assumed to be stable under selection in general, is frequently interrupted by horizontal gene transfer and rearrangement, but how a core-gene-defined genome maintains its stability or flexibility remains to be investigated. Based on data from 30 species, including 425 genomes from six phyla, we grouped core genes into syntenic blocks in the context of a pangenome according to their stability across multiple isolates. A subset of the core genes, often species specific and lineage associated, formed a core-gene-defined genome organizational framework (cGOF). Such cGOFs are either single segmental (one-third of the species analyzed) or multisegmental (the rest). Multisegment cGOFs were further classified into symmetric or asymmetric according to segment orientations toward the origin-terminus axis. The cGOFs in Gram-positive species are exclusively symmetric and often reversible in orientation, as opposed to those of the Gram-negative bacteria, which are all asymmetric and irreversible. Meanwhile, all species showing strong strand-biased gene distribution contain symmetric cGOFs and often specific DnaE (α subunit of DNA polymerase III) isoforms. Furthermore, functional evaluations revealed that cGOF genes are hub associated with regard to cellular activities, and the stability of cGOF provides efficient indexes for scaffold orientation as demonstrated by assembling virtual and empirical genome drafts. cGOFs show species specificity, and the symmetry of multisegmental cGOFs is conserved among taxa and constrained by DNA polymerase-centric strand-biased gene distribution. The definition of species-specific cGOFs provides powerful guidance for genome assembly and other structure-based analysis.

IMPORTANCE: Prokaryotic genomes are frequently interrupted by horizontal gene transfer (HGT) and rearrangement. To know whether there is a set of genes not only conserved in position among isolates but also functionally essential for a given species and to further evaluate the stability or flexibility of such genome structures across lineages are of importance. Based on a large number of multi-isolate pangenomic data, our analysis reveals that a subset of core genes is organized into a core-gene-defined genome organizational framework, or cGOF. Furthermore, the lineage-associated cGOFs among Gram-positive and Gram-negative bacteria behave differently: the former, composed of 2 to 4 segments, have their fragments symmetrically rearranged around the origin-terminus axis, whereas the latter show more complex segmentation and are partitioned asymmetrically into chromosomal structures. The definition of cGOFs provides new insights into prokaryotic genome organization and efficient guidance for genome assembly and analysis.}, } @article {pmid25401060, year = {2014}, author = {Sharma, PK and Fu, J and Zhang, X and Fristensky, B and Sparling, R and Levin, DB}, title = {Genome features of Pseudomonas putida LS46, a novel polyhydroxyalkanoate producer and its comparison with other P. putida strains.}, journal = {AMB Express}, volume = {4}, number = {}, pages = {37}, pmid = {25401060}, issn = {2191-0855}, abstract = {A novel strain of Pseudomonas putida LS46 was isolated from wastewater on the basis of its ability to synthesize medium chain-length polyhydroxyalkanoates (mcl-PHAs). P.putida LS46 was differentiated from other P.putida strains on the basis of cpn60 (UT). The complete genome of P.putida LS46 was sequenced and annotated. Its chromosome is 5,86,2556 bp in size with GC ratio of 61.69. It is encoding 5316 genes, including 7 rRNA genes and 76 tRNA genes. Nucleotide sequence data of the complete P. putida LS46 genome was compared with nine other P. putida strains (KT2440, F1, BIRD-1, S16, ND6, DOT-T1E, UW4, W619 and GB-1) identified either as biocontrol agents or as bioremediation agents and isolated from different geographical region and different environment. BLASTn analysis of whole genome sequences of the ten P. putida strains revealed nucleotide sequence identities of 86.54 to 97.52%. P.putida genome arrangement was LS46 highly similar to P.putida BIRD1 and P.putida ND6 but was markedly different than P.putida DOT-T1E, P.putida UW4 and P.putida W619. Fatty acid biosynthesis (fab), fatty acid degradation (fad) and PHA synthesis genes were highly conserved among biocontrol and bioremediation P.putida strains. Six genes in pha operon of P. putida LS46 showed >98% homology at gene and proteins level. It appears that polyhydroxyalkanoate (PHA) synthesis is an intrinsic property of P. putida and was not affected by its geographic origin. However, all strains, including P. putida LS46, were different from one another on the basis of house keeping genes, and presence of plasmid, prophages, insertion sequence elements and genomic islands. While P. putida LS46 was not selected for plant growth promotion or bioremediation capacity, its genome also encoded genes for root colonization, pyoverdine synthesis, oxidative stress (present in other soil isolates), degradation of aromatic compounds, heavy metal resistance and nicotinic acid degradation, manganese (Mn II) oxidation. Genes for toluene or naphthalene degradation found in the genomes of P. putida F1, DOT-T1E, and ND6 were absent in the P. putida LS46 genome. Heavy metal resistant genes encoded by the P. putida W619 genome were also not present in the P. putida LS46 genome. Despite the overall similarity among genome of P.putida strains isolated for different applications and from different geographical location a number of differences were observed in genome arrangement, occurrence of transposon, genomic islands and prophage. It appears that P.putida strains had a common ancestor and by acquiring some specific genes by horizontal gene transfer it differed from other related strains.}, } @article {pmid25398865, year = {2015}, author = {Gibbons, SM and Schwartz, T and Fouquier, J and Mitchell, M and Sangwan, N and Gilbert, JA and Kelley, ST}, title = {Ecological succession and viability of human-associated microbiota on restroom surfaces.}, journal = {Applied and environmental microbiology}, volume = {81}, number = {2}, pages = {765-773}, pmid = {25398865}, issn = {1098-5336}, support = {T32 EB009412/EB/NIBIB NIH HHS/United States ; 5T-32EB-009412/EB/NIBIB NIH HHS/United States ; }, mesh = {Bacteria/*classification/*isolation & purification ; *Biota ; *Environmental Microbiology ; Humans ; Microbial Viability ; Viruses/*classification/*isolation & purification ; }, abstract = {Human-associated bacteria dominate the built environment (BE). Following decontamination of floors, toilet seats, and soap dispensers in four public restrooms, in situ bacterial communities were characterized hourly, daily, and weekly to determine their successional ecology. The viability of cultivable bacteria, following the removal of dispersal agents (humans), was also assessed hourly. A late-successional community developed within 5 to 8 h on restroom floors and showed remarkable stability over weeks to months. Despite late-successional dominance by skin- and outdoor-associated bacteria, the most ubiquitous organisms were predominantly gut-associated taxa, which persisted following exclusion of humans. Staphylococcus represented the majority of the cultivable community, even after several hours of human exclusion. Methicillin-resistant Staphylococcus aureus (MRSA)-associated virulence genes were found on floors but were not present in assembled Staphylococcus pan-genomes. Viral abundances, which were predominantly enterophages, human papilloma virus, and herpesviruses, were significantly correlated with bacterial abundances and showed an unexpectedly low virus-to-bacterium ratio in surface-associated samples, suggesting that bacterial hosts are mostly dormant on BE surfaces.}, } @article {pmid25398610, year = {2014}, author = {Marcus, S and Lee, H and Schatz, MC}, title = {SplitMEM: a graphical algorithm for pan-genome analysis with suffix skips.}, journal = {Bioinformatics (Oxford, England)}, volume = {30}, number = {24}, pages = {3476-3483}, pmid = {25398610}, issn = {1367-4811}, support = {R01 HG006677/HG/NHGRI NIH HHS/United States ; R01-HG006677/HG/NHGRI NIH HHS/United States ; }, mesh = {*Algorithms ; Bacillus anthracis/genetics ; Escherichia coli/genetics ; Genome, Bacterial ; Genomics/*methods ; Sequence Analysis, DNA/*methods ; }, abstract = {MOTIVATION: Genomics is expanding from a single reference per species paradigm into a more comprehensive pan-genome approach that analyzes multiple individuals together. A compressed de Bruijn graph is a sophisticated data structure for representing the genomes of entire populations. It robustly encodes shared segments, simple single-nucleotide polymorphisms and complex structural variations far beyond what can be represented in a collection of linear sequences alone.

RESULTS: We explore deep topological relationships between suffix trees and compressed de Bruijn graphs and introduce an algorithm, splitMEM, that directly constructs the compressed de Bruijn graph in time and space linear to the total number of genomes for a given maximum genome size. We introduce suffix skips to traverse several suffix links simultaneously and use them to efficiently decompose maximal exact matches into graph nodes. We demonstrate the utility of splitMEM by analyzing the nine-strain pan-genome of Bacillus anthracis and up to 62 strains of Escherichia coli, revealing their core-genome properties.}, } @article {pmid25397580, year = {2014}, author = {Zhu Ge, X and Jiang, J and Pan, Z and Hu, L and Wang, S and Wang, H and Leung, FC and Dai, J and Fan, H}, title = {Comparative genomic analysis shows that avian pathogenic Escherichia coli isolate IMT5155 (O2:K1:H5; ST complex 95, ST140) shares close relationship with ST95 APEC O1:K1 and human ExPEC O18:K1 strains.}, journal = {PloS one}, volume = {9}, number = {11}, pages = {e112048}, pmid = {25397580}, issn = {1932-6203}, mesh = {Animals ; Animals, Newborn ; Birds/*microbiology ; Chick Embryo ; CpG Islands/genetics ; Disease Models, Animal ; Escherichia coli/classification/*genetics/isolation & purification/*pathogenicity ; Escherichia coli Infections/genetics/microbiology ; Genes, Bacterial ; Genome, Bacterial/*genetics ; Genomics/*methods ; Humans ; Mice ; Molecular Sequence Data ; Open Reading Frames/genetics ; *Phylogeny ; Plasmids/metabolism ; Rats ; Sequence Analysis, DNA ; Serotyping ; Virulence/genetics ; }, abstract = {Avian pathogenic E. coli and human extraintestinal pathogenic E. coli serotypes O1, O2 and O18 strains isolated from different hosts are generally located in phylogroup B2 and ST complex 95, and they share similar genetic characteristics and pathogenicity, with no or minimal host specificity. They are popular objects for the study of ExPEC genetic characteristics and pathogenesis in recent years. Here, we investigated the evolution and genetic blueprint of APEC pathotype by performing phylogenetic and comparative genome analysis of avian pathogenic E. coli strain IMT5155 (O2:K1:H5; ST complex 95, ST140) with other E. coli pathotypes. Phylogeny analyses indicated that IMT5155 has closest evolutionary relationship with APEC O1, IHE3034, and UTI89. Comparative genomic analysis showed that IMT5155 and APEC O1 shared significant genetic overlap/similarities with human ExPEC dominant O18:K1 strains (IHE3034 and UTI89). Furthermore, the unique PAI I5155 (GI-12) was identified and found to be conserved in APEC O2 serotype isolates. GI-7 and GI-16 encoding two typical T6SSs in IMT5155 might be useful markers for the identification of ExPEC dominant serotypes (O1, O2, and O18) strains. IMT5155 contained a ColV plasmid p1ColV5155, which defined the APEC pathotype. The distribution analysis of 10 sequenced ExPEC pan-genome virulence factors among 47 sequenced E. coli strains provided meaningful information for B2 APEC/ExPEC-specific virulence factors, including several adhesins, invasins, toxins, iron acquisition systems, and so on. The pathogenicity tests of IMT5155 and other APEC O1:K1 and O2:K1 serotypes strains (isolated in China) through four animal models showed that they were highly virulent for avian colisepticemia and able to cause septicemia and meningitis in neonatal rats, suggesting zoonotic potential of these APEC O1:K1 and O2:K1 isolates.}, } @article {pmid25373147, year = {2015}, author = {Salipante, SJ and Roach, DJ and Kitzman, JO and Snyder, MW and Stackhouse, B and Butler-Wu, SM and Lee, C and Cookson, BT and Shendure, J}, title = {Large-scale genomic sequencing of extraintestinal pathogenic Escherichia coli strains.}, journal = {Genome research}, volume = {25}, number = {1}, pages = {119-128}, pmid = {25373147}, issn = {1549-5469}, support = {U54 AI057141/AI/NIAID NIH HHS/United States ; 5U54AI057141-08REV/AI/NIAID NIH HHS/United States ; }, mesh = {Adolescent ; Adult ; Aged ; Aged, 80 and over ; Child ; Child, Preschool ; DNA, Bacterial/genetics ; Drug Resistance, Multiple, Bacterial/genetics ; Escherichia coli/classification/*genetics/isolation & purification ; Female ; Gene Library ; Genetic Association Studies ; *Genome, Bacterial ; Humans ; Infant ; Infant, Newborn ; Logistic Models ; Longitudinal Studies ; Male ; Middle Aged ; Phenotype ; Phylogeny ; Sequence Analysis, DNA/*methods ; Urinary Tract Infections/microbiology ; Virulence Factors/genetics ; Young Adult ; }, abstract = {Large-scale bacterial genome sequencing efforts to date have provided limited information on the most prevalent category of disease: sporadically acquired infections caused by common pathogenic bacteria. Here, we performed whole-genome sequencing and de novo assembly of 312 blood- or urine-derived isolates of extraintestinal pathogenic (ExPEC) Escherichia coli, a common agent of sepsis and community-acquired urinary tract infections, obtained during the course of routine clinical care at a single institution. We find that ExPEC E. coli are highly genomically heterogeneous, consistent with pan-genome analyses encompassing the larger species. Investigation of differential virulence factor content and antibiotic resistance phenotypes reveals markedly different profiles among lineages and among strains infecting different body sites. We use high-resolution molecular epidemiology to explore the dynamics of infections at the level of individual patients, including identification of possible person-to-person transmission. Notably, a limited number of discrete lineages caused the majority of bloodstream infections, including one subclone (ST131-H30) responsible for 28% of bacteremic E. coli infections over a 3-yr period. We additionally use a microbial genome-wide-association study (GWAS) approach to identify individual genes responsible for antibiotic resistance, successfully recovering known genes but notably not identifying any novel factors. We anticipate that in the near future, whole-genome sequencing of microorganisms associated with clinical disease will become routine. Our study reveals what kind of information can be obtained from sequencing clinical isolates on a large scale, even well-characterized organisms such as E. coli, and provides insight into how this information might be utilized in a healthcare setting.}, } @article {pmid25356804, year = {2015}, author = {Defferrari, R and Mazzocco, K and Ambros, IM and Ambros, PF and Bedwell, C and Beiske, K and Bénard, J and Berbegall, AP and Bown, N and Combaret, V and Couturier, J and Erminio, G and Gambini, C and Garaventa, A and Gross, N and Haupt, R and Kohler, J and Jeison, M and Lunec, J and Marques, B and Martinsson, T and Noguera, R and Parodi, S and Schleiermacher, G and Tweddle, DA and Valent, A and Van Roy, N and Vicha, A and Villamon, E and Tonini, GP}, title = {Influence of segmental chromosome abnormalities on survival in children over the age of 12 months with unresectable localised peripheral neuroblastic tumours without MYCN amplification.}, journal = {British journal of cancer}, volume = {112}, number = {2}, pages = {290-295}, pmid = {25356804}, issn = {1532-1827}, support = {//Cancer Research UK/United Kingdom ; }, mesh = {Chromosome Aberrations ; Comparative Genomic Hybridization ; Disease-Free Survival ; Gene Amplification ; Humans ; Infant ; Kaplan-Meier Estimate ; N-Myc Proto-Oncogene Protein ; Neuroblastoma/diagnosis/*genetics/mortality ; Nuclear Proteins/genetics ; Oncogene Proteins/genetics ; Peripheral Nervous System Neoplasms/diagnosis/*genetics/mortality ; Prognosis ; }, abstract = {BACKGROUND: The prognostic impact of segmental chromosome alterations (SCAs) in children older than 1 year, diagnosed with localised unresectable neuroblastoma (NB) without MYCN amplification enrolled in the European Unresectable Neuroblastoma (EUNB) protocol is still to be clarified, while, for other group of patients, the presence of SCAs is associated with poor prognosis.

METHODS: To understand the role of SCAs we performed multilocus/pangenomic analysis of 98 tumour samples from patients enrolled in the EUNB protocol.

RESULTS: Age at diagnosis was categorised into two groups using 18 months as the age cutoff. Significant difference in the presence of SCAs was seen in tumours of patients between 12 and 18 months and over 18 months of age at diagnosis, respectively (P=0.04). A significant correlation (P=0.03) was observed between number of SCAs per tumour and age. Event-free (EFS) and overall survival (OS) were calculated in both age groups, according to both the presence and number of SCAs. In older patients, a poorer survival was associated with the presence of SCAs (EFS=46% vs 75%, P=0.023; OS=66.8% vs 100%, P=0.003). Moreover, OS of older patients inversely correlated with number of SCAs (P=0.002). Finally, SCAs provided additional prognostic information beyond histoprognosis, as their presence was associated with poorer OS in patients over 18 months with unfavourable International Neuroblastoma Pathology Classification (INPC) histopathology (P=0.018).

CONCLUSIONS: The presence of SCAs is a negative prognostic marker that impairs outcome of patients over the age of 18 months with localised unresectable NB without MYCN amplification, especially when more than one SCA is present. Moreover, in older patients with unfavourable INPC tumour histoprognosis, the presence of SCAs significantly affects OS.}, } @article {pmid25344468, year = {2014}, author = {Siewert, C and Hess, WR and Duduk, B and Huettel, B and Reinhardt, R and Büttner, C and Kube, M}, title = {Complete genome determination and analysis of Acholeplasma oculi strain 19L, highlighting the loss of basic genetic features in the Acholeplasmataceae.}, journal = {BMC genomics}, volume = {15}, number = {1}, pages = {931}, pmid = {25344468}, issn = {1471-2164}, mesh = {Acholeplasma/*genetics ; Evolution, Molecular ; Genome, Bacterial/genetics ; *Genomics ; Sequence Analysis ; Species Specificity ; }, abstract = {BACKGROUND: Acholeplasma oculi belongs to the Acholeplasmataceae family, comprising the genera Acholeplasma and 'Candidatus Phytoplasma'. Acholeplasmas are ubiquitous saprophytic bacteria. Several isolates are derived from plants or animals, whereas phytoplasmas are characterised as intracellular parasitic pathogens of plant phloem and depend on insect vectors for their spread. The complete genome sequences for eight strains of this family have been resolved so far, all of which were determined depending on clone-based sequencing.

RESULTS: The A. oculi strain 19L chromosome was sequenced using two independent approaches. The first approach comprised sequencing by synthesis (Illumina) in combination with Sanger sequencing, while single molecule real time sequencing (PacBio) was used in the second. The genome was determined to be 1,587,120 bp in size. Sequencing by synthesis resulted in six large genome fragments, while the single molecule real time sequencing approach yielded one circular chromosome sequence. High-quality sequences were obtained by both strategies differing in six positions, which are interpreted as reliable variations present in the culture population. Our genome analysis revealed 1,471 protein-coding genes and highlighted the absence of the F1FO-type Na+ ATPase system and GroEL/ES chaperone. Comparison of the four available Acholeplasma sequences revealed a core-genome encoding 703 proteins and a pan-genome of 2,867 proteins.

CONCLUSIONS: The application of two state-of-the-art sequencing technologies highlights the potential of single molecule real time sequencing for complete genome determination. Comparative genome analyses revealed that the process of losing particular basic genetic features during genome reduction occurs in both genera, as indicated for several phytoplasma strains and at least A. oculi. The loss of the F1FO-type Na+ ATPase system may separate Acholeplasmataceae from other Mollicutes, while the loss of those genes encoding the chaperone GroEL/ES is not a rare exception in this bacterial class.}, } @article {pmid25343872, year = {2015}, author = {Checcucci, A and Mengoni, A}, title = {The integrated microbial genome resource of analysis.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {1231}, number = {}, pages = {289-295}, doi = {10.1007/978-1-4939-1720-4_18}, pmid = {25343872}, issn = {1940-6029}, mesh = {Archaea/genetics ; Bacteria/genetics ; Database Management Systems ; Eukaryota/genetics ; *Genome, Microbial ; Metagenomics/*methods/statistics & numerical data ; Plasmids/chemistry ; *Software ; Synteny ; United States ; }, abstract = {Integrated Microbial Genomes and Metagenomes (IMG) is a biocomputational system that allows to provide information and support for annotation and comparative analysis of microbial genomes and metagenomes. IMG has been developed by the US Department of Energy (DOE)-Joint Genome Institute (JGI). IMG platform contains both draft and complete genomes, sequenced by Joint Genome Institute and other public and available genomes. Genomes of strains belonging to Archaea, Bacteria, and Eukarya domains are present as well as those of viruses and plasmids. Here, we provide some essential features of IMG system and case study for pangenome analysis.}, } @article {pmid25343870, year = {2015}, author = {Galardini, M and Mengoni, A and Mocali, S}, title = {From pangenome to panphenome and back.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {1231}, number = {}, pages = {257-270}, doi = {10.1007/978-1-4939-1720-4_16}, pmid = {25343870}, issn = {1940-6029}, mesh = {Databases, Chemical ; Electronic Data Processing ; Genetic Association Studies ; *Genome, Bacterial ; *Genotype ; Metabolic Networks and Pathways/genetics ; Microarray Analysis ; Multigene Family ; *Phenotype ; Sinorhizobium meliloti/classification/*genetics/metabolism ; *Software ; }, abstract = {The ability to relate genomic differences in bacterial species to their variability in expressed phenotypes is one of the most challenging tasks in today's biology. Such task is of paramount importance towards the understanding of biotechnologically relevant pathways and possibly for their manipulation. Fundamental prerequisites are the genome-wide reconstruction of metabolic pathways and a comprehensive measurement of cellular phenotypes. Cellular pathways can be reliably reconstructed using the KEGG database, while the OmniLog™ Phenotype Microarray (PM) technology may be used to measure nearly 2,000 growth conditions over time. However, few computational tools that can directly link PM data with the gene(s) of interest followed by the extraction of information on gene-phenotype correlation are available. In this chapter the use of the DuctApe software suite is presented, which allows the joint analysis of bacterial genomic and phenomic data, highlighting those pathways and reactions most probably associated with phenotypic variability. A case study on four Sinorhizobium meliloti strains is presented; more example datasets are available online.}, } @article {pmid25343868, year = {2015}, author = {Vinuesa, P and Contreras-Moreira, B}, title = {Robust identification of orthologues and paralogues for microbial pan-genomics using GET_HOMOLOGUES: a case study of pIncA/C plasmids.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {1231}, number = {}, pages = {203-232}, doi = {10.1007/978-1-4939-1720-4_14}, pmid = {25343868}, issn = {1940-6029}, mesh = {*Algorithms ; Bacteria/classification/*genetics ; Base Sequence ; Chromosome Mapping ; Databases, Genetic ; Electronic Data Processing ; Genome Size ; *Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Metagenomics/instrumentation/*methods ; Molecular Sequence Annotation ; Molecular Sequence Data ; Multigene Family ; Phylogeny ; Plasmids/*chemistry ; Sequence Alignment ; Sequence Analysis, DNA ; Sequence Homology, Nucleic Acid ; *Software ; Synteny ; }, abstract = {GET_HOMOLOGUES is an open-source software package written in Perl and R to define robust core- and pan-genomes by computing consensus clusters of orthologous gene families from whole-genome sequences using the bidirectional best-hit, COGtriangles, and OrthoMCL clustering algorithms. The granularity of the clusters can be fine-tuned by a user-configurable filtering strategy based on a combination of blastp pairwise alignment parameters, hmmscan-based scanning of Pfam domain composition of the proteins in each cluster, and a partial synteny criterion. We present detailed protocols to fit exponential and binomial mixture models to estimate core- and pan-genome sizes, compute pan-genome trees from the pan-genome matrix using a parsimony criterion, analyze and graphically represent the pan-genome structure, and identify lineage-specific gene families for the 12 complete pIncA/C plasmids currently available in NCBI's RefSeq. The software package, license, and detailed user manual can be downloaded for free for academic use from two mirrors: http://www.eead.csic.es/compbio/soft/gethoms.php and http://maya.ccg.unam.mx/soft/gethoms.php.}, } @article {pmid25343867, year = {2015}, author = {Bosi, E and Fani, R and Fondi, M}, title = {Defining orthologs and pangenome size metrics.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {1231}, number = {}, pages = {191-202}, doi = {10.1007/978-1-4939-1720-4_13}, pmid = {25343867}, issn = {1940-6029}, mesh = {Artificial Intelligence ; Bacteria/classification/*genetics ; Base Sequence ; Electronic Data Processing ; Genome Size ; *Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Metagenomics/instrumentation/*methods ; Molecular Sequence Annotation ; Molecular Sequence Data ; Phylogeny ; Sequence Alignment ; Sequence Analysis, DNA/*instrumentation/methods ; Sequence Homology, Nucleic Acid ; *Software ; }, abstract = {Since the advent of ultra-massive sequencing techniques, the consequent drop-off in both price and time required made feasible the sequencing of increasingly more genomes from microbes belonging to the same taxonomic unit. Eventually, this led to the concept of pangenome, that is, the entire set of genes present in a group of representatives of the same genus/species, which, in turn, can be divided into core genome, defined as the set of those genes present in all the genomes under study, and a dispensable genome, the set of genes possessed only by one or a subset of organism. When analyzing a pangenome, an interesting point is to measure its size, thus estimating the gene repertoire of a given taxonomic group. This is usually performed counting the novel genes added to the overall pangenome when new genomes are sequenced and annotated. A pangenome can be also classified as open or close: in an open pangenome its size increases indefinitely when adding new genomes; thus sequencing additional strains will likely yield novel genes. Conversely, in a close pangenome, adding new genomes will not lead to the discovery of new coding capabilities. A central point in pangenomics is the definition of homology relationships between genes belonging to different genomes. This may turn into the search of those genes with similar sequences between different organisms (and including both paralogous and orthologous genes). In this chapter, methods for finding groups of orthologs between genomes and for estimating the pangenome size are discussed. Also, working codes to address these tasks are provided.}, } @article {pmid25343862, year = {2015}, author = {Sonego, P and Meysman, P and Moretto, M and Viola, R and Laukens, K and Cavalieri, D and Engelen, K}, title = {Comparative analysis of gene expression: uncovering expression conservation and divergence between Salmonella enterica serovar Typhimurium strains LT2 and 14028S.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {1231}, number = {}, pages = {125-135}, doi = {10.1007/978-1-4939-1720-4_8}, pmid = {25343862}, issn = {1940-6029}, mesh = {Base Sequence ; *Gene Expression Regulation, Bacterial ; *Genes, Bacterial ; *Genome, Bacterial ; Molecular Sequence Annotation ; Molecular Sequence Data ; Position-Specific Scoring Matrices ; Salmonella typhimurium/classification/*genetics/pathogenicity ; Sequence Alignment ; Sequence Analysis, DNA ; Sequence Homology, Nucleic Acid ; *Software ; Virulence ; }, abstract = {Different strains of the same organism can share a large amount of their genetic material, the so called core pangenome. Nevertheless, these species can display different lifestyles and it is still not well known to what extent the core pangenome plays a role in the divergence of lifestyles between the two organisms. Here, we present a procedure for uncovering the conservation and divergence of gene expression by using large expression compendia. We will use data from two Salmonella enterica serovar Typhimurium strains as an example here, strain LT2 and strain 14028S, to assess if there are orthologous gene pairs with different expression domains related in both strains.}, } @article {pmid25264906, year = {2014}, author = {Pupacdi, B and Javed, A and Zaki, MJ and Ruchirawat, M}, title = {NSIT: novel sequence identification tool.}, journal = {PloS one}, volume = {9}, number = {9}, pages = {e108011}, pmid = {25264906}, issn = {1932-6203}, mesh = {Sequence Alignment ; Sequence Analysis, DNA/*methods ; }, abstract = {Novel sequences are DNA sequences present in an individual's genome but absent in the human reference assembly. They are predicted to be biologically important, both individual and population specific, and consistent with the known human migration paths. Recent works have shown that an average person harbors 2-5 Mb of such sequences and estimated that the human pan-genome contains as high as 19-40 Mb of novel sequences. To identify them in a de novo genome assembly, some existing sequence aligners have been used but no computational method has been specifically proposed for this task. In this work, we developed NSIT (Novel Sequence Identification Tool), a software that can accurately and efficiently identify novel sequences in an individual's de novo whole genome assembly. We identified and characterized 1.1 Mb, 1.2 Mb, and 1.0 Mb of novel sequences in NA18507 (African), YH (Asian), and NA12878 (European) de novo genome assemblies, respectively. Our results show very high concordance with the previous work using the respective reference assembly. In addition, our results using the latest human reference assembly suggest that the amount of novel sequences per individual may not be as high as previously reported. We additionally developed a graphical viewer for comparisons of novel sequence contents. The viewer also helped in identifying sequence contamination; we found 130 kb of Epstein-Barr virus sequence in the previously published NA18507 novel sequences as well as 287 kb of zebrafish repeats in NA12878 de novo assembly. NSIT requires [Formula: see text]2GB of RAM and 1.5-2 hrs on a commodity desktop. The program is applicable to input assemblies with varying contig/scaffold sizes, ranging from 100 bp to as high as 50 Mb. It works in both 32-bit and 64-bit systems and outperforms, by large margins, other fast sequence aligners previously applied to this task. To our knowledge, NSIT is the first software designed specifically for novel sequence identification in a de novo human genome assembly.}, } @article {pmid25249233, year = {2014}, author = {D'Amato, F and Rouli, L and Edouard, S and Tyczka, J and Million, M and Robert, C and Nguyen, TT and Raoult, D}, title = {The genome of Coxiella burnetii Z3055, a clone linked to the Netherlands Q fever outbreaks, provides evidence for the role of drift in the emergence of epidemic clones.}, journal = {Comparative immunology, microbiology and infectious diseases}, volume = {37}, number = {5-6}, pages = {281-288}, doi = {10.1016/j.cimid.2014.08.003}, pmid = {25249233}, issn = {1878-1667}, mesh = {Ankyrin Repeat/genetics ; Bacterial Proteins/*genetics/metabolism ; Clone Cells ; Coxiella burnetii/classification/*genetics/pathogenicity ; *Disease Outbreaks ; *Genetic Drift ; *Genome, Bacterial ; Genotype ; Humans ; INDEL Mutation ; Membrane Proteins/genetics/metabolism ; Netherlands/epidemiology ; Phylogeny ; Point Mutation ; Q Fever/*epidemiology/microbiology/pathology ; Transcription Factors/genetics/metabolism ; }, abstract = {Coxiella burnetii is a pathogen causing Q fever. The aim of our work was to study Z3055, a strain that is genotypically related to the strain causing the Netherlands outbreak. We compared Z3055 to 5 other completed genomes available in GenBank. We calculated the blast score ratio (BSR) to analyze genetic differences among the strains. The ratio core genome/pangenome was 98% likely other bacteria with closed pangenomes. Differences between Z3055 and the reference NMI consisted only of point mutations and insertion/deletion (INDELs). Non-synonymous mutations significantly increased in genes coding for membrane proteins (16/156 vs 103/1757, bilateral Chi(2) test, p<0.05), ankyrin repeat domains containing proteins (2/9 vs 117/1904, bilateral Chi(2) test, p<0.05), transcription factors (7/53 vs 112/1860, bilateral Chi(2) test, p<0.05) and translation proteins (15/144 vs 109/1655, bilateral Chi(2) test, p<0.05). The evolution of this strain may have been driven by mutations in critical genes.}, } @article {pmid25229054, year = {2014}, author = {Elbir, H and Abi-Rached, L and Pontarotti, P and Yoosuf, N and Drancourt, M}, title = {African relapsing Fever borreliae genomospecies revealed by comparative genomics.}, journal = {Frontiers in public health}, volume = {2}, number = {}, pages = {43}, pmid = {25229054}, issn = {2296-2565}, abstract = {BACKGROUND: Relapsing fever borreliae are vector-borne bacteria responsible for febrile infection in humans in North America, Africa, Asia, and in the Iberian Peninsula in Europe. Relapsing fever borreliae are phylogenetically closely related, yet they differ in pathogenicity and vectors. Their long-term taxonomy, based on geography and vector grouping, needs to be re-apprised in a genomic context. We therefore embarked into genomic analyses of relapsing fever borreliae, focusing on species found in Africa.

RESULTS: Genome-wide phylogenetic analyses group Old World Borrelia crocidurae, Borrelia hispanica, B. duttonii, and B. recurrentis in one clade, and New World Borrelia turicatae and Borrelia hermsii in a second clade. Accordingly, average nucleotide identity is 99% among B. duttonii, B. recurrentis, and B. crocidurae and 96% between latter borreliae and B. hispanica while the similarity is 86% between Old World and New World borreliae. Comparative genomics indicates that the Old World relapsing fever B. duttonii, B. recurrentis, B. crocidurae, and B. hispanica have a 2,514-gene pan genome and a 933-gene core genome that includes 788 chromosomal and 145 plasmidic genes. Analyzing the role that natural selection has played in the evolution of Old World borreliae species revealed that 55 loci were under positive diversifying selection, including loci coding for membrane, flagellar, and chemotaxis proteins, three categories associated with adaption to specific niches.

CONCLUSION: Genomic analyses led to a reappraisal of the taxonomy of relapsing fever borreliae in Africa. These analyses suggest that B. crocidurae, B. duttonii, and B. recurrentis are ecotypes of a unique genomospecies, while B. hispanica is a distinct species.}, } @article {pmid25223320, year = {2014}, author = {Bager, RJ and Kudirkiene, E and da Piedade, I and Seemann, T and Nielsen, TK and Pors, SE and Mattsson, AH and Boyce, JD and Adler, B and Bojesen, AM}, title = {In silico prediction of Gallibacterium anatis pan-immunogens.}, journal = {Veterinary research}, volume = {45}, number = {1}, pages = {80}, pmid = {25223320}, issn = {1297-9716}, mesh = {Amino Acid Sequence ; Animals ; Bacterial Proteins/chemistry/genetics/*immunology ; Bacterial Vaccines/*genetics/immunology ; Chickens ; Computer Simulation ; Escherichia coli/genetics ; Pasteurellaceae/*genetics/*immunology/metabolism/pathogenicity ; Pasteurellaceae Infections/immunology/microbiology/prevention & control/*veterinary ; Poultry Diseases/immunology/microbiology/*prevention & control ; Virulence Factors ; }, abstract = {The Gram-negative bacterium Gallibacterium anatis is a major cause of salpingitis and peritonitis in commercial egg-layers, leading to reduced egg production and increased mortality. Unfortunately, widespread multidrug resistance and antigenic diversity makes it difficult to control infections and novel prevention strategies are urgently needed. In this study, a pan-genomic reverse vaccinology (RV) approach was used to identify potential vaccine candidates. Firstly, the genomes of 10 selected Gallibacterium strains were analyzed and proteins selected on the following criteria; predicted surface-exposure or secretion, none or one transmembrane helix (TMH), and presence in six or more of the 10 genomes. In total, 42 proteins were selected. The genes encoding 27 of these proteins were successfully cloned in Escherichia coli and the proteins expressed and purified. To reduce the number of vaccine candidates for in vivo testing, each of the purified recombinant proteins was screened by ELISA for their ability to elicit a significant serological response with serum from chickens that had been infected with G. anatis. Additionally, an in silico prediction of the protective potential was carried out based on a protein property prediction method. Of the 27 proteins, two novel putative immunogens were identified; Gab_1309 and Gab_2312. Moreover, three previously characterized virulence factors; GtxA, FlfA and Gab_2156, were identified. Thus, by combining the pan-genomic RV approach with subsequent in vitro and in silico screening, we have narrowed down the pan-proteome of G. anatis to five vaccine candidates. Importantly, preliminary immunization trials indicated an in vivo protective potential of GtxA-N, FlfA and Gab_1309.}, } @article {pmid25218520, year = {2014}, author = {Li, YH and Zhou, G and Ma, J and Jiang, W and Jin, LG and Zhang, Z and Guo, Y and Zhang, J and Sui, Y and Zheng, L and Zhang, SS and Zuo, Q and Shi, XH and Li, YF and Zhang, WK and Hu, Y and Kong, G and Hong, HL and Tan, B and Song, J and Liu, ZX and Wang, Y and Ruan, H and Yeung, CK and Liu, J and Wang, H and Zhang, LJ and Guan, RX and Wang, KJ and Li, WB and Chen, SY and Chang, RZ and Jiang, Z and Jackson, SA and Li, R and Qiu, LJ}, title = {De novo assembly of soybean wild relatives for pan-genome analysis of diversity and agronomic traits.}, journal = {Nature biotechnology}, volume = {32}, number = {10}, pages = {1045-1052}, pmid = {25218520}, issn = {1546-1696}, mesh = {Agriculture ; Amino Acid Sequence ; Biomass ; DNA, Plant/analysis/genetics ; Disease Resistance/genetics ; Genome, Plant/*genetics ; Genomics/*methods ; Molecular Sequence Data ; Phylogeny ; Polymorphism, Single Nucleotide/*genetics ; Seeds/genetics ; Sequence Alignment ; Sequence Analysis, DNA ; Soybeans/classification/*genetics/*physiology ; }, abstract = {Wild relatives of crops are an important source of genetic diversity for agriculture, but their gene repertoire remains largely unexplored. We report the establishment and analysis of a pan-genome of Glycine soja, the wild relative of cultivated soybean Glycine max, by sequencing and de novo assembly of seven phylogenetically and geographically representative accessions. Intergenomic comparisons identified lineage-specific genes and genes with copy number variation or large-effect mutations, some of which show evidence of positive selection and may contribute to variation of agronomic traits such as biotic resistance, seed composition, flowering and maturity time, organ size and final biomass. Approximately 80% of the pan-genome was present in all seven accessions (core), whereas the rest was dispensable and exhibited greater variation than the core genome, perhaps reflecting a role in adaptation to diverse environments. This work will facilitate the harnessing of untapped genetic diversity from wild soybean for enhancement of elite cultivars.}, } @article {pmid25201145, year = {2014}, author = {Sarovich, DS and Price, EP}, title = {SPANDx: a genomics pipeline for comparative analysis of large haploid whole genome re-sequencing datasets.}, journal = {BMC research notes}, volume = {7}, number = {}, pages = {618}, pmid = {25201145}, issn = {1756-0500}, mesh = {*Genome ; *Haploidy ; Phylogeny ; Polymorphism, Single Nucleotide ; }, abstract = {BACKGROUND: Next-generation sequencing (NGS) is now a commonplace tool for molecular characterisation of virtually any species of interest. Despite the ever-increasing use of NGS in laboratories worldwide, analysis of whole genome re-sequencing (WGS) datasets from start to finish remains nontrivial due to the fragmented nature of NGS software and the lack of experienced bioinformaticists in many research teams.

FINDINGS: We describe SPANDx (Synergised Pipeline for Analysis of NGS Data in Linux), a new tool for high-throughput comparative analysis of haploid WGS datasets comprising one through thousands of genomes. SPANDx consolidates several well-validated, open-source packages into a single tool, mitigating the need to learn and manipulate individual NGS programs. SPANDx incorporates BWA for alignment of raw NGS reads against a reference genome or pan-genome, followed by data filtering, variant calling and annotation using Picard, GATK, SAMtools and SnpEff. BEDTools has also been included for genetic locus presence/absence (P/A) determination to easily visualise the core and accessory genomes. Additional SPANDx features include construction of error-corrected single-nucleotide polymorphism (SNP) and insertion-deletion matrices, and P/A matrices, to enable user-friendly visualisation of genetic variants. The SNP matrices generated using VCFtools and GATK are directly importable into PAUP*, PHYLIP or RAxML for downstream phylogenetic analysis. SPANDx has been developed to handle NGS data from Illumina, Ion Personal Genome Machine (PGM) and 454 platforms, and we demonstrate that it has comparable performance across Illumina MiSeq/HiSeq2000 and Ion PGM data.

CONCLUSION: SPANDx is an all-in-one tool for comprehensive haploid WGS analysis. SPANDx is open source and is freely available at: http://sourceforge.net/projects/spandx/.}, } @article {pmid25189482, year = {2014}, author = {Brambilla, C and Laffaire, J and Lantuejoul, S and Moro-Sibilot, D and Mignotte, H and Arbib, F and Toffart, AC and Petel, F and Hainaut, P and Rousseaux, S and Khochbin, S and de Reyniès, A and Brambilla, E}, title = {Lung squamous cell carcinomas with basaloid histology represent a specific molecular entity.}, journal = {Clinical cancer research : an official journal of the American Association for Cancer Research}, volume = {20}, number = {22}, pages = {5777-5786}, doi = {10.1158/1078-0432.CCR-14-0459}, pmid = {25189482}, issn = {1557-3265}, mesh = {Adult ; Aged ; Aged, 80 and over ; Carcinoma, Squamous Cell/*genetics/mortality/*pathology ; Chromosome Aberrations ; Cluster Analysis ; DNA Copy Number Variations ; Gene Expression Regulation, Neoplastic ; Humans ; Lung Neoplasms/*genetics/mortality/*pathology ; Male ; Middle Aged ; Prognosis ; RNA, Messenger/genetics ; Signal Transduction ; Transcriptome ; }, abstract = {PURPOSE: The basaloid carcinoma (pure) and the (mixed) basaloid variant of lung squamous cell carcinoma (SCC) have a dismal prognosis but their underlying specific molecular characteristics remain obscure and no therapy has proven to be efficient.

EXPERIMENTAL DESIGN: To assess their molecular specificity among other lung SCCs we analyzed DNA copy number aberrations and mRNA expression pangenomic profiles of 93 SCCs, including 42 basaloid samples (24 pure, 18 mixed).

RESULTS: Supervised analyses reveal that pure basaloid tumors display a specific mRNA expression profile, encoding factors controlling the cell cycle, transcription, chromatin, and splicing, with prevalent expression in germline and stem cells, while genes related to squamous differentiation are underexpressed. From this signature, we derived a 2-genes (SOX4, IVL) immunohistochemistry-based predictor that discriminated basaloid tumors (pure and mixed) from non-basaloid tumors with 94% accuracy in an independent series. The pure basaloid tumors are also distinguished through unsupervised analyses. Using a centroid-based predictor, the corresponding molecular subtype was found in 8 independent public datasets (n = 58/533), and was shown to be associated with a very poor survival as compared with other SCCs (adjusted HR = 2.45; P = 0.000001).

CONCLUSION: This study enlightens the heterogeneity of SCCs that can be subclassified in mRNA expression subtypes. This study demonstrates for the first time that basaloid SCCs constitute a distinct histomolecular entity, which justifies its recognition and distinction from non-basaloid SCCs. In addition, their characteristic molecular profile highlights their intrinsic resistance to cytotoxic chemotherapy and could serve as a guide for targeted therapies.}, } @article {pmid25184130, year = {2014}, author = {Illakkiam, D and Shankar, M and Ponraj, P and Rajendhran, J and Gunasekaran, P}, title = {Genome Sequencing of a Mung Bean Plant Growth Promoting Strain of P. aeruginosa with Biocontrol Ability.}, journal = {International journal of genomics}, volume = {2014}, number = {}, pages = {123058}, pmid = {25184130}, issn = {2314-436X}, abstract = {Pseudomonas aeruginosa PGPR2 is a mung bean rhizosphere strain that produces secondary metabolites and hydrolytic enzymes contributing to excellent antifungal activity against Macrophomina phaseolina, one of the prevalent fungal pathogens of mung bean. Genome sequencing was performed using the Ion Torrent Personal Genome Machine generating 1,354,732 reads (6,772,433 sequenced bases) achieving ~25-fold coverage of the genome. Reference genome assembly using MIRA 3.4.0 yielded 198 contigs. The draft genome of PGPR2 encoded 6803 open reading frames, of which 5314 were genes with predicted functions, 1489 were genes of known functions, and 80 were RNA-coding genes. Strain specific and core genes of P. aeruginosa PGPR2 that are relevant to rhizospheric habitat were identified by pangenome analysis. Genes involved in plant growth promoting function such as synthesis of ACC deaminase, indole-3-acetic acid, trehalose, mineral scavenging siderophores, hydrogen cyanide, chitinases, acyl homoserine lactones, acetoin, 2,3-butanediol, and phytases were identified. In addition, niche-specific genes such as phosphate solubilising 3-phytase, adhesins, pathway-specific transcriptional regulators, a diguanylate cyclase involved in cellulose synthesis, a receptor for ferrienterochelin, a DEAD/DEAH-box helicase involved in stress tolerance, chemotaxis/motility determinants, an HtpX protease, and enzymes involved in the production of a chromanone derivative with potent antifungal activity were identified.}, } @article {pmid25181051, year = {2014}, author = {Ali, S and Steinmetz, G and Montillet, G and Perrard, MH and Loundou, A and Durand, P and Guichaoua, MR and Prat, O}, title = {Exposure to low-dose bisphenol A impairs meiosis in the rat seminiferous tubule culture model: a physiotoxicogenomic approach.}, journal = {PloS one}, volume = {9}, number = {9}, pages = {e106245}, pmid = {25181051}, issn = {1932-6203}, mesh = {Animals ; Benzhydryl Compounds/*pharmacology ; Cell Nucleus/drug effects ; Cells, Cultured ; Down-Regulation/drug effects ; Gene Expression Profiling ; Male ; Meiosis/*drug effects ; *Models, Biological ; Oligonucleotide Array Sequence Analysis ; Phenols/*pharmacology ; Rats, Sprague-Dawley ; Recombination, Genetic/genetics ; Reproducibility of Results ; Seminiferous Tubules/*cytology/drug effects/metabolism ; Signal Transduction/drug effects/genetics ; Spermatocytes/cytology/drug effects ; Synaptonemal Complex/drug effects/genetics ; *Toxicogenetics ; Transcriptome/genetics ; Up-Regulation/drug effects ; }, abstract = {BACKGROUND: Bisphenol A (BPA) is one of the most widespread chemicals in the world and is suspected of being responsible for male reproductive impairments. Nevertheless, its molecular mode of action on spermatogenesis is unclear. This work combines physiology and toxicogenomics to identify mechanisms by which BPA affects the timing of meiosis and induces germ-cell abnormalities.

METHODS: We used a rat seminiferous tubule culture model mimicking the in vivo adult rat situation. BPA (1 nM and 10 nM) was added to the culture medium. Transcriptomic and meiotic studies were performed on the same cultures at the same exposure times (days 8, 14, and 21). Transcriptomics was performed using pangenomic rat microarrays. Immunocytochemistry was conducted with an anti-SCP3 antibody.

RESULTS: The gene expression analysis showed that the total number of differentially expressed transcripts was time but not dose dependent. We focused on 120 genes directly involved in the first meiotic prophase, sustaining immunocytochemistry. Sixty-two genes were directly involved in pairing and recombination, some of them with high fold changes. Immunocytochemistry indicated alteration of meiotic progression in the presence of BPA, with increased leptotene and decreased diplotene spermatocyte percentages and partial meiotic arrest at the pachytene checkpoint. Morphological abnormalities were observed at all stages of the meiotic prophase. The prevalent abnormalities were total asynapsis and apoptosis. Transcriptomic analysis sustained immunocytological observations.

CONCLUSION: We showed that low doses of BPA alter numerous genes expression, especially those involved in the reproductive system, and severely impair crucial events of the meiotic prophase leading to partial arrest of meiosis in rat seminiferous tubule cultures.}, } @article {pmid25156331, year = {2014}, author = {Ogunremi, D and Devenish, J and Amoako, K and Kelly, H and Dupras, AA and Belanger, S and Wang, LR}, title = {High resolution assembly and characterization of genomes of Canadian isolates of Salmonella Enteritidis.}, journal = {BMC genomics}, volume = {15}, number = {1}, pages = {713}, pmid = {25156331}, issn = {1471-2164}, mesh = {Base Composition ; Chromosome Mapping ; Computational Biology/methods ; Evolution, Molecular ; Genome Size ; *Genome, Bacterial ; *Genomics/methods ; High-Throughput Nucleotide Sequencing ; Molecular Sequence Data ; Phylogeny ; Polymorphism, Single Nucleotide ; Salmonella enteritidis/*genetics ; }, abstract = {BACKGROUND: There is a need to characterize genomes of the foodborne pathogen, Salmonella enterica serovar Enteritidis (SE) and identify genetic information that could be ultimately deployed for differentiating strains of the organism, a need that is yet to be addressed mainly because of the high degree of clonality of the organism. In an effort to achieve the first characterization of the genomes of SE of Canadian origin, we carried out massively parallel sequencing of the nucleotide sequence of 11 SE isolates obtained from poultry production environments (n = 9), a clam and a chicken, assembled finished genomes and investigated diversity of the SE genome.

RESULTS: The median genome size was 4,678,683 bp. A total of 4,833 chromosomal genes defined the pan genome of our field SE isolates consisting of 4,600 genes present in all the genomes, i.e., core genome, and 233 genes absent in at least one genome (accessory genome). Genome diversity was demonstrable by the presence of 1,360 loci showing single nucleotide polymorphism (SNP) in the core genome which was used to portray the genetic distances by means of a phylogenetic tree for the SE isolates. The accessory genome consisted mostly of previously identified SE prophage sequences as well as two, apparently full-sized, novel prophages namely a 28 kb sequence provisionally designated as SE-OLF-10058 (3) prophage and a 43 kb sequence provisionally designated as SE-OLF-10012 prophage.

CONCLUSIONS: The number of SNPs identified in the relatively large core genome of SE is a reflection of substantial diversity that could be exploited for strain differentiation as shown by the development of an informative phylogenetic tree. Prophage sequences can also be exploited for SE strain differentiation and lineage tracking. This work has laid the ground work for further studies to develop a readily adoptable laboratory test for the subtyping of SE.}, } @article {pmid25156090, year = {2015}, author = {Hilker, R and Munder, A and Klockgether, J and Losada, PM and Chouvarine, P and Cramer, N and Davenport, CF and Dethlefsen, S and Fischer, S and Peng, H and Schönfelder, T and Türk, O and Wiehlmann, L and Wölbeling, F and Gulbins, E and Goesmann, A and Tümmler, B}, title = {Interclonal gradient of virulence in the Pseudomonas aeruginosa pangenome from disease and environment.}, journal = {Environmental microbiology}, volume = {17}, number = {1}, pages = {29-46}, doi = {10.1111/1462-2920.12606}, pmid = {25156090}, issn = {1462-2920}, mesh = {Animals ; Environmental Microbiology ; Female ; Genetic Variation ; *Genome, Bacterial ; Humans ; Lung Diseases/microbiology ; Mice ; Mice, Inbred C57BL ; Moths/microbiology ; Open Reading Frames ; Plant Diseases/microbiology ; Pseudomonas Infections/microbiology ; Pseudomonas aeruginosa/*genetics/isolation & purification/metabolism/*pathogenicity ; Virulence/genetics ; }, abstract = {The population genomics of Pseudomonas aeruginosa was analysed by genome sequencing of representative strains of the 15 most frequent clonal complexes in the P. aeruginosa population and of the five most common clones from the environment of which so far no isolate from a human infection has been detected. Gene annotation identified 5892-7187 open reading frame (ORFs; median 6381 ORFs) in the 20 6.4-7.4 Mbp large genomes. The P. aeruginosa pangenome consists of a conserved core of at least 4000 genes, a combinatorial accessory genome of a further 10 000 genes and 30 000 or more rare genes that are present in only a few strains or clonal complexes. Whole genome comparisons of single nucleotide polymorphism synteny indicated unrestricted gene flow between clonal complexes by recombination. Using standardized acute lettuce, Galleria mellonella and murine airway infection models the full spectrum of possible host responses to P. aeruginosa was observed with the 20 strains ranging from unimpaired health following infection to 100% lethality. Genome comparisons indicate that the differential genetic repertoire of clones maintains a habitat-independent gradient of virulence in the P. aeruginosa population.}, } @article {pmid25139901, year = {2014}, author = {Spagnoletti, M and Ceccarelli, D and Rieux, A and Fondi, M and Taviani, E and Fani, R and Colombo, MM and Colwell, RR and Balloux, F}, title = {Acquisition and evolution of SXT-R391 integrative conjugative elements in the seventh-pandemic Vibrio cholerae lineage.}, journal = {mBio}, volume = {5}, number = {4}, pages = {}, pmid = {25139901}, issn = {2150-7511}, support = {260801/ERC_/European Research Council/International ; 2R01 AI039129-11A2/AI/NIAID NIH HHS/United States ; }, mesh = {Bayes Theorem ; *Conjugation, Genetic ; *DNA Transposable Elements ; Databases, Genetic ; Drug Resistance, Multiple ; Evolution, Molecular ; Gammaproteobacteria/genetics ; Gene Transfer, Horizontal ; Genetic Variation ; Genome ; Genomics ; *Homologous Recombination ; Pandemics ; Phenotype ; Phylogeny ; Polymorphism, Single Nucleotide ; Vibrio cholerae/*genetics ; }, abstract = {UNLABELLED: SXT-R391 Integrative conjugative elements (ICEs) are self-transmissible mobile genetic elements able to confer multidrug resistance and other adaptive features to bacterial hosts, including Vibrio cholerae, the causative agent of cholera. ICEs are arranged in a mosaic genetic structure composed of a conserved backbone interspersed with variable DNA clusters located in conserved hot spots. In this study, we investigated ICE acquisition and subsequent microevolution in pandemic V. cholerae. Ninety-six ICEs were retrieved from publicly available sequence databases from V. cholerae clinical strains and were compared to a set of reference ICEs. Comparative genomics highlighted the existence of five main ICE groups with a distinct genetic makeup, exemplified by ICEVchInd5, ICEVchMoz10, SXT, ICEVchInd6, and ICEVchBan11. ICEVchInd5 (the most frequent element, represented by 70 of 96 elements analyzed) displayed no sequence rearrangements and was characterized by 46 single nucleotide polymorphisms (SNPs). SNP analysis revealed that recent inter-ICE homologous recombination between ICEVchInd5 and other ICEs circulating in gammaproteobacteria generated ICEVchMoz10, ICEVchInd6, and ICEVchBan11. Bayesian phylogenetic analyses indicated that ICEVchInd5 and SXT were independently acquired by the current pandemic V. cholerae O1 and O139 lineages, respectively, within a period of only a few years.

IMPORTANCE: SXT-R391 ICEs have been recognized as key vectors of antibiotic resistance in the seventh-pandemic lineage of V. cholerae, which remains a major cause of mortality and morbidity on a global scale. ICEs were acquired only recently in this clade and are acknowledged to be major contributors to horizontal gene transfer and the acquisition of new traits in bacterial species. We have reconstructed the temporal dynamics of SXT-R391 ICE acquisition and spread and have identified subsequent recombination events generating significant diversity in ICEs currently circulating among V. cholerae clinical strains. Our results showed that acquisition of SXT-R391 ICEs provided the V. cholerae seventh-pandemic lineage not only with a multidrug resistance phenotype but also with a powerful molecular tool for rapidly accessing the pan-genome of a large number of gammaproteobacteria.}, } @article {pmid25120263, year = {2014}, author = {Oliveira, PH and Touchon, M and Rocha, EP}, title = {The interplay of restriction-modification systems with mobile genetic elements and their prokaryotic hosts.}, journal = {Nucleic acids research}, volume = {42}, number = {16}, pages = {10618-10631}, pmid = {25120263}, issn = {1362-4962}, support = {281605/ERC_/European Research Council/International ; }, mesh = {CRISPR-Cas Systems ; DNA Restriction-Modification Enzymes/*genetics ; Evolution, Molecular ; Gene Transfer, Horizontal ; *Genome, Archaeal ; *Genome, Bacterial ; *Interspersed Repetitive Sequences ; Prophages/genetics ; }, abstract = {The roles of restriction-modification (R-M) systems in providing immunity against horizontal gene transfer (HGT) and in stabilizing mobile genetic elements (MGEs) have been much debated. However, few studies have precisely addressed the distribution of these systems in light of HGT, its mechanisms and its vectors. We analyzed the distribution of R-M systems in 2261 prokaryote genomes and found their frequency to be strongly dependent on the presence of MGEs, CRISPR-Cas systems, integrons and natural transformation. Yet R-M systems are rare in plasmids, in prophages and nearly absent from other phages. Their abundance depends on genome size for small genomes where it relates with HGT but saturates at two occurrences per genome. Chromosomal R-M systems might evolve under cycles of purifying and relaxed selection, where sequence conservation depends on the biochemical activity and complexity of the system and total gene loss is frequent. Surprisingly, analysis of 43 pan-genomes suggests that solitary R-M genes rarely arise from the degradation of R-M systems. Solitary genes are transferred by large MGEs, whereas complete systems are more frequently transferred autonomously or in small MGEs. Our results suggest means of testing the roles for R-M systems and their associations with MGEs.}, } @article {pmid25104553, year = {2014}, author = {Aherfi, S and La Scola, B and Pagnier, I and Raoult, D and Colson, P}, title = {The expanding family Marseilleviridae.}, journal = {Virology}, volume = {466-467}, number = {}, pages = {27-37}, doi = {10.1016/j.virol.2014.07.014}, pmid = {25104553}, issn = {1096-0341}, mesh = {Acanthamoeba/*virology ; Animals ; DNA Viruses/*classification/genetics/physiology/ultrastructure ; DNA, Viral/genetics ; Fresh Water/virology ; Genome, Viral/*genetics ; Genomics ; Humans ; Insecta/virology ; Phylogeny ; Virus Replication ; }, abstract = {The family Marseilleviridae encompasses giant viruses that replicate in free-living Acanthamoeba amoebae. Since the discovery of the founding member Marseillevirus in 2007, 7 new marseilleviruses have been observed, including 3 from environmental freshwater, one from a dipteran, and two from symptom-free humans. Marseilleviruses have ≈250-nm-large icosahedral capsids and 346-386-kb-long mosaic genomes that encode 444-497 predicted proteins. They share a small set of core genes with Mimivirus and other large and giant DNA viruses that compose a monophyletic group, first described in 2001. Comparative genomics analyses indicate that the family Marseilleviridae currently includes three lineages and a pan-genome composed of ≈600 genes. Antibodies against marseilleviruses and viral DNA have been observed in a significant proportion of asymptomatic individuals and in the blood and lymph nodes of a child with adenitis; these observations suggest that these giant viruses may be blood borne and question if they may be pathogenic in humans.}, } @article {pmid25051369, year = {2014}, author = {Hamouda, MA and Belhacene, N and Puissant, A and Colosetti, P and Robert, G and Jacquel, A and Mari, B and Auberger, P and Luciano, F}, title = {The small heat shock protein B8 (HSPB8) confers resistance to bortezomib by promoting autophagic removal of misfolded proteins in multiple myeloma cells.}, journal = {Oncotarget}, volume = {5}, number = {15}, pages = {6252-6266}, pmid = {25051369}, issn = {1949-2553}, mesh = {Antineoplastic Agents/*pharmacology ; Autophagy/drug effects ; Bortezomib/*pharmacology ; Cell Line, Tumor ; Drug Resistance, Neoplasm ; Heat-Shock Proteins/*metabolism ; Humans ; Molecular Chaperones ; Multiple Myeloma/*drug therapy/*metabolism/pathology ; Protein Folding ; Protein Serine-Threonine Kinases/*metabolism ; }, abstract = {Velcade is one of the inescapable drug to treat patient suffering from multiple myeloma (MM) and resistance to this drug represents a major drawback for patients. However, the mechanisms underlying velcade resistance remain incompletely understood. We derived several U266 MM cell clones that resist to velcade. U266-resistant cells were resistant to velcade-induced cell death but exhibited a similar sensitivity to various proapoptotic stimuli. Careful analysis of proteosomal subunits and proteasome enzymatic activities showed that neither the composition nor the activity of the proteasome was affected in velcade-resistant cells. Elimination of velcade-induced poly-ubiquitinated proteins and protein aggregates was drastically stimulated in the resistant cells and correlated with increased cell survival. Inhibition of the lysosomal activity in velcade-resistant cells resulted in an increase of cell aggregates and decrease survival, indicating that aggregates are eliminated through lysosomal degradation. In addition, pangenomic profiling of velcade-sensitive and resistant cells showed that the small heat shock protein HSPB8 was overexpressed in resistant cells. Finally, gain and loss of function experiment demonstrated that HSPB8 is a key factor for velcade resistance. In conclusion, HSPB8 plays an important role for the elimination of aggregates in velcade-resistant cells that contributes to their enhanced survival.}, } @article {pmid25032833, year = {2014}, author = {Kant, R and Rintahaka, J and Yu, X and Sigvart-Mattila, P and Paulin, L and Mecklin, JP and Saarela, M and Palva, A and von Ossowski, I}, title = {A comparative pan-genome perspective of niche-adaptable cell-surface protein phenotypes in Lactobacillus rhamnosus.}, journal = {PloS one}, volume = {9}, number = {7}, pages = {e102762}, pmid = {25032833}, issn = {1932-6203}, mesh = {Adaptation, Biological/genetics ; Bacterial Adhesion/genetics ; Bacterial Proteins/*genetics ; Genes, Bacterial/genetics ; Genetic Association Studies/methods ; Genetic Variation/genetics ; Genome, Bacterial/*genetics ; Lacticaseibacillus rhamnosus/*genetics ; Membrane Proteins/*genetics ; Operon/genetics ; Phenotype ; }, abstract = {Lactobacillus rhamnosus is a ubiquitously adaptable Gram-positive bacterium and as a typical commensal can be recovered from various microbe-accessible bodily orifices and cavities. Then again, other isolates are food-borne, with some of these having been long associated with naturally fermented cheeses and yogurts. Additionally, because of perceived health benefits to humans and animals, numerous L. rhamnosus strains have been selected for use as so-called probiotics and are often taken in the form of dietary supplements and functional foods. At the genome level, it is anticipated that certain genetic variances will have provided the niche-related phenotypes that augment the flexible adaptiveness of this species, thus enabling its strains to grow and survive in their respective host environments. For this present study, we considered it functionally informative to examine and catalogue the genotype-phenotype variation existing at the cell surface between different L. rhamnosus strains, with the presumption that this might be relatable to habitat preferences and ecological adaptability. Here, we conducted a pan-genomic study involving 13 genomes from L. rhamnosus isolates with various origins. In using a benchmark strain (gut-adapted L. rhamnosus GG) for our pan-genome comparison, we had focused our efforts on a detailed examination and description of gene products for certain functionally relevant surface-exposed proteins, each of which in effect might also play a part in niche adaptability among the other strains. Perhaps most significantly of the surface protein loci we had analyzed, it would appear that the spaCBA operon (known to encode SpaCBA-called pili having a mucoadhesive phenotype) is a genomic rarity and an uncommon occurrence in L. rhamnosus. However, for any of the so-piliated L. rhamnosus strains, they will likely possess an increased niche-specific fitness, which functionally might presumably be manifested by a protracted transient colonization of the gut mucosa or some similar microhabitat.}, } @article {pmid25012753, year = {2014}, author = {Mosquera-Rendón, J and Cárdenas-Brito, S and Pineda, JD and Corredor, M and Benítez-Páez, A}, title = {Evolutionary and sequence-based relationships in bacterial AdoMet-dependent non-coding RNA methyltransferases.}, journal = {BMC research notes}, volume = {7}, number = {}, pages = {440}, pmid = {25012753}, issn = {1756-0500}, mesh = {Amino Acid Sequence ; Bacteria/classification/*genetics ; Bacterial Proteins/chemistry/*genetics ; Base Sequence ; Epigenesis, Genetic ; Evolution, Molecular ; *Genome, Bacterial ; Methylation ; Methyltransferases/chemistry/*genetics ; Models, Molecular ; Molecular Sequence Data ; *Phylogeny ; *RNA Processing, Post-Transcriptional ; *RNA, Untranslated ; S-Adenosylmethionine/metabolism ; Sequence Alignment ; }, abstract = {BACKGROUND: RNA post-transcriptional modification is an exciting field of research that has evidenced this editing process as a sophisticated epigenetic mechanism to fine tune the ribosome function and to control gene expression. Although tRNA modifications seem to be more relevant for the ribosome function and cell physiology as a whole, some rRNA modifications have also been seen to play pivotal roles, essentially those located in central ribosome regions. RNA methylation at nucleobases and ribose moieties of nucleotides appear to frequently modulate its chemistry and structure. RNA methyltransferases comprise a superfamily of highly specialized enzymes that accomplish a wide variety of modifications. These enzymes exhibit a poor degree of sequence similarity in spite of using a common reaction cofactor and modifying the same substrate type.

RESULTS: Relationships and lineages of RNA methyltransferases have been extensively discussed, but no consensus has been reached. To shed light on this topic, we performed amino acid and codon-based sequence analyses to determine phylogenetic relationships and molecular evolution. We found that most Class I RNA MTases are evolutionarily related to protein and cofactor/vitamin biosynthesis methyltransferases. Additionally, we found that at least nine lineages explain the diversity of RNA MTases. We evidenced that RNA methyltransferases have high content of polar and positively charged amino acid, which coincides with the electrochemistry of their substrates.

CONCLUSIONS: After studying almost 12,000 bacterial genomes and 2,000 patho-pangenomes, we revealed that molecular evolution of Class I methyltransferases matches the different rates of synonymous and non-synonymous substitutions along the coding region. Consequently, evolution on Class I methyltransferases selects against amino acid changes affecting the structure conformation.}, } @article {pmid25009843, year = {2014}, author = {Qin, QL and Xie, BB and Yu, Y and Shu, YL and Rong, JC and Zhang, YJ and Zhao, DL and Chen, XL and Zhang, XY and Chen, B and Zhou, BC and Zhang, YZ}, title = {Comparative genomics of the marine bacterial genus Glaciecola reveals the high degree of genomic diversity and genomic characteristic for cold adaptation.}, journal = {Environmental microbiology}, volume = {16}, number = {6}, pages = {1642-1653}, doi = {10.1111/1462-2920.12318}, pmid = {25009843}, issn = {1462-2920}, mesh = {Adaptation, Physiological/genetics ; Alteromonadaceae/*genetics ; Base Sequence ; Clustered Regularly Interspaced Short Palindromic Repeats ; Gene Transfer, Horizontal ; Genetic Variation ; *Genome, Bacterial ; Molecular Sequence Data ; Phylogeny ; Sequence Analysis, DNA ; }, abstract = {To what extent the genomes of different species belonging to one genus can be diverse and the relationship between genomic differentiation and environmental factor remain unclear for oceanic bacteria. With many new bacterial genera and species being isolated from marine environments, this question warrants attention. In this study, we sequenced all the type strains of the published species of Glaciecola, a recently defined cold-adapted genus with species from diverse marine locations, to study the genomic diversity and cold-adaptation strategy in this genus.The genome size diverged widely from 3.08 to 5.96 Mb, which can be explained by massive gene gain and loss events. Horizontal gene transfer and new gene emergence contributed substantially to the genome size expansion. The genus Glaciecola had an open pan-genome. Comparative genomic research indicated that species of the genus Glaciecola had high diversity in genome size, gene content and genetic relatedness. This may be prevalent in marine bacterial genera considering the dynamic and complex environments of the ocean. Species of Glaciecola had some common genomic features related to cold adaptation, which enable them to thrive and play a role in biogeochemical cycle in the cold marine environments.}, } @article {pmid24977706, year = {2014}, author = {Gao, XY and Zhi, XY and Li, HW and Klenk, HP and Li, WJ}, title = {Comparative genomics of the bacterial genus Streptococcus illuminates evolutionary implications of species groups.}, journal = {PloS one}, volume = {9}, number = {6}, pages = {e101229}, pmid = {24977706}, issn = {1932-6203}, mesh = {Base Composition/genetics ; *Biological Evolution ; Conserved Sequence/genetics ; Genes, Bacterial/genetics ; Genome Size/genetics ; *Genomics ; *Phylogeny ; Species Specificity ; Streptococcus/*genetics ; Virulence Factors/genetics ; }, abstract = {Members of the genus Streptococcus within the phylum Firmicutes are among the most diverse and significant zoonotic pathogens. This genus has gone through considerable taxonomic revision due to increasing improvements of chemotaxonomic approaches, DNA hybridization and 16S rRNA gene sequencing. It is proposed to place the majority of streptococci into "species groups". However, the evolutionary implications of species groups are not clear presently. We use comparative genomic approaches to yield a better understanding of the evolution of Streptococcus through genome dynamics, population structure, phylogenies and virulence factor distribution of species groups. Genome dynamics analyses indicate that the pan-genome size increases with the addition of newly sequenced strains, while the core genome size decreases with sequential addition at the genus level and species group level. Population structure analysis reveals two distinct lineages, one including Pyogenic, Bovis, Mutans and Salivarius groups, and the other including Mitis, Anginosus and Unknown groups. Phylogenetic dendrograms show that species within the same species group cluster together, and infer two main clades in accordance with population structure analysis. Distribution of streptococcal virulence factors has no obvious patterns among the species groups; however, the evolution of some common virulence factors is congruous with the evolution of species groups, according to phylogenetic inference. We suggest that the proposed streptococcal species groups are reasonable from the viewpoints of comparative genomics; evolution of the genus is congruent with the individual evolutionary trajectories of different species groups.}, } @article {pmid24976559, year = {2014}, author = {Kraft, B and Tegetmeyer, HE and Meier, D and Geelhoed, JS and Strous, M}, title = {Rapid succession of uncultured marine bacterial and archaeal populations in a denitrifying continuous culture.}, journal = {Environmental microbiology}, volume = {16}, number = {10}, pages = {3275-3286}, doi = {10.1111/1462-2920.12552}, pmid = {24976559}, issn = {1462-2920}, support = {242635/ERC_/European Research Council/International ; }, mesh = {Archaea/classification/genetics/isolation & purification/*metabolism ; Bacteria/classification/genetics/*metabolism ; *Denitrification ; Geologic Sediments/*microbiology ; Nitrite Reductases/genetics ; Oceans and Seas ; Phylogeny ; }, abstract = {Marine denitrification constitutes an important part of the global nitrogen cycle and the diversity, abundance and process rates of denitrifying microorganisms have been the focus of many studies. Still, there is little insight in the ecophysiology of marine denitrifying communities. In this study, a heterotrophic denitrifying community from sediments of a marine intertidal flat active in nitrogen cycling was selected in a chemostat and monitored over a period of 50 days. The chemostat enabled the maintenance of constant and well-defined experimental conditions over the time-course of the experiment. Analysis of the microbial community composition by automated ribosomal intergenic spacer analysis (ARISA), Illumina sequencing and catalyzed reporter deposition fluorescence in situ hybridization (CARD-FISH) revealed strong dynamics in community composition over time, while overall denitrification by the enrichment culture was stable. Members of the genera Arcobacter, Pseudomonas, Pseudovibrio, Rhodobacterales and of the phylum Bacteroidetes were identified as the dominant denitrifiers. Among the fermenting organisms co-enriched with the denitrifiers was a novel archaeon affiliated with the recently proposed DPANN-superphylum. The pan-genome of populations affiliated to Pseudovibrio encoded a NirK as well as a NirS nitrite reductase, indicating the rare co-occurrence of both evolutionary unrelated nitrite reductases within coexisting subpopulations.}, } @article {pmid24962689, year = {2014}, author = {Cluzeau, T and Dubois, A and Jacquel, A and Luciano, F and Renneville, A and Preudhomme, C and Karsenti, JM and Mounier, N and Rohrlich, P and Raynaud, S and Mari, B and Robert, G and Auberger, P}, title = {Phenotypic and genotypic characterization of azacitidine-sensitive and resistant SKM1 myeloid cell lines.}, journal = {Oncotarget}, volume = {5}, number = {12}, pages = {4384-4391}, pmid = {24962689}, issn = {1949-2553}, mesh = {Antimetabolites, Antineoplastic/*pharmacology ; Azacitidine/*pharmacology ; Cell Line, Tumor ; Down-Regulation ; Genotype ; Humans ; Myelodysplastic Syndromes/*metabolism/pathology ; Myeloid Cells/*drug effects/physiology ; Phenotype ; }, abstract = {In the present study, we provide a comparative phenotypic and genotypic analysis of azacitidine-sensitive and resistant SKM-1 cell lines. Morphologically, SKM1-R exhibited increase in cell size that accounts for by enhanced ploidy in a majority of cells as shown by cell cycle and karyotype analysis. No specific Single Nucleotide Polymorphism (SNP) alteration was found in SKM1-R cells compared to their SKM1-S counterpart. Comparative pangenomic profiling revealed the up-regulation of a panel of genes involved in cellular movement, cell death and survival and down-regulation of genes required for cell to cell signaling and free radical scavenging in SKM1-R cells. We also searched for mutations frequently associated with myelodysplastic syndromes (MDS) and found that both cell lines harbored mutations in TET2, ASLX1 and TP53. Collectively, our data show that despite their different morphological and phenotypic features, SKM1-S and SKM1-R cells exhibited similar genotypic characteristics. Finally, pangenomic profiling identifies new potential pathways to be targeted to circumvent AZA-resistance. In conclusion, SKM1-R cells represent a valuable tool for the validation of new therapeutic intervention in MDS.}, } @article {pmid24953095, year = {2014}, author = {Frandi, A and Jacquier, N and Théraulaz, L and Greub, G and Viollier, PH}, title = {FtsZ-independent septal recruitment and function of cell wall remodelling enzymes in chlamydial pathogens.}, journal = {Nature communications}, volume = {5}, number = {}, pages = {4200}, pmid = {24953095}, issn = {2041-1723}, mesh = {Amidohydrolases/chemistry/genetics/*metabolism ; Amino Acid Sequence ; Bacterial Proteins/chemistry/genetics/*metabolism ; Cell Division ; Cell Wall/*enzymology/genetics/metabolism ; Chlamydia/cytology/*enzymology/genetics/metabolism ; Cytoskeletal Proteins/genetics/metabolism ; Escherichia coli/enzymology/genetics/metabolism ; Molecular Sequence Data ; Peptidoglycan/metabolism ; Protein Structure, Tertiary ; Sequence Alignment ; }, abstract = {The nature and assembly of the chlamydial division septum is poorly defined due to the paucity of a detectable peptidoglycan (PG)-based cell wall, the inhibition of constriction by penicillin and the presence of coding sequences for cell wall precursor and remodelling enzymes in the reduced chlamydial (pan-)genome. Here we show that the chlamydial amidase (AmiA) is active and remodels PG in Escherichia coli. Moreover, forward genetics using an E. coli amidase mutant as entry point reveals that the chlamydial LysM-domain protein NlpD is active in an E. coli reporter strain for PG endopeptidase activity (ΔnlpI). Immunolocalization unveils NlpD as the first septal (cell-wall-binding) protein in Chlamydiae and we show that its septal sequestration depends on prior cell wall synthesis. Since AmiA assembles into peripheral clusters, trimming of a PG-like polymer or precursors occurs throughout the chlamydial envelope, while NlpD targets PG-like peptide crosslinks at the chlamydial septum during constriction.}, } @article {pmid24951835, year = {2014}, author = {Zou, QH and Li, RQ and Liu, GR and Liu, SL}, title = {Comparative genomic analysis between typhoidal and non-typhoidal Salmonella serovars reveals typhoid-specific protein families.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {26}, number = {}, pages = {295-302}, doi = {10.1016/j.meegid.2014.06.008}, pmid = {24951835}, issn = {1567-7257}, mesh = {Bacterial Proteins/*genetics ; Computational Biology ; Databases, Genetic ; *Genome, Bacterial ; *Genomics ; Humans ; *Multigene Family ; Open Reading Frames ; Phylogeny ; Salmonella/classification/*genetics/metabolism ; Salmonella Infections ; Salmonella typhi/classification/*genetics/metabolism ; Serogroup ; Typhoid Fever ; }, abstract = {BACKGROUND: The genus Salmonella contains more than 2600 serovars. While most cause a self-limiting gastroenteritis, four serovars, S. Typhi, S. Paratyphi A, B and C, elicit typhoid, a potentially fatal systemic infection. Because of the prevalence in certain regions, such as South Asia, and the disease severity of typhoidal Salmonella infections, comprehensive studies are needed to elucidate the pathogenesis of diseases caused by these typhoidal serovars.

RESULTS: We performed comparative genomic analyses on eight human typhoidal strains and 27 non-human typhoidal Salmonella strains to elucidate their evolutionary relationships and identify the genes specific to the four typhoidal serovars. Our results indicate that Salmonella may have an open pan-genome. A core-genome based phylogeny demonstrated that divergence between S. Paratyphi A and S. Typhi took place not long ago and S. Paratyphi B shared a recent common ancestor with S. Paratyphi C. Of great interest, the divergence between S. Paratyphi B and S. Paratyphi C was shown to be more recent than that between S. Paratyphi A and S. Typhi. Alignment and comparisons of the genomes identified unique complements of protein families to each of the typhoidal serovars. Most of these protein families are phage related and some are candidate virulence factors. Importantly, we found 88 protein families specific to two to three of the four typhoidal serovars. All but two of the 88 genes are present in S. Typhi, with a few in the three paratyphoidal serovars but none in the non-human typhoidal serovars. Most of these genes are predicted to encode hypothetical proteins and some are known to code for virulence factors such as Vi polysaccharide related proteins.

CONCLUSIONS: By comprehensive genomic comparisons, we identified protein families specific to the human typhoidal serovars, which will greatly facilitate investigations on typhoid pathogenesis.}, } @article {pmid24939058, year = {2014}, author = {Fumeaux, C and Radhakrishnan, SK and Ardissone, S and Théraulaz, L and Frandi, A and Martins, D and Nesper, J and Abel, S and Jenal, U and Viollier, PH}, title = {Cell cycle transition from S-phase to G1 in Caulobacter is mediated by ancestral virulence regulators.}, journal = {Nature communications}, volume = {5}, number = {}, pages = {4081}, pmid = {24939058}, issn = {2041-1723}, mesh = {Bacterial Proteins/genetics/*metabolism ; Base Sequence ; Caulobacter/*physiology ; Chromatin Immunoprecipitation ; Dimerization ; Electrophoretic Mobility Shift Assay ; G1 Phase/genetics/*physiology ; Gene Expression Regulation, Bacterial/genetics/*physiology ; Immunoblotting ; *Models, Biological ; Molecular Sequence Data ; Promoter Regions, Genetic/genetics ; Real-Time Polymerase Chain Reaction ; Repressor Proteins/genetics/*metabolism ; S Phase Cell Cycle Checkpoints/genetics/*physiology ; Sequence Analysis, DNA ; Species Specificity ; beta-Galactosidase ; }, abstract = {Zinc-finger domain transcriptional regulators regulate a myriad of functions in eukaryotes. Interestingly, ancestral versions (MucR) from Alpha-proteobacteria control bacterial virulence/symbiosis. Whether virulence regulators can also control cell cycle transcription is unknown. Here we report that MucR proteins implement a hitherto elusive primordial S→G1 transcriptional switch. After charting G1-specific promoters in the cell cycle model Caulobacter crescentus by comparative ChIP-seq, we use one such promoter as genetic proxy to unearth two MucR paralogs, MucR1/2, as constituents of a quadripartite and homeostatic regulatory module directing the S→G1 transcriptional switch. Surprisingly, MucR orthologues that regulate virulence and symbiosis gene transcription in Brucella, Agrobacterium or Sinorhizobium support this S→G1 switch in Caulobacter. Pan-genomic ChIP-seq analyses in Sinorhizobium and Caulobacter show that this module indeed targets orthologous genes. We propose that MucR proteins and possibly other virulence regulators primarily control bacterial cell cycle (G1-phase) transcription, rendering expression of target (virulence) genes periodic and in tune with the cell cycle.}, } @article {pmid24924907, year = {2014}, author = {Duan, C and Xiong, X and Qi, Y and Gong, W and Jiao, J and Wen, B}, title = {Genomic and comparative genomic analyses of Rickettsia heilongjiangensis provide insight into its evolution and pathogenesis.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {26}, number = {}, pages = {274-282}, doi = {10.1016/j.meegid.2014.05.028}, pmid = {24924907}, issn = {1567-7257}, mesh = {Bacterial Proteins/genetics ; Bacterial Secretion Systems/genetics ; Codon ; Drug Resistance, Bacterial/genetics ; *Evolution, Molecular ; Gene Order ; *Genome, Bacterial ; Genomic Islands ; *Genomics ; Humans ; Mutagenesis, Insertional ; Open Reading Frames ; Phylogeny ; Repetitive Sequences, Nucleic Acid ; Rickettsia/classification/drug effects/*genetics/pathogenicity ; Rickettsia Infections/*microbiology ; Virulence Factors/genetics ; }, abstract = {Rickettsia heilongjiangensis, the causative agent of far eastern spotted fever, is an obligate intracellular gram-negative bacterium that belongs to the spotted fever group rickettsiae. To understand the evolution and pathogenesis of R. heilongjiangensis, we analyzed its genome and compared it with other rickettsial genomes available in GenBank. The R. heilongjiangensis chromosome contains 1333 genes, including 1297 protein coding genes and 36 RNA coding genes. The genome also contains 121 pseudogenes, 54 insertion sequences, and 39 tandem repeats. Sixteen genes encoding the major components of the type IV secretion systems were identified in the R. heilongjiangensis genome. In total, 37 β-barrel outer membrane proteins were predicted in the genome, eight of which have been previously confirmed to be outer membrane proteins. In addition, 266 potential virulence factor genes, seven partially deleted antibiotic resistance genes, and a genomic island were identified in the genome. The codon usage in the genome is compatible with its low GC content, and the amino acid usage shows apparent bias. A comparative genomic analysis showed that R. heilongjiangensis and R. japonica share one unique fragment that may be a target sequence for a diagnostic assay. The orthologs of 37 genes of R. heilongjiangensis were found in pathogenic R. rickettsii str. Sheila Smith but not in non-pathogenic R. rickettsii str. Iowa, which may explain why R. heilongjiangensis is pathogenic. Pan-genome analysis showed that R. heilongjiangensis and 42 other rickettsiae strains share 693 core genes with a pan-genome size of 4837 genes. The pan-genome-based phylogeny showed that R. heilongjiangensis was closely related to R. japonica.}, } @article {pmid24923324, year = {2014}, author = {Deschamps, P and Zivanovic, Y and Moreira, D and Rodriguez-Valera, F and López-García, P}, title = {Pangenome evidence for extensive interdomain horizontal transfer affecting lineage core and shell genes in uncultured planktonic thaumarchaeota and euryarchaeota.}, journal = {Genome biology and evolution}, volume = {6}, number = {7}, pages = {1549-1563}, pmid = {24923324}, issn = {1759-6653}, mesh = {Archaea/*genetics ; Euryarchaeota/*genetics ; *Evolution, Molecular ; Gene Transfer, Horizontal/genetics ; *Genome ; Genomic Library ; *Phylogeny ; }, abstract = {Horizontal gene transfer (HGT) is an important force in evolution, which may lead, among other things, to the adaptation to new environments by the import of new metabolic functions. Recent studies based on phylogenetic analyses of a few genome fragments containing archaeal 16S rRNA genes and fosmid-end sequences from deep-sea metagenomic libraries have suggested that marine planktonic archaea could be affected by high HGT frequency. Likewise, a composite genome of an uncultured marine euryarchaeote showed high levels of gene sequence similarity to bacterial genes. In this work, we ask whether HGT is frequent and widespread in genomes of these marine archaea, and whether HGT is an ancient and/or recurrent phenomenon. To answer these questions, we sequenced 997 fosmid archaeal clones from metagenomic libraries of deep-Mediterranean waters (1,000 and 3,000 m depth) and built comprehensive pangenomes for planktonic Thaumarchaeota (Group I archaea) and Euryarchaeota belonging to the uncultured Groups II and III Euryarchaeota (GII/III-Euryarchaeota). Comparison with available reference genomes of Thaumarchaeota and a composite marine surface euryarchaeote genome allowed us to define sets of core, lineage-specific core, and shell gene ortholog clusters for the two archaeal lineages. Molecular phylogenetic analyses of all gene clusters showed that 23.9% of marine Thaumarchaeota genes and 29.7% of GII/III-Euryarchaeota genes had been horizontally acquired from bacteria. HGT is not only extensive and directional but also ongoing, with high HGT levels in lineage-specific core (ancient transfers) and shell (recent transfers) genes. Many of the acquired genes are related to metabolism and membrane biogenesis, suggesting an adaptive value for life in cold, oligotrophic oceans. We hypothesize that the acquisition of an important amount of foreign genes by the ancestors of these archaeal groups significantly contributed to their divergence and ecological success.}, } @article {pmid24887297, year = {2014}, author = {Gruel, N and Benhamo, V and Bhalshankar, J and Popova, T and Fréneaux, P and Arnould, L and Mariani, O and Stern, MH and Raynal, V and Sastre-Garau, X and Rouzier, R and Delattre, O and Vincent-Salomon, A}, title = {Polarity gene alterations in pure invasive micropapillary carcinomas of the breast.}, journal = {Breast cancer research : BCR}, volume = {16}, number = {3}, pages = {R46}, pmid = {24887297}, issn = {1465-542X}, mesh = {Axonemal Dyneins/genetics ; Base Sequence ; Breast/pathology ; Breast Neoplasms/*genetics/pathology ; Calmodulin-Binding Proteins/genetics ; Carcinoma, Ductal, Breast/*genetics/pathology ; Cell Polarity/*genetics ; Chaperonins ; Class I Phosphatidylinositol 3-Kinases ; Cytoskeletal Proteins/genetics ; DNA Copy Number Variations ; Exome/genetics ; Female ; Forkhead Box Protein O3 ; Forkhead Transcription Factors/biosynthesis/genetics ; Formins ; Gene Amplification/genetics ; Group II Chaperonins/genetics ; Humans ; Membrane Glycoproteins/genetics ; Membrane Proteins/biosynthesis/genetics ; Microfilament Proteins/biosynthesis ; Molecular Chaperones ; Mutation, Missense ; Neoplasm Invasiveness/*genetics ; Neoplasm Proteins/genetics ; Nuclear Proteins/biosynthesis ; Phosphatidylinositol 3-Kinases/genetics ; Protein Tyrosine Phosphatases, Non-Receptor/genetics ; RNA-Binding Proteins ; Receptor, ErbB-2/biosynthesis ; Receptors, Estrogen/biosynthesis ; Retrospective Studies ; Sequence Analysis, DNA ; Sequence Analysis, RNA ; Sequence Deletion/genetics ; Serine C-Palmitoyltransferase/genetics ; Tumor Suppressor Protein p53/genetics ; Ubiquitin-Protein Ligases ; }, abstract = {INTRODUCTION: Pure invasive micropapillary carcinoma (IMPC) is a special type of breast carcinoma characterised by clusters of cells presenting polarity abnormalities. The biological alterations underlying this pattern remain unknown.

METHODS: Pangenomic analysis (n=39), TP53 (n=43) and PIK3CA (n=41) sequencing in a series of IMPCs were performed. A subset of cases was also analysed with whole-exome sequencing (n=4) and RNA sequencing (n=6). Copy number variation profiles were compared with those of oestrogen receptors and grade-matched invasive ductal carcinomas (IDCs) of no special type.

RESULTS: Unsupervised analysis of genomic data distinguished two IMPC subsets: one (Sawtooth/8/16) exhibited a significant increase in 16p gains (71%), and the other (Firestorm/Amplifier) was characterised by a high frequency of 8q (35%), 17q (20% to 46%) and 20q (23% to 30%) amplifications and 17p loss (74%). TP53 mutations (10%) were more frequently identified in the amplifier subset, and PIK3CA mutations (4%) were detected in both subsets. Compared to IDC, IMPC exhibited specific loss of the 6q16-q22 region (45%), which is associated with downregulation of FOXO3 and SEC63 gene expression. SEC63 and FOXO3 missense mutations were identified in one case each (2%). Whole-exome sequencing combined with RNA sequencing of IMPC allowed us to identify somatic mutations in genes involved in polarity, DNAH9 and FMN2 (8% and 2%, respectively) or ciliogenesis, BBS12 and BBS9 (2% each) or genes coding for endoplasmic reticulum protein, HSP90B1 and SPTLC3 (2% each) and cytoskeleton, UBR4 and PTPN21 (2% each), regardless of the genomic subset. The intracellular biological function of the mutated genes identified by gene ontology analysis suggests a driving role in the clinicopathological characteristics of IMPC.

CONCLUSION: In our comprehensive molecular analysis of IMPC, we identified numerous genomic alterations without any recurrent fusion genes. Recurrent somatic mutations of genes participating in cellular polarity and shape suggest that they, together with other biological alterations (such as epigenetic modifications and stromal alterations), could contribute to the morphological pattern of IMPC. Though none of the individual abnormalities demonstrated specificity for IMPC, whether their combination in IMPC may have a cumulative effect that drives the abnormal polarity of IMPC needs to be examined further with in vitro experiments.}, } @article {pmid24886480, year = {2014}, author = {Sassi, M and Drancourt, M}, title = {Genome analysis reveals three genomospecies in Mycobacterium abscessus.}, journal = {BMC genomics}, volume = {15}, number = {1}, pages = {359}, pmid = {24886480}, issn = {1471-2164}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/genetics/metabolism ; Cluster Analysis ; Databases, Factual ; Drug Resistance, Bacterial ; Genetic Variation ; *Genome, Bacterial ; Genomics ; Host-Pathogen Interactions ; Mycobacterium/classification/drug effects/*genetics ; Phylogeny ; }, abstract = {BACKGROUND: Mycobacterium abscessus complex, the third most frequent mycobacterial complex responsible for community- and health care-associated infections in developed countries, comprises of M. abscessus subsp. abscessus and M. abscessus subsp. bolletii reviously referred as Mycobacterium bolletii and Mycobacterium massiliense. The diversity of this group of opportunistic pathogens is poorly described.

RESULTS: In-depth analysis of 14 published M. abscessus complex genomes found a pan-genome of 6,153 proteins and core-genome of 3,947 (64.1%) proteins, indicating a non-conservative genome. Analysing the average percentage of amino-acid sequence identity (from 94.19% to 98.58%) discriminates three main clusters C1, C2 and C3: C1 comprises strains belonging to M. abscessus, C2 comprises strains belonging to M. massiliense and C3 comprises strains belonging to M. bolletii; and two sub-clusters in clusters C2 and C3. The phylogenomic network confirms these three clusters. The genome length (from 4.8 to 5.51-Mb) varies from 5.07-Mb in C1, 4.89-Mb in C2A, 5.01-Mb in C2B and 5.28-Mb in C3. The mean number of prophage regions (from 0 to 7) is 2 in C1; 1.33 in C2A; 3.5 in C2B and five in C3. A total of 36 genes are uniquely present in C1, 15 in C2 and 15 in C3. These genes could be used for the detection and identification of organisms in each cluster. Further, the mean number of host-interaction factors (including PE, PPE, LpqH, MCE, Yrbe and type VII secretion system ESX3 and ESX4) varies from 70 in cluster C1, 80 in cluster C2A, 74 in cluster C2B and 93 in clusters C3A and C3B. No significant differences in antibiotic resistance genes were observed between clusters, in contrast to previously reported in-vitro patterns of drug resistance. They encode both penicillin-binding proteins targeted by β-lactam antibiotics and an Ambler class A β-lactamase for which inhibitors exist.

CONCLUSIONS: Our comparative analysis indicates that M. abscessus complex comprises three genomospecies, corresponding to M. abscessus, M. bolletii, and M. massiliense. The genomics data here reported indicate differences in virulence of medical interest; and suggest targets for the refined detection and identification of M. abscessus.}, } @article {pmid24884520, year = {2014}, author = {De Maayer, P and Chan, WY and Rubagotti, E and Venter, SN and Toth, IK and Birch, PR and Coutinho, TA}, title = {Analysis of the Pantoea ananatis pan-genome reveals factors underlying its ability to colonize and interact with plant, insect and vertebrate hosts.}, journal = {BMC genomics}, volume = {15}, number = {1}, pages = {404}, pmid = {24884520}, issn = {1471-2164}, mesh = {Animals ; Bacterial Proteins/genetics/metabolism ; Enterobacteriaceae Infections/microbiology/*veterinary ; Gene-Environment Interaction ; *Genome, Bacterial ; Humans ; Insecta/microbiology ; Pantoea/classification/*genetics/*physiology ; Phylogeny ; Plant Diseases/*microbiology ; Plants/microbiology ; Vertebrates/microbiology ; }, abstract = {BACKGROUND: Pantoea ananatis is found in a wide range of natural environments, including water, soil, as part of the epi- and endophytic flora of various plant hosts, and in the insect gut. Some strains have proven effective as biological control agents and plant-growth promoters, while other strains have been implicated in diseases of a broad range of plant hosts and humans. By analysing the pan-genome of eight sequenced P. ananatis strains isolated from different sources we identified factors potentially underlying its ability to colonize and interact with hosts in both the plant and animal Kingdoms.

RESULTS: The pan-genome of the eight compared P. ananatis strains consisted of a core genome comprised of 3,876 protein coding sequences (CDSs) and a sizeable accessory genome consisting of 1,690 CDSs. We estimate that ~106 unique CDSs would be added to the pan-genome with each additional P. ananatis genome sequenced in the future. The accessory fraction is derived mainly from integrated prophages and codes mostly for proteins of unknown function. Comparison of the translated CDSs on the P. ananatis pan-genome with the proteins encoded on all sequenced bacterial genomes currently available revealed that P. ananatis carries a number of CDSs with orthologs restricted to bacteria associated with distinct hosts, namely plant-, animal- and insect-associated bacteria. These CDSs encode proteins with putative roles in transport and metabolism of carbohydrate and amino acid substrates, adherence to host tissues, protection against plant and animal defense mechanisms and the biosynthesis of potential pathogenicity determinants including insecticidal peptides, phytotoxins and type VI secretion system effectors.

CONCLUSIONS: P. ananatis has an 'open' pan-genome typical of bacterial species that colonize several different environments. The pan-genome incorporates a large number of genes encoding proteins that may enable P. ananatis to colonize, persist in and potentially cause disease symptoms in a wide range of plant and animal hosts.}, } @article {pmid24880341, year = {2014}, author = {Zhang, L and Chen, LH and Wan, H and Yang, R and Wang, Z and Feng, J and Yang, S and Jones, S and Wang, S and Zhou, W and Zhu, H and Killela, PJ and Zhang, J and Wu, Z and Li, G and Hao, S and Wang, Y and Webb, JB and Friedman, HS and Friedman, AH and McLendon, RE and He, Y and Reitman, ZJ and Bigner, DD and Yan, H}, title = {Exome sequencing identifies somatic gain-of-function PPM1D mutations in brainstem gliomas.}, journal = {Nature genetics}, volume = {46}, number = {7}, pages = {726-730}, pmid = {24880341}, issn = {1546-1718}, support = {P30 CA014236/CA/NCI NIH HHS/United States ; R01 CA140316/CA/NCI NIH HHS/United States ; T32 GM007184/GM/NIGMS NIH HHS/United States ; R01CA140316/CA/NCI NIH HHS/United States ; }, mesh = {Brain Stem Neoplasms/*genetics/mortality/pathology ; Cell Proliferation ; Cells, Cultured ; Checkpoint Kinase 2/metabolism ; DNA Copy Number Variations ; DNA Methylation ; Exome/*genetics ; Gene Expression Profiling ; Glioma/*genetics/mortality/pathology ; High-Throughput Nucleotide Sequencing ; Histones/metabolism ; Humans ; Immunoblotting ; Immunoenzyme Techniques ; Immunoprecipitation ; Mutation/*genetics ; Phosphoprotein Phosphatases/*genetics ; Prognosis ; Protein Phosphatase 2C ; Survival Rate ; Tumor Stem Cell Assay ; Tumor Suppressor Protein p53/metabolism ; }, abstract = {Gliomas arising in the brainstem and thalamus are devastating tumors that are difficult to surgically resect. To determine the genetic and epigenetic landscape of these tumors, we performed exomic sequencing of 14 brainstem gliomas (BSGs) and 12 thalamic gliomas. We also performed targeted mutational analysis of an additional 24 such tumors and genome-wide methylation profiling of 45 gliomas. This study led to the discovery of tumor-specific mutations in PPM1D, encoding wild-type p53-induced protein phosphatase 1D (WIP1), in 37.5% of the BSGs that harbored hallmark H3F3A mutations encoding p.Lys27Met substitutions. PPM1D mutations were mutually exclusive with TP53 mutations in BSG and attenuated p53 activation in vitro. PPM1D mutations were truncating alterations in exon 6 that enhanced the ability of PPM1D to suppress the activation of the DNA damage response checkpoint protein CHK2. These results define PPM1D as a frequent target of somatic mutation and as a potential therapeutic target in brainstem gliomas.}, } @article {pmid24818909, year = {2014}, author = {de Bentzmann, S and Bordi, C}, title = {Construction of Pseudomonas aeruginosa two-hybrid libraries for high-throughput assays.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {1149}, number = {}, pages = {225-241}, doi = {10.1007/978-1-4939-0473-0_19}, pmid = {24818909}, issn = {1940-6029}, mesh = {Cyclic AMP/biosynthesis ; Genes, Reporter ; Genome, Bacterial ; High-Throughput Screening Assays/*methods ; Plasmids/metabolism ; Protein Binding ; Pseudomonas aeruginosa/genetics/*metabolism ; Reproducibility of Results ; *Two-Hybrid System Techniques ; }, abstract = {In Pseudomonas aeruginosa, identification of new partners of a protein of interest could give precious clues to decipher a biological process in which this protein is involved. However, genes encoding for partners of a protein of interest are unknown and frequently scattered throughout the genome. We describe herein the construction and the use of pan-genomic bacterial two-hybrid libraries to identify new partners of a protein of interest encoded by P. aeruginosa.}, } @article {pmid24803571, year = {2014}, author = {Epstein, B and Sadowsky, MJ and Tiffin, P}, title = {Selection on horizontally transferred and duplicated genes in sinorhizobium (ensifer), the root-nodule symbionts of medicago.}, journal = {Genome biology and evolution}, volume = {6}, number = {5}, pages = {1199-1209}, pmid = {24803571}, issn = {1759-6653}, mesh = {Gene Frequency ; *Gene Transfer, Horizontal ; *Genes, Duplicate ; Genetic Fitness ; Genetic Variation ; Genome, Bacterial ; Medicago/*microbiology ; Polymorphism, Single Nucleotide ; Root Nodules, Plant/*microbiology ; *Selection, Genetic ; Sinorhizobium meliloti/*genetics ; Symbiosis/genetics ; }, abstract = {Structural variation, including variation in gene copy number and presence or absence of genes, is a widespread and important source of genomic variation. We used whole-genome DNA sequences from 48 strains of Sinorhizobium (recently renamed Ensifer), including 20 strains of Sinorhizobium meliloti and 12 strains of S. medicae that were the focus of the analyses, to study the fitness effects of new structural variants created by duplication and horizontal gene transfer. We find that derived duplicated and horizontally transferred (HT) genes segregate at lower frequency than synonymous and nonsynonymous nucleotide variants in S. meliloti and S. medicae. Furthermore, the relative frequencies of different types of variants are more similar in S. medicae than in S. meliloti, the species with the larger effective population size. These results are consistent with the hypothesis that most duplications and HT genes have deleterious effects. Diversity of duplications, as measured by segregating duplicated genes per gene, is greater than nucleotide diversity, consistent with a high rate of duplication. Our results suggest that the vast majority of structural variants found among closely related bacterial strains are short-lived and unlikely to be involved in species-wide adaptation.}, } @article {pmid24803570, year = {2014}, author = {Teo, J and Tan, SY and Liu, Y and Tay, M and Ding, Y and Li, Y and Kjelleberg, S and Givskov, M and Lin, RT and Yang, L}, title = {Comparative genomic analysis of malaria mosquito vector-associated novel pathogen Elizabethkingia anophelis.}, journal = {Genome biology and evolution}, volume = {6}, number = {5}, pages = {1158-1165}, pmid = {24803570}, issn = {1759-6653}, mesh = {Animals ; Culicidae/genetics/microbiology ; Drug Resistance, Bacterial/genetics ; Flavobacteriaceae/*genetics/isolation & purification/*pathogenicity ; Flavobacteriaceae Infections/epidemiology/microbiology ; Genome, Bacterial ; Humans ; Insect Vectors/microbiology ; Intensive Care Units ; Molecular Sequence Data ; Phylogeny ; Virulence/genetics ; }, abstract = {Acquisition of Elizabethkingia infections in intensive care units (ICUs) has risen in the past decade. Treatment of Elizabethkingia infections is challenging due to the lack of effective therapeutic regimens, leading to a high mortality rate. Elizabethkingia infections have long been attributed to Elizabethkingia meningoseptica. Recently, we used whole-genome sequencing to reveal that E. anophelis is the pathogenic agent for an Elizabethkingia outbreak at two ICUs. We performed comparative genomic analysis of seven hospital-isolated E. anophelis strains with five available Elizabethkingia spp. genomes deposited in the National Center for Biotechnology Information Database. A pan-genomic approach was applied to identify the core- and pan-genome for the Elizabethkingia genus. We showed that unlike the hospital-isolated pathogen E. meningoseptica ATCC 12535 strain, the hospital-isolated E. anophelis strains have genome content and organization similar to the E. anophelis Ag1 and R26 strains isolated from the midgut microbiota of the malaria mosquito vector Anopheles gambiae. Both the core- and accessory genomes of Elizabethkingia spp. possess genes conferring antibiotic resistance and virulence. Our study highlights that E. anophelis is an emerging bacterial pathogen for hospital environments.}, } @article {pmid24773920, year = {2014}, author = {Grosso-Becerra, MV and Santos-Medellín, C and González-Valdez, A and Méndez, JL and Delgado, G and Morales-Espinosa, R and Servín-González, L and Alcaraz, LD and Soberón-Chávez, G}, title = {Pseudomonas aeruginosa clinical and environmental isolates constitute a single population with high phenotypic diversity.}, journal = {BMC genomics}, volume = {15}, number = {}, pages = {318}, pmid = {24773920}, issn = {1471-2164}, mesh = {Genome, Bacterial ; Molecular Sequence Data ; Phenotype ; Pseudomonas aeruginosa/genetics/*isolation & purification ; Virulence ; }, abstract = {BACKGROUND: Pseudomonas aeruginosa is an opportunistic pathogen with a high incidence of hospital infections that represents a threat to immune compromised patients. Genomic studies have shown that, in contrast to other pathogenic bacteria, clinical and environmental isolates do not show particular genomic differences. In addition, genetic variability of all the P. aeruginosa strains whose genomes have been sequenced is extremely low. This low genomic variability might be explained if clinical strains constitute a subpopulation of this bacterial species present in environments that are close to human populations, which preferentially produce virulence associated traits.

RESULTS: In this work, we sequenced the genomes and performed phenotypic descriptions for four non-human P. aeruginosa isolates collected from a plant, the ocean, a water-spring, and from dolphin stomach. We show that the four strains are phenotypically diverse and that this is not reflected in genomic variability, since their genomes are almost identical. Furthermore, we performed a detailed comparative genomic analysis of the four strains studied in this work with the thirteen previously reported P. aeruginosa genomes by means of describing their core and pan-genomes.

CONCLUSIONS: Contrary to what has been described for other bacteria we have found that the P. aeruginosa core genome is constituted by a high proportion of genes and that its pan-genome is thus relatively small. Considering the high degree of genomic conservation between isolates of P. aeruginosa from diverse environments, including human tissues, some implications for the treatment of infections are discussed. This work also represents a methodological contribution for the genomic study of P. aeruginosa, since we provide a database of the comparison of all the proteins encoded by the seventeen strains analyzed.}, } @article {pmid24766399, year = {2014}, author = {Pohl, S and Klockgether, J and Eckweiler, D and Khaledi, A and Schniederjans, M and Chouvarine, P and Tümmler, B and Häussler, S}, title = {The extensive set of accessory Pseudomonas aeruginosa genomic components.}, journal = {FEMS microbiology letters}, volume = {356}, number = {2}, pages = {235-241}, doi = {10.1111/1574-6968.12445}, pmid = {24766399}, issn = {1574-6968}, mesh = {Gene Expression Profiling ; Gene Transfer, Horizontal ; Genetic Variation ; *Genome, Bacterial ; *Interspersed Repetitive Sequences ; Pseudomonas Infections/microbiology ; Pseudomonas aeruginosa/classification/*genetics/isolation & purification ; }, abstract = {Up to 20% of the chromosomal Pseudomonas aeruginosa DNA belong to the so-called accessory genome. Its elements are specific for subgroups or even single strains and are likely acquired by horizontal gene transfer (HGT). Similarities of the accessory genomic elements to DNA from other bacterial species, mainly the DNA of γ- and β-proteobacteria, indicate a role of interspecies HGT. In this study, we analysed the expression of the accessory genome in 150 clinical P. aeruginosa isolates as uncovered by transcriptome sequencing and the presence of accessory genes in eleven additional isolates. Remarkably, despite the large number of P. aeruginosa strains that have been sequenced to date, we found new strain-specific compositions of accessory genomic elements and a high portion (10-20%) of genes without P. aeruginosa homologues. Although some genes were detected to be expressed/present in several isolates, individual patterns regarding the genes, their functions and the possible origin of the DNA were widespread among the tested strains. Our results demonstrate the unaltered potential to discover new traits within the P. aeruginosa population and underline that the P. aeruginosa pangenome is likely to increase with increasing sequence information.}, } @article {pmid24736785, year = {2014}, author = {Toro, N and Martínez-Rodríguez, L and Martínez-Abarca, F}, title = {Insights into the history of a bacterial group II intron remnant from the genomes of the nitrogen-fixing symbionts Sinorhizobium meliloti and Sinorhizobium medicae.}, journal = {Heredity}, volume = {113}, number = {4}, pages = {306-315}, pmid = {24736785}, issn = {1365-2540}, mesh = {Base Sequence ; *Evolution, Molecular ; *Genome, Bacterial ; *Introns ; Molecular Sequence Data ; Nitrogen Fixation ; Phylogeny ; Sinorhizobium/classification/*genetics/metabolism ; Sinorhizobium meliloti/classification/*genetics/metabolism ; Symbiosis ; }, abstract = {Group II introns are self-splicing catalytic RNAs that act as mobile retroelements. In bacteria, they are thought to be tolerated to some extent because they self-splice and home preferentially to sites outside of functional genes, generally within intergenic regions or in other mobile genetic elements, by mechanisms including the divergence of DNA target specificity to prevent target site saturation. RmInt1 is a mobile group II intron that is widespread in natural populations of Sinorhizobium meliloti and was first described in the GR4 strain. Like other bacterial group II introns, RmInt1 tends to evolve toward an inactive form by fragmentation, with loss of the 3' terminus. We identified genomic evidence of a fragmented intron closely related to RmInt1 buried in the genome of the extant S. meliloti/S. medicae species. By studying this intron, we obtained evidence for the occurrence of intron insertion before the divergence of ancient rhizobial species. This fragmented group II intron has thus existed for a long time and has provided sequence variation, on which selection can act, contributing to diverse genetic rearrangements, and to generate pan-genome divergence after strain differentiation. The data presented here suggest that fragmented group II introns within intergenic regions closed to functionally important neighboring genes may have been microevolutionary forces driving adaptive evolution of these rhizobial species.}, } @article {pmid24733489, year = {2014}, author = {Islam, MA and Waller, AS and Hug, LA and Provart, NJ and Edwards, EA and Mahadevan, R}, title = {New insights into Dehalococcoides mccartyi metabolism from a reconstructed metabolic network-based systems-level analysis of D. mccartyi transcriptomes.}, journal = {PloS one}, volume = {9}, number = {4}, pages = {e94808}, pmid = {24733489}, issn = {1932-6203}, mesh = {Bacterial Proteins/genetics/metabolism ; Chloroflexi/*genetics/*metabolism ; Cluster Analysis ; Electron Transport ; Gene Expression Regulation, Bacterial ; Genes, Bacterial ; Hydrolases/metabolism ; Metabolic Networks and Pathways/*genetics ; Molecular Sequence Annotation ; Operon/genetics ; Principal Component Analysis ; Proteomics ; Reproducibility of Results ; *Systems Biology ; Transcriptome/*genetics ; }, abstract = {Organohalide respiration, mediated by Dehalococcoides mccartyi, is a useful bioremediation process that transforms ground water pollutants and known human carcinogens such as trichloroethene and vinyl chloride into benign ethenes. Successful application of this process depends on the fundamental understanding of the respiration and metabolism of D. mccartyi. Reductive dehalogenases, encoded by rdhA genes of these anaerobic bacteria, exclusively catalyze organohalide respiration and drive metabolism. To better elucidate D. mccartyi metabolism and physiology, we analyzed available transcriptomic data for a pure isolate (Dehalococcoides mccartyi strain 195) and a mixed microbial consortium (KB-1) using the previously developed pan-genome-scale reconstructed metabolic network of D. mccartyi. The transcriptomic data, together with available proteomic data helped confirm transcription and expression of the majority genes in D. mccartyi genomes. A composite genome of two highly similar D. mccartyi strains (KB-1 Dhc) from the KB-1 metagenome sequence was constructed, and operon prediction was conducted for this composite genome and other single genomes. This operon analysis, together with the quality threshold clustering analysis of transcriptomic data helped generate experimentally testable hypotheses regarding the function of a number of hypothetical proteins and the poorly understood mechanism of energy conservation in D. mccartyi. We also identified functionally enriched important clusters (13 for strain 195 and 11 for KB-1 Dhc) of co-expressed metabolic genes using information from the reconstructed metabolic network. This analysis highlighted some metabolic genes and processes, including lipid metabolism, energy metabolism, and transport that potentially play important roles in organohalide respiration. Overall, this study shows the importance of an organism's metabolic reconstruction in analyzing various "omics" data to obtain improved understanding of the metabolism and physiology of the organism.}, } @article {pmid24684742, year = {2014}, author = {Lin, K and Zhang, N and Severing, EI and Nijveen, H and Cheng, F and Visser, RG and Wang, X and de Ridder, D and Bonnema, G}, title = {Beyond genomic variation--comparison and functional annotation of three Brassica rapa genomes: a turnip, a rapid cycling and a Chinese cabbage.}, journal = {BMC genomics}, volume = {15}, number = {1}, pages = {250}, pmid = {24684742}, issn = {1471-2164}, mesh = {Brassica/*genetics ; Chromosomes, Plant ; Computational Biology ; Evolution, Molecular ; Genetic Association Studies ; Genetic Markers ; *Genetic Variation ; *Genome, Plant ; Genomics/methods ; Genotype ; Molecular Sequence Annotation ; Multigene Family ; Phenotype ; }, abstract = {BACKGROUND: Brassica rapa is an economically important crop species. During its long breeding history, a large number of morphotypes have been generated, including leafy vegetables such as Chinese cabbage and pakchoi, turnip tuber crops and oil crops.

RESULTS: To investigate the genetic variation underlying this morphological variation, we re-sequenced, assembled and annotated the genomes of two B. rapa subspecies, turnip crops (turnip) and a rapid cycling. We then analysed the two resulting genomes together with the Chinese cabbage Chiifu reference genome to obtain an impression of the B. rapa pan-genome. The number of genes with protein-coding changes between the three genotypes was lower than that among different accessions of Arabidopsis thaliana, which can be explained by the smaller effective population size of B. rapa due to its domestication. Based on orthology to a number of non-brassica species, we estimated the date of divergence among the three B. rapa morphotypes at approximately 250,000 YA, far predating Brassica domestication (5,000-10,000 YA).

CONCLUSIONS: By analysing genes unique to turnip we found evidence for copy number differences in peroxidases, pointing to a role for the phenylpropanoid biosynthesis pathway in the generation of morphological variation. The estimated date of divergence among three B. rapa morphotypes implies that prior to domestication there was already considerably divergence among B. rapa genotypes. Our study thus provides two new B. rapa reference genomes, delivers a set of computer tools to analyse the resulting pan-genome and uses these to shed light on genetic drivers behind the rich morphological variation found in B. rapa.}, } @article {pmid24678308, year = {2014}, author = {Zhang, Y and Sievert, SM}, title = {Pan-genome analyses identify lineage- and niche-specific markers of evolution and adaptation in Epsilonproteobacteria.}, journal = {Frontiers in microbiology}, volume = {5}, number = {}, pages = {110}, pmid = {24678308}, issn = {1664-302X}, abstract = {The rapidly increasing availability of complete bacterial genomes has created new opportunities for reconstructing bacterial evolution, but it has also highlighted the difficulty to fully understand the genomic and functional variations occurring among different lineages. Using the class Epsilonproteobacteria as a case study, we investigated the composition, flexibility, and function of its pan-genomes. Models were constructed to extrapolate the expansion of pan-genomes at three different taxonomic levels. The results show that, for Epsilonproteobacteria the seemingly large genome variations among strains of the same species are less noticeable when compared with groups at higher taxonomic ranks, indicating that genome stability is imposed by the potential existence of taxonomic boundaries. The analyses of pan-genomes has also defined a set of universally conserved core genes, based on which a phylogenetic tree was constructed to confirm that thermophilic species from deep-sea hydrothermal vents represent the most ancient lineages of Epsilonproteobacteria. Moreover, by comparing the flexible genome of a chemoautotrophic deep-sea vent species to (1) genomes of species belonging to the same genus, but inhabiting different environments, and (2) genomes of other vent species, but belonging to different genera, we were able to delineate the relative importance of lineage-specific versus niche-specific genes. This result not only emphasizes the overall importance of phylogenetic proximity in shaping the variable part of the genome, but also highlights the adaptive functions of niche-specific genes. Overall, by modeling the expansion of pan-genomes and analyzing core and flexible genes, this study provides snapshots on how the complex processes of gene acquisition, conservation, and removal affect the evolution of different species, and contribute to the metabolic diversity and versatility of Epsilonproteobacteria.}, } @article {pmid24676150, year = {2014}, author = {Méric, G and Yahara, K and Mageiros, L and Pascoe, B and Maiden, MC and Jolley, KA and Sheppard, SK}, title = {A reference pan-genome approach to comparative bacterial genomics: identification of novel epidemiological markers in pathogenic Campylobacter.}, journal = {PloS one}, volume = {9}, number = {3}, pages = {e92798}, pmid = {24676150}, issn = {1932-6203}, support = {/WT_/Wellcome Trust/United Kingdom ; 087622/WT_/Wellcome Trust/United Kingdom ; G0801929/MRC_/Medical Research Council/United Kingdom ; /BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Campylobacter/classification/*genetics ; Campylobacter Infections/epidemiology/microbiology ; Campylobacter coli/genetics ; Campylobacter jejuni/genetics ; Genetic Variation ; *Genome, Bacterial ; *Genomics/methods ; Phylogeny ; Prevalence ; }, abstract = {The increasing availability of hundreds of whole bacterial genomes provides opportunities for enhanced understanding of the genes and alleles responsible for clinically important phenotypes and how they evolved. However, it is a significant challenge to develop easy-to-use and scalable methods for characterizing these large and complex data and relating it to disease epidemiology. Existing approaches typically focus on either homologous sequence variation in genes that are shared by all isolates, or non-homologous sequence variation--focusing on genes that are differentially present in the population. Here we present a comparative genomics approach that simultaneously approximates core and accessory genome variation in pathogen populations and apply it to pathogenic species in the genus Campylobacter. A total of 7 published Campylobacter jejuni and Campylobacter coli genomes were selected to represent diversity across these species, and a list of all loci that were present at least once was compiled. After filtering duplicates a 7-isolate reference pan-genome, of 3,933 loci, was defined. A core genome of 1,035 genes was ubiquitous in the sample accounting for 59% of the genes in each isolate (average genome size of 1.68 Mb). The accessory genome contained 2,792 genes. A Campylobacter population sample of 192 genomes was screened for the presence of reference pan-genome loci with gene presence defined as a BLAST match of ≥ 70% identity over ≥ 50% of the locus length--aligned using MUSCLE on a gene-by-gene basis. A total of 21 genes were present only in C. coli and 27 only in C. jejuni, providing information about functional differences associated with species and novel epidemiological markers for population genomic analyses. Homologs of these genes were found in several of the genomes used to define the pan-genome and, therefore, would not have been identified using a single reference strain approach.}, } @article {pmid24672511, year = {2014}, author = {Lukjancenko, O and Ussery, DW}, title = {Vibrio chromosome-specific families.}, journal = {Frontiers in microbiology}, volume = {5}, number = {}, pages = {73}, pmid = {24672511}, issn = {1664-302X}, abstract = {We have compared chromosome-specific genes in a set of 18 finished Vibrio genomes, and, in addition, also calculated the pan- and core-genomes from a data set of more than 250 draft Vibrio genome sequences. These genomes come from 9 known species and 2 unknown species. Within the finished chromosomes, we find a core set of 1269 encoded protein families for chromosome 1, and a core of 252 encoded protein families for chromosome 2. Many of these core proteins are also found in the draft genomes (although which chromosome they are located on is unknown.) Of the chromosome specific core protein families, 1169 and 153 are uniquely found in chromosomes 1 and 2, respectively. Gene ontology (GO) terms for each of the protein families were determined, and the different sets for each chromosome were compared. A total of 363 different "Molecular Function" GO categories were found for chromosome 1 specific protein families, and these include several broad activities: pyridoxine 5' phosphate synthetase, glucosylceramidase, heme transport, DNA ligase, amino acid binding, and ribosomal components; in contrast, chromosome 2 specific protein families have only 66 Molecular Function GO terms and include many membrane-associated activities, such as ion channels, transmembrane transporters, and electron transport chain proteins. Thus, it appears that whilst there are many "housekeeping systems" encoded in chromosome 1, there are far fewer core functions found in chromosome 2. However, the presence of many membrane-associated encoded proteins in chromosome 2 is surprising.}, } @article {pmid24651173, year = {2014}, author = {Xie, JB and Du, Z and Bai, L and Tian, C and Zhang, Y and Xie, JY and Wang, T and Liu, X and Chen, X and Cheng, Q and Chen, S and Li, J}, title = {Comparative genomic analysis of N2-fixing and non-N2-fixing Paenibacillus spp.: organization, evolution and expression of the nitrogen fixation genes.}, journal = {PLoS genetics}, volume = {10}, number = {3}, pages = {e1004231}, pmid = {24651173}, issn = {1553-7404}, mesh = {Binding Sites ; Escherichia coli/*genetics/metabolism ; Evolution, Molecular ; Gene Transfer, Horizontal/genetics ; *Genomics ; Multigene Family ; Nitrogen Fixation/*genetics/physiology ; Nitrogenase/genetics ; Paenibacillus/genetics/*metabolism ; Phylogeny ; Promoter Regions, Genetic ; }, abstract = {We provide here a comparative genome analysis of 31 strains within the genus Paenibacillus including 11 new genomic sequences of N2-fixing strains. The heterogeneity of the 31 genomes (15 N2-fixing and 16 non-N2-fixing Paenibacillus strains) was reflected in the large size of the shell genome, which makes up approximately 65.2% of the genes in pan genome. Large numbers of transposable elements might be related to the heterogeneity. We discovered that a minimal and compact nif cluster comprising nine genes nifB, nifH, nifD, nifK, nifE, nifN, nifX, hesA and nifV encoding Mo-nitrogenase is conserved in the 15 N2-fixing strains. The nif cluster is under control of a σ(70)-depedent promoter and possesses a GlnR/TnrA-binding site in the promoter. Suf system encoding [Fe-S] cluster is highly conserved in N2-fixing and non-N2-fixing strains. Furthermore, we demonstrate that the nif cluster enabled Escherichia coli JM109 to fix nitrogen. Phylogeny of the concatenated NifHDK sequences indicates that Paenibacillus and Frankia are sister groups. Phylogeny of the concatenated 275 single-copy core genes suggests that the ancestral Paenibacillus did not fix nitrogen. The N2-fixing Paenibacillus strains were generated by acquiring the nif cluster via horizontal gene transfer (HGT) from a source related to Frankia. During the history of evolution, the nif cluster was lost, producing some non-N2-fixing strains, and vnf encoding V-nitrogenase or anf encoding Fe-nitrogenase was acquired, causing further diversification of some strains. In addition, some N2-fixing strains have additional nif and nif-like genes which may result from gene duplications. The evolution of nitrogen fixation in Paenibacillus involves a mix of gain, loss, HGT and duplication of nif/anf/vnf genes. This study not only reveals the organization and distribution of nitrogen fixation genes in Paenibacillus, but also provides insight into the complex evolutionary history of nitrogen fixation.}, } @article {pmid24646409, year = {2014}, author = {Starkenburg, SR and Kwon, KJ and Jha, RK and McKay, C and Jacobs, M and Chertkov, O and Twary, S and Rocap, G and Cattolico, RA}, title = {A pangenomic analysis of the Nannochloropsis organellar genomes reveals novel genetic variations in key metabolic genes.}, journal = {BMC genomics}, volume = {15}, number = {}, pages = {212}, pmid = {24646409}, issn = {1471-2164}, mesh = {ATP Synthetase Complexes/chemistry/genetics/metabolism ; Amino Acid Sequence ; Chloroplasts/genetics ; *Genome ; Genome, Mitochondrial ; Mitochondria/genetics ; Molecular Sequence Annotation ; Molecular Sequence Data ; Multigene Family ; Protein Structure, Secondary ; Sequence Alignment ; Sequence Analysis, DNA ; Stramenopiles/*genetics ; Transcriptome ; }, abstract = {BACKGROUND: Microalgae in the genus Nannochloropsis are photosynthetic marine Eustigmatophytes of significant interest to the bioenergy and aquaculture sectors due to their ability to efficiently accumulate biomass and lipids for utilization in renewable transportation fuels, aquaculture feed, and other useful bioproducts. To better understand the genetic complement that drives the metabolic processes of these organisms, we present the assembly and comparative pangenomic analysis of the chloroplast and mitochondrial genomes from Nannochloropsis salina CCMP1776.

RESULTS: The chloroplast and mitochondrial genomes of N. salina are 98.4% and 97% identical to their counterparts in Nannochloropsis gaditana. Comparison of the Nannochloropsis pangenome to other algae within and outside of the same phyla revealed regions of significant genetic divergence in key genes that encode proteins needed for regulation of branched chain amino synthesis (acetohydroxyacid synthase), carbon fixation (RuBisCO activase), energy conservation (ATP synthase), protein synthesis and homeostasis (Clp protease, ribosome).

CONCLUSIONS: Many organellar gene modifications in Nannochloropsis are unique and deviate from conserved orthologs found across the tree of life. Implementation of secondary and tertiary structure prediction was crucial to functionally characterize many proteins and therefore should be implemented in automated annotation pipelines. The exceptional similarity of the N. salina and N. gaditana organellar genomes suggests that N. gaditana be reclassified as a strain of N. salina.}, } @article {pmid24626479, year = {2014}, author = {Kerr, JE and Abramian, JR and Dao, DH and Rigney, TW and Fritz, J and Pham, T and Gay, I and Parthasarathy, K and Wang, BY and Zhang, W and Tribble, GD}, title = {Genetic exchange of fimbrial alleles exemplifies the adaptive virulence strategy of Porphyromonas gingivalis.}, journal = {PloS one}, volume = {9}, number = {3}, pages = {e91696}, pmid = {24626479}, issn = {1932-6203}, support = {P30 CA016672/CA/NCI NIH HHS/United States ; R01 DE019634/DE/NIDCR NIH HHS/United States ; UL1 TR000371/TR/NCATS NIH HHS/United States ; DE-019634/DE/NIDCR NIH HHS/United States ; }, mesh = {Alleles ; Fimbriae Proteins/genetics/metabolism ; Fimbriae, Bacterial/*genetics ; *Gene Transfer, Horizontal ; Genetic Variation ; Genotype ; Humans ; Periodontitis/*genetics/microbiology/pathology ; Phenotype ; Porphyromonas gingivalis/*genetics/pathogenicity ; Virulence/genetics ; }, abstract = {Porphyromonas gingivalis is a gram-negative anaerobic bacterium, a member of the human oral microbiome, and a proposed "keystone" pathogen in the development of chronic periodontitis, an inflammatory disease of the gingiva. P. gingivalis is a genetically diverse species, and is able to exchange chromosomal DNA between strains by natural competence and conjugation. In this study, we investigate the role of horizontal DNA transfer as an adaptive process to modify behavior, using the major fimbriae as our model system, due to their critical role in mediating interactions with the host environment. We show that P. gingivalis is able to exchange fimbrial allele types I and IV into four distinct strain backgrounds via natural competence. In all recombinants, we detected a complete exchange of the entire fimA allele, and the rate of exchange varies between the different strain backgrounds. In addition, gene exchange within other regions of the fimbrial genetic locus was identified. To measure the biological implications of these allele swaps we compared three genotypes of fimA in an isogenic background, strain ATCC 33277. We demonstrate that exchange of fimbrial allele type results in profound phenotypic changes, including the quantity of fimbriae elaborated, membrane blebbing, auto-aggregation and other virulence-associated phenotypes. Replacement of the type I allele with either the type III or IV allele resulted in increased invasion of gingival fibroblast cells relative to the isogenic parent strain. While genetic variability is known to impact host-microbiome interactions, this is the first study to quantitatively assess the adaptive effect of exchanging genes within the pan genome cloud. This is significant as it presents a potential mechanism by which opportunistic pathogens may acquire the traits necessary to modify host-microbial interactions.}, } @article {pmid24625962, year = {2014}, author = {Richards, VP and Palmer, SR and Pavinski Bitar, PD and Qin, X and Weinstock, GM and Highlander, SK and Town, CD and Burne, RA and Stanhope, MJ}, title = {Phylogenomics and the dynamic genome evolution of the genus Streptococcus.}, journal = {Genome biology and evolution}, volume = {6}, number = {4}, pages = {741-753}, pmid = {24625962}, issn = {1759-6653}, support = {T90 DE021990/DE/NIDCR NIH HHS/United States ; R01 AI073368/AI/NIAID NIH HHS/United States ; AI073368/AI/NIAID NIH HHS/United States ; HHSN272200900007C/AI/NIAID NIH HHS/United States ; 272200900007C//PHS HHS/United States ; }, mesh = {Base Sequence ; *Evolution, Molecular ; Gene Transfer, Horizontal/*physiology ; Genome, Bacterial/*physiology ; Humans ; Molecular Sequence Data ; *Phylogeny ; Streptococcus/*genetics ; }, abstract = {The genus Streptococcus comprises important pathogens that have a severe impact on human health and are responsible for substantial economic losses to agriculture. Here, we utilize 46 Streptococcus genome sequences (44 species), including eight species sequenced here, to provide the first genomic level insight into the evolutionary history and genetic basis underlying the functional diversity of all major groups of this genus. Gene gain/loss analysis revealed a dynamic pattern of genome evolution characterized by an initial period of gene gain followed by a period of loss, as the major groups within the genus diversified. This was followed by a period of genome expansion associated with the origins of the present extant species. The pattern is concordant with an emerging view that genomes evolve through a dynamic process of expansion and streamlining. A large proportion of the pan-genome has experienced lateral gene transfer (LGT) with causative factors, such as relatedness and shared environment, operating over different evolutionary scales. Multiple gene ontology terms were significantly enriched for each group, and mapping terms onto the phylogeny showed that those corresponding to genes born on branches leading to the major groups represented approximately one-fifth of those enriched. Furthermore, despite the extensive LGT, several biochemical characteristics have been retained since group formation, suggesting genomic cohesiveness through time, and that these characteristics may be fundamental to each group. For example, proteolysis: mitis group; urea metabolism: salivarius group; carbohydrate metabolism: pyogenic group; and transcription regulation: bovis group.}, } @article {pmid24616837, year = {2014}, author = {Rodriguez-Valera, F and Mizuno, CM and Ghai, R}, title = {Tales from a thousand and one phages.}, journal = {Bacteriophage}, volume = {4}, number = {1}, pages = {e28265}, pmid = {24616837}, issn = {2159-7073}, abstract = {The sequencing of marine metagenomic fosmids led to the discovery of several new complete phage genomes. Among the 21 major sequence groups, 10 totally novel groups of marine phages could be identified. Some of these represent the first phages infecting large marine prokaryotic phyla, such as the Verrucomicrobia and the recently described Ca. Actinomarinales. Coming from a single deep photic zone sample the diversity of phages found is astonishing, and the comparison with a metavirome from the same location indicates that only 2% of the real diversity was recovered. In addition to this large macro-diversity, rich micro-diversity was also found, affecting host-recognition modules, mirroring the variation of cell surface components in their host marine microbes.}, } @article {pmid24592321, year = {2014}, author = {Tümmler, B and Wiehlmann, L and Klockgether, J and Cramer, N}, title = {Advances in understanding Pseudomonas.}, journal = {F1000prime reports}, volume = {6}, number = {}, pages = {9}, pmid = {24592321}, issn = {2051-7599}, abstract = {Pseudomonas aeruginosa, the type species of pseudomonads, is an opportunistic pathogen that colonizes a wide range of niches. Current genome sequencing projects are producing previously inconceivable detail about the population biology and evolution of P. aeruginosa. Its pan-genome has a larger genetic repertoire than the human genome, which explains the broad metabolic capabilities of P. aeruginosa and its ubiquitous distribution in aquatic habitats. P. aeruginosa may persist in the airways of individuals with cystic fibrosis for decades. The ongoing whole-genome analyses of serial isolates from cystic fibrosis patients provide the so far singular opportunity to monitor the microevolution of a bacterial pathogen during chronic infection over thousands of generations. Although the evolution in cystic fibrosis lungs is neutral overall, some pathoadaptive mutations are selected during the within-host evolutionary process. Even a single mutation may be sufficient to generate novel complex traits provided that predisposing mutational events have previously occurred in the clonal lineage.}, } @article {pmid24590676, year = {2014}, author = {Robins, WP and Mekalanos, JJ}, title = {Genomic science in understanding cholera outbreaks and evolution of Vibrio cholerae as a human pathogen.}, journal = {Current topics in microbiology and immunology}, volume = {379}, number = {}, pages = {211-229}, pmid = {24590676}, issn = {0070-217X}, support = {R01 GM068851/GM/NIGMS NIH HHS/United States ; R37 AI018045/AI/NIAID NIH HHS/United States ; T32 AI007061/AI/NIAID NIH HHS/United States ; }, mesh = {Cholera/*epidemiology ; *Disease Outbreaks ; *Evolution, Molecular ; *Genomics ; Humans ; Polymorphism, Single Nucleotide ; Vibrio cholerae/*genetics ; }, abstract = {Modern genomic and bioinformatic approaches have been applied to interrogate the V. cholerae genome, the role of genomic elements in cholera disease, and the origin, relatedness, and dissemination of epidemic strains. A universal attribute of choleragenic strains includes a repertoire of pathogenicity islands and virulence genes, namely the CTXϕ prophage and Toxin Co-regulated Pilus (TCP) in addition to other virulent genetic elements including those referred to as Seventh Pandemic Islands. During the last decade, the advent of Next Generation Sequencing (NGS) has provided highly resolved and often complete genomic sequences of epidemic isolates in addition to both clinical and environmental strains isolated from geographically unconnected regions. Genomic comparisons of these strains, as was completed during and following the Haitian outbreak in 2010, reveals that most epidemic strains appear closely related, regardless of region of origin. Non-O1 clinical or environmental strains may also possess some virulence islands, but phylogenic analysis of the core genome suggests they are more diverse and distantly related than those isolated during epidemics. Like Haiti, genomic studies that examine both the Vibrio core and pan-genome in addition to Single Nucleotide Polymorphisms (SNPs) conclude that a number of epidemics are caused by strains that closely resemble those in Asia, and often appear to originate there and then spread globally. The accumulation of SNPs in the epidemic strains over time can then be applied to better understand the evolution of the V. cholerae genome as an etiological agent.}, } @article {pmid24586485, year = {2014}, author = {Laksanalamai, P and Huang, B and Sabo, J and Burall, LS and Zhao, S and Bates, J and Datta, AR}, title = {Genomic characterization of novel Listeria monocytogenes serotype 4b variant strains.}, journal = {PloS one}, volume = {9}, number = {2}, pages = {e89024}, pmid = {24586485}, issn = {1932-6203}, mesh = {Australia ; Bacterial Typing Techniques ; *Genetic Variation ; *Genome, Bacterial ; Humans ; Listeria monocytogenes/*genetics/isolation & purification ; Listeriosis/microbiology ; Oligonucleotide Array Sequence Analysis ; Phylogeny ; Polymerase Chain Reaction/methods ; Serogroup ; }, abstract = {Over 90% of the human listeriosis cases are caused by Listeria monocytogenes serotypes 1/2a, 1/2b and 4b strains. As an alternative to antigen-antibody based serotyping, a PCR-based method for serogrouping has been developed and validated. In this communication, we report an in-depth analysis of five 4b variant strains, four clinical isolates from Australia and one environmental isolate from USA. Although these five strains were serotype 4b by classical serotyping method, the serogrouping PCR profiles of these strains show the presence of a 1/2a-3a specific amplicon in addition to the standard 4b-4d-4e specific amplicons. These strains were further analyzed by pulsed field gel electrophoresis, binary gene typing, multi-locus variable-number-tandem-repeat analysis and a high density pan-genomic Listeria microarray. Using these sub-typing results, the clinical isolates were grouped into two distinct genomic groups- one of which could be part of an unidentified outbreak. The microarray results when compared with our database of other 4b outbreak isolates indicated that the serotype 4b variant strains represent very different genotypic profiles than the known reported 4b outbreak strains representing major epidemic clones. The acquisition of serotype 1/2a gene clusters by the 4b variant strains appears to be independent in origin, spanning large areas of geographical and temporal space and may indicate predisposition of some 4b strains towards accepting DNA from related organisms.}, } @article {pmid24581150, year = {2014}, author = {Bottacini, F and O'Connell Motherway, M and Kuczynski, J and O'Connell, KJ and Serafini, F and Duranti, S and Milani, C and Turroni, F and Lugli, GA and Zomer, A and Zhurina, D and Riedel, C and Ventura, M and van Sinderen, D}, title = {Comparative genomics of the Bifidobacterium breve taxon.}, journal = {BMC genomics}, volume = {15}, number = {1}, pages = {170}, pmid = {24581150}, issn = {1471-2164}, mesh = {Bifidobacterium/classification/*genetics/metabolism ; Carbohydrate Metabolism ; Cluster Analysis ; Computational Biology ; DNA Transposable Elements ; Gene Order ; Genetic Association Studies ; Genetic Variation ; *Genome, Bacterial ; *Genomics ; High-Throughput Nucleotide Sequencing ; Metabolome ; Metabolomics ; Molecular Sequence Data ; Multigene Family ; Phylogeny ; }, abstract = {BACKGROUND: Bifidobacteria are commonly found as part of the microbiota of the gastrointestinal tract (GIT) of a broad range of hosts, where their presence is positively correlated with the host's health status. In this study, we assessed the genomes of thirteen representatives of Bifidobacterium breve, which is not only a frequently encountered component of the (adult and infant) human gut microbiota, but can also be isolated from human milk and vagina.

RESULTS: In silico analysis of genome sequences from thirteen B. breve strains isolated from different environments (infant and adult faeces, human milk, human vagina) shows that the genetic variability of this species principally consists of hypothetical genes and mobile elements, but, interestingly, also genes correlated with the adaptation to host environment and gut colonization. These latter genes specify the biosynthetic machinery for sortase-dependent pili and exopolysaccharide production, as well as genes that provide protection against invasion of foreign DNA (i.e. CRISPR loci and restriction/modification systems), and genes that encode enzymes responsible for carbohydrate fermentation. Gene-trait matching analysis showed clear correlations between known metabolic capabilities and characterized genes, and it also allowed the identification of a gene cluster involved in the utilization of the alcohol-sugar sorbitol.

CONCLUSIONS: Genome analysis of thirteen representatives of the B. breve species revealed that the deduced pan-genome exhibits an essentially close trend. For this reason our analyses suggest that this number of B. breve representatives is sufficient to fully describe the pan-genome of this species. Comparative genomics also facilitated the genetic explanation for differential carbon source utilization phenotypes previously observed in different strains of B. breve.}, } @article {pmid26355784, year = {2014}, author = {Sirén, J and Välimäki, N and Mäkinen, V}, title = {Indexing Graphs for Path Queries with Applications in Genome Research.}, journal = {IEEE/ACM transactions on computational biology and bioinformatics}, volume = {11}, number = {2}, pages = {375-388}, doi = {10.1109/TCBB.2013.2297101}, pmid = {26355784}, issn = {1557-9964}, mesh = {Databases, Genetic ; Genomics/*methods ; Humans ; Phylogeny ; Polymorphism, Single Nucleotide ; Sequence Alignment/*methods ; Sequence Analysis, DNA/methods ; }, abstract = {We propose a generic approach to replace the canonical sequence representation of genomes with graph representations, and study several applications of such extensions. We extend the Burrows-Wheeler transform (BWT) of strings to acyclic directed labeled graphs, to support path queries as an extension to substring searching. We develop, apply, and tailor this technique to a) read alignment on an extended BWT index of a graph representing pan-genome, i.e., reference genome and known variants of it; and b) split-read alignment on an extended BWT index of a splicing graph. Other possible applications include probe/primer design, alignments to assembly graphs, and alignments to phylogenetic tree of partial-order graphs. We report several experiments on the feasibility and applicability of the approach. Especially on highly-polymorphic genome regions our pan-genome index is making a significant improvement in alignment accuracy.}, } @article {pmid24565753, year = {2014}, author = {Settles, EW and Kink, JA and Talaat, A}, title = {Attenuated strains of Mycobacterium avium subspecies paratuberculosis as vaccine candidates against Johne's disease.}, journal = {Vaccine}, volume = {32}, number = {18}, pages = {2062-2069}, doi = {10.1016/j.vaccine.2014.02.010}, pmid = {24565753}, issn = {1873-2518}, mesh = {Animals ; Antibodies, Bacterial/blood ; Bacterial Vaccines/*immunology ; DNA Transposable Elements ; Immunity, Cellular ; Immunoglobulin G/blood ; Interferon-gamma/immunology ; Interleukin-17/immunology ; Mice ; Mice, Inbred C57BL ; Mutation ; Mycobacterium avium subsp. paratuberculosis/*classification/genetics ; Paratuberculosis/*prevention & control ; Vaccines, Attenuated/immunology ; Virulence Factors/genetics ; }, abstract = {Mycobacterium avium subspecies paratuberculosis (M. paratuberculosis) is the causative agent of Johne's disease in ruminants. Johne's disease has a severe economic impact on the dairy industry in the USA and worldwide. In an effort to combat this disease, we screened several transposon mutants that were attenuated in the murine model of paratuberculosis for the potential use as live attenuated vaccines. Using the murine model, two vaccine candidates (pgs1360, pgs3965 with mutations of fabG2_2 and umaA1, respectively) were at or below the limit of detection for tissue colonization suggesting their low level persistence and hence safety. Prior to challenge, both candidates induced a M. paratuberculosis-specific IFN-γ, an indication of eliciting cell-mediated immunity. Following challenge with a virulent strain of M. paratuberculosis, the two vaccine candidates significantly reduced bacterial colonization in organs with reduced histological scores compared to control animals. In addition, one of the vaccine candidates (pgs3965) also induced IL-17a, a cytokine associated with protective immunity in mycobacterial infection. Our analysis suggested that the pgs3965 vaccine candidate is a potential live-attenuated vaccine that could be tested further in ruminant models of paratuberculosis. The analysis also validated our screening strategy to identify effective vaccine candidates against intracellular pathogens.}, } @article {pmid24548794, year = {2014}, author = {Marroni, F and Pinosio, S and Morgante, M}, title = {Structural variation and genome complexity: is dispensable really dispensable?.}, journal = {Current opinion in plant biology}, volume = {18}, number = {}, pages = {31-36}, doi = {10.1016/j.pbi.2014.01.003}, pmid = {24548794}, issn = {1879-0356}, mesh = {Biological Evolution ; Gene Expression Regulation, Plant ; Genome, Plant/*genetics ; *Genomic Structural Variation ; Plants/genetics ; }, abstract = {Structural variants (SVs) such as copy number variants (CNVs) and presence/absence variants (PAVs) substantially contribute to genetic variation and have an important effect on phenotypic diversity. Since unbalanced SVs are by definition sequences present only in some individuals, they have therefore been referred to as dispensable genome and are not necessary for survival, even though they may provide an important contribution to phenotypic diversity within the species. However, some multi-copy sequences of the dispensable genomes (e.g., multigene families) may be needed in a given proportion by each individual, thus belonging to a conditionally dispensable portion of the pan-genome. Another interesting aspect reported by recent studies is that the rate at which SVs are formed might be influenced by the mating system and by common environmental stresses. In conclusion the dispensable genome plays an important role in genome evolution and in the complex interplay between the genome and the environment.}, } @article {pmid24515248, year = {2014}, author = {Choo, SW and Wee, WY and Ngeow, YF and Mitchell, W and Tan, JL and Wong, GJ and Zhao, Y and Xiao, J}, title = {Genomic reconnaissance of clinical isolates of emerging human pathogen Mycobacterium abscessus reveals high evolutionary potential.}, journal = {Scientific reports}, volume = {4}, number = {}, pages = {4061}, pmid = {24515248}, issn = {2045-2322}, mesh = {*Biological Evolution ; Databases, Genetic ; *Genome, Bacterial ; Humans ; Molecular Sequence Annotation ; Mycobacterium/classification/*genetics/isolation & purification ; Mycobacterium Infections/*microbiology/pathology ; Mycobacterium tuberculosis/classification/genetics ; Phylogeny ; RNA, Transfer/chemistry/metabolism ; Sequence Alignment ; Sequence Analysis, DNA ; Virulence Factors/genetics ; }, abstract = {Mycobacterium abscessus (Ma) is an emerging human pathogen that causes both soft tissue infections and systemic disease. We present the first comparative whole-genome study of Ma strains isolated from patients of wide geographical origin. We found a high proportion of accessory strain-specific genes indicating an open, non-conservative pan-genome structure, and clear evidence of rapid phage-mediated evolution. Although we found fewer virulence factors in Ma compared to M. tuberculosis, our data indicated that Ma evolves rapidly and therefore should be monitored closely for the acquisition of more pathogenic traits. This comparative study provides a better understanding of Ma and forms the basis for future functional work on this important pathogen.}, } @article {pmid24505344, year = {2014}, author = {Leekitcharoenphon, P and Nielsen, EM and Kaas, RS and Lund, O and Aarestrup, FM}, title = {Evaluation of whole genome sequencing for outbreak detection of Salmonella enterica.}, journal = {PloS one}, volume = {9}, number = {2}, pages = {e87991}, pmid = {24505344}, issn = {1932-6203}, mesh = {DNA, Bacterial/genetics ; Disease Outbreaks ; Genome, Bacterial/*genetics ; Genome-Wide Association Study/methods ; Phylogeny ; Polymorphism, Single Nucleotide/genetics ; Salmonella Infections/*epidemiology/*genetics ; Salmonella enterica/*genetics ; Sequence Analysis, DNA/methods ; }, abstract = {Salmonella enterica is a common cause of minor and large food borne outbreaks. To achieve successful and nearly 'real-time' monitoring and identification of outbreaks, reliable sub-typing is essential. Whole genome sequencing (WGS) shows great promises for using as a routine epidemiological typing tool. Here we evaluate WGS for typing of S. Typhimurium including different approaches for analyzing and comparing the data. A collection of 34 S. Typhimurium isolates was sequenced. This consisted of 18 isolates from six outbreaks and 16 epidemiologically unrelated background strains. In addition, 8 S. Enteritidis and 5 S. Derby were also sequenced and used for comparison. A number of different bioinformatics approaches were applied on the data; including pan-genome tree, k-mer tree, nucleotide difference tree and SNP tree. The outcome of each approach was evaluated in relation to the association of the isolates to specific outbreaks. The pan-genome tree clustered 65% of the S. Typhimurium isolates according to the pre-defined epidemiology, the k-mer tree 88%, the nucleotide difference tree 100% and the SNP tree 100% of the strains within S. Typhimurium. The resulting outcome of the four phylogenetic analyses were also compared to PFGE revealing that WGS typing achieved the greater performance than the traditional method. In conclusion, for S. Typhimurium, SNP analysis and nucleotide difference approach of WGS data seem to be the superior methods for epidemiological typing compared to other phylogenetic analytic approaches that may be used on WGS. These approaches were also superior to the more classical typing method, PFGE. Our study also indicates that WGS alone is insufficient to determine whether strains are related or un-related to outbreaks. This still requires the combination of epidemiological data and whole genome sequencing results.}, } @article {pmid24492180, year = {2014}, author = {Assié, G and Jouinot, A and Bertherat, J}, title = {The 'omics' of adrenocortical tumours for personalized medicine.}, journal = {Nature reviews. Endocrinology}, volume = {10}, number = {4}, pages = {215-228}, pmid = {24492180}, issn = {1759-5037}, mesh = {Adrenal Cortex Neoplasms/genetics/*pathology/physiopathology ; DNA Methylation ; Genome, Human ; Genomics ; Humans ; MicroRNAs/genetics ; Precision Medicine/*trends ; Recurrence ; Survival Analysis ; Transcriptome ; }, abstract = {Pan-genomic analyses of genetic and epigenetic alterations and gene expression profiles are providing important new insights into the pathogenesis and molecular classification of cancers. The technologies and methods used for these studies are rapidly diversifying and improving. The use of such methodologies for the analysis of adrenocortical tumours has revealed clear transcriptomic (mRNA and microRNA expression profiles), epigenomic (DNA methylation profiles) and genomic (DNA mutations and chromosomal alterations) differences between benign and malignant tumours. Interestingly, genomic studies of adrenal cancers have also identified subtypes of malignant tumours, which demonstrate distinct patterns of molecular alterations and are associated with different clinical outcomes. These discoveries have created the opportunity for classifying adrenocortical tumours on the basis of molecular analyses. Following these genomic studies, efforts to develop new molecular tools that improve diagnosis and prognostication of patients with adrenocortical tumours have also been made. This Review describes the progress that has been made towards classification of adrenocortical tumours to date based on key genomic approaches. In addition, the potential for the development and use of various molecular tools to personalize the management of patients with adrenocortical tumours is discussed.}, } @article {pmid24489782, year = {2014}, author = {Guo, M and Zhou, Q and Zhou, Y and Yang, L and Liu, T and Yang, J and Chen, Y and Su, L and Xu, J and Chen, J and Liu, F and Chen, J and Dai, W and Ni, P and Fang, C and Yang, R}, title = {Genomic evolution of 11 type strains within family Planctomycetaceae.}, journal = {PloS one}, volume = {9}, number = {1}, pages = {e86752}, pmid = {24489782}, issn = {1932-6203}, mesh = {*Biological Evolution ; *Genome, Bacterial ; Genomic Islands ; Metabolic Networks and Pathways ; Multigene Family ; *Phylogeny ; Planctomycetales/*classification/*genetics/metabolism/ultrastructure ; Plasmids ; }, abstract = {The species in family Planctomycetaceae are ideal groups for investigating the origin of eukaryotes. Their cells are divided by a lipidic intracytoplasmic membrane and they share a number of eukaryote-like molecular characteristics. However, their genomic structures, potential abilities, and evolutionary status are still unknown. In this study, we searched for common protein families and a core genome/pan genome based on 11 sequenced species in family Planctomycetaceae. Then, we constructed phylogenetic tree based on their 832 common protein families. We also annotated the 11 genomes using the Clusters of Orthologous Groups database. Moreover, we predicted and reconstructed their core/pan metabolic pathways using the KEGG (Kyoto Encyclopedia of Genes and Genomes) orthology system. Subsequently, we identified genomic islands (GIs) and structural variations (SVs) among the five complete genomes and we specifically investigated the integration of two Planctomycetaceae plasmids in all 11 genomes. The results indicate that Planctomycetaceae species share diverse genomic variations and unique genomic characteristics, as well as have huge potential for human applications.}, } @article {pmid24488960, year = {2014}, author = {Hirsch, CN and Foerster, JM and Johnson, JM and Sekhon, RS and Muttoni, G and Vaillancourt, B and Peñagaricano, F and Lindquist, E and Pedraza, MA and Barry, K and de Leon, N and Kaeppler, SM and Buell, CR}, title = {Insights into the maize pan-genome and pan-transcriptome.}, journal = {The Plant cell}, volume = {26}, number = {1}, pages = {121-135}, pmid = {24488960}, issn = {1532-298X}, mesh = {Chromosomes, Plant ; *Genome, Plant ; Linkage Disequilibrium ; Polymorphism, Single Nucleotide ; Sequence Alignment ; Sequence Analysis, RNA ; *Transcriptome ; Zea mays/*genetics ; }, abstract = {Genomes at the species level are dynamic, with genes present in every individual (core) and genes in a subset of individuals (dispensable) that collectively constitute the pan-genome. Using transcriptome sequencing of seedling RNA from 503 maize (Zea mays) inbred lines to characterize the maize pan-genome, we identified 8681 representative transcript assemblies (RTAs) with 16.4% expressed in all lines and 82.7% expressed in subsets of the lines. Interestingly, with linkage disequilibrium mapping, 76.7% of the RTAs with at least one single nucleotide polymorphism (SNP) could be mapped to a single genetic position, distributed primarily throughout the nonpericentromeric portion of the genome. Stepwise iterative clustering of RTAs suggests, within the context of the genotypes used in this study, that the maize genome is restricted and further sampling of seedling RNA within this germplasm base will result in minimal discovery. Genome-wide association studies based on SNPs and transcript abundance in the pan-genome revealed loci associated with the timing of the juvenile-to-adult vegetative and vegetative-to-reproductive developmental transitions, two traits important for fitness and adaptation. This study revealed the dynamic nature of the maize pan-genome and demonstrated that a substantial portion of variation may lie outside the single reference genome for a species.}, } @article {pmid24460813, year = {2014}, author = {Rokicki, J and Knox, D and Dowell, RD and Copley, SD}, title = {CodaChrome: a tool for the visualization of proteome conservation across all fully sequenced bacterial genomes.}, journal = {BMC genomics}, volume = {15}, number = {}, pages = {65}, pmid = {24460813}, issn = {1471-2164}, support = {R01 GM078554/GM/NIGMS NIH HHS/United States ; }, mesh = {Algorithms ; Bacteria/*genetics ; Databases, Genetic ; Enterococcus/genetics ; *Genome, Bacterial ; Helicobacter pylori/genetics ; Internet ; Proteome/*analysis/genetics ; Proteomics/*instrumentation ; RNA, Ribosomal, 16S/genetics ; *Software ; User-Computer Interface ; }, abstract = {BACKGROUND: The relationships between bacterial genomes are complicated by rampant horizontal gene transfer, varied selection pressures, acquisition of new genes, loss of genes, and divergence of genes, even in closely related lineages. As more and more bacterial genomes are sequenced, organizing and interpreting the incredible amount of relational information that connects them becomes increasingly difficult.

RESULTS: We have developed CodaChrome (http://www.sourceforge.com/p/codachrome), a one-versus-all proteome comparison tool that allows the user to visually investigate the relationship between a bacterial proteome of interest and the proteomes encoded by every other bacterial genome recorded in GenBank in a massive interactive heat map. This tool has allowed us to rapidly identify the most highly conserved proteins encoded in the bacterial pan-genome, fast-clock genes useful for subtyping of bacterial species, the evolutionary history of an indel in the Sphingobium lineage, and an example of horizontal gene transfer from a member of the genus Enterococcus to a recent ancestor of Helicobacter pylori.

CONCLUSION: CodaChrome is a user-friendly and powerful tool for simultaneously visualizing relationships between thousands of proteomes.}, } @article {pmid24420766, year = {2014}, author = {Zhao, Y and Jia, X and Yang, J and Ling, Y and Zhang, Z and Yu, J and Wu, J and Xiao, J}, title = {PanGP: a tool for quickly analyzing bacterial pan-genome profile.}, journal = {Bioinformatics (Oxford, England)}, volume = {30}, number = {9}, pages = {1297-1299}, pmid = {24420766}, issn = {1367-4811}, mesh = {Algorithms ; Bacteria/*genetics ; *Genome, Bacterial ; Genomics/*methods ; High-Throughput Nucleotide Sequencing/*methods ; Multigene Family ; Software ; }, abstract = {Pan-genome analyses have shed light on the dynamics and evolution of bacterial genome from the point of population. The explosive growth of bacterial genome sequence also brought an extremely big challenge to pan-genome profile analysis. We developed a tool, named PanGP, to complete pan-genome profile analysis for large-scale strains efficiently. PanGP has integrated two sampling algorithms, totally random (TR) and distance guide (DG). The DG algorithm drew sample strain combinations on the basis of genome diversity of bacterial population. The performance of these two algorithms have been evaluated on four bacteria populations with strain numbers varying from 30 to 200, and the DG algorithm exhibited overwhelming advantage on accuracy and stability than the TR algorithm.}, } @article {pmid24418910, year = {2014}, author = {Thilly, WG and Gostjeva, EV and Koledova, VV and Zukerberg, LR and Chung, D and Fomina, JN and Darroudi, F and Stollar, BD}, title = {Metakaryotic stem cell nuclei use pangenomic dsRNA/DNA intermediates in genome replication and segregation.}, journal = {Organogenesis}, volume = {10}, number = {1}, pages = {44-52}, pmid = {24418910}, issn = {1555-8592}, support = {P50 CA127003/CA/NCI NIH HHS/United States ; }, mesh = {Cell Line, Tumor ; Cell Nucleus/*genetics ; *Chromosome Segregation ; DNA/*metabolism ; *DNA Replication ; Fluorescein-5-isothiocyanate/chemistry ; *Genome ; Humans ; Immunohistochemistry ; Karyotype ; RNA/*metabolism ; Stem Cells/cytology ; }, abstract = {Bell shaped nuclei of metakaryotic cells double their DNA content during and after symmetric and asymmetric amitotic fissions rather than in the separate, pre-mitotic S-phase of eukaryotic cells. A parsimonious hypothesis was tested that the two anti-parallel strands of each chromatid DNA helix were first segregated as ssDNA-containing complexes into sister nuclei then copied to recreate a dsDNA genome. Metakaryotic nuclei that were treated during amitosis with RNase A and stained with acridine orange or fluorescent antibody to ssDNA revealed large amounts of ssDNA. Without RNase treatment metakaryotic nuclei in amitosis stained strongly with an antibody complex specific to dsRNA/DNA. Images of amitotic figures co-stained with dsRNA/DNA antibody and DAPI indicated that the entire interphase dsDNA genome (B-form helices) was transformed into two dsRNA/DNA genomes (A-form helices) that were segregated in the daughter cell nuclei then retransformed into dsDNA. As this process segregates DNA strands of opposite polarity in sister cells it hypothetically offers a sequential switching mechanism within the diverging stem cell lineages of development.}, } @article {pmid24415958, year = {2014}, author = {Wang, D and Ning, K and Li, J and Hu, J and Han, D and Wang, H and Zeng, X and Jing, X and Zhou, Q and Su, X and Chang, X and Wang, A and Wang, W and Jia, J and Wei, L and Xin, Y and Qiao, Y and Huang, R and Chen, J and Han, B and Yoon, K and Hill, RT and Zohar, Y and Chen, F and Hu, Q and Xu, J}, title = {Nannochloropsis genomes reveal evolution of microalgal oleaginous traits.}, journal = {PLoS genetics}, volume = {10}, number = {1}, pages = {e1004094}, pmid = {24415958}, issn = {1553-7404}, mesh = {Evolution, Molecular ; Gene Transfer, Horizontal ; Genetic Variation ; *Genome ; Microalgae/*genetics ; Molecular Sequence Annotation ; *Phylogeny ; Sequence Analysis, DNA ; Species Specificity ; Transcriptome ; Triglycerides/biosynthesis/*genetics ; }, abstract = {Oleaginous microalgae are promising feedstock for biofuels, yet the genetic diversity, origin and evolution of oleaginous traits remain largely unknown. Here we present a detailed phylogenomic analysis of five oleaginous Nannochloropsis species (a total of six strains) and one time-series transcriptome dataset for triacylglycerol (TAG) synthesis on one representative strain. Despite small genome sizes, high coding potential and relative paucity of mobile elements, the genomes feature small cores of ca. 2,700 protein-coding genes and a large pan-genome of >38,000 genes. The six genomes share key oleaginous traits, such as the enrichment of selected lipid biosynthesis genes and certain glycoside hydrolase genes that potentially shift carbon flux from chrysolaminaran to TAG synthesis. The eleven type II diacylglycerol acyltransferase genes (DGAT-2) in every strain, each expressed during TAG synthesis, likely originated from three ancient genomes, including the secondary endosymbiosis host and the engulfed green and red algae. Horizontal gene transfers were inferred in most lipid synthesis nodes with expanded gene doses and many glycoside hydrolase genes. Thus multiple genome pooling and horizontal genetic exchange, together with selective inheritance of lipid synthesis genes and species-specific gene loss, have led to the enormous genetic apparatus for oleaginousness and the wide genomic divergence among present-day Nannochloropsis. These findings have important implications in the screening and genetic engineering of microalgae for biofuels.}, } @article {pmid24410921, year = {2014}, author = {Cooper, KK and Mandrell, RE and Louie, JW and Korlach, J and Clark, TA and Parker, CT and Huynh, S and Chain, PS and Ahmed, S and Carter, MQ}, title = {Comparative genomics of enterohemorrhagic Escherichia coli O145:H28 demonstrates a common evolutionary lineage with Escherichia coli O157:H7.}, journal = {BMC genomics}, volume = {15}, number = {}, pages = {17}, pmid = {24410921}, issn = {1471-2164}, mesh = {*Biological Evolution ; Enterohemorrhagic Escherichia coli/classification/genetics/virology ; Escherichia coli/*classification/*genetics/virology ; Escherichia coli O157/*classification/genetics/virology ; Escherichia coli Proteins/genetics/metabolism ; *Genome, Bacterial ; Genomics ; Methyltransferases/genetics/metabolism ; Phylogeny ; Prophages/metabolism ; Serotyping ; Shiga Toxin/genetics ; Shigella/classification/genetics ; Virulence Factors/genetics ; }, abstract = {BACKGROUND: Although serotype O157:H7 is the predominant enterohemorrhagic Escherichia coli (EHEC), outbreaks of non-O157 EHEC that cause severe foodborne illness, including hemolytic uremic syndrome have increased worldwide. In fact, non-O157 serotypes are now estimated to cause over half of all the Shiga toxin-producing Escherichia coli (STEC) cases, and outbreaks of non-O157 EHEC infections are frequently associated with serotypes O26, O45, O103, O111, O121, and O145. Currently, there are no complete genomes for O145 in public databases.

RESULTS: We determined the complete genome sequences of two O145 strains (EcO145), one linked to a US lettuce-associated outbreak (RM13514) and one to a Belgium ice-cream-associated outbreak (RM13516). Both strains contain one chromosome and two large plasmids, with genome sizes of 5,737,294 bp for RM13514 and 5,559,008 bp for RM13516. Comparative analysis of the two EcO145 genomes revealed a large core (5,173 genes) and a considerable amount of strain-specific genes. Additionally, the two EcO145 genomes display distinct chromosomal architecture, virulence gene profile, phylogenetic origin of Stx2a prophage, and methylation profile (methylome). Comparative analysis of EcO145 genomes to other completely sequenced STEC and other E. coli and Shigella genomes revealed that, unlike any other known non-O157 EHEC strain, EcO145 ascended from a common lineage with EcO157/EcO55. This evolutionary relationship was further supported by the pangenome analysis of the 10 EHEC str ains. Of the 4,192 EHEC core genes, EcO145 shares more genes with EcO157 than with the any other non-O157 EHEC strains.

CONCLUSIONS: Our data provide evidence that EcO145 and EcO157 evolved from a common lineage, but ultimately each serotype evolves via a lineage-independent nature to EHEC by acquisition of the core set of EHEC virulence factors, including the genes encoding Shiga toxin and the large virulence plasmid. The large variation between the two EcO145 genomes suggests a distinctive evolutionary path between the two outbreak strains. The distinct methylome between the two EcO145 strains is likely due to the presence of a BsuBI/PstI methyltransferase gene cassette in the Stx2a prophage of the strain RM13514, suggesting a role of horizontal gene transfer-mediated epigenetic alteration in the evolution of individual EHEC strains.}, } @article {pmid24391975, year = {2013}, author = {Ferrario, C and Ricci, G and Milani, C and Lugli, GA and Ventura, M and Eraclio, G and Borgo, F and Fortina, MG}, title = {Lactococcus garvieae: where is it from? A first approach to explore the evolutionary history of this emerging pathogen.}, journal = {PloS one}, volume = {8}, number = {12}, pages = {e84796}, pmid = {24391975}, issn = {1932-6203}, mesh = {Base Sequence ; *Biological Evolution ; Cluster Analysis ; Communicable Diseases, Emerging/*microbiology ; Computational Biology ; Gene Frequency ; *Genetic Variation ; Genetics, Population ; Genome, Bacterial/*genetics ; Humans ; Lactococcus/*genetics ; Molecular Sequence Data ; Multigene Family/genetics ; Multilocus Sequence Typing ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Species Specificity ; }, abstract = {The population structure and diversity of Lactococcus garvieae, an emerging pathogen of increasing clinical significance, was determined at both gene and genome level. Selected lactococcal isolates of various origins were analyzed by a multi locus sequence typing (MLST). This gene-based analysis was compared to genomic characteristics, estimated through the complete genome sequences available in database. The MLST identified two branches containing the majority of the strains and two branches bearing one strain each. One strain was particularly differentiated from the other L. garvieae strains, showing a significant genetic distance. The genomic characteristics, correlated to the MLST-based phylogeny, indicated that this "separated strain" appeared first and could be considered the evolutionary intermediate between Lactococcus lactis and L. garvieae main clusters. A preliminary genome analysis of L. garvieae indicated a pan-genome constituted of about 4100 genes, which included 1341 core genes and 2760 genes belonging to the dispensable genome. A total of 1491 Clusters of Orthologous Genes (COGs) were found to be specific to the 11 L. garvieae genomes, with the genome of the "separated strain" showing the highest presence of unique genes.}, } @article {pmid24391709, year = {2013}, author = {Bouchet, A and Sakakini, N and El Atifi, M and Le Clec'h, C and Brauer, E and Moisan, A and Deman, P and Rihet, P and Le Duc, G and Pelletier, L}, title = {Early gene expression analysis in 9L orthotopic tumor-bearing rats identifies immune modulation in molecular response to synchrotron microbeam radiation therapy.}, journal = {PloS one}, volume = {8}, number = {12}, pages = {e81874}, pmid = {24391709}, issn = {1932-6203}, mesh = {Adaptive Immunity/genetics/radiation effects ; Animals ; Brain/immunology/radiation effects ; Brain Neoplasms/*genetics/immunology/*radiotherapy ; Cell Line, Tumor ; Gene Expression/radiation effects ; Gene Expression Profiling ; Genes, MHC Class II/*radiation effects ; Glioma/*genetics/immunology/*radiotherapy ; Immunity, Innate/genetics/radiation effects ; Male ; Oligonucleotide Array Sequence Analysis ; Radiotherapy/methods ; Rats ; Rats, Inbred F344 ; Signal Transduction/immunology/radiation effects ; Synchrotrons ; }, abstract = {Synchrotron Microbeam Radiation Therapy (MRT) relies on the spatial fractionation of the synchrotron photon beam into parallel micro-beams applying several hundred of grays in their paths. Several works have reported the therapeutic interest of the radiotherapy modality at preclinical level, but biological mechanisms responsible for the described efficacy are not fully understood to date. The aim of this study was to identify the early transcriptomic responses of normal brain and glioma tissue in rats after MRT irradiation (400Gy). The transcriptomic analysis of similarly irradiated normal brain and tumor tissues was performed 6 hours after irradiation of 9 L orthotopically tumor-bearing rats. Pangenomic analysis revealed 1012 overexpressed and 497 repressed genes in the irradiated contralateral normal tissue and 344 induced and 210 repressed genes in tumor tissue. These genes were grouped in a total of 135 canonical pathways. More than half were common to both tissues with a predominance for immunity or inflammation (64 and 67% of genes for normal and tumor tissues, respectively). Several pathways involving HMGB1, toll-like receptors, C-type lectins and CD36 may serve as a link between biochemical changes triggered by irradiation and inflammation and immunological challenge. Most immune cell populations were involved: macrophages, dendritic cells, natural killer, T and B lymphocytes. Among them, our results highlighted the involvement of Th17 cell population, recently described in tumor. The immune response was regulated by a large network of mediators comprising growth factors, cytokines, lymphokines. In conclusion, early response to MRT is mainly based on inflammation and immunity which appear therefore as major contributors to MRT efficacy.}, } @article {pmid24387194, year = {2014}, author = {Benedict, MN and Henriksen, JR and Metcalf, WW and Whitaker, RJ and Price, ND}, title = {ITEP: an integrated toolkit for exploration of microbial pan-genomes.}, journal = {BMC genomics}, volume = {15}, number = {}, pages = {8}, pmid = {24387194}, issn = {1471-2164}, mesh = {Algorithms ; Bacterial Proteins/genetics/metabolism ; Clostridium/classification/genetics ; Cluster Analysis ; Databases, Genetic ; Genome, Bacterial/*genetics ; Genomics/*methods ; Genotype ; Internet ; Metabolic Networks and Pathways/genetics ; Phenotype ; Phylogeny ; Ribosomal Proteins/genetics/metabolism ; *Software ; User-Computer Interface ; }, abstract = {BACKGROUND: Comparative genomics is a powerful approach for studying variation in physiological traits as well as the evolution and ecology of microorganisms. Recent technological advances have enabled sequencing large numbers of related genomes in a single project, requiring computational tools for their integrated analysis. In particular, accurate annotations and identification of gene presence and absence are critical for understanding and modeling the cellular physiology of newly sequenced genomes. Although many tools are available to compare the gene contents of related genomes, new tools are necessary to enable close examination and curation of protein families from large numbers of closely related organisms, to integrate curation with the analysis of gain and loss, and to generate metabolic networks linking the annotations to observed phenotypes.

RESULTS: We have developed ITEP, an Integrated Toolkit for Exploration of microbial Pan-genomes, to curate protein families, compute similarities to externally-defined domains, analyze gene gain and loss, and generate draft metabolic networks from one or more curated reference network reconstructions in groups of related microbial species among which the combination of core and variable genes constitute the their "pan-genomes". The ITEP toolkit consists of: (1) a series of modular command-line scripts for identification, comparison, curation, and analysis of protein families and their distribution across many genomes; (2) a set of Python libraries for programmatic access to the same data; and (3) pre-packaged scripts to perform common analysis workflows on a collection of genomes. ITEP's capabilities include de novo protein family prediction, ortholog detection, analysis of functional domains, identification of core and variable genes and gene regions, sequence alignments and tree generation, annotation curation, and the integration of cross-genome analysis and metabolic networks for study of metabolic network evolution.

CONCLUSIONS: ITEP is a powerful, flexible toolkit for generation and curation of protein families. ITEP's modular design allows for straightforward extension as analysis methods and tools evolve. By integrating comparative genomics with the development of draft metabolic networks, ITEP harnesses the power of comparative genomics to build confidence in links between genotype and phenotype and helps disambiguate gene annotations when they are evaluated in both evolutionary and metabolic network contexts.}, } @article {pmid24386196, year = {2013}, author = {Van den Bogert, B and Boekhorst, J and Herrmann, R and Smid, EJ and Zoetendal, EG and Kleerebezem, M}, title = {Comparative genomics analysis of Streptococcus isolates from the human small intestine reveals their adaptation to a highly dynamic ecosystem.}, journal = {PloS one}, volume = {8}, number = {12}, pages = {e83418}, pmid = {24386196}, issn = {1932-6203}, mesh = {*Adaptation, Physiological ; Amino Acids/metabolism ; Carbohydrate Metabolism ; Carbon/metabolism ; Gene-Environment Interaction ; Genome, Bacterial ; Humans ; Intestine, Small/*microbiology/*physiology ; *Metagenomics ; Phylogeny ; Pyruvic Acid/metabolism ; Sequence Analysis, DNA ; Streptococcus/*classification/*genetics/isolation & purification/metabolism ; Vitamins/metabolism ; }, abstract = {The human small-intestinal microbiota is characterised by relatively large and dynamic Streptococcus populations. In this study, genome sequences of small-intestinal streptococci from S. mitis, S. bovis, and S. salivarius species-groups were determined and compared with those from 58 Streptococcus strains in public databases. The Streptococcus pangenome consists of 12,403 orthologous groups of which 574 are shared among all sequenced streptococci and are defined as the Streptococcus core genome. Genome mining of the small-intestinal streptococci focused on functions playing an important role in the interaction of these streptococci in the small-intestinal ecosystem, including natural competence and nutrient-transport and metabolism. Analysis of the small-intestinal Streptococcus genomes predicts a high capacity to synthesize amino acids and various vitamins as well as substantial divergence in their carbohydrate transport and metabolic capacities, which is in agreement with observed physiological differences between these Streptococcus strains. Gene-specific PCR-strategies enabled evaluation of conservation of Streptococcus populations in intestinal samples from different human individuals, revealing that the S. salivarius strains were frequently detected in the small-intestine microbiota, supporting the representative value of the genomes provided in this study. Finally, the Streptococcus genomes allow prediction of the effect of dietary substances on Streptococcus population dynamics in the human small-intestine.}, } @article {pmid25780505, year = {2014}, author = {Zehr, ES and Bayles, DO and Boatwright, WD and Tabatabai, LB and Register, KB}, title = {Non-contiguous finished genome sequence of Ornithobacterium rhinotracheale strain H06-030791.}, journal = {Standards in genomic sciences}, volume = {9}, number = {}, pages = {14}, pmid = {25780505}, issn = {1944-3277}, abstract = {The Gram-negative, pleomorphic, rod-shaped bacterium Ornithobacterium rhinotracheale is a cause of pneumonia and airsacculitis in poultry. It is a member of the family Flavobacteriaceae of the phylum "Bacteroidetes". O. rhinotracheale strain H06-030791 was isolated from the lung of a turkey in North Carolina in 2006. Its genome consists of a circular chromosome of 2,319,034 bp in length with a total of 2243 protein-coding genes and nine RNA genes. Genome sequences are available for two additional strains of O. rhinotracheale, isolated in 1988 and 1995, the latter described in a companion genome report in this issue of SIGS. The genome sequence of O. rhinotracheale strain H06-030791, a more contemporary isolate, will be of value in establishing core and pan-genomes for O. rhinotracheale and elucidating its evolutionary history.}, } @article {pmid24375107, year = {2014}, author = {Lu, W and Wise, MJ and Tay, CY and Windsor, HM and Marshall, BJ and Peacock, C and Perkins, T}, title = {Comparative analysis of the full genome of Helicobacter pylori isolate Sahul64 identifies genes of high divergence.}, journal = {Journal of bacteriology}, volume = {196}, number = {5}, pages = {1073-1083}, pmid = {24375107}, issn = {1098-5530}, mesh = {Adaptation, Physiological ; Bacterial Proteins/genetics/metabolism ; Chromosome Mapping ; Chromosomes, Bacterial/genetics ; Female ; Gene Expression Regulation, Bacterial/*physiology ; *Genetic Variation ; Genome, Bacterial ; Helicobacter Infections/epidemiology/microbiology ; Helicobacter pylori/*classification/*genetics ; Humans ; Native Hawaiian or Other Pacific Islander ; Phylogeny ; Species Specificity ; }, abstract = {Isolates of Helicobacter pylori can be classified phylogeographically. High genetic diversity and rapid microevolution are a hallmark of H. pylori genomes, a phenomenon that is proposed to play a functional role in persistence and colonization of diverse human populations. To provide further genomic evidence in the lineage of H. pylori and to further characterize diverse strains of this pathogen in different human populations, we report the finished genome sequence of Sahul64, an H. pylori strain isolated from an indigenous Australian. Our analysis identified genes that were highly divergent compared to the 38 publically available genomes, which include genes involved in the biosynthesis and modification of lipopolysaccharide, putative prophage genes, restriction modification components, and hypothetical genes. Furthermore, the virulence-associated vacA locus is a pseudogene and the cag pathogenicity island (cagPAI) is not present. However, the genome does contain a gene cluster associated with pathogenicity, including dupA. Our analysis found that with the addition of Sahul64 to the 38 genomes, the core genome content of H. pylori is reduced by approximately 14% (∼170 genes) and the pan-genome has expanded from 2,070 to 2,238 genes. We have identified three putative horizontally acquired regions, including one that is likely to have been acquired from the closely related Helicobacter cetorum prior to speciation. Our results suggest that Sahul64, with the absence of cagPAI, highly divergent cell envelope proteins, and a predicted nontransportable VacA protein, could be more highly adapted to ancient indigenous Australian people but with lower virulence potential compared to other sequenced and cagPAI-positive H. pylori strains.}, } @article {pmid24363272, year = {2014}, author = {Choudhry, H and Schödel, J and Oikonomopoulos, S and Camps, C and Grampp, S and Harris, AL and Ratcliffe, PJ and Ragoussis, J and Mole, DR}, title = {Extensive regulation of the non-coding transcriptome by hypoxia: role of HIF in releasing paused RNApol2.}, journal = {EMBO reports}, volume = {15}, number = {1}, pages = {70-76}, pmid = {24363272}, issn = {1469-3178}, support = {078333/Z/05/Z/WT_/Wellcome Trust/United Kingdom ; 088182/Z/09/Z/WT_/Wellcome Trust/United Kingdom ; A16016/CRUK_/Cancer Research UK/United Kingdom ; WT091857MA/WT_/Wellcome Trust/United Kingdom ; /WT_/Wellcome Trust/United Kingdom ; }, mesh = {Basic Helix-Loop-Helix Transcription Factors/*physiology ; Cell Hypoxia ; *Gene Expression Regulation ; Humans ; Hypoxia-Inducible Factor 1, alpha Subunit/*physiology ; MCF-7 Cells ; Promoter Regions, Genetic ; Protein Binding ; RNA Polymerase II/*metabolism ; RNA, Messenger/genetics/metabolism ; RNA, Untranslated/genetics/metabolism ; Transcription, Genetic ; *Transcriptome ; }, abstract = {Hypoxia is central to both ischaemic and neoplastic diseases. However, the non-coding transcriptional response to hypoxia is largely uncharacterized. We undertook integrated genomic analyses of both non-coding and coding transcripts using massively parallel sequencing and interfaced this data with pan-genomic analyses of hypoxia-inducible factor (HIF) and RNApol2 binding in hypoxic cells. These analyses revealed that all classes of RNA are profoundly regulated by hypoxia and implicated HIF as a major direct regulator of both the non-coding and coding transcriptome, acting predominantly through release of pre-bound promoter-paused RNApol2. These findings indicate that the transcriptional response to hypoxia is substantially more extensive than previously considered.}, } @article {pmid24293219, year = {2014}, author = {Yoosuf, N and Pagnier, I and Fournous, G and Robert, C and La Scola, B and Raoult, D and Colson, P}, title = {Complete genome sequence of Courdo11 virus, a member of the family Mimiviridae.}, journal = {Virus genes}, volume = {48}, number = {2}, pages = {218-223}, pmid = {24293219}, issn = {1572-994X}, mesh = {DNA, Viral/genetics ; *Genome, Viral ; Microscopy, Electron ; Mimiviridae/classification/*genetics/ultrastructure ; Phylogeny ; RNA, Transfer/genetics ; }, abstract = {Giant viruses of amoebae were discovered 10 years ago and led to the description of two new viral families: Mimiviridae and Marseilleviridae. These viruses exhibit remarkable features, including large capsids and genomes that are similar in size to those of small bacteria and their large genetic repertoires include genes that are unique among viruses. The family Mimiviridae has grown during the past decade since the discovery of its initial member, Mimivirus, and continues to expand. Here, we describe the genome of a new giant virus that infects Acanthamoeba spp., Courdo11 virus, isolated in 2010 by inoculating Acanthamoeba spp. with freshwater collected from a river in southeastern France. The Courdo11 virus genome is a double stranded DNA molecule composed of 1,245,674 nucleotides. The comparative analyses of Courdo11 virus with the genomes of other giant viruses showed that it belongs to lineage C of mimiviruses of amoebae, being most closely related to Megavirus chilensis and LBA 111, the first mimivirus isolated from a human. Major characteristics of the M. chilensis genome were identified in the Courdo11 virus genome, found to encode three more tRNAs. Genomic architecture comparisons mirrored previous findings that showed conservation of collinear regions in the middle part of the genome and diversity towards the extremities. Finally, fourteen ORFans were identified in the Courdo11 virus genome, suggesting that the pan-genome of mimiviruses of amoeba might reach a plateau.}, } @article {pmid24277855, year = {2013}, author = {Monk, JM and Charusanti, P and Aziz, RK and Lerman, JA and Premyodhin, N and Orth, JD and Feist, AM and Palsson, BØ}, title = {Genome-scale metabolic reconstructions of multiple Escherichia coli strains highlight strain-specific adaptations to nutritional environments.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {110}, number = {50}, pages = {20338-20343}, pmid = {24277855}, issn = {1091-6490}, support = {R01 GM057089/GM/NIGMS NIH HHS/United States ; R01 GM098105/GM/NIGMS NIH HHS/United States ; 1R01GM057089/GM/NIGMS NIH HHS/United States ; 1R01GM098105/GM/NIGMS NIH HHS/United States ; }, mesh = {Adaptation, Biological/*genetics ; Computational Biology ; Decision Trees ; Escherichia coli/*genetics/physiology ; Genes, Bacterial/genetics ; *Genetic Variation ; Genome, Bacterial/*genetics ; Metabolic Networks and Pathways/*genetics ; Models, Genetic ; Nutritional Physiological Phenomena/*genetics ; Phylogeny ; Shigella/genetics ; Species Specificity ; Systems Biology ; }, abstract = {Genome-scale models (GEMs) of metabolism were constructed for 55 fully sequenced Escherichia coli and Shigella strains. The GEMs enable a systems approach to characterizing the pan and core metabolic capabilities of the E. coli species. The majority of pan metabolic content was found to consist of alternate catabolic pathways for unique nutrient sources. The GEMs were then used to systematically analyze growth capabilities in more than 650 different growth-supporting environments. The results show that unique strain-specific metabolic capabilities correspond to pathotypes and environmental niches. Twelve of the GEMs were used to predict growth on six differentiating nutrients, and the predictions were found to agree with 80% of experimental outcomes. Additionally, GEMs were used to predict strain-specific auxotrophies. Twelve of the strains modeled were predicted to be auxotrophic for vitamins niacin (vitamin B3), thiamin (vitamin B1), or folate (vitamin B9). Six of the strains modeled have lost biosynthetic pathways for essential amino acids methionine, tryptophan, or leucine. Genome-scale analysis of multiple strains of a species can thus be used to define the metabolic essence of a microbial species and delineate growth differences that shed light on the adaptation process to a particular microenvironment.}, } @article {pmid24251075, year = {2013}, author = {Daifuku, T and Yoshida, T and Sako, Y}, title = {Genome variation in the hyperthermophilic archaeon Aeropyrum.}, journal = {Mobile genetic elements}, volume = {3}, number = {5}, pages = {e26833}, pmid = {24251075}, issn = {2159-2543}, abstract = {Aeropyrum spp are aerobic, heterotrophic, and hyperthermophilic marine archaea. There are two closely related Aeropyrum species, Aeropyrum camini and Aeropyrum pernix, which are isolated from geographically distinct locations. Recently, we compared their genome sequences to determine their genomic variation. They possess highly conserved small genomes, reflecting their close relationship. The entire genome similarity may result from their survival strategies in adapting to extreme environmental conditions. Meanwhile, synteny disruptions were observed in some regions including clustered regularly interspaced short palindromic repeats elements. Further, the largest portion of their non-orthologous genes were genes in the two proviral regions of A. pernix (Aeropyrum pernix spindle-shaped virus 1 and Aeropyrum pernix ovoid virus 1) or ORFans considered to be derived from viruses. Our data shows that genomic diversification of Aeropyrum spp may be substantially induced by viruses. This suggests that Aeropyrum spp may have a large pan-genome that can be extended by viruses, while each of the species shares a highly conserved small genome specializing for extreme environments.}, } @article {pmid24250794, year = {2013}, author = {Acuña, LG and Cárdenas, JP and Covarrubias, PC and Haristoy, JJ and Flores, R and Nuñez, H and Riadi, G and Shmaryahu, A and Valdés, J and Dopson, M and Rawlings, DE and Banfield, JF and Holmes, DS and Quatrini, R}, title = {Architecture and gene repertoire of the flexible genome of the extreme acidophile Acidithiobacillus caldus.}, journal = {PloS one}, volume = {8}, number = {11}, pages = {e78237}, pmid = {24250794}, issn = {1932-6203}, mesh = {Acidithiobacillus/*genetics/metabolism ; Bacterial Proteins/genetics ; Computational Biology ; Conjugation, Genetic ; DNA Transposable Elements/*genetics ; *Genome, Bacterial ; Plasmids/genetics ; Sulfur/*metabolism ; }, abstract = {BACKGROUND: Acidithiobacillus caldus is a sulfur oxidizing extreme acidophile and the only known mesothermophile within the Acidithiobacillales. As such, it is one of the preferred microbes for mineral bioprocessing at moderately high temperatures. In this study, we explore the genomic diversity of A. caldus strains using a combination of bioinformatic and experimental techniques, thus contributing first insights into the elucidation of the species pangenome.

PRINCIPAL FINDINGS: Comparative sequence analysis of A. caldus ATCC 51756 and SM-1 indicate that, despite sharing a conserved and highly syntenic genomic core, both strains have unique gene complements encompassing nearly 20% of their respective genomes. The differential gene complement of each strain is distributed between the chromosomal compartment, one megaplasmid and a variable number of smaller plasmids, and is directly associated to a diverse pool of mobile genetic elements (MGE). These include integrative conjugative and mobilizable elements, genomic islands and insertion sequences. Some of the accessory functions associated to these MGEs have been linked previously to the flexible gene pool in microorganisms inhabiting completely different econiches. Yet, others had not been unambiguously mapped to the flexible gene pool prior to this report and clearly reflect strain-specific adaption to local environmental conditions.

SIGNIFICANCE: For many years, and because of DNA instability at low pH and recurrent failure to genetically transform acidophilic bacteria, gene transfer in acidic environments was considered negligible. Findings presented herein imply that a more or less conserved pool of actively excising MGEs occurs in the A. caldus population and point to a greater frequency of gene exchange in this econiche than previously recognized. Also, the data suggest that these elements endow the species with capacities to withstand the diverse abiotic and biotic stresses of natural environments, in particular those associated with its extreme econiche.}, } @article {pmid24231161, year = {2014}, author = {Orlandini, V and Maida, I and Fondi, M and Perrin, E and Papaleo, MC and Bosi, E and de Pascale, D and Tutino, ML and Michaud, L and Lo Giudice, A and Fani, R}, title = {Genomic analysis of three sponge-associated Arthrobacter Antarctic strains, inhibiting the growth of Burkholderia cepacia complex bacteria by synthesizing volatile organic compounds.}, journal = {Microbiological research}, volume = {169}, number = {7-8}, pages = {593-601}, doi = {10.1016/j.micres.2013.09.018}, pmid = {24231161}, issn = {1618-0623}, mesh = {Animals ; Arthrobacter/*genetics/isolation & purification/*metabolism ; Burkholderia cepacia complex/*drug effects/*growth & development ; Genome, Bacterial ; Genomics ; Phylogeny ; Porifera/*microbiology ; Volatile Organic Compounds/metabolism/*pharmacology ; }, abstract = {In this work we analyzed the ability of three Arthrobacter strains (namely TB23, TB26 and CAL618), which were isolated from the Antarctic sponges Haliclonissa verrucosa and Lyssodendrix nobilis, to specifically inhibit the growth of a panel of 40 Burkholderia cepacia complex strains, representing a major cause of infections in patients that are affected by Cystic Fibrosis. The inhibitory activity was due to the synthesis of antimicrobial compounds, very likely volatile organic compounds (VOCs), and was partially dependent on the growth media that were used for Antarctic strains growth. The phylogenetic analysis revealed that two of them (i.e. CAL 618 and TB23) were very close and very likely belonged to the same Arthrobacter species, whereas the strain TB26 was placed in a distant branch. The genome of the strains TB26 and CAL618 was also sequenced and compared with that of the strain TB23. The analysis revealed that TB23 and CAL618 shared more genomic properties (GC content, genome size, number of genes) than with TB26. Since the three strains exhibited very similar inhibition pattern vs Bcc strains, it is quite possible that genes involved in the biosynthesis of antimicrobial compounds very likely belong to the core genome.}, } @article {pmid24215651, year = {2013}, author = {Liu, G and Zhang, W and Lu, C}, title = {Comparative genomics analysis of Streptococcus agalactiae reveals that isolates from cultured tilapia in China are closely related to the human strain A909.}, journal = {BMC genomics}, volume = {14}, number = {}, pages = {775}, pmid = {24215651}, issn = {1471-2164}, mesh = {Animals ; Cattle ; China ; Fish Diseases/*genetics/microbiology ; Genome, Bacterial ; Humans ; Phylogeny ; Streptococcal Infections/*genetics/pathology ; Streptococcus agalactiae/*genetics/isolation & purification/pathogenicity ; Tilapia/genetics/*microbiology ; }, abstract = {BACKGROUND: Streptococcus agalactiae, also referred to as Group B Streptococcus (GBS), is a frequent resident of the rectovaginal tract in humans, and a major cause of neonatal infection. In addition, S. agalactiae is a known fish pathogen, which compromises food safety and represents a zoonotic hazard. The complete genome sequence of the piscine S. agalactiae isolate GD201008-001 was compared with 14 other piscine, human and bovine strains to explore their virulence determinants, evolutionary relationships and the genetic basis of host tropism in S. agalactiae.

RESULTS: The pan-genome of S. agalactiae is open and its size increases with the addition of newly sequenced genomes. The core genes shared by all isolates account for 50 ~ 70% of any single genome. The Chinese piscine isolates GD201008-001 and ZQ0910 are phylogenetically distinct from the Latin American piscine isolates SA20-06 and STIR-CD-17, but are closely related to the human strain A909, in the context of the clustered regularly interspaced short palindromic repeats (CRISPRs), prophage, virulence-associated genes and phylogenetic relationships. We identified a unique 10 kb gene locus in Chinese piscine strains.

CONCLUSIONS: Isolates from cultured tilapia in China have a close genomic relationship with the human strain A909. Our findings provide insight into the pathogenesis and host-associated genome content of piscine S. agalactiae isolated in China.}, } @article {pmid24205326, year = {2013}, author = {Linville, JL and Rodriguez, M and Land, M and Syed, MH and Engle, NL and Tschaplinski, TJ and Mielenz, JR and Cox, CD}, title = {Industrial robustness: understanding the mechanism of tolerance for the Populus hydrolysate-tolerant mutant strain of Clostridium thermocellum.}, journal = {PloS one}, volume = {8}, number = {10}, pages = {e78829}, pmid = {24205326}, issn = {1932-6203}, mesh = {Carbon Dioxide/metabolism ; Cellulose/metabolism ; Clostridium thermocellum/drug effects/*genetics/growth & development/*physiology ; Fermentation/drug effects ; Gene Expression Regulation, Bacterial/drug effects ; Genomics ; Hydrogen/metabolism ; Hydrolysis ; *Mutation ; Populus/*metabolism ; Sequence Analysis, DNA ; Species Specificity ; }, abstract = {BACKGROUND: An industrially robust microorganism that can efficiently degrade and convert lignocellulosic biomass into ethanol and next-generation fuels is required to economically produce future sustainable liquid transportation fuels. The anaerobic, thermophilic, cellulolytic bacterium Clostridium thermocellum is a candidate microorganism for such conversions but it, like many bacteria, is sensitive to potential toxic inhibitors developed in the liquid hydrolysate produced during biomass processing. Microbial processes leading to tolerance of these inhibitory compounds found in the pretreated biomass hydrolysate are likely complex and involve multiple genes.

In this study, we developed a 17.5% v/v Populus hydrolysate tolerant mutant strain of C. thermocellum by directed evolution. The genome of the wild type strain, six intermediate population samples and seven single colony isolates were sequenced to elucidate the mechanism of tolerance. Analysis of the 224 putative mutations revealed 73 high confidence mutations. A longitudinal analysis of the intermediate population samples, a pan-genomic analysis of the isolates, and a hotspot analysis revealed 24 core genes common to all seven isolates and 8 hotspots. Genetic mutations were matched with the observed phenotype through comparison of RNA expression levels during fermentation by the wild type strain and mutant isolate 6 in various concentrations of Populus hydrolysate (0%, 10%, and 17.5% v/v).

CONCLUSION/SIGNIFICANCE: The findings suggest that there are multiple mutations responsible for the Populus hydrolysate tolerant phenotype resulting in several simultaneous mechanisms of action, including increases in cellular repair, and altered energy metabolism. To date, this study provides the most comprehensive elucidation of the mechanism of tolerance to a pretreated biomass hydrolysate by C. thermocellum. These findings make important contributions to the development of industrially robust strains of consolidated bioprocessing microorganisms.}, } @article {pmid24204279, year = {2013}, author = {Danchin, EG and Arguel, MJ and Campan-Fournier, A and Perfus-Barbeoch, L and Magliano, M and Rosso, MN and Da Rocha, M and Da Silva, C and Nottet, N and Labadie, K and Guy, J and Artiguenave, F and Abad, P}, title = {Identification of novel target genes for safer and more specific control of root-knot nematodes from a pan-genome mining.}, journal = {PLoS pathogens}, volume = {9}, number = {10}, pages = {e1003745}, pmid = {24204279}, issn = {1553-7374}, mesh = {Animals ; Genes, Helminth/*physiology ; Genome-Wide Association Study ; Humans ; Plant Diseases/*parasitology ; RNA Interference ; Tylenchoidea/*genetics/metabolism ; }, abstract = {Root-knot nematodes are globally the most aggressive and damaging plant-parasitic nematodes. Chemical nematicides have so far constituted the most efficient control measures against these agricultural pests. Because of their toxicity for the environment and danger for human health, these nematicides have now been banned from use. Consequently, new and more specific control means, safe for the environment and human health, are urgently needed to avoid worldwide proliferation of these devastating plant-parasites. Mining the genomes of root-knot nematodes through an evolutionary and comparative genomics approach, we identified and analyzed 15,952 nematode genes conserved in genomes of plant-damaging species but absent from non target genomes of chordates, plants, annelids, insect pollinators and mollusks. Functional annotation of the corresponding proteins revealed a relative abundance of putative transcription factors in this parasite-specific set compared to whole proteomes of root-knot nematodes. This may point to important and specific regulators of genes involved in parasitism. Because these nematodes are known to secrete effector proteins in planta, essential for parasitism, we searched and identified 993 such effector-like proteins absent from non-target species. Aiming at identifying novel targets for the development of future control methods, we biologically tested the effect of inactivation of the corresponding genes through RNA interference. A total of 15 novel effector-like proteins and one putative transcription factor compatible with the design of siRNAs were present as non-redundant genes and had transcriptional support in the model root-knot nematode Meloidogyne incognita. Infestation assays with siRNA-treated M. incognita on tomato plants showed significant and reproducible reduction of the infestation for 12 of the 16 tested genes compared to control nematodes. These 12 novel genes, showing efficient reduction of parasitism when silenced, constitute promising targets for the development of more specific and safer control means.}, } @article {pmid24203705, year = {2014}, author = {Huang, K and Brady, A and Mahurkar, A and White, O and Gevers, D and Huttenhower, C and Segata, N}, title = {MetaRef: a pan-genomic database for comparative and community microbial genomics.}, journal = {Nucleic acids research}, volume = {42}, number = {Database issue}, pages = {D617-24}, pmid = {24203705}, issn = {1362-4962}, support = {P30 DK043351/DK/NIDDK NIH HHS/United States ; U54HG004969/HG/NHGRI NIH HHS/United States ; R01HG005969/HG/NHGRI NIH HHS/United States ; HHSN272200900018C/AI/NIAID NIH HHS/United States ; }, mesh = {Archaea/classification ; Bacteria/classification ; *Databases, Genetic ; *Genome, Archaeal ; *Genome, Bacterial ; Genomics ; Internet ; Metagenomics ; Microbiota ; Molecular Sequence Annotation ; Multigene Family ; Phylogeny ; }, abstract = {Microbial genome sequencing is one of the longest-standing areas of biological database development, but high-throughput, low-cost technologies have increased its throughput to an unprecedented number of new genomes per year. Several thousand microbial genomes are now available, necessitating new approaches to organizing information on gene function, phylogeny and microbial taxonomy to facilitate downstream biological interpretation. MetaRef, available at http://metaref.org, is a novel online resource systematically cataloguing a comprehensive pan-genome of all microbial clades with sequenced isolates. It organizes currently available draft and finished bacterial and archaeal genomes into quality-controlled clades, reports all core and pan gene families at multiple levels in the resulting taxonomy, and it annotates families' conservation, phylogeny and consensus functional information. MetaRef also provides a comprehensive non-redundant reference gene catalogue for metagenomic studies, including the abundance and prevalence of all gene families in the >700 shotgun metagenomic samples of the Human Microbiome Project. This constitutes a systematic mapping of clade-specific microbial functions within the healthy human microbiome across multiple body sites and can be used as reference for identifying potential functional biomarkers in disease-associate microbiomes. MetaRef provides all information both as an online browsable resource and as downloadable sequences and tabular data files that can be used for subsequent offline studies.}, } @article {pmid24157882, year = {2013}, author = {Pagnier, I and Reteno, DG and Saadi, H and Boughalmi, M and Gaia, M and Slimani, M and Ngounga, T and Bekliz, M and Colson, P and Raoult, D and La Scola, B}, title = {A decade of improvements in Mimiviridae and Marseilleviridae isolation from amoeba.}, journal = {Intervirology}, volume = {56}, number = {6}, pages = {354-363}, doi = {10.1159/000354556}, pmid = {24157882}, issn = {1423-0100}, mesh = {Amoeba/*virology ; DNA Viruses/*classification/*isolation & purification ; High-Throughput Screening Assays/methods ; Specimen Handling/methods ; Virology/*methods ; }, abstract = {Since the isolation of the first giant virus, the Mimivirus, by T.J. Rowbotham in a cooling tower in Bradford, UK, and after its characterisation by our group in 2003, we have continued to develop novel strategies to isolate additional strains. By first focusing on cooling towers using our original time-consuming procedure, we were able to isolate a new lineage of giant virus called Marseillevirus and a new Mimivirus strain called Mamavirus. In the following years, we have accumulated the world's largest unique collection of giant viruses by improving the use of antibiotic combinations to avoid bacterial contamination of amoeba, developing strategies of preliminary screening of samples by molecular methods, and using a high-throughput isolation method developed by our group. Based on the inoculation of nearly 7,000 samples, our collection currently contains 43 strains of Mimiviridae (14 in lineage A, 6 in lineage B, and 23 in lineage C) and 17 strains of Marseilleviridae isolated from various environments, including 3 of human origin. This study details the procedures used to build this collection and paves the way for the high-throughput isolation of new isolates to improve the record of giant virus distribution in the environment and the determination of their pangenome.}, } @article {pmid24141123, year = {2014}, author = {Alvarez, L and Bricio, C and Blesa, A and Hidalgo, A and Berenguer, J}, title = {Transferable denitrification capability of Thermus thermophilus.}, journal = {Applied and environmental microbiology}, volume = {80}, number = {1}, pages = {19-28}, pmid = {24141123}, issn = {1098-5336}, mesh = {Conjugation, Genetic ; *Denitrification ; Electron Transport ; *Gene Transfer, Horizontal ; Metabolic Networks and Pathways/*genetics ; Nitrates/metabolism ; Nitric Oxide/metabolism ; Nitrites/metabolism ; Nitrogen Oxides/metabolism ; *Plasmids ; Thermus thermophilus/*genetics/*metabolism ; }, abstract = {Laboratory-adapted strains of Thermus spp. have been shown to require oxygen for growth, including the model strains T. thermophilus HB27 and HB8. In contrast, many isolates of this species that have not been intensively grown under laboratory conditions keep the capability to grow anaerobically with one or more electron acceptors. The use of nitrogen oxides, especially nitrate, as electron acceptors is one of the most widespread capabilities among these facultative strains. In this process, nitrate is reduced to nitrite by a reductase (Nar) that also functions as electron transporter toward nitrite and nitric oxide reductases when nitrate is scarce, effectively replacing respiratory complex III. In many T. thermophilus denitrificant strains, most electrons for Nar are provided by a new class of NADH dehydrogenase (Nrc). The ability to reduce nitrite to NO and subsequently to N2O by the corresponding Nir and Nor reductases is also strain specific. The genes encoding the capabilities for nitrate (nar) and nitrite (nir and nor) respiration are easily transferred between T. thermophilus strains by natural competence or by a conjugation-like process and may be easily lost upon continuous growth under aerobic conditions. The reason for this instability is apparently related to the fact that these metabolic capabilities are encoded in gene cluster islands, which are delimited by insertion sequences and integrated within highly variable regions of easily transferable extrachromosomal elements. Together with the chromosomal genes, these plasmid-associated genetic islands constitute the extended pangenome of T. thermophilus that provides this species with an enhanced capability to adapt to changing environments.}, } @article {pmid24141120, year = {2014}, author = {Kim, EB and Marco, ML}, title = {Nonclinical and clinical Enterococcus faecium strains, but not Enterococcus faecalis strains, have distinct structural and functional genomic features.}, journal = {Applied and environmental microbiology}, volume = {80}, number = {1}, pages = {154-165}, pmid = {24141120}, issn = {1098-5336}, mesh = {Adaptation, Biological ; Animals ; Enterococcus faecalis/*genetics/isolation & purification ; Enterococcus faecium/*genetics/isolation & purification ; *Environmental Microbiology ; Genes, Bacterial ; *Genome, Bacterial ; Genomics ; Gram-Positive Bacterial Infections/*microbiology ; Interspersed Repetitive Sequences ; Synteny ; }, abstract = {Certain strains of Enterococcus faecium and Enterococcus faecalis contribute beneficially to animal health and food production, while others are associated with nosocomial infections. To determine whether there are structural and functional genomic features that are distinct between nonclinical (NC) and clinical (CL) strains of those species, we analyzed the genomes of 31 E. faecium and 38 E. faecalis strains. Hierarchical clustering of 7,017 orthologs found in the E. faecium pangenome revealed that NC strains clustered into two clades and are distinct from CL strains. NC E. faecium genomes are significantly smaller than CL genomes, and this difference was partly explained by significantly fewer mobile genetic elements (ME), virulence factors (VF), and antibiotic resistance (AR) genes. E. faecium ortholog comparisons identified 68 and 153 genes that are enriched for NC and CL strains, respectively. Proximity analysis showed that CL-enriched loci, and not NC-enriched loci, are more frequently colocalized on the genome with ME. In CL genomes, AR genes are also colocalized with ME, and VF are more frequently associated with CL-enriched loci. Genes in 23 functional groups are also differentially enriched between NC and CL E. faecium genomes. In contrast, differences were not observed between NC and CL E. faecalis genomes despite their having larger genomes than E. faecium. Our findings show that unlike E. faecalis, NC and CL E. faecium strains are equipped with distinct structural and functional genomic features indicative of adaptation to different environments.}, } @article {pmid24130670, year = {2013}, author = {Dam, B and Dam, S and Blom, J and Liesack, W}, title = {Genome analysis coupled with physiological studies reveals a diverse nitrogen metabolism in Methylocystis sp. strain SC2.}, journal = {PloS one}, volume = {8}, number = {10}, pages = {e74767}, pmid = {24130670}, issn = {1932-6203}, mesh = {Base Sequence ; Genome, Bacterial/genetics ; Methylocystaceae/genetics/*metabolism ; Molecular Sequence Data ; Nitrogen/*metabolism ; Phylogeny ; }, abstract = {BACKGROUND: Methylocystis sp. strain SC2 can adapt to a wide range of methane concentrations. This is due to the presence of two isozymes of particulate methane monooxygenase exhibiting different methane oxidation kinetics. To gain insight into the underlying genetic information, its genome was sequenced and found to comprise a 3.77 Mb chromosome and two large plasmids.

PRINCIPAL FINDINGS: We report important features of the strain SC2 genome. Its sequence is compared with those of seven other methanotroph genomes, comprising members of the Alphaproteobacteria, Gammaproteobacteria, and Verrucomicrobia. While the pan-genome of all eight methanotroph genomes totals 19,358 CDS, only 154 CDS are shared. The number of core genes increased with phylogenetic relatedness: 328 CDS for proteobacterial methanotrophs and 1,853 CDS for the three alphaproteobacterial Methylocystaceae members, Methylocystis sp. strain SC2 and strain Rockwell, and Methylosinus trichosporium OB3b. The comparative study was coupled with physiological experiments to verify that strain SC2 has diverse nitrogen metabolism capabilities. In correspondence to a full complement of 34 genes involved in N2 fixation, strain SC2 was found to grow with atmospheric N2 as the sole nitrogen source, preferably at low oxygen concentrations. Denitrification-mediated accumulation of 0.7 nmol (30)N2/hr/mg dry weight of cells under anoxic conditions was detected by tracer analysis. N2 production is related to the activities of plasmid-borne nitric oxide and nitrous oxide reductases.

CONCLUSIONS/PERSPECTIVES: Presence of a complete denitrification pathway in strain SC2, including the plasmid-encoded nosRZDFYX operon, is unique among known methanotrophs. However, the exact ecophysiological role of this pathway still needs to be elucidated. Detoxification of toxic nitrogen compounds and energy conservation under oxygen-limiting conditions are among the possible roles. Relevant features that may stimulate further research are, for example, absence of CRISPR/Cas systems in strain SC2, high number of iron acquisition systems in strain OB3b, and large number of transposases in strain Rockwell.}, } @article {pmid24112474, year = {2013}, author = {Mongodin, EF and Casjens, SR and Bruno, JF and Xu, Y and Drabek, EF and Riley, DR and Cantarel, BL and Pagan, PE and Hernandez, YA and Vargas, LC and Dunn, JJ and Schutzer, SE and Fraser, CM and Qiu, WG and Luft, BJ}, title = {Inter- and intra-specific pan-genomes of Borrelia burgdorferi sensu lato: genome stability and adaptive radiation.}, journal = {BMC genomics}, volume = {14}, number = {}, pages = {693}, pmid = {24112474}, issn = {1471-2164}, support = {SC1 AI107955/AI/NIAID NIH HHS/United States ; AI47553/AI/NIAID NIH HHS/United States ; N01-AI30071/AI/NIAID NIH HHS/United States ; MD007599/MD/NIMHD NIH HHS/United States ; AI107955/AI/NIAID NIH HHS/United States ; AI49003/AI/NIAID NIH HHS/United States ; AI37256/AI/NIAID NIH HHS/United States ; AI074825/AI/NIAID NIH HHS/United States ; }, mesh = {Borrelia burgdorferi Group/*genetics ; Chromosomes, Bacterial/genetics ; Evolution, Molecular ; *Genome, Bacterial ; *Genomic Instability ; Humans ; Lyme Disease/microbiology ; Models, Genetic ; Open Reading Frames ; Phylogeny ; Phylogeography ; Plasmids/genetics ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; Species Specificity ; }, abstract = {BACKGROUND: Lyme disease is caused by spirochete bacteria from the Borrelia burgdorferi sensu lato (B. burgdorferi s.l.) species complex. To reconstruct the evolution of B. burgdorferi s.l. and identify the genomic basis of its human virulence, we compared the genomes of 23 B. burgdorferi s.l. isolates from Europe and the United States, including B. burgdorferi sensu stricto (B. burgdorferi s.s., 14 isolates), B. afzelii (2), B. garinii (2), B. "bavariensis" (1), B. spielmanii (1), B. valaisiana (1), B. bissettii (1), and B. "finlandensis" (1).

RESULTS: Robust B. burgdorferi s.s. and B. burgdorferi s.l. phylogenies were obtained using genome-wide single-nucleotide polymorphisms, despite recombination. Phylogeny-based pan-genome analysis showed that the rate of gene acquisition was higher between species than within species, suggesting adaptive speciation. Strong positive natural selection drives the sequence evolution of lipoproteins, including chromosomally-encoded genes 0102 and 0404, cp26-encoded ospC and b08, and lp54-encoded dbpA, a07, a22, a33, a53, a65. Computer simulations predicted rapid adaptive radiation of genomic groups as population size increases.

CONCLUSIONS: Intra- and inter-specific pan-genome sizes of B. burgdorferi s.l. expand linearly with phylogenetic diversity. Yet gene-acquisition rates in B. burgdorferi s.l. are among the lowest in bacterial pathogens, resulting in high genome stability and few lineage-specific genes. Genome adaptation of B. burgdorferi s.l. is driven predominantly by copy-number and sequence variations of lipoprotein genes. New genomic groups are likely to emerge if the current trend of B. burgdorferi s.l. population expansion continues.}, } @article {pmid24096415, year = {2013}, author = {Contreras-Moreira, B and Vinuesa, P}, title = {GET_HOMOLOGUES, a versatile software package for scalable and robust microbial pangenome analysis.}, journal = {Applied and environmental microbiology}, volume = {79}, number = {24}, pages = {7696-7701}, pmid = {24096415}, issn = {1098-5336}, mesh = {Computational Biology/*methods ; Genomics/*methods ; *Software ; Streptococcus/genetics ; }, abstract = {GET_HOMOLOGUES is an open-source software package that builds on popular orthology-calling approaches making highly customizable and detailed pangenome analyses of microorganisms accessible to nonbioinformaticians. It can cluster homologous gene families using the bidirectional best-hit, COGtriangles, or OrthoMCL clustering algorithms. Clustering stringency can be adjusted by scanning the domain composition of proteins using the HMMER3 package, by imposing desired pairwise alignment coverage cutoffs, or by selecting only syntenic genes. The resulting homologous gene families can be made even more robust by computing consensus clusters from those generated by any combination of the clustering algorithms and filtering criteria. Auxiliary scripts make the construction, interrogation, and graphical display of core genome and pangenome sets easy to perform. Exponential and binomial mixture models can be fitted to the data to estimate theoretical core genome and pangenome sizes, and high-quality graphics can be generated. Furthermore, pangenome trees can be easily computed and basic comparative genomics performed to identify lineage-specific genes or gene family expansions. The software is designed to take advantage of modern multiprocessor personal computers as well as computer clusters to parallelize time-consuming tasks. To demonstrate some of these capabilities, we survey a set of 50 Streptococcus genomes annotated in the Orthologous Matrix (OMA) browser as a benchmark case. The package can be downloaded at http://www.eead.csic.es/compbio/soft/gethoms.php and http://maya.ccg.unam.mx/soft/gethoms.php.}, } @article {pmid24069314, year = {2013}, author = {Liu, WY and Wong, CF and Chung, KM and Jiang, JW and Leung, FC}, title = {Comparative genome analysis of Enterobacter cloacae.}, journal = {PloS one}, volume = {8}, number = {9}, pages = {e74487}, pmid = {24069314}, issn = {1932-6203}, mesh = {Antibiosis/genetics ; Bacterial Secretion Systems ; Chitinases/genetics ; Computational Biology/methods ; Enterobacter cloacae/classification/*genetics/physiology ; Genes, Bacterial ; *Genome, Bacterial ; *Genomics ; Pantoea/genetics ; Phylogeny ; Sequence Analysis, DNA ; Virulence Factors/genetics ; }, abstract = {The Enterobacter cloacae species includes an extremely diverse group of bacteria that are associated with plants, soil and humans. Publication of the complete genome sequence of the plant growth-promoting endophytic E. cloacae subsp. cloacae ENHKU01 provided an opportunity to perform the first comparative genome analysis between strains of this dynamic species. Examination of the pan-genome of E. cloacae showed that the conserved core genome retains the general physiological and survival genes of the species, while genomic factors in plasmids and variable regions determine the virulence of the human pathogenic E. cloacae strain; additionally, the diversity of fimbriae contributes to variation in colonization and host determination of different E. cloacae strains. Comparative genome analysis further illustrated that E. cloacae strains possess multiple mechanisms for antagonistic action against other microorganisms, which involve the production of siderophores and various antimicrobial compounds, such as bacteriocins, chitinases and antibiotic resistance proteins. The presence of Type VI secretion systems is expected to provide further fitness advantages for E. cloacae in microbial competition, thus allowing it to survive in different environments. Competition assays were performed to support our observations in genomic analysis, where E. cloacae subsp. cloacae ENHKU01 demonstrated antagonistic activities against a wide range of plant pathogenic fungal and bacterial species.}, } @article {pmid24065654, year = {2013}, author = {Santos, AR and Barbosa, E and Fiaux, K and Zurita-Turk, M and Chaitankar, V and Kamapantula, B and Abdelzaher, A and Ghosh, P and Tiwari, S and Barve, N and Jain, N and Barh, D and Silva, A and Miyoshi, A and Azevedo, V}, title = {PANNOTATOR: an automated tool for annotation of pan-genomes.}, journal = {Genetics and molecular research : GMR}, volume = {12}, number = {3}, pages = {2982-2989}, doi = {10.4238/2013.August.16.2}, pmid = {24065654}, issn = {1676-5680}, mesh = {Databases, Genetic ; Genome, Bacterial/*genetics ; High-Throughput Nucleotide Sequencing ; *Molecular Sequence Annotation ; *Software ; }, abstract = {Due to next-generation sequence technologies, sequencing of bacterial genomes is no longer one of the main bottlenecks in bacterial research and the number of new genomes deposited in public databases continues to increase at an accelerating rate. Among these new genomes, several belong to the same species and were generated for pan-genomic studies. A pan-genomic study allows investigation of strain phenotypic differences based on genotypic differences. Along with a need for good assembly quality, it is also fundamental to guarantee good functional genome annotation of the different strains. In order to ensure quality and standards for functional genome annotation among different strains, we developed and made available PANNOTATOR (http://bnet.egr.vcu.edu/iioab/agenote.php), a web-based automated pipeline for the annotation of closely related and well-suited genomes for pan-genome studies, aiming at reducing the manual work to generate reports and corrections of various genome strains. PANNOTATOR achieved 98 and 76% of correctness for gene name and function, respectively, as result of an annotation transfer, with a similarity cut-off of 70%, compared with a gold standard annotation for the same species. These results surpassed the RAST and BASys softwares by 41 and 21% and 66 and 17% for gene name and function annotation, respectively, when there were reliable genome annotations of closely related species. PANNOTATOR provides fast and reliable pan-genome annotation; thereby allowing us to maintain the research focus on the main genotype differences between strains.}, } @article {pmid24034426, year = {2013}, author = {Koren, S and Harhay, GP and Smith, TP and Bono, JL and Harhay, DM and Mcvey, SD and Radune, D and Bergman, NH and Phillippy, AM}, title = {Reducing assembly complexity of microbial genomes with single-molecule sequencing.}, journal = {Genome biology}, volume = {14}, number = {9}, pages = {R101}, pmid = {24034426}, issn = {1474-760X}, mesh = {Algorithms ; Base Sequence ; Contig Mapping/*methods/statistics & numerical data ; Escherichia coli/genetics ; Francisella tularensis/genetics ; Genome Size ; *Genome, Archaeal ; *Genome, Bacterial ; Genomic Library ; Mannheimia haemolytica/genetics ; Molecular Sequence Data ; Salmonella enterica/genetics ; Sequence Analysis, DNA/economics/*methods/statistics & numerical data ; *Software ; }, abstract = {BACKGROUND: The short reads output by first- and second-generation DNA sequencing instruments cannot completely reconstruct microbial chromosomes. Therefore, most genomes have been left unfinished due to the significant resources required to manually close gaps in draft assemblies. Third-generation, single-molecule sequencing addresses this problem by greatly increasing sequencing read length, which simplifies the assembly problem.

RESULTS: To measure the benefit of single-molecule sequencing on microbial genome assembly, we sequenced and assembled the genomes of six bacteria and analyzed the repeat complexity of 2,267 complete bacteria and archaea. Our results indicate that the majority of known bacterial and archaeal genomes can be assembled without gaps, at finished-grade quality, using a single PacBio RS sequencing library. These single-library assemblies are also more accurate than typical short-read assemblies and hybrid assemblies of short and long reads.

CONCLUSIONS: Automated assembly of long, single-molecule sequencing data reduces the cost of microbial finishing to $1,000 for most genomes, and future advances in this technology are expected to drive the cost lower. This is expected to increase the number of completed genomes, improve the quality of microbial genome databases, and enable high-fidelity, population-scale studies of pan-genomes and chromosomal organization.}, } @article {pmid23950996, year = {2013}, author = {Humbert, JF and Barbe, V and Latifi, A and Gugger, M and Calteau, A and Coursin, T and Lajus, A and Castelli, V and Oztas, S and Samson, G and Longin, C and Medigue, C and de Marsac, NT}, title = {A tribute to disorder in the genome of the bloom-forming freshwater cyanobacterium Microcystis aeruginosa.}, journal = {PloS one}, volume = {8}, number = {8}, pages = {e70747}, pmid = {23950996}, issn = {1932-6203}, mesh = {Base Composition ; Computational Biology/methods ; Ecosystem ; Fresh Water/*microbiology ; Gene Order ; Gene Transfer, Horizontal ; Genome Size ; *Genome, Bacterial ; Microcystis/classification/*genetics/metabolism ; Multigene Family ; Phylogeny ; Repetitive Sequences, Nucleic Acid ; *Water Microbiology ; }, abstract = {Microcystis aeruginosa is one of the most common bloom-forming cyanobacteria in freshwater ecosystems worldwide. This species produces numerous secondary metabolites, including microcystins, which are harmful to human health. We sequenced the genomes of ten strains of M. aeruginosa in order to explore the genomic basis of their ability to occupy varied environments and proliferate. Our findings show that M. aeruginosa genomes are characterized by having a large open pangenome, and that each genome contains similar proportions of core and flexible genes. By comparing the GC content of each gene to the mean value of the whole genome, we estimated that in each genome, around 11% of the genes seem to result from recent horizontal gene transfer events. Moreover, several large gene clusters resulting from HGT (up to 19 kb) have been found, illustrating the ability of this species to integrate such large DNA molecules. It appeared also that all M. aeruginosa displays a large genomic plasticity, which is characterized by a high proportion of repeat sequences and by low synteny values between the strains. Finally, we identified 13 secondary metabolite gene clusters, including three new putative clusters. When comparing the genomes of Microcystis and Prochlorococcus, one of the dominant picocyanobacteria living in marine ecosystems, our findings show that they are characterized by having almost opposite evolutionary strategies, both of which have led to ecological success in their respective environments.}, } @article {pmid23940551, year = {2013}, author = {Zhou, Y and Bu, L and Guo, M and Zhou, C and Wang, Y and Chen, L and Liu, J}, title = {Comprehensive genomic characterization of campylobacter genus reveals some underlying mechanisms for its genomic diversification.}, journal = {PloS one}, volume = {8}, number = {8}, pages = {e70241}, pmid = {23940551}, issn = {1932-6203}, mesh = {Campylobacter jejuni/*genetics ; Gene Transfer, Horizontal/genetics ; Genetic Variation/genetics ; Genome, Bacterial/*genetics ; Phylogeny ; }, abstract = {Campylobacter species.are phenotypically diverse in many aspects including host habitats and pathogenicities, which demands comprehensive characterization of the entire Campylobacter genus to study their underlying genetic diversification. Up to now, 34 Campylobacter strains have been sequenced and published in public databases, providing good opportunity to systemically analyze their genomic diversities. In this study, we first conducted genomic characterization, which includes genome-wide alignments, pan-genome analysis, and phylogenetic identification, to depict the genetic diversity of Campylobacter genus. Afterward, we improved the tetranucleotide usage pattern-based naïve Bayesian classifier to identify the abnormal composition fragments (ACFs, fragments with significantly different tetranucleotide frequency profiles from its genomic tetranucleotide frequency profiles) including horizontal gene transfers (HGTs) to explore the mechanisms for the genetic diversity of this organism. Finally, we analyzed the HGTs transferred via bacteriophage transductions. To our knowledge, this study is the first to use single nucleotide polymorphism information to construct liable microevolution phylogeny of 21 Campylobacter jejuni strains. Combined with the phylogeny of all the collected Campylobacter species based on genome-wide core gene information, comprehensive phylogenetic inference of all 34 Campylobacter organisms was determined. It was found that C. jejuni harbors a high fraction of ACFs possibly through intraspecies recombination, whereas other Campylobacter members possess numerous ACFs possibly via intragenus recombination. Furthermore, some Campylobacter strains have undergone significant ancient viral integration during their evolution process. The improved method is a powerful tool for bacterial genomic analysis. Moreover, the findings would provide useful information for future research on Campylobacter genus.}, } @article {pmid23919351, year = {2013}, author = {Paul, JH and Hollander, D and Coble, P and Daly, KL and Murasko, S and English, D and Basso, J and Delaney, J and McDaniel, L and Kovach, CW}, title = {Toxicity and mutagenicity of Gulf of Mexico waters during and after the deepwater horizon oil spill.}, journal = {Environmental science & technology}, volume = {47}, number = {17}, pages = {9651-9659}, doi = {10.1021/es401761h}, pmid = {23919351}, issn = {1520-5851}, mesh = {Aliivibrio fischeri/drug effects ; Biodiversity ; Dinoflagellida/drug effects ; Environmental Monitoring ; Escherichia coli/drug effects/genetics ; Geologic Sediments/*chemistry ; Gulf of Mexico ; Hydrocarbons/*toxicity ; Petroleum Pollution/*analysis ; Phytoplankton/drug effects/physiology ; Spectrometry, Fluorescence ; Water Pollutants, Chemical/*toxicity ; }, abstract = {The Deepwater Horizon oil spill is unparalleled among environmental hydrocarbon releases, because of the tremendous volume of oil, the additional contamination by dispersant, and the oceanic depth at which this release occurred. Here, we present data on general toxicity and mutagenicity of upper water column waters and, to a lesser degree, sediment porewater of the Northeastern Gulf of Mexico (NEGOM) and west Florida shelf (WFS) at the time of the Deepwater Horizon oil spill in 2010 and thereafter. During a research cruise in August 2010, analysis of water collected in the NEGOM indicated that samples of 3 of 14 (21%) stations were toxic to bacteria based on the Microtox assay, 4 of 13 (34%) were toxic to phytoplankton via the QwikLite assay, and 6 of 14 (43%) showed DNA damaging activity using the λ-Microscreen Prophage induction assay. The Microtox and Microscreen assays indicated that the degree of toxicity was correlated to total petroleum hydrocarbon concentration. Long-term monitoring of stations on the NEGOM and the WFS was undertaken by 8 and 6 cruises to these areas, respectively. Microtox toxicity was nearly totally absent by December 2010 in the Northeastern Gulf of Mexico (3 of 8 cruises with one positive station). In contrast, QwikLite toxicity assay yielded positives at each cruise, often at multiple stations or depths, indicating the greater sensitivity of the QwikLite assay to environmental factors. The Microscreen mutagenicity assays indicated that certain water column samples overlying the WFS were mutagenic at least 1.5 years after capping the Macondo well. Similarly, sediment porewater samples taken from 1000, 1200, and 1400 m from the slope off the WFS in June 2011 were also highly genotoxic. Our observations are consistent with a portion of the dispersed oil from the Macondo well area advecting to the southeast and upwelling onto the WFS, although other explanations exist. Organisms in contact with these waters might experience DNA damage that could lead to mutation and heritable alterations to the community pangenome. Such mutagenic interactions might not become apparent in higher organisms for years.}, } @article {pmid23912978, year = {2013}, author = {Aherfi, S and Pagnier, I and Fournous, G and Raoult, D and La Scola, B and Colson, P}, title = {Complete genome sequence of Cannes 8 virus, a new member of the proposed family "Marseilleviridae".}, journal = {Virus genes}, volume = {47}, number = {3}, pages = {550-555}, pmid = {23912978}, issn = {1572-994X}, mesh = {Base Sequence ; France ; Fresh Water/*virology ; *Genome, Viral ; Humans ; Molecular Sequence Data ; Open Reading Frames ; Phylogeny ; RNA Virus Infections/virology ; RNA Viruses/*classification/genetics/*isolation & purification ; }, abstract = {Marseillevirus is a giant virus that was isolated in 2007 by culturing water collected from a cooling tower in Paris, France, on Acanthamoeba polyphaga. Since then, five other marseilleviruses have been detected in environmental or human samples. The genomes of two of the six marseilleviruses have been described in detail. We describe herein the genome of Cannes 8 virus, a new member of the proposed family "Marseilleviridae." Cannes 8 virus was isolated from water collected from a cooling tower in Cannes in southeastern France. Its genome is a circular double-stranded DNA molecule with 374,041 base pairs, larger than the Marseillevirus and Lausannevirus genomes. This genome harbors 484 open reading frames predicted to encode proteins with sizes ranging from 50 to 1,537 amino acids, among which 380 (79%) and 272 (56%) are bona fide orthologs of Marseillevirus and Lausannevirus proteins, respectively. In addition, 407 and 336 predicted proteins have significant hits against Marseillevirus and Lausannevirus proteins, respectively, and 294 proteins are shared by all three marseilleviruses. The Cannes 8 virus genome has a high level of collinearity (for 96% of orthologs) with the Marseillevirus genome. About two-thirds of the Cannes 8 virus gene repertoire is composed of family ORFans. The description and annotation of the genomes of new marseilleviruses that will undoubtedly be recovered from environmental or clinical samples will be helpful to increase our knowledge of the pan-genome of the family "Marseilleviridae."}, } @article {pmid23894338, year = {2013}, author = {Smokvina, T and Wels, M and Polka, J and Chervaux, C and Brisse, S and Boekhorst, J and van Hylckama Vlieg, JE and Siezen, RJ}, title = {Lactobacillus paracasei comparative genomics: towards species pan-genome definition and exploitation of diversity.}, journal = {PloS one}, volume = {8}, number = {7}, pages = {e68731}, pmid = {23894338}, issn = {1932-6203}, mesh = {Carbohydrate Metabolism/genetics ; Cluster Analysis ; Clustered Regularly Interspaced Short Palindromic Repeats/genetics ; Fatty Acids/metabolism ; Gene Order ; Gene Transfer, Horizontal ; *Genetic Variation ; *Genome, Bacterial ; *Genomics ; Lactobacillus/classification/*genetics/metabolism ; Molecular Sequence Annotation ; Phylogeny ; Plasmids/genetics ; }, abstract = {Lactobacillus paracasei is a member of the normal human and animal gut microbiota and is used extensively in the food industry in starter cultures for dairy products or as probiotics. With the development of low-cost, high-throughput sequencing techniques it has become feasible to sequence many different strains of one species and to determine its "pan-genome". We have sequenced the genomes of 34 different L. paracasei strains, and performed a comparative genomics analysis. We analysed genome synteny and content, focussing on the pan-genome, core genome and variable genome. Each genome was shown to contain around 2800-3100 protein-coding genes, and comparative analysis identified over 4200 ortholog groups that comprise the pan-genome of this species, of which about 1800 ortholog groups make up the conserved core. Several factors previously associated with host-microbe interactions such as pili, cell-envelope proteinase, hydrolases p40 and p75 or the capacity to produce short branched-chain fatty acids (bkd operon) are part of the L. paracasei core genome present in all analysed strains. The variome consists mainly of hypothetical proteins, phages, plasmids, transposon/conjugative elements, and known functions such as sugar metabolism, cell-surface proteins, transporters, CRISPR-associated proteins, and EPS biosynthesis proteins. An enormous variety and variability of sugar utilization gene cassettes were identified, with each strain harbouring between 25-53 cassettes, reflecting the high adaptability of L. paracasei to different niches. A phylogenomic tree was constructed based on total genome contents, and together with an analysis of horizontal gene transfer events we conclude that evolution of these L. paracasei strains is complex and not always related to niche adaptation. The results of this genome content comparison was used, together with high-throughput growth experiments on various carbohydrates, to perform gene-trait matching analysis, in order to link the distribution pattern of a specific phenotype to the presence/absence of specific sets of genes.}, } @article {pmid23868134, year = {2013}, author = {Zubair, S and de Villiers, EP and Younan, M and Andersson, G and Tettelin, H and Riley, DR and Jores, J and Bongcam-Rudloff, E and Bishop, RP}, title = {Genome Sequences of Two Pathogenic Streptococcus agalactiae Isolates from the One-Humped Camel Camelus dromedarius.}, journal = {Genome announcements}, volume = {1}, number = {4}, pages = {}, pmid = {23868134}, issn = {2169-8287}, abstract = {Streptococcus agalactiae causes a range of clinical syndromes in camels (Camelus dromedarius). We report the genome sequences of two S. agalactiae isolates that induce abscesses in Kenyan camels. These genomes provide novel data on the composition of the S. agalactiae "pan genome" and reveal the presence of multiple genomic islands.}, } @article {pmid23865481, year = {2013}, author = {Virag, P and Fischer-Fodor, E and Perde-Schrepler, M and Brie, I and Tatomir, C and Balacescu, L and Berindan-Neagoe, I and Victor, B and Balacescu, O}, title = {Oxaliplatin induces different cellular and molecular chemoresistance patterns in colorectal cancer cell lines of identical origins.}, journal = {BMC genomics}, volume = {14}, number = {}, pages = {480}, pmid = {23865481}, issn = {1471-2164}, mesh = {Antineoplastic Agents/*pharmacology ; Cell Line, Tumor ; Cell Movement/drug effects/genetics ; Colorectal Neoplasms/*pathology ; Cross-Linking Reagents/*pharmacology ; *Drug Resistance, Neoplasm/genetics ; Humans ; Neoplasm Invasiveness ; Organoplatinum Compounds/*pharmacology ; Oxaliplatin ; Phenotype ; Reproducibility of Results ; Time Factors ; Transcriptome/drug effects ; }, abstract = {BACKGROUND: Cancer cells frequently adopt cellular and molecular alterations and acquire resistance to cytostatic drugs. Chemotherapy with oxaliplatin is among the leading treatments for colorectal cancer with a response rate of 50%, inducing intrastrand cross-links on the DNA. Despite of this drug's efficiency, resistance develops in nearly all metastatic patients. Chemoresistance being of crucial importance for the drug's clinical efficiency this study aimed to contribute to the identification and description of some cellular and molecular alterations induced by prolonged oxaliplatin therapy. Resistance to oxaliplatin was induced in Colo320 (Colo320R) and HT-29 (HT-29R) colorectal adenocarcinoma cell lines by exposing the cells to increasing concentrations of the drug. Alterations in morphology, cytotoxicity, DNA cross-links formation and gene expression profiles were assessed in the parental and resistant variants with microscopy, MTT, alkaline comet and pangenomic microarray assays, respectively.

RESULTS: Morphology analysis revealed epithelial-to-mesenchymal transition in the resistant vs parental cells suggesting alterations of the cells' adhesion complexes, through which they acquire increased invasiveness and adherence. Cytotoxicity measurements demonstrated resistance to oxaliplatin in both cell lines; Colo320 being more sensitive than HT-29 to this drug (P < 0.001). The treatment with oxaliplatin caused major DNA cross-links in both parental cell lines; in Colo320R small amounts of DNA cross-links were still detectable, while in HT-29R not. We identified 441 differentially expressed genes in Colo320R and 613 in HT-29R as compared to their parental counterparts (at least 1.5 -fold up- or down- regulation, p < 0.05). More disrupted functions and pathways were detected in HT-29R cell line than in Colo320R, involving genes responsible for apoptosis inhibition, cellular proliferation and epithelial-to-mesenchymal transition. Several upstream regulators were detected as activated in HT-29R cell line, but not in Colo320R.

CONCLUSIONS: Our findings revealed a more resistant phenotype in HT-29R as compared to Colo320R and different cellular and molecular chemoresistance patterns induced by prolonged treatment with oxaliplatin in cell lines with identical origins (colorectal adenocarcinomas).}, } @article {pmid23820394, year = {2013}, author = {Katz, LS and Petkau, A and Beaulaurier, J and Tyler, S and Antonova, ES and Turnsek, MA and Guo, Y and Wang, S and Paxinos, EE and Orata, F and Gladney, LM and Stroika, S and Folster, JP and Rowe, L and Freeman, MM and Knox, N and Frace, M and Boncy, J and Graham, M and Hammer, BK and Boucher, Y and Bashir, A and Hanage, WP and Van Domselaar, G and Tarr, CL}, title = {Evolutionary dynamics of Vibrio cholerae O1 following a single-source introduction to Haiti.}, journal = {mBio}, volume = {4}, number = {4}, pages = {}, pmid = {23820394}, issn = {2150-7511}, support = {T32 AI007610/AI/NIAID NIH HHS/United States ; U54 GM088558/GM/NIGMS NIH HHS/United States ; U54GM088558/GM/NIGMS NIH HHS/United States ; }, mesh = {Cholera/*epidemiology/*microbiology ; DNA, Bacterial/chemistry/genetics ; *Epidemics ; *Evolution, Molecular ; Gene Order ; *Genome, Bacterial ; Haiti/epidemiology ; Humans ; Mutation ; Sequence Analysis, DNA ; Vibrio cholerae O1/classification/*genetics/*isolation & purification ; }, abstract = {UNLABELLED: Prior to the epidemic that emerged in Haiti in October of 2010, cholera had not been documented in this country. After its introduction, a strain of Vibrio cholerae O1 spread rapidly throughout Haiti, where it caused over 600,000 cases of disease and >7,500 deaths in the first two years of the epidemic. We applied whole-genome sequencing to a temporal series of V. cholerae isolates from Haiti to gain insight into the mode and tempo of evolution in this isolated population of V. cholerae O1. Phylogenetic and Bayesian analyses supported the hypothesis that all isolates in the sample set diverged from a common ancestor within a time frame that is consistent with epidemiological observations. A pangenome analysis showed nearly homogeneous genomic content, with no evidence of gene acquisition among Haiti isolates. Nine nearly closed genomes assembled from continuous-long-read data showed evidence of genome rearrangements and supported the observation of no gene acquisition among isolates. Thus, intrinsic mutational processes can account for virtually all of the observed genetic polymorphism, with no demonstrable contribution from horizontal gene transfer (HGT). Consistent with this, the 12 Haiti isolates tested by laboratory HGT assays were severely impaired for transformation, although unlike previously characterized noncompetent V. cholerae isolates, each expressed hapR and possessed a functional quorum-sensing system. Continued monitoring of V. cholerae in Haiti will illuminate the processes influencing the origin and fate of genome variants, which will facilitate interpretation of genetic variation in future epidemics.

IMPORTANCE: Vibrio cholerae is the cause of substantial morbidity and mortality worldwide, with over three million cases of disease each year. An understanding of the mode and rate of evolutionary change is critical for proper interpretation of genome sequence data and attribution of outbreak sources. The Haiti epidemic provides an unprecedented opportunity to study an isolated, single-source outbreak of Vibrio cholerae O1 over an established time frame. By using multiple approaches to assay genetic variation, we found no evidence that the Haiti strain has acquired any genes by horizontal gene transfer, an observation that led us to discover that it is also poorly transformable. We have found no evidence that environmental strains have played a role in the evolution of the outbreak strain.}, } @article {pmid23805886, year = {2013}, author = {Song, L and Wang, W and Conrads, G and Rheinberg, A and Sztajer, H and Reck, M and Wagner-Döbler, I and Zeng, AP}, title = {Genetic variability of mutans streptococci revealed by wide whole-genome sequencing.}, journal = {BMC genomics}, volume = {14}, number = {}, pages = {430}, pmid = {23805886}, issn = {1471-2164}, mesh = {Amino Acid Sequence ; Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/chemistry/genetics/metabolism ; Chromosome Aberrations ; Drug Resistance, Bacterial/genetics ; Evolution, Molecular ; *Genetic Variation ; Genome, Bacterial/genetics ; *Genomics ; Metabolic Networks and Pathways/genetics ; Molecular Sequence Annotation ; Molecular Sequence Data ; Oxidative Stress/genetics ; *Sequence Analysis ; Streptococcus mutans/drug effects/*genetics/metabolism ; }, abstract = {BACKGROUND: Mutans streptococci are a group of bacteria significantly contributing to tooth decay. Their genetic variability is however still not well understood.

RESULTS: Genomes of 6 clinical S. mutans isolates of different origins, one isolate of S. sobrinus (DSM 20742) and one isolate of S. ratti (DSM 20564) were sequenced and comparatively analyzed. Genome alignment revealed a mosaic-like structure of genome arrangement. Genes related to pathogenicity are found to have high variations among the strains, whereas genes for oxidative stress resistance are well conserved, indicating the importance of this trait in the dental biofilm community. Analysis of genome-scale metabolic networks revealed significant differences in 42 pathways. A striking dissimilarity is the unique presence of two lactate oxidases in S. sobrinus DSM 20742, probably indicating an unusual capability of this strain in producing H2O2 and expanding its ecological niche. In addition, lactate oxidases may form with other enzymes a novel energetic pathway in S. sobrinus DSM 20742 that can remedy its deficiency in citrate utilization pathway.Using 67 S. mutans genomes currently available including the strains sequenced in this study, we estimates the theoretical core genome size of S. mutans, and performed modeling of S. mutans pan-genome by applying different fitting models. An "open" pan-genome was inferred.

CONCLUSIONS: The comparative genome analyses revealed diversities in the mutans streptococci group, especially with respect to the virulence related genes and metabolic pathways. The results are helpful for better understanding the evolution and adaptive mechanisms of these oral pathogen microorganisms and for combating them.}, } @article {pmid23760826, year = {2012}, author = {Urbanczyk, H and Furukawa, T and Yamamoto, Y and Dunlap, PV}, title = {Natural replacement of vertically inherited lux-rib genes of Photobacterium aquimaris by horizontally acquired homologues.}, journal = {Environmental microbiology reports}, volume = {4}, number = {4}, pages = {412-416}, doi = {10.1111/j.1758-2229.2012.00355.x}, pmid = {23760826}, issn = {1758-2229}, abstract = {We report here the first instance of a complete replacement of vertically inherited luminescence genes by horizontally acquired homologues. Different strains of Photobacterium aquimaris contain homologues of the lux-rib genes that have a different evolutionary history. Strain BS1 from the Black Sea contains a vertically inherited lux-rib operon, which presumably arose in the ancestor of this species, whereas the type strain NBRC 104633(T) , from Sagami Bay, lacks the vertically inherited lux-rib operon and instead carries a complete and functional lux-rib operon acquired horizontally from a bacterium related to Photobacterium mandapamensis. The results indicate that the horizontal acquisition of the lux genes expanded the pan-genome of P. aquimaris, but it did not influence the phylogenetic divergence of this species.}, } @article {pmid23760476, year = {2013}, author = {Read, BA and Kegel, J and Klute, MJ and Kuo, A and Lefebvre, SC and Maumus, F and Mayer, C and Miller, J and Monier, A and Salamov, A and Young, J and Aguilar, M and Claverie, JM and Frickenhaus, S and Gonzalez, K and Herman, EK and Lin, YC and Napier, J and Ogata, H and Sarno, AF and Shmutz, J and Schroeder, D and de Vargas, C and Verret, F and von Dassow, P and Valentin, K and Van de Peer, Y and Wheeler, G and , and Dacks, JB and Delwiche, CF and Dyhrman, ST and Glöckner, G and John, U and Richards, T and Worden, AZ and Zhang, X and Grigoriev, IV}, title = {Pan genome of the phytoplankton Emiliania underpins its global distribution.}, journal = {Nature}, volume = {499}, number = {7457}, pages = {209-213}, pmid = {23760476}, issn = {1476-4687}, mesh = {Calcification, Physiologic ; Calcium/metabolism ; Carbonic Anhydrases/genetics/metabolism ; Ecosystem ; Genome/*genetics ; Haptophyta/classification/*genetics/*isolation & purification/metabolism ; Oceans and Seas ; Phylogeny ; Phytoplankton/*genetics ; Proteome/genetics ; Seawater ; }, abstract = {Coccolithophores have influenced the global climate for over 200 million years. These marine phytoplankton can account for 20 per cent of total carbon fixation in some systems. They form blooms that can occupy hundreds of thousands of square kilometres and are distinguished by their elegantly sculpted calcium carbonate exoskeletons (coccoliths), rendering them visible from space. Although coccolithophores export carbon in the form of organic matter and calcite to the sea floor, they also release CO2 in the calcification process. Hence, they have a complex influence on the carbon cycle, driving either CO2 production or uptake, sequestration and export to the deep ocean. Here we report the first haptophyte reference genome, from the coccolithophore Emiliania huxleyi strain CCMP1516, and sequences from 13 additional isolates. Our analyses reveal a pan genome (core genes plus genes distributed variably between strains) probably supported by an atypical complement of repetitive sequence in the genome. Comparisons across strains demonstrate that E. huxleyi, which has long been considered a single species, harbours extensive genome variability reflected in different metabolic repertoires. Genome variability within this species complex seems to underpin its capacity both to thrive in habitats ranging from the equator to the subarctic and to form large-scale episodic blooms under a wide variety of environmental conditions.}, } @article {pmid23749121, year = {2013}, author = {Li, X and Hu, Y and Gong, J and Zhang, L and Wang, G}, title = {Comparative genome characterization of Achromobacter members reveals potential genetic determinants facilitating the adaptation to a pathogenic lifestyle.}, journal = {Applied microbiology and biotechnology}, volume = {97}, number = {14}, pages = {6413-6425}, doi = {10.1007/s00253-013-5018-3}, pmid = {23749121}, issn = {1432-0614}, mesh = {Achromobacter/classification/*genetics/isolation & purification/pathogenicity ; *Adaptation, Physiological ; Biological Evolution ; Cystic Fibrosis/microbiology ; Genome Size ; *Genome, Bacterial ; Humans ; Phylogeny ; Soil Microbiology ; Species Specificity ; Virulence Factors/genetics ; }, abstract = {Members of the Achromobacter genus are Gram-negative bacteria including both environmental and clinical isolates, which are increasingly recovered from patients with cystic fibrosis (CF) as emerging pathogens. To better understand the features of the genus and its potential pathogenic mechanisms, six available Achromobacter genomes were compared in this study. The results revealed that: (1) Achromobacter had a pan-genome size of 10,750 genes with 3,398 core genes and a similar global classification of protein functions; (2) the Achromobacter genomes underwent a relatively low recombination that introduced nearly twice nucleotide substitutions less than the point mutation in genome evolution; (3) phylogenomic analysis based on 436 conserved proteins and average nucleotide identity both indicated that the Achromobacter genus had the closest relationship to the human/animal pathogen Bordetella rather than to Alcaligenes. The entire group of Achromobacter clustered with Bordetella in phylogeny, strongly suggesting a common origin, which therefore highlighted the potentially pathogenic nature of Achromobacter from the phylogenetic perspective, and (4) the CF clinical isolate possessed markedly unique genomic features discriminated from the environmental isolate and was equipped with numerous factors that facilitate its adaptation to a pathogenic lifestyle, such as a type III secretion system, a "polysaccharide island" (36.0 kb) of capsular/cellulose synthesis, adhesion-related proteins, alcaligin biogenesis, and several putative toxins. This study provided the first comprehensive genomic comparative analysis for Achromobacter, revealed information to better understand this far less-known genus on the genomic scale, and, importantly, identified potential virulence factors of the Achromobacter pathogen.}, } @article {pmid23724777, year = {2013}, author = {Grim, CJ and Kotewicz, ML and Power, KA and Gopinath, G and Franco, AA and Jarvis, KG and Yan, QQ and Jackson, SA and Sathyamoorthy, V and Hu, L and Pagotto, F and Iversen, C and Lehner, A and Stephan, R and Fanning, S and Tall, BD}, title = {Pan-genome analysis of the emerging foodborne pathogen Cronobacter spp. suggests a species-level bidirectional divergence driven by niche adaptation.}, journal = {BMC genomics}, volume = {14}, number = {}, pages = {366}, pmid = {23724777}, issn = {1471-2164}, mesh = {Adaptation, Physiological/*genetics ; Cronobacter/*genetics/*physiology ; Evolution, Molecular ; *Food Microbiology ; Genome, Bacterial/genetics ; *Genomics ; Molecular Sequence Data ; Phylogeny ; Species Specificity ; }, abstract = {BACKGROUND: Members of the genus Cronobacter are causes of rare but severe illness in neonates and preterm infants following the ingestion of contaminated infant formula. Seven species have been described and two of the species genomes were subsequently published. In this study, we performed comparative genomics on eight strains of Cronobacter, including six that we sequenced (representing six of the seven species) and two previously published, closed genomes.

RESULTS: We identified and characterized the features associated with the core and pan genome of the genus Cronobacter in an attempt to understand the evolution of these bacteria and the genetic content of each species. We identified 84 genomic regions that are present in two or more Cronobacter genomes, along with 45 unique genomic regions. Many potentially horizontally transferred genes, such as lysogenic prophages, were also identified. Most notable among these were several type six secretion system gene clusters, transposons that carried tellurium, copper and/or silver resistance genes, and a novel integrative conjugative element.

CONCLUSIONS: Cronobacter have diverged into two clusters, one consisting of C. dublinensis and C. muytjensii (Cdub-Cmuy) and the other comprised of C. sakazakii, C. malonaticus, C. universalis, and C. turicensis, (Csak-Cmal-Cuni-Ctur) from the most recent common ancestral species. While several genetic determinants for plant-association and human virulence could be found in the core genome of Cronobacter, the four Cdub-Cmuy clade genomes contained several accessory genomic regions important for survival in a plant-associated environmental niche, while the Csak-Cmal-Cuni-Ctur clade genomes harbored numerous virulence-related genetic traits.}, } @article {pmid23723974, year = {2013}, author = {Lefeuvre, P and Cellier, G and Remenant, B and Chiroleu, F and Prior, P}, title = {Constraints on genome dynamics revealed from gene distribution among the Ralstonia solanacearum species.}, journal = {PloS one}, volume = {8}, number = {5}, pages = {e63155}, pmid = {23723974}, issn = {1932-6203}, mesh = {Base Sequence ; DNA Probes/metabolism ; Gene Transfer, Horizontal/*genetics ; Genes, Bacterial/*genetics ; Phylogeny ; Ralstonia solanacearum/*genetics ; }, abstract = {Because it is suspected that gene content may partly explain host adaptation and ecology of pathogenic bacteria, it is important to study factors affecting genome composition and its evolution. While recent genomic advances have revealed extremely large pan-genomes for some bacterial species, it remains difficult to predict to what extent gene pool is accessible within or transferable between populations. As genomes bear imprints of the history of the organisms, gene distribution pattern analyses should provide insights into the forces and factors at play in the shaping and maintaining of bacterial genomes. In this study, we revisited the data obtained from a previous CGH microarrays analysis in order to assess the genomic plasticity of the R. solanacearum species complex. Gene distribution analyses demonstrated the remarkably dispersed genome of R. solanacearum with more than half of the genes being accessory. From the reconstruction of the ancestral genomes compositions, we were able to infer the number of gene gain and loss events along the phylogeny. Analyses of gene movement patterns reveal that factors associated with gene function, genomic localization and ecology delineate gene flow patterns. While the chromosome displayed lower rates of movement, the megaplasmid was clearly associated with hot-spots of gene gain and loss. Gene function was also confirmed to be an essential factor in gene gain and loss dynamics with significant differences in movement patterns between different COG categories. Finally, analyses of gene distribution highlighted possible highways of horizontal gene transfer. Due to sampling and design bias, we can only speculate on factors at play in this gene movement dynamic. Further studies examining precise conditions that favor gene transfer would provide invaluable insights in the fate of bacteria, species delineation and the emergence of successful pathogens.}, } @article {pmid23698251, year = {2013}, author = {Deberne, M and Levy, A and Mondini, M and Dessen, P and Vivet, S and Supiramaniam, A and Vozenin, MC and Deutsch, E}, title = {The combination of the antiviral agent cidofovir and anti-EGFR antibody cetuximab exerts an antiproliferative effect on HPV-positive cervical cancer cell lines' in-vitro and in-vivo xenografts.}, journal = {Anti-cancer drugs}, volume = {24}, number = {6}, pages = {599-608}, doi = {10.1097/CAD.0b013e3283612a71}, pmid = {23698251}, issn = {1473-5741}, mesh = {Animals ; Antibodies, Monoclonal, Humanized/administration & dosage/*pharmacology ; Antineoplastic Agents/administration & dosage/*pharmacology ; Antiviral Agents/administration & dosage/*pharmacology ; Cell Cycle/drug effects ; Cell Line, Tumor ; Cetuximab ; Cidofovir ; Cytosine/administration & dosage/*analogs & derivatives/pharmacology ; Dose-Response Relationship, Drug ; Drug Synergism ; Female ; Gene Expression Regulation, Neoplastic ; Humans ; Mice ; Mice, Nude ; Organophosphonates/administration & dosage/*pharmacology ; Papillomaviridae/*drug effects ; Uterine Cervical Neoplasms/*drug therapy ; }, abstract = {Cervical carcinoma remains a leading cause of female mortality worldwide and over 90% of these tumors contain the human papillomavirus (HPV) genome. Cross-talk between the epidermal growth factor receptor and HPV has been reported and is implicated in tumor progression. The combination of the antiviral compound cidofovir (Cd) with the monoclonal antibody antiepidermal growth factor receptor cetuximab (Cx) was evaluated. HPV-positive (HeLa and Me180) and HPV-negative (C33A, H460 and A549) human cancer cell lines were incubated with Cd (1-10 μg/ml) and/or Cx (10 or 50 μg/ml). The antitumor effect of the combination was assessed in vitro using a clonogenic survival assay, cell cycle analysis, and phospho-H2AX level. Tumor growth delay was assayed in vivo using xenograft models. A pan-genomic analysis was carried out to identify the genes expressed differentially in untreated HeLa HPV-positive cells versus cells treated by the Cd-Cx combination. The Cd-Cx combination inhibited proliferation in all the cell lines tested. The association of Cd and Cx exerted a synergistic activity on HPV-positive but not on HPV-negative cell lines. The combination delayed tumor growth of HPV-positive tumors in vivo; however, no efficacy was reported on HPV-negative C33A xenografts nor on cell lines treated by single-drug therapy. The combination induced an S-phase arrest associated with an enhanced level of the double-strand break in Me180 and HeLa cell lines. Gene profiling assays showed a significant differential modulation of genes in HeLa cell lines treated with the combination involving the EGR-1 transcription factor. The current data support a synergistic antiproliferative action of the Cd-Cx combination on HPV-related cervical tumors.}, } @article {pmid23675451, year = {2013}, author = {Guy, L and Jernberg, C and Arvén Norling, J and Ivarsson, S and Hedenström, I and Melefors, Ö and Liljedahl, U and Engstrand, L and Andersson, SG}, title = {Adaptive mutations and replacements of virulence traits in the Escherichia coli O104:H4 outbreak population.}, journal = {PloS one}, volume = {8}, number = {5}, pages = {e63027}, pmid = {23675451}, issn = {1932-6203}, mesh = {Adaptation, Biological/*genetics ; Adhesins, Bacterial/genetics ; Anti-Bacterial Agents/pharmacology ; Disease Outbreaks ; Drug Resistance, Bacterial/genetics ; Enterohemorrhagic Escherichia coli/classification/drug effects/*genetics/*pathogenicity ; Escherichia coli Infections/epidemiology ; Gene Order ; Genes, Bacterial ; Genome, Bacterial ; Hemolytic-Uremic Syndrome/epidemiology ; Humans ; *Mutation ; Phylogeny ; Plasmids/genetics ; Polymorphism, Single Nucleotide ; Prophages/genetics ; *Quantitative Trait Loci ; Recombination, Genetic ; Shiga Toxin/genetics ; Tellurium/pharmacology ; Tunisia ; Virulence/*genetics ; }, abstract = {The sequencing of highly virulent Escherichia coli O104:H4 strains isolated during the outbreak of bloody diarrhea and hemolytic uremic syndrome in Europe in 2011 revealed a genome that contained a Shiga toxin encoding prophage and a plasmid encoding enteroaggregative fimbriae. Here, we present the draft genome sequence of a strain isolated in Sweden from a patient who had travelled to Tunisia in 2010 (E112/10) and was found to differ from the outbreak strains by only 38 SNPs in non-repetitive regions, 16 of which were mapped to the branch to the outbreak strain. We identified putatively adaptive mutations in genes for transporters, outer surface proteins and enzymes involved in the metabolism of carbohydrates. A comparative analysis with other historical strains showed that E112/10 contained Shiga toxin prophage genes of the same genotype as the outbreak strain, while these genes have been replaced by a different genotype in two otherwise very closely related strains isolated in the Republic of Georgia in 2009. We also present the genome sequences of two enteroaggregative E. coli strains affiliated with phylogroup A (C43/90 and C48/93) that contain the agg genes for the AAF/I-type fimbriae characteristic of the outbreak population. Interestingly, C43/90 also contained a tet/mer antibiotic resistance island that was nearly identical in sequence to that of the outbreak strain, while the corresponding island in the Georgian strains was most similar to E. coli strains of other serotypes. We conclude that the pan-genome of the outbreak population is shared with strains of the A phylogroup and that its evolutionary history is littered with gene replacement events, including most recently independent acquisitions of antibiotic resistance genes in the outbreak strains and its nearest neighbors. The results are summarized in a refined evolutionary model for the emergence of the O104:H4 outbreak population.}, } @article {pmid23653265, year = {2013}, author = {Khan, A and Asif, H and Studholme, DJ and Khan, IA and Azim, MK}, title = {Genome characterization of a novel Burkholderia cepacia complex genomovar isolated from dieback affected mango orchards.}, journal = {World journal of microbiology & biotechnology}, volume = {29}, number = {11}, pages = {2033-2044}, pmid = {23653265}, issn = {1573-0972}, mesh = {Bacterial Outer Membrane Proteins/chemistry ; Bacterial Proteins/chemistry ; Bacterial Typing Techniques ; Burkholderia cepacia complex/*genetics/*isolation & purification/metabolism ; Evolution, Molecular ; *Genome, Bacterial ; Genomics/methods ; Gram-Negative Bacteria/genetics/isolation & purification ; Haemophilus influenzae/genetics/metabolism ; High-Throughput Nucleotide Sequencing ; Mangifera/*microbiology ; Models, Molecular ; Molecular Sequence Annotation ; Phylogeny ; Plant Diseases/*microbiology ; Protein Structure, Secondary ; Sequence Alignment ; Serine Endopeptidases/chemistry ; Virulence Factors/genetics/metabolism ; }, abstract = {We characterized the genome of the antibiotic resistant, caseinolytic and non-hemolytic Burkholderia sp. strain TJI49, isolated from mango trees (Mangifera indica L.) with dieback disease. This isolate produced severe disease symptoms on the indicator plants. Next generation DNA sequencing and short-read assembly generated the 60X deep 7,631,934 nucleotide draft genome of Burkholderia sp. TJI49 which comprised three chromosomes and at least one mega plasmid. Genome annotation studies revealed a total 8,992 genes, out of which 8,940 were protein coding genes. Comparative genomics and phylogenetics identified Burkholderia sp. TJI49 as a distinct species of Burkholderia cepacia complex (BCC), closely related to B. multivorans ATCC17616. Genome-wide sequence alignment of this isolate with replicons of BCC members showed conservation of core function genes but considerable variations in accessory genes. Subsystem-based gene annotation identified the active presence of wide spread colonization island and type VI secretion system in Burkholderia sp. TJI49. Sequence comparisons revealed (a) 28 novel ORFs that have no database matches and (b) 23 ORFs with orthologues in species other than Burkholderia, indicating horizontal gene transfer events. Fold recognition of novel ORFs identified genes encoding pertactin autotransporter-like proteins (a constituent of type V secretion system) and Hap adhesion-like proteins (involved in cell-cell adhesion) in the genome of Burkholderia sp. TJI49. The genomic characterization of this isolate provided additional information related to the 'pan-genome' of Burkholderia species.}, } @article {pmid23645200, year = {2013}, author = {Milani, C and Duranti, S and Lugli, GA and Bottacini, F and Strati, F and Arioli, S and Foroni, E and Turroni, F and van Sinderen, D and Ventura, M}, title = {Comparative genomics of Bifidobacterium animalis subsp. lactis reveals a strict monophyletic bifidobacterial taxon.}, journal = {Applied and environmental microbiology}, volume = {79}, number = {14}, pages = {4304-4315}, pmid = {23645200}, issn = {1098-5336}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/chemistry/genetics/metabolism ; Bifidobacterium/*classification/drug effects/*genetics/metabolism ; *Genome, Bacterial ; Molecular Sequence Data ; Phylogeny ; Polymerase Chain Reaction ; Proteome ; Sequence Analysis, DNA ; Tetracycline/pharmacology ; Tetracycline Resistance ; }, abstract = {Strains of Bifidobacterium animalis subsp. lactis are extensively exploited by the food industry as health-promoting bacteria, although the genetic variability of members belonging to this taxon has so far not received much scientific attention. In this article, we describe the complete genetic makeup of the B. animalis subsp. lactis Bl12 genome and discuss the genetic relatedness of this strain with other sequenced strains belonging to this taxon. Moreover, a detailed comparative genomic analysis of B. animalis subsp. lactis genomes was performed, which revealed a closely related and isogenic nature of all currently available B. animalis subsp. lactis strains, thus strongly suggesting a closed pan-genome structure of this bacterial group.}, } @article {pmid23637636, year = {2013}, author = {Zhou, Z and McCann, A and Litrup, E and Murphy, R and Cormican, M and Fanning, S and Brown, D and Guttman, DS and Brisse, S and Achtman, M}, title = {Neutral genomic microevolution of a recently emerged pathogen, Salmonella enterica serovar Agona.}, journal = {PLoS genetics}, volume = {9}, number = {4}, pages = {e1003471}, pmid = {23637636}, issn = {1553-7404}, support = {//Wellcome Trust/United Kingdom ; }, mesh = {*DNA, Bacterial/genetics ; Disease Outbreaks ; Electrophoresis, Gel, Pulsed-Field ; Genomics ; Humans ; Salmonella Infections ; Salmonella enterica/genetics ; *Serogroup ; }, abstract = {Salmonella enterica serovar Agona has caused multiple food-borne outbreaks of gastroenteritis since it was first isolated in 1952. We analyzed the genomes of 73 isolates from global sources, comparing five distinct outbreaks with sporadic infections as well as food contamination and the environment. Agona consists of three lineages with minimal mutational diversity: only 846 single nucleotide polymorphisms (SNPs) have accumulated in the non-repetitive, core genome since Agona evolved in 1932 and subsequently underwent a major population expansion in the 1960s. Homologous recombination with other serovars of S. enterica imported 42 recombinational tracts (360 kb) in 5/143 nodes within the genealogy, which resulted in 3,164 additional SNPs. In contrast to this paucity of genetic diversity, Agona is highly diverse according to pulsed-field gel electrophoresis (PFGE), which is used to assign isolates to outbreaks. PFGE diversity reflects a highly dynamic accessory genome associated with the gain or loss (indels) of 51 bacteriophages, 10 plasmids, and 6 integrative conjugational elements (ICE/IMEs), but did not correlate uniquely with outbreaks. Unlike the core genome, indels occurred repeatedly in independent nodes (homoplasies), resulting in inaccurate PFGE genealogies. The accessory genome contained only few cargo genes relevant to infection, other than antibiotic resistance. Thus, most of the genetic diversity within this recently emerged pathogen reflects changes in the accessory genome, or is due to recombination, but these changes seemed to reflect neutral processes rather than Darwinian selection. Each outbreak was caused by an independent clade, without universal, outbreak-associated genomic features, and none of the variable genes in the pan-genome seemed to be associated with an ability to cause outbreaks.}, } @article {pmid23631911, year = {2013}, author = {Tomida, S and Nguyen, L and Chiu, BH and Liu, J and Sodergren, E and Weinstock, GM and Li, H}, title = {Pan-genome and comparative genome analyses of propionibacterium acnes reveal its genomic diversity in the healthy and diseased human skin microbiome.}, journal = {mBio}, volume = {4}, number = {3}, pages = {e00003-13}, pmid = {23631911}, issn = {2150-7511}, support = {UH2AR057503/AR/NIAMS NIH HHS/United States ; R01GM099530/GM/NIGMS NIH HHS/United States ; U54 HG004968/HG/NHGRI NIH HHS/United States ; UH2 AR057503/AR/NIAMS NIH HHS/United States ; U54HG004968/HG/NHGRI NIH HHS/United States ; R01 GM099530/GM/NIGMS NIH HHS/United States ; }, mesh = {Acne Vulgaris/*microbiology ; Cluster Analysis ; Genes, Bacterial ; *Genetic Variation ; *Genome, Bacterial ; Gram-Positive Bacterial Infections/*microbiology ; Humans ; Molecular Sequence Data ; Phylogeny ; Propionibacterium acnes/*classification/*genetics/isolation & purification ; Sequence Analysis, DNA ; Skin/*microbiology ; Synteny ; }, abstract = {UNLABELLED: Propionibacterium acnes constitutes a major part of the skin microbiome and contributes to human health. However, it has also been implicated as a pathogenic factor in several diseases, including acne, one of the most common skin diseases. Its pathogenic role, however, remains elusive. To better understand the genetic landscape and diversity of the organism and its role in human health and disease, we performed a comparative genome analysis of 82 P. acnes strains, 69 of which were sequenced by our group. This collection covers all known P. acnes lineages, including types IA, IB, II, and III. Our analysis demonstrated that although the P. acnes pan-genome is open, it is relatively small and expands slowly. The core regions, shared by all the sequenced genomes, accounted for 88% of the average genome. Comparative genome analysis showed that within each lineage, the strains isolated from the same individuals were more closely related than the ones isolated from different individuals, suggesting that clonal expansions occurred within each individual microbiome. We also identified the genetic elements specific to each lineage. Differences in harboring these elements may explain the phenotypic and functional differences of P. acnes in functioning as a commensal in healthy skin and as a pathogen in diseases. Our findings of the differences among P. acnes strains at the genome level underscore the importance of identifying the human microbiome variations at the strain level in understanding its association with diseases and provide insight into novel and personalized therapeutic approaches for P. acnes-related diseases.

IMPORTANCE: Propionibacterium acnes is a major human skin bacterium. It plays an important role in maintaining skin health. However, it has also been hypothesized to be a pathogenic factor in several diseases, including acne, a common skin disease affecting 85% of teenagers. To understand whether different strains have different virulent properties and thus play different roles in health and diseases, we compared the genomes of 82 P. acnes strains, most of which were isolated from acne or healthy skin. We identified lineage-specific genetic elements that may explain the phenotypic and functional differences of P. acnes as a commensal in health and as a pathogen in diseases. By analyzing a large number of sequenced strains, we provided an improved understanding of the genetic landscape and diversity of the organism at the strain level and at the molecular level that can be further applied in the development of new and personalized therapies.}, } @article {pmid23626855, year = {2013}, author = {Chung, WC and Chen, LL and Lo, WS and Lin, CP and Kuo, CH}, title = {Comparative analysis of the peanut witches'-broom phytoplasma genome reveals horizontal transfer of potential mobile units and effectors.}, journal = {PloS one}, volume = {8}, number = {4}, pages = {e62770}, pmid = {23626855}, issn = {1932-6203}, mesh = {Arachis/microbiology ; Cluster Analysis ; *DNA Transposable Elements ; *Gene Transfer, Horizontal ; *Genome, Bacterial ; Metabolic Networks and Pathways ; Molecular Sequence Annotation ; Molecular Sequence Data ; Open Reading Frames ; Phylogeny ; Phytoplasma/classification/*genetics/metabolism ; Plant Diseases/microbiology ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Phytoplasmas are a group of bacteria that are associated with hundreds of plant diseases. Due to their economical importance and the difficulties involved in the experimental study of these obligate pathogens, genome sequencing and comparative analysis have been utilized as powerful tools to understand phytoplasma biology. To date four complete phytoplasma genome sequences have been published. However, these four strains represent limited phylogenetic diversity. In this study, we report the shotgun sequencing and evolutionary analysis of a peanut witches'-broom (PnWB) phytoplasma genome. The availability of this genome provides the first representative of the 16SrII group and substantially improves the taxon sampling to investigate genome evolution. The draft genome assembly contains 13 chromosomal contigs with a total size of 562,473 bp, covering ∼90% of the chromosome. Additionally, a complete plasmid sequence is included. Comparisons among the five available phytoplasma genomes reveal the differentiations in gene content and metabolic capacity. Notably, phylogenetic inferences of the potential mobile units (PMUs) in these genomes indicate that horizontal transfer may have occurred between divergent phytoplasma lineages. Because many effectors are associated with PMUs, the horizontal transfer of these transposon-like elements can contribute to the adaptation and diversification of these pathogens. In summary, the findings from this study highlight the importance of improving taxon sampling when investigating genome evolution. Moreover, the currently available sequences are inadequate to fully characterize the pan-genome of phytoplasmas. Future genome sequencing efforts to expand phylogenetic diversity are essential in improving our understanding of phytoplasma evolution.}, } @article {pmid23626695, year = {2013}, author = {Li, HW and Zhi, XY and Yao, JC and Zhou, Y and Tang, SK and Klenk, HP and Zhao, J and Li, WJ}, title = {Comparative genomic analysis of the genus Nocardiopsis provides new insights into its genetic mechanisms of environmental adaptability.}, journal = {PloS one}, volume = {8}, number = {4}, pages = {e61528}, pmid = {23626695}, issn = {1932-6203}, mesh = {Actinobacteria/classification/*genetics ; Adaptation, Physiological/*genetics ; Bacterial Proteins/*genetics ; Carrier Proteins/genetics ; Chromosome Mapping ; *Chromosomes, Bacterial ; Comparative Genomic Hybridization ; Evolution, Molecular ; *Gene Expression Regulation, Bacterial ; Genetic Speciation ; *Genome, Bacterial ; Genomics ; Multigene Family ; Phylogeny ; }, abstract = {The genus Nocardiopsis, a widespread group in phylum Actinobacteria, has received much attention owing to its ecological versatility, pathogenicity, and ability to produce a rich array of bioactive metabolites. Its high environmental adaptability might be attributable to its genome dynamics, which can be estimated through comparative genomic analysis targeting microorganisms with close phylogenetic relationships but different phenotypes. To shed light on speciation, gene content evolution, and environmental adaptation in these unique actinobacteria, we sequenced draft genomes for 16 representative species of the genus and compared them with that of the type species N. dassonvillei subsp. dassonvillei DSM 43111(T). The core genome of 1,993 orthologous and paralogous gene clusters was identified, and the pan-genomic reservoir was found not only to accommodate more than 22,000 genes, but also to be open. The top ten paralogous genes in terms of copy number could be referred to three functional categories: transcription regulators, transporters, and synthases related to bioactive metabolites. Based on phylogenomic reconstruction, we inferred past evolutionary events, such as gene gains and losses, and identified a list of clade-specific genes implicated in environmental adaptation. These results provided insights into the genetic causes of environmental adaptability in this cosmopolitan actinobacterial group and the contributions made by its inherent features, including genome dynamics and the constituents of core and accessory proteins.}, } @article {pmid23613838, year = {2013}, author = {Palmer, SR and Miller, JH and Abranches, J and Zeng, L and Lefebure, T and Richards, VP and Lemos, JA and Stanhope, MJ and Burne, RA}, title = {Phenotypic heterogeneity of genomically-diverse isolates of Streptococcus mutans.}, journal = {PloS one}, volume = {8}, number = {4}, pages = {e61358}, pmid = {23613838}, issn = {1932-6203}, support = {R01 AI073368/AI/NIAID NIH HHS/United States ; R01 DE012236/DE/NIDCR NIH HHS/United States ; R01 DE013239/DE/NIDCR NIH HHS/United States ; T90 DE021990/DE/NIDCR NIH HHS/United States ; }, mesh = {Bacterial Proteins/genetics ; Genome, Bacterial/*genetics ; Streptococcus mutans/classification/*genetics ; }, abstract = {High coverage, whole genome shotgun (WGS) sequencing of 57 geographically- and genetically-diverse isolates of Streptococcus mutans from individuals of known dental caries status was recently completed. Of the 57 sequenced strains, fifteen isolates, were selected based primarily on differences in gene content and phenotypic characteristics known to affect virulence and compared with the reference strain UA159. A high degree of variability in these properties was observed between strains, with a broad spectrum of sensitivities to low pH, oxidative stress (air and paraquat) and exposure to competence stimulating peptide (CSP). Significant differences in autolytic behavior and in biofilm development in glucose or sucrose were also observed. Natural genetic competence varied among isolates, and this was correlated to the presence or absence of competence genes, comCDE and comX, and to bacteriocins. In general strains that lacked the ability to become competent possessed fewer genes for bacteriocins and immunity proteins or contained polymorphic variants of these genes. WGS sequence analysis of the pan-genome revealed, for the first time, components of a Type VII secretion system in several S. mutans strains, as well as two putative ORFs that encode possible collagen binding proteins located upstream of the cnm gene, which is associated with host cell invasiveness. The virulence of these particular strains was assessed in a wax-worm model. This is the first study to combine a comprehensive analysis of key virulence-related phenotypes with extensive genomic analysis of a pathogen that evolved closely with humans. Our analysis highlights the phenotypic diversity of S. mutans isolates and indicates that the species has evolved a variety of adaptive strategies to persist in the human oral cavity and, when conditions are favorable, to initiate disease.}, } @article {pmid23606480, year = {2013}, author = {Qu, PH and Chen, SY and Scholz, HC and Busse, HJ and Gu, Q and Kämpfer, P and Foster, JT and Glaeser, SP and Chen, C and Yang, ZC}, title = {Francisella guangzhouensis sp. nov., isolated from air-conditioning systems.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {63}, number = {Pt 10}, pages = {3628-3635}, doi = {10.1099/ijs.0.049916-0}, pmid = {23606480}, issn = {1466-5034}, mesh = {Air Conditioning ; Bacterial Typing Techniques ; China ; Cysteine/metabolism ; DNA, Bacterial/genetics ; Fatty Acids/analysis ; Francisella/*classification/genetics/isolation & purification ; Genes, Bacterial ; Molecular Sequence Data ; Phospholipids/analysis ; *Phylogeny ; Polyamines/analysis ; RNA, Ribosomal, 16S/genetics ; RNA, Ribosomal, 23S/genetics ; Sequence Analysis, DNA ; Ubiquinone/analysis ; *Water Microbiology ; }, abstract = {Four strains (08HL01032(T), 09HG994, 10HP82-6 and 10HL1960) were isolated from water of air-conditioning systems of various cooling towers in Guangzhou city, China. Cells were Gram-stain-negative coccobacilli without flagella, catalase-positive and oxidase-negative, showing no reduction of nitrate, no hydrolysis of urea and no production of H2S. Growth was characteristically enhanced in the presence of l-cysteine, which was consistent with the properties of members of the genus Francisella. The quinone system was composed of ubiquinone Q-8 with minor amounts of Q-9. The polar lipid profile consisted of the predominant lipids phosphatidylethanolamine, diphosphatidylglycerol, phosphatidylglycerol, phosphatidylcholine, two unidentified phospholipids (PL2, PL3), an unidentified aminophospholipid and an unidentified glycolipid (GL2). The polyamine pattern consisted of the major compounds spermidine, cadaverine and spermine. The major cellular fatty acids were C10 : 0, C14 : 0, C16 : 0, C18 : 1ω9c and C18 : 1 3-OH. A draft whole-genome sequence of the proposed type strain 08HL01032(T) was generated. Comparative sequence analysis of the complete 16S and 23S rRNA genes confirmed affiliation to the genus Francisella, with 95 % sequence identity to the closest relatives in the database, the type strains of Francisella philomiragia and Francisella noatunensis subsp. orientalis. Full-length deduced amino acid sequences of various housekeeping genes, recA, gyrB, groEL, dnaK, rpoA, rpoB, rpoD, rpoH, fopA and sdhA, exhibited similarities of 67-92 % to strains of other species of the genus Francisella. Strains 08HL01032(T), 09HG994, 10HP82-6 and 10HL1960 exhibited highly similar pan-genome PCR profiles. Both the phenotypic and molecular data support the conclusion that the four strains belong to the genus Francisella but exhibit considerable divergence from all recognized Francisella species. Therefore, we propose the name Francisella guangzhouensis sp. nov., with the type strain 08HL01032(T) (= CCUG 60119(T) = NCTC 13503(T)).}, } @article {pmid23585535, year = {2013}, author = {Gordienko, EN and Kazanov, MD and Gelfand, MS}, title = {Evolution of pan-genomes of Escherichia coli, Shigella spp., and Salmonella enterica.}, journal = {Journal of bacteriology}, volume = {195}, number = {12}, pages = {2786-2792}, pmid = {23585535}, issn = {1098-5530}, mesh = {Computational Biology/methods ; Escherichia coli/*genetics ; *Evolution, Molecular ; *Genome, Bacterial ; Phylogeny ; Salmonella enterica/*genetics ; Shigella/*genetics ; }, abstract = {Multiple sequencing of genomes belonging to a bacterial species allows one to analyze and compare statistics and dynamics of the gene complements of species, their pan-genomes. Here, we analyzed multiple genomes of Escherichia coli, Shigella spp., and Salmonella enterica. We demonstrate that the distribution of the number of genomes harboring a gene is well approximated by a sum of two power functions, describing frequent genes (present in many strains) and rare genes (present in few strains). The virtual absence of Shigella-specific genes not present in E. coli genomes confirms previous observations that Shigella is not an independent genus. While the pan-genome size is increasing with each new strain, the number of genes present in a fixed fraction of strains stabilizes quickly. For instance, slightly fewer than 4,000 genes are present in at least half of any group of E. coli genomes. Comparison of S. enterica and E. coli pan-genomes revealed the existence of a common periphery, that is, genes present in some but not all strains of both species. Analysis of phylogenetic trees demonstrates that rare genes from the periphery likely evolve under horizontal transfer, whereas frequent periphery genes may have been inherited from the periphery genome of the common ancestor.}, } @article {pmid23563954, year = {2013}, author = {Aylward, FO and McDonald, BR and Adams, SM and Valenzuela, A and Schmidt, RA and Goodwin, LA and Woyke, T and Currie, CR and Suen, G and Poulsen, M}, title = {Comparison of 26 sphingomonad genomes reveals diverse environmental adaptations and biodegradative capabilities.}, journal = {Applied and environmental microbiology}, volume = {79}, number = {12}, pages = {3724-3733}, pmid = {23563954}, issn = {1098-5336}, mesh = {Adaptation, Biological/*genetics ; Amino Acid Sequence ; Animals ; Base Sequence ; Biodegradation, Environmental ; Cluster Analysis ; *Environment ; Genome, Bacterial/*genetics ; Isoptera/*microbiology ; Models, Genetic ; Molecular Sequence Annotation ; Molecular Sequence Data ; Open Reading Frames/genetics ; *Phylogeny ; Puerto Rico ; Sequence Analysis, DNA ; South Africa ; Sphingomonadaceae/enzymology/*genetics/*metabolism ; }, abstract = {Sphingomonads comprise a physiologically versatile group within the Alphaproteobacteria that includes strains of interest for biotechnology, human health, and environmental nutrient cycling. In this study, we compared 26 sphingomonad genome sequences to gain insight into their ecology, metabolic versatility, and environmental adaptations. Our multilocus phylogenetic and average amino acid identity (AAI) analyses confirm that Sphingomonas, Sphingobium, Sphingopyxis, and Novosphingobium are well-resolved monophyletic groups with the exception of Sphingomonas sp. strain SKA58, which we propose belongs to the genus Sphingobium. Our pan-genomic analysis of sphingomonads reveals numerous species-specific open reading frames (ORFs) but few signatures of genus-specific cores. The organization and coding potential of the sphingomonad genomes appear to be highly variable, and plasmid-mediated gene transfer and chromosome-plasmid recombination, together with prophage- and transposon-mediated rearrangements, appear to play prominent roles in the genome evolution of this group. We find that many of the sphingomonad genomes encode numerous oxygenases and glycoside hydrolases, which are likely responsible for their ability to degrade various recalcitrant aromatic compounds and polysaccharides, respectively. Many of these enzymes are encoded on megaplasmids, suggesting that they may be readily transferred between species. We also identified enzymes putatively used for the catabolism of sulfonate and nitroaromatic compounds in many of the genomes, suggesting that plant-based compounds or chemical contaminants may be sources of nitrogen and sulfur. Many of these sphingomonads appear to be adapted to oligotrophic environments, but several contain genomic features indicative of host associations. Our work provides a basis for understanding the ecological strategies employed by sphingomonads and their role in environmental nutrient cycling.}, } @article {pmid23541411, year = {2014}, author = {Grumann, D and Nübel, U and Bröker, BM}, title = {Staphylococcus aureus toxins--their functions and genetics.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {21}, number = {}, pages = {583-592}, doi = {10.1016/j.meegid.2013.03.013}, pmid = {23541411}, issn = {1567-7257}, mesh = {Bacterial Toxins/*genetics ; DNA, Bacterial ; Evolution, Molecular ; Humans ; *Interspersed Repetitive Sequences ; Phylogeny ; Staphylococcal Infections/microbiology ; Staphylococcus aureus/classification/*genetics/physiology ; }, abstract = {The outcome of encounters between Staphylococcus (S.) aureus and its human host ranges from life-threatening infection through allergic reactions to symptom-free colonization. The pan-genome of this bacterial species encodes numerous toxins, known or strongly suspected to cause specific diseases or symptoms. Three toxin families are in the focus of this review, namely (i) pore-forming toxins, (ii) exfoliative toxins and (iii) superantigens. The majority of toxin-encoding genes are located on mobile genetic elements (MGEs), resulting in a pronounced heterogeneity in the endowment with toxin genes of individual S. aureus strains. Recent population genomic analysis have provided a framework for an improved understanding of the temporal and spatial scales of the motility of MGEs and their associated toxin genes. The distribution of toxin genes among clonal lineages within the species S. aureus is not random, and phylogenetic (sub-)lineages within clonal complexes feature characteristic toxin signatures. When studying pathogenesis, this lineage association, which is caused by the clonal nature of S. aureus makes it difficult to discriminate effects of specific toxins from contributions of the genetic background and/or other associated genetic factors.}, } @article {pmid23541197, year = {2013}, author = {Gopinath, G and Hari, K and Jain, R and Mammel, MK and Kothary, MH and Franco, AA and Grim, CJ and Jarvis, KG and Sathyamoorthy, V and Hu, L and Datta, AR and Patel, IR and Jackson, SA and Gangiredla, J and Kotewicz, ML and LeClerc, JE and Wekell, M and McCardell, BA and Solomotis, MD and Tall, BD}, title = {The Pathogen-annotated Tracking Resource Network (PATRN) system: a web-based resource to aid food safety, regulatory science, and investigations of foodborne pathogens and disease.}, journal = {Food microbiology}, volume = {34}, number = {2}, pages = {303-318}, doi = {10.1016/j.fm.2013.01.001}, pmid = {23541197}, issn = {1095-9998}, mesh = {Bacteria/classification/*genetics/isolation & purification ; Data Mining ; Database Management Systems/*instrumentation ; Food Microbiology ; Food Safety/*methods ; Foodborne Diseases/*microbiology/prevention & control ; Humans ; Information Dissemination ; Information Services/*instrumentation ; *Internet ; }, abstract = {Investigation of foodborne diseases requires the capture and analysis of time-sensitive information on microbial pathogens that is derived from multiple analytical methods and sources. The web-based Pathogen-annotated Tracking Resource Network (PATRN) system (www.patrn.net) was developed to address the data aggregation, analysis, and communication needs important to the global food safety community for the investigation of foodborne disease. PATRN incorporates a standard vocabulary for describing isolate metadata and provides a representational schema for a prototypic data exchange standard using a novel data loading wizard for aggregation of assay and attribution information. PATRN currently houses expert-curated, high-quality "foundational datasets" consisting of published experimental results from conventional assays and next generation analysis platforms for isolates of Escherichia coli, Listeria monocytogenes, and Salmonella, Shigella, Vibrio and Cronobacter species. A suite of computational tools for data mining, clustering, and graphical representation is available. Within PATRN, the public curated data repository is complemented by a secure private workspace for user-driven analyses, and for sharing data among collaborators. To demonstrate the data curation, loading wizard features, and analytical capabilities of PATRN, three use-case scenarios are presented. Use-case scenario one is a comparison of the distribution and prevalence of plasmid-encoded virulence factor genes among 249 Cronobacter strains with similar attributes to that of nine Cronobacter isolates from recent cases obtained between March and October, 2010-2011. To highlight PATRN's data management and trend finding tools, analysis of datasets, stored in PATRN as part of an ongoing surveillance project to identify the predominant molecular serogroups among Cronobacter sakazakii isolates observed in the USA is shown. Use-case scenario two demonstrates the secure workspace available for private users to upload and analyze sensitive data, and for collating cross-platform datasets to identify and validate congruent datapoints. SNP datasets from WGS assemblies and pan-genome microarrays are analyzed in a combinatorial fashion to determine relatedness of 33 Salmonella enterica strains to six strains collected as part of an outbreak investigation. Use-case scenario three utilizes published surveillance results that describe the incidence and sources of O157:H7 E. coli isolates associated with a produce pre-harvest surveillance study that occurred during 2002-2006. In summary, PATRN is a web-based integrated platform containing tools for the management, analysis and visualization of data about foodborne pathogens.}, } @article {pmid23528645, year = {2013}, author = {Fondi, M and Rizzi, E and Emiliani, G and Orlandini, V and Berna, L and Papaleo, MC and Perrin, E and Maida, I and Corti, G and De Bellis, G and Baldi, F and Dijkshoorn, L and Vaneechoutte, M and Fani, R}, title = {The genome sequence of the hydrocarbon-degrading Acinetobacter venetianus VE-C3.}, journal = {Research in microbiology}, volume = {164}, number = {5}, pages = {439-449}, doi = {10.1016/j.resmic.2013.03.003}, pmid = {23528645}, issn = {1769-7123}, mesh = {Acinetobacter/classification/*genetics/isolation & purification/metabolism ; Cluster Analysis ; DNA, Bacterial/*chemistry/*genetics ; Gene Order ; *Genome, Bacterial ; Hydrocarbons/metabolism ; Italy ; Metabolic Networks and Pathways/genetics ; Molecular Sequence Data ; Phylogeny ; Seawater/microbiology ; *Sequence Analysis, DNA ; }, abstract = {Here we report the genome sequence of Acinetobacter venetianus VE-C3, a strain isolated from the Venice Lagoon and known to be able to degrade n-alkanes. Post sequencing analyses revealed that this strain is relatively distantly related to the other Acinetobacter strains completely sequenced so far as shown by phylogenetic analysis and pangenome analysis (1285 genes shared with all the other Acinetobacter genomes sequenced so far). A. venetianus VE-C3 possesses a wide range of determinants whose molecular functions are probably related to the survival in a strongly impacted ecological niche. Among them, genes probably involved in the metabolism of long-chain n-alkanes and in the resistance to toxic metals (e.g. arsenic, cadmium, cobalt and zinc) were found. Genes belonging to these processes were found both on the chromosome and on plasmids. Also, our analysis documented one of the possible genetic bases underlying the strategy adopted by A. venetianus VE-C3 for the adhesion to oil fuel droplets, which could account for the differences existing in this process with other A. venetianus strains. Finally, the presence of a number of DNA mobilization-related genes (i.e. transposases, integrases, resolvases) strongly suggests an important role played by horizontal gene transfer in shaping the genome of A. venetianus VE-C3 and in its adaptation to its special ecological niche.}, } @article {pmid23493677, year = {2013}, author = {Fitzsimons, MS and Novotny, M and Lo, CC and Dichosa, AE and Yee-Greenbaum, JL and Snook, JP and Gu, W and Chertkov, O and Davenport, KW and McMurry, K and Reitenga, KG and Daughton, AR and He, J and Johnson, SL and Gleasner, CD and Wills, PL and Parson-Quintana, B and Chain, PS and Detter, JC and Lasken, RS and Han, CS}, title = {Nearly finished genomes produced using gel microdroplet culturing reveal substantial intraspecies genomic diversity within the human microbiome.}, journal = {Genome research}, volume = {23}, number = {5}, pages = {878-888}, pmid = {23493677}, issn = {1549-5469}, support = {HHSN272200900007C/AI/NIAID NIH HHS/United States ; R01 HG003647/HG/NHGRI NIH HHS/United States ; U54 AI084844/AI/NIAID NIH HHS/United States ; U54 AI-084844-01/AI/NIAID NIH HHS/United States ; }, mesh = {Bacteria/*genetics ; *Genetic Variation ; Genome, Bacterial/*genetics ; Genomics ; Humans ; *Microbiota ; Polymerase Chain Reaction ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA/methods ; }, abstract = {The majority of microbial genomic diversity remains unexplored. This is largely due to our inability to culture most microorganisms in isolation, which is a prerequisite for traditional genome sequencing. Single-cell sequencing has allowed researchers to circumvent this limitation. DNA is amplified directly from a single cell using the whole-genome amplification technique of multiple displacement amplification (MDA). However, MDA from a single chromosome copy suffers from amplification bias and a large loss of specificity from even very small amounts of DNA contamination, which makes assembling a genome difficult and completely finishing a genome impossible except in extraordinary circumstances. Gel microdrop cultivation allows culturing of a diverse microbial community and provides hundreds to thousands of genetically identical cells as input for an MDA reaction. We demonstrate the utility of this approach by comparing sequencing results of gel microdroplets and single cells following MDA. Bias is reduced in the MDA reaction and genome sequencing, and assembly is greatly improved when using gel microdroplets. We acquired multiple near-complete genomes for two bacterial species from human oral and stool microbiome samples. A significant amount of genome diversity, including single nucleotide polymorphisms and genome recombination, is discovered. Gel microdroplets offer a powerful and high-throughput technology for assembling whole genomes from complex samples and for probing the pan-genome of naturally occurring populations.}, } @article {pmid23459509, year = {2013}, author = {Wiles, TJ and Norton, JP and Smith, SN and Lewis, AJ and Mobley, HL and Casjens, SR and Mulvey, MA}, title = {A phyletically rare gene promotes the niche-specific fitness of an E. coli pathogen during bacteremia.}, journal = {PLoS pathogens}, volume = {9}, number = {2}, pages = {e1003175}, pmid = {23459509}, issn = {1553-7374}, support = {R01 DK094777/DK/NIDDK NIH HHS/United States ; R01 AI095647/AI/NIAID NIH HHS/United States ; T32-GM007464/GM/NIGMS NIH HHS/United States ; AI095647/AI/NIAID NIH HHS/United States ; AI043363/AI/NIAID NIH HHS/United States ; AI090369/AI/NIAID NIH HHS/United States ; AI088086/AI/NIAID NIH HHS/United States ; R01 AI043363/AI/NIAID NIH HHS/United States ; R56 AI074825/AI/NIAID NIH HHS/United States ; AI074825/AI/NIAID NIH HHS/United States ; R21 AI088086/AI/NIAID NIH HHS/United States ; T32 AI055434/AI/NIAID NIH HHS/United States ; R21 AI090369/AI/NIAID NIH HHS/United States ; DK094777/DK/NIDDK NIH HHS/United States ; T32 GM007464/GM/NIGMS NIH HHS/United States ; R56 AI043363/AI/NIAID NIH HHS/United States ; R01 AI074825/AI/NIAID NIH HHS/United States ; }, mesh = {Animals ; Biological Evolution ; Disease Models, Animal ; Embryo, Nonmammalian/metabolism/microbiology ; Escherichia coli Infections/genetics/*microbiology ; Escherichia coli Proteins/*physiology ; Female ; Gene Expression Regulation, Bacterial ; *Genetic Fitness ; Genome, Bacterial ; Genomic Islands ; Host-Pathogen Interactions ; Mice ; Mice, Inbred CBA/microbiology ; Phylogeny ; Urinary Tract Infections/genetics/*microbiology ; Uropathogenic Escherichia coli/*pathogenicity ; Virulence/*genetics ; Zebrafish/genetics/*microbiology ; }, abstract = {In bacteria, laterally acquired genes are often concentrated within chromosomal regions known as genomic islands. Using a recently developed zebrafish infection model, we set out to identify unique factors encoded within genomic islands that contribute to the fitness and virulence of a reference urosepsis isolate-extraintestinal pathogenic Escherichia coli strain CFT073. By screening a series of deletion mutants, we discovered a previously uncharacterized gene, neaT, that is conditionally required by the pathogen during systemic infections. In vitro assays indicate that neaT can limit bacterial interactions with host phagocytes and alter the aggregative properties of CFT073. The neaT gene is localized within an integrated P2-like bacteriophage in CFT073, but was rarely found within other proteobacterial genomes. Sequence-based analyses revealed that neaT homologues are present, but discordantly conserved, within a phyletically diverse set of bacterial species. In CFT073, neaT appears to be unameliorated, having an exceptionally A+T-rich composition along with a notably altered codon bias. These data suggest that neaT was recently brought into the proteobacterial pan-genome from an extra-phyletic source. Interestingly, even in G+C-poor genomes, as found within the Firmicutes lineage, neaT-like genes are often unameliorated. Sequence-level features of neaT homologues challenge the common supposition that the A+T-rich nature of many recently acquired genes reflects the nucleotide composition of their genomes of origin. In total, these findings highlight the complexity of the evolutionary forces that can affect the acquisition, utilization, and assimilation of rare genes that promote the niche-dependent fitness and virulence of a bacterial pathogen.}, } @article {pmid23438674, year = {2013}, author = {Kapur, RP and Cole, B and Zhang, M and Lin, J and Fligner, CL}, title = {Placental mesenchymal dysplasia and fetal renal-hepatic-pancreatic dysplasia: androgenetic-biparental mosaicism and pathogenesis of an autosomal recessive disorder.}, journal = {Pediatric and developmental pathology : the official journal of the Society for Pediatric Pathology and the Paediatric Pathology Society}, volume = {16}, number = {3}, pages = {191-200}, doi = {10.2350/12-12-1281-OA.1}, pmid = {23438674}, issn = {1093-5266}, mesh = {Chromosome Disorders/*genetics/pathology ; Female ; Fetal Diseases/*genetics/pathology ; Heterotaxy Syndrome/*genetics/pathology ; Humans ; Laser Capture Microdissection ; Male ; Mesoderm/pathology ; Mosaicism ; Placenta/*pathology ; Pregnancy ; Uniparental Disomy/*genetics/pathology ; }, abstract = {Androgenetic-biparental mosaicism (ABM) denotes an embryo in which a subset of cells contains a diploid chromosomal complement derived entirely from the father. Such embryos have a high incidence of placental mesenchymal dysplasia (PMD) and paternal imprinting disorders because the androgenetic cells have pangenomic paternal uniparental disomy. Uniparental disomy also poses a theoretical risk for paternally transmitted autosomal recessive disorders, if both chromosomes of each autosomal pair are identical (isodisomy). We present the 1st example of a recessive disorder, renal-hepatic-pancreatic dysplasia, in a pregnancy complicated by PMD and ABM. Androgenetic-biparental mosaicism was demonstrated in fetal DNA, extracted from multiple organs, by quantitative polymerase chain reaction-based methods that detected allelic imbalances at the differentially methylated SNRPN locus (chromosome 15); polymorphic short tandem repeat microsatellite markers located on chromosomes 4, 7, 8, 13, 18, and 21; and single nucleotide polymorphisms on chromosomes 1 and 19. Laser capture microdissection was performed to isolate specific placental and renal cell populations and document selective enrichment of androgenetic cells in the stroma of PMD and the epithelium of renal cysts. Mutational analysis of coding sequences did not reveal any mutations in NPHP3, a ciliopathy gene implicated in some cases of renal-hepatic-pancreatic dysplasia. Nonetheless, the fetal phenotype and laser capture data support the model of a paternally transmitted autosomal recessive disorder, which occurred because of ABM.}, } @article {pmid23431003, year = {2013}, author = {Galardini, M and Pini, F and Bazzicalupo, M and Biondi, EG and Mengoni, A}, title = {Replicon-dependent bacterial genome evolution: the case of Sinorhizobium meliloti.}, journal = {Genome biology and evolution}, volume = {5}, number = {3}, pages = {542-558}, pmid = {23431003}, issn = {1759-6653}, mesh = {Bacterial Proteins/genetics ; Base Sequence ; *Evolution, Molecular ; *Genome, Bacterial ; Molecular Sequence Data ; Phylogeny ; *Replicon ; Selection, Genetic ; Sinorhizobium meliloti/classification/*genetics ; }, abstract = {Many bacterial species, such as the alphaproteobacterium Sinorhizobium meliloti, are characterized by open pangenomes and contain multipartite genomes consisting of a chromosome and other large-sized replicons, such as chromids, megaplasmids, and plasmids. The evolutionary forces in both functional and structural aspects that shape the pangenome of species with multipartite genomes are still poorly understood. Therefore, we sequenced the genomes of 10 new S. meliloti strains, analyzed with four publicly available additional genomic sequences. Results indicated that the three main replicons present in these strains (a chromosome, a chromid, and a megaplasmid) partly show replicon-specific behaviors related to strain differentiation. In particular, the pSymB chromid was shown to be a hot spot for positively selected genes, and, unexpectedly, genes resident in the pSymB chromid were also found to be more widespread in distant taxa than those located in the other replicons. Moreover, through the exploitation of a DNA proximity network, a series of conserved "DNA backbones" were found to shape the evolution of the genome structure, with the rest of the genome experiencing rearrangements. The presented data allow depicting a scenario where the pSymB chromid has a distinctive role in intraspecies differentiation and in evolution through positive selection, whereas the pSymA megaplasmid mostly contributes to structural fluidity and to the emergence of new functions, indicating a specific evolutionary role for each replicon in the pangenome evolution.}, } @article {pmid23409014, year = {2013}, author = {Mann, RA and Smits, TH and Bühlmann, A and Blom, J and Goesmann, A and Frey, JE and Plummer, KM and Beer, SV and Luck, J and Duffy, B and Rodoni, B}, title = {Comparative genomics of 12 strains of Erwinia amylovora identifies a pan-genome with a large conserved core.}, journal = {PloS one}, volume = {8}, number = {2}, pages = {e55644}, pmid = {23409014}, issn = {1932-6203}, mesh = {DNA, Bacterial/genetics ; Erwinia amylovora/classification/*genetics ; *Genome, Bacterial ; Phylogeny ; Species Specificity ; }, abstract = {The plant pathogen Erwinia amylovora can be divided into two host-specific groupings; strains infecting a broad range of hosts within the Rosaceae subfamily Spiraeoideae (e.g., Malus, Pyrus, Crataegus, Sorbus) and strains infecting Rubus (raspberries and blackberries). Comparative genomic analysis of 12 strains representing distinct populations (e.g., geographic, temporal, host origin) of E. amylovora was used to describe the pan-genome of this major pathogen. The pan-genome contains 5751 coding sequences and is highly conserved relative to other phytopathogenic bacteria comprising on average 89% conserved, core genes. The chromosomes of Spiraeoideae-infecting strains were highly homogeneous, while greater genetic diversity was observed between Spiraeoideae- and Rubus-infecting strains (and among individual Rubus-infecting strains), the majority of which was attributed to variable genomic islands. Based on genomic distance scores and phylogenetic analysis, the Rubus-infecting strain ATCC BAA-2158 was genetically more closely related to the Spiraeoideae-infecting strains of E. amylovora than it was to the other Rubus-infecting strains. Analysis of the accessory genomes of Spiraeoideae- and Rubus-infecting strains has identified putative host-specific determinants including variation in the effector protein HopX1(Ea) and a putative secondary metabolite pathway only present in Rubus-infecting strains.}, } @article {pmid23386843, year = {2013}, author = {Gillings, MR}, title = {Evolutionary consequences of antibiotic use for the resistome, mobilome and microbial pangenome.}, journal = {Frontiers in microbiology}, volume = {4}, number = {}, pages = {4}, pmid = {23386843}, issn = {1664-302X}, abstract = {The widespread use and abuse of antibiotic therapy has evolutionary and ecological consequences, some of which are only just beginning to be examined. One well known consequence is the fixation of mutations and lateral gene transfer (LGT) events that confer antibiotic resistance. Sequential selection events, driven by different classes of antibiotics, have resulted in the assembly of diverse resistance determinants and mobile DNAs into novel genetic elements of ever-growing complexity and flexibility. These novel plasmids, integrons, and genomic islands have now become fixed at high frequency in diverse cell lineages by human antibiotic use. Consequently they can be regarded as xenogenetic pollutants, analogous to xenobiotic compounds, but with the critical distinction that they replicate rather than degrade when released to pollute natural environments. Antibiotics themselves must also be regarded as pollutants, since human production overwhelms natural synthesis, and a major proportion of ingested antibiotic is excreted unchanged into waste streams. Such antibiotic pollutants have non-target effects, raising the general rates of mutation, recombination, and LGT in all the microbiome, and simultaneously providing the selective force to fix such changes. This has the consequence of recruiting more genes into the resistome and mobilome, and of increasing the overlap between these two components of microbial genomes. Thus the human use and environmental release of antibiotics is having second order effects on the microbial world, because these small molecules act as drivers of bacterial evolution. Continued pollution with both xenogenetic elements and the selective agents that fix such elements in populations has potentially adverse consequences for human welfare.}, } @article {pmid23342011, year = {2013}, author = {Soares, SC and Silva, A and Trost, E and Blom, J and Ramos, R and Carneiro, A and Ali, A and Santos, AR and Pinto, AC and Diniz, C and Barbosa, EG and Dorella, FA and Aburjaile, F and Rocha, FS and Nascimento, KK and Guimarães, LC and Almeida, S and Hassan, SS and Bakhtiar, SM and Pereira, UP and Abreu, VA and Schneider, MP and Miyoshi, A and Tauch, A and Azevedo, V}, title = {The pan-genome of the animal pathogen Corynebacterium pseudotuberculosis reveals differences in genome plasticity between the biovar ovis and equi strains.}, journal = {PloS one}, volume = {8}, number = {1}, pages = {e53818}, pmid = {23342011}, issn = {1932-6203}, mesh = {Animals ; Corynebacterium/*genetics ; Gene Deletion ; Genes, Bacterial/genetics ; Genetic Variation ; Genome, Bacterial/*genetics ; Genomic Islands/genetics ; Multigene Family/genetics ; Species Specificity ; Virulence Factors/genetics ; }, abstract = {Corynebacterium pseudotuberculosis is a facultative intracellular pathogen and the causative agent of several infectious and contagious chronic diseases, including caseous lymphadenitis, ulcerative lymphangitis, mastitis, and edematous skin disease, in a broad spectrum of hosts. In addition, Corynebacterium pseudotuberculosis infections pose a rising worldwide economic problem in ruminants. The complete genome sequences of 15 C. pseudotuberculosis strains isolated from different hosts and countries were comparatively analyzed using a pan-genomic strategy. Phylogenomic, pan-genomic, core genomic, and singleton analyses revealed close relationships among pathogenic corynebacteria, the clonal-like behavior of C. pseudotuberculosis and slow increases in the sizes of pan-genomes. According to extrapolations based on the pan-genomes, core genomes and singletons, the C. pseudotuberculosis biovar ovis shows a more clonal-like behavior than the C. pseudotuberculosis biovar equi. Most of the variable genes of the biovar ovis strains were acquired in a block through horizontal gene transfer and are highly conserved, whereas the biovar equi strains contain great variability, both intra- and inter-biovar, in the 16 detected pathogenicity islands (PAIs). With respect to the gene content of the PAIs, the most interesting finding is the high similarity of the pilus genes in the biovar ovis strains compared with the great variability of these genes in the biovar equi strains. Concluding, the polymerization of complete pilus structures in biovar ovis could be responsible for a remarkable ability of these strains to spread throughout host tissues and penetrate cells to live intracellularly, in contrast with the biovar equi, which rarely attacks visceral organs. Intracellularly, the biovar ovis strains are expected to have less contact with other organisms than the biovar equi strains, thereby explaining the significant clonal-like behavior of the biovar ovis strains.}, } @article {pmid23339658, year = {2013}, author = {Kuenne, C and Billion, A and Mraheil, MA and Strittmatter, A and Daniel, R and Goesmann, A and Barbuddhe, S and Hain, T and Chakraborty, T}, title = {Reassessment of the Listeria monocytogenes pan-genome reveals dynamic integration hotspots and mobile genetic elements as major components of the accessory genome.}, journal = {BMC genomics}, volume = {14}, number = {}, pages = {47}, pmid = {23339658}, issn = {1471-2164}, mesh = {Adaptation, Physiological/genetics ; Animals ; Conserved Sequence ; DNA Transposable Elements/genetics ; Evolution, Molecular ; Genetic Markers/genetics ; Genome, Bacterial/*genetics ; Genomic Islands/genetics ; Genomics ; Humans ; Internet ; Interspersed Repetitive Sequences/*genetics ; Inverted Repeat Sequences/genetics ; Listeria monocytogenes/*genetics/pathogenicity/physiology/virology ; Models, Genetic ; Phylogeny ; Prophages/physiology ; RNA, Small Untranslated/genetics ; Rabbits ; Species Specificity ; }, abstract = {BACKGROUND: Listeria monocytogenes is an important food-borne pathogen and model organism for host-pathogen interaction, thus representing an invaluable target considering research on the forces governing the evolution of such microbes. The diversity of this species has not been exhaustively explored yet, as previous efforts have focused on analyses of serotypes primarily implicated in human listeriosis. We conducted complete genome sequencing of 11 strains employing 454 GS FLX technology, thereby achieving full coverage of all serotypes including the first complete strains of serotypes 1/2b, 3c, 3b, 4c, 4d, and 4e. These were comparatively analyzed in conjunction with publicly available data and assessed for pathogenicity in the Galleria mellonella insect model.

RESULTS: The species pan-genome of L. monocytogenes is highly stable but open, suggesting an ability to adapt to new niches by generating or including new genetic information. The majority of gene-scale differences represented by the accessory genome resulted from nine hyper variable hotspots, a similar number of different prophages, three transposons (Tn916, Tn554, IS3-like), and two mobilizable islands. Only a subset of strains showed CRISPR/Cas bacteriophage resistance systems of different subtypes, suggesting a supplementary function in maintenance of chromosomal stability. Multiple phylogenetic branches of the genus Listeria imply long common histories of strains of each lineage as revealed by a SNP-based core genome tree highlighting the impact of small mutations for the evolution of species L. monocytogenes. Frequent loss or truncation of genes described to be vital for virulence or pathogenicity was confirmed as a recurring pattern, especially for strains belonging to lineages III and II. New candidate genes implicated in virulence function were predicted based on functional domains and phylogenetic distribution. A comparative analysis of small regulatory RNA candidates supports observations of a differential distribution of trans-encoded RNA, hinting at a diverse range of adaptations and regulatory impact.

CONCLUSIONS: This study determined commonly occurring hyper variable hotspots and mobile elements as primary effectors of quantitative gene-scale evolution of species L. monocytogenes, while gene decay and SNPs seem to represent major factors influencing long-term evolution. The discovery of common and disparately distributed genes considering lineages, serogroups, serotypes and strains of species L. monocytogenes will assist in diagnostic, phylogenetic and functional research, supported by the comparative genomic GECO-LisDB analysis server (http://bioinfo.mikrobio.med.uni-giessen.de/geco2lisdb).}, } @article {pmid23324384, year = {2013}, author = {Schödel, J and Mole, DR and Ratcliffe, PJ}, title = {Pan-genomic binding of hypoxia-inducible transcription factors.}, journal = {Biological chemistry}, volume = {394}, number = {4}, pages = {507-517}, doi = {10.1515/hsz-2012-0351}, pmid = {23324384}, issn = {1437-4315}, mesh = {Animals ; Chromatin Immunoprecipitation ; Humans ; Hypoxia/*metabolism ; Hypoxia-Inducible Factor 1/metabolism ; Models, Biological ; Transcription, Genetic/genetics ; }, abstract = {Hypoxia-inducible transcription factors (HIFs) mediate the cellular response to hypoxia. HIF-DNA binding triggers a transcriptional program that acts to both restore oxygen homeostasis and adapt cells to low oxygen availability. In this context, HIF is centrally involved in many physiologic and pathophysiological processes such as development, high altitude adaptation, ischemic disease, inflammation, and cancer. The recent development of chromatin immunoprecipitation coupled to genome-wide DNA sequence analysis allows the position and extent of HIF binding to DNA to be characterized across the entire genome and correlated with genetic, epigenetic, and transcriptional analyses. This review summarizes recent pan-genomic analyses of HIF binding and HIF-dependent transcriptional regulation.}, } @article {pmid23320838, year = {2013}, author = {Labrie, SJ and Frois-Moniz, K and Osburne, MS and Kelly, L and Roggensack, SE and Sullivan, MB and Gearin, G and Zeng, Q and Fitzgerald, M and Henn, MR and Chisholm, SW}, title = {Genomes of marine cyanopodoviruses reveal multiple origins of diversity.}, journal = {Environmental microbiology}, volume = {15}, number = {5}, pages = {1356-1376}, doi = {10.1111/1462-2920.12053}, pmid = {23320838}, issn = {1462-2920}, mesh = {Cyanobacteria/*virology ; DNA-Directed DNA Polymerase/genetics ; *Genetic Variation ; Genome, Viral/*genetics ; Genomic Islands/genetics ; Metagenomics ; Oceans and Seas ; *Phylogeny ; Podoviridae/*classification/*genetics ; Prochlorococcus/virology ; Seawater/*microbiology ; Sequence Alignment ; Synechococcus/virology ; }, abstract = {The marine cyanobacteria Prochlorococcus and Synechococcus are highly abundant in the global oceans, as are the cyanophage with which they co-evolve. While genomic analyses have been relatively extensive for cyanomyoviruses, only three cyanopodoviruses isolated on marine cyanobacteria have been sequenced. Here we present nine new cyanopodovirus genomes, and analyse them in the context of the broader group. The genomes range from 42.2 to 47.7 kb, with G+C contents consistent with those of their hosts. They share 12 core genes, and the pan-genome is not close to being fully sampled. The genomes contain three variable island regions, with the most hypervariable genes concentrated at one end of the genome. Concatenated core-gene phylogeny clusters all but one of the phage into three distinct groups (MPP-A and two discrete clades within MPP-B). The outlier, P-RSP2, has the smallest genome and lacks RNA polymerase, a hallmark of the Autographivirinae subfamily. The phage in group MPP-B contain photosynthesis and carbon metabolism associated genes, while group MPP-A and the outlier P-RSP2 do not, suggesting different constraints on their lytic cycles. Four of the phage encode integrases and three have a host integration signature. Metagenomic analyses reveal that cyanopodoviruses may be more abundant in the oceans than previously thought.}, } @article {pmid23319441, year = {2013}, author = {Jones, N and Bonnet, F and Sfar, S and Lafitte, M and Lafon, D and Sierankowski, G and Brouste, V and Banneau, G and Tunon de Lara, C and Debled, M and MacGrogan, G and Longy, M and Sevenet, N}, title = {Comprehensive analysis of PTEN status in breast carcinomas.}, journal = {International journal of cancer}, volume = {133}, number = {2}, pages = {323-334}, doi = {10.1002/ijc.28021}, pmid = {23319441}, issn = {1097-0215}, mesh = {Alleles ; Carcinoma, Ductal, Breast/*genetics/*metabolism ; Chromosomal Instability ; Chromosomes/ultrastructure ; Class I Phosphatidylinositol 3-Kinases ; DNA Mutational Analysis ; Female ; *Gene Expression Regulation, Neoplastic ; Humans ; Immunohistochemistry/methods ; In Situ Hybridization, Fluorescence ; Lymph Nodes/pathology ; PTEN Phosphohydrolase/*genetics ; Phosphatidylinositol 3-Kinases/metabolism ; Point Mutation ; Prognosis ; Receptor, ErbB-2/metabolism ; Receptors, Estrogen/metabolism ; Tumor Suppressor Protein p53/metabolism ; }, abstract = {PTEN plays a well-established role in the negative regulation of the PI3K pathway, which is frequently activated in several cancer types, including breast cancer. A nuclear function in the maintenance of chromosomal stability has been proposed for PTEN but is yet to be clearly defined. In order to improve understanding of the role of PTEN in mammary tumorigenesis in terms of a possible gene dosage effect, its PI3K pathway function and its association with p53, we undertook comprehensive analysis of PTEN status in 135 sporadic invasive ductal carcinomas. Four PTEN status groups were defined; complete loss (19/135, 14%), reduced copy number (19/135, 14%), normal (86/135, 64%) and complex (11/135, 8%). Whereas the PTEN complete loss status was significantly associated with estrogen receptor (ER) negativity (p=0.006) and in particular the basal-like phenotype (p<0.0001), a reduced PTEN copy number was not associated with hormone receptor status or a particular breast cancer subtype. Overall, PI3K pathway alteration was suggested to be involved in 59% (79/134) of tumors as assessed by human epidermal growth factor receptor 2 overexpression, PIK3CA mutation or a complete loss of PTEN. A complex PTEN status was identified in a tumor subgroup which displayed a specific, complex DNA profile at the PTEN locus with a strikingly similar highly rearranged pan-genomic profile. All of these tumors had relapsed and were associated with a poorer prognosis in the context of node negative disease (p=1.4 × 10(-13)) thus may represent a tumor subgroup with a common molecular alteration which could be targeted to improve clinical outcome.}, } @article {pmid23315380, year = {2013}, author = {Lobkovsky, AE and Wolf, YI and Koonin, EV}, title = {Gene frequency distributions reject a neutral model of genome evolution.}, journal = {Genome biology and evolution}, volume = {5}, number = {1}, pages = {233-242}, pmid = {23315380}, issn = {1759-6653}, mesh = {Archaea/genetics ; Bacteria/genetics ; *Evolution, Molecular ; *Gene Frequency ; *Genome, Archaeal ; *Genome, Bacterial ; *Models, Genetic ; Selection, Genetic ; }, abstract = {Evolution of prokaryotes involves extensive loss and gain of genes, which lead to substantial differences in the gene repertoires even among closely related organisms. Through a wide range of phylogenetic depths, gene frequency distributions in prokaryotic pangenomes bear a characteristic, asymmetrical U-shape, with a core of (nearly) universal genes, a "shell" of moderately common genes, and a "cloud" of rare genes. We employ mathematical modeling to investigate evolutionary processes that might underlie this universal pattern. Gene frequency distributions for almost 400 groups of 10 bacterial or archaeal species each over a broad range of evolutionary distances were fit to steady-state, infinite allele models based on the distribution of gene replacement rates and the phylogenetic tree relating the species in each group. The fits of the theoretical frequency distributions to the empirical ones yield model parameters and estimates of the goodness of fit. Using the Akaike Information Criterion, we show that the neutral model of genome evolution, with the same replacement rate for all genes, can be confidently rejected. Of the three tested models with purifying selection, the one in which the distribution of replacement rates is derived from a stochastic population model with additive per-gene fitness yields the best fits to the data. The selection strength estimated from the fits declines with evolutionary divergence while staying well outside the neutral regime. These findings indicate that, unlike some other universal distributions of genomic variables, for example, the distribution of paralogous gene family membership, the gene frequency distribution is substantially affected by selection.}, } @article {pmid25580216, year = {2013}, author = {Marin, MA and Vicente, AC}, title = {Architecture of the superintegron in Vibrio cholerae: identification of core and unique genes.}, journal = {F1000Research}, volume = {2}, number = {}, pages = {63}, pmid = {25580216}, issn = {2046-1402}, abstract = {BACKGROUND: Vibrio cholerae, the etiologic agent of cholera, is indigenous to aquatic environments. The V. cholerae genome consists of two chromosomes; the smallest of these harbors a large gene capture and excision system called the superintegron (SI), of ~120 kbp. The flexible nature of the SI that results from gene cassette capture, deletion and rearrangement is thought to make it a hotspot of V. cholerae diversity, but beyond the basic structure it is not clear if there is a core genome in the SI and if so how it is structured. The aim of this study was to explore the core genome structure and the differences in gene content among strains of V. cholerae.

METHODS: From the complete genomes of seven V. cholerae and one Vibrio mimicus representative strains, we recovered the SI sequences based on the locations of the structural gene IntI4 and the V. cholerae repeats. Analysis of the pangenome, including cluster analysis of functional genes, pangenome profile analysis, genetic variation analysis of functional genes, strain evolution analysis and function enrichment analysis of gene clusters, was performed using a pangenome analysis pipeline in addition to the R scripts, splitsTree4 and genoPlotR.

RESULTS AND CONCLUSIONS: Here, we reveal the genetic architecture of the V. cholerae SI. It contains eight core genes when V. mimicus is included and 21 core genes when only V. cholerae strains are considered; many of them are present in several copies. The V. cholerae SI has an open pangenome, which means that V. cholerae may be able to import new gene cassettes to SI. The set of dispensable SI genes is influenced by the niche and type species. The core genes are distributed along the SI, apparently without a position effect.}, } @article {pmid23241446, year = {2012}, author = {Wolf, YI and Makarova, KS and Yutin, N and Koonin, EV}, title = {Updated clusters of orthologous genes for Archaea: a complex ancestor of the Archaea and the byways of horizontal gene transfer.}, journal = {Biology direct}, volume = {7}, number = {}, pages = {46}, pmid = {23241446}, issn = {1745-6150}, support = {//Intramural NIH HHS/United States ; }, mesh = {Archaea/*genetics ; Archaeal Proteins/genetics ; Cluster Analysis ; Databases, Protein ; *Evolution, Molecular ; *Gene Transfer, Horizontal ; Genes, Archaeal ; *Genome, Archaeal ; Genomics ; Multigene Family ; Phylogeny ; Sequence Alignment ; }, abstract = {BACKGROUND: Collections of Clusters of Orthologous Genes (COGs) provide indispensable tools for comparative genomic analysis, evolutionary reconstruction and functional annotation of new genomes. Initially, COGs were made for all complete genomes of cellular life forms that were available at the time. However, with the accumulation of thousands of complete genomes, construction of a comprehensive COG set has become extremely computationally demanding and prone to error propagation, necessitating the switch to taxon-specific COG collections. Previously, we reported the collection of COGs for 41 genomes of Archaea (arCOGs). Here we present a major update of the arCOGs and describe evolutionary reconstructions to reveal general trends in the evolution of Archaea.

RESULTS: The updated version of the arCOG database incorporates 91% of the pangenome of 120 archaea (251,032 protein-coding genes altogether) into 10,335 arCOGs. Using this new set of arCOGs, we performed maximum likelihood reconstruction of the genome content of archaeal ancestral forms and gene gain and loss events in archaeal evolution. This reconstruction shows that the last Common Ancestor of the extant Archaea was an organism of greater complexity than most of the extant archaea, probably with over 2,500 protein-coding genes. The subsequent evolution of almost all archaeal lineages was apparently dominated by gene loss resulting in genome streamlining. Overall, in the evolution of Archaea as well as a representative set of bacteria that was similarly analyzed for comparison, gene losses are estimated to outnumber gene gains at least 4 to 1. Analysis of specific patterns of gene gain in Archaea shows that, although some groups, in particular Halobacteria, acquire substantially more genes than others, on the whole, gene exchange between major groups of Archaea appears to be largely random, with no major 'highways' of horizontal gene transfer.

CONCLUSIONS: The updated collection of arCOGs is expected to become a key resource for comparative genomics, evolutionary reconstruction and functional annotation of new archaeal genomes. Given that, in spite of the major increase in the number of genomes, the conserved core of archaeal genes appears to be stabilizing, the major evolutionary trends revealed here have a chance to stand the test of time.

REVIEWERS: This article was reviewed by (for complete reviews see the Reviewers' Reports section): Dr. PLG, Prof. PF, Dr. PL (nominated by Prof. JPG).}, } @article {pmid23236030, year = {2013}, author = {Dieterich, K and Quijano-Roy, S and Monnier, N and Zhou, J and Fauré, J and Smirnow, DA and Carlier, R and Laroche, C and Marcorelles, P and Mercier, S and Mégarbané, A and Odent, S and Romero, N and Sternberg, D and Marty, I and Estournet, B and Jouk, PS and Melki, J and Lunardi, J}, title = {The neuronal endopeptidase ECEL1 is associated with a distinct form of recessive distal arthrogryposis.}, journal = {Human molecular genetics}, volume = {22}, number = {8}, pages = {1483-1492}, doi = {10.1093/hmg/dds514}, pmid = {23236030}, issn = {1460-2083}, mesh = {Animals ; Arthrogryposis/embryology/*genetics/pathology ; Central Nervous System/pathology ; Chromosome Mapping ; Consanguinity ; Embryonic Development/*genetics ; Genes, Recessive ; Genetic Linkage ; Homozygote ; Humans ; Metalloendopeptidases/*genetics ; Mice ; Motor Neurons/pathology ; Mutation ; Pedigree ; Phenotype ; }, abstract = {Distal arthrogryposis (DA) is a heterogeneous subgroup of arthrogryposis multiplex congenita (AMC), a large family of disorders characterized by multiple congenital joint limitations due to reduced fetal movements. DA is mainly characterized by contractures afflicting especially the distal extremities without overt muscular or neurological signs. Although a limited number of genes mostly implicated in the contractile apparatus have been identified in DA, most patients failed to show mutations in currently known genes. Using a pangenomic approach, we demonstrated linkage of DA to chromosome 2q37 in two consanguineous families and the endothelin-converting enzyme like 1 (ECEL1) gene present in this region was associated with DA. Screening of a panel of 20 families with non-specific DA identified seven homozygous or compound heterozygous mutations of ECEL1 in a total of six families. Mutations resulted mostly in the absence of protein. ECEL1 is a neuronal endopeptidase predominantly expressed in the central nervous system and brain structures during fetal life in mice and human. ECEL1 plays a major role in intramuscular axonal branching of motor neurons in skeletal muscle during embryogenesis. A detailed review of clinical findings of DA patients with ECEL1 mutations revealed a homogeneous and recognizable phenotype characterized by limited knee flexion, flexed third to fifth fingers and severe muscle atrophy predominant on lower limbs and tongue that suggested a common pathogenic mechanism. We described a new and homogenous phenotype of DA associated with ECEL1 that resulted in symptoms involving rather the peripheral than the central nervous system and suggesting a developmental dysfunction.}, } @article {pmid23228887, year = {2013}, author = {Cornejo, OE and Lefébure, T and Bitar, PD and Lang, P and Richards, VP and Eilertson, K and Do, T and Beighton, D and Zeng, L and Ahn, SJ and Burne, RA and Siepel, A and Bustamante, CD and Stanhope, MJ}, title = {Evolutionary and population genomics of the cavity causing bacteria Streptococcus mutans.}, journal = {Molecular biology and evolution}, volume = {30}, number = {4}, pages = {881-893}, pmid = {23228887}, issn = {1537-1719}, support = {R01 AI073368/AI/NIAID NIH HHS/United States ; AI073368/AI/NIAID NIH HHS/United States ; }, mesh = {Adaptation, Biological/genetics ; Carbohydrate Metabolism/genetics ; Dental Caries/microbiology ; *Evolution, Molecular ; Gene Frequency ; Genome, Bacterial ; Humans ; Likelihood Functions ; Linkage Disequilibrium ; *Metagenomics ; Models, Genetic ; Polymorphism, Single Nucleotide ; Recombination, Genetic ; Selection, Genetic ; Streptococcus mutans/*genetics ; }, abstract = {Streptococcus mutans is widely recognized as one of the key etiological agents of human dental caries. Despite its role in this important disease, our present knowledge of gene content variability across the species and its relationship to adaptation is minimal. Estimates of its demographic history are not available. In this study, we generated genome sequences of 57 S. mutans isolates, as well as representative strains of the most closely related species to S. mutans (S. ratti, S. macaccae, and S. criceti), to identify the overall structure and potential adaptive features of the dispensable and core components of the genome. We also performed population genetic analyses on the core genome of the species aimed at understanding the demographic history, and impact of selection shaping its genetic variation. The maximum gene content divergence among strains was approximately 23%, with the majority of strains diverging by 5-15%. The core genome consisted of 1,490 genes and the pan-genome approximately 3,296. Maximum likelihood analysis of the synonymous site frequency spectrum (SFS) suggested that the S. mutans population started expanding exponentially approximately 10,000 years ago (95% confidence interval [CI]: 3,268-14,344 years ago), coincidental with the onset of human agriculture. Analysis of the replacement SFS indicated that a majority of these substitutions are under strong negative selection, and the remainder evolved neutrally. A set of 14 genes was identified as being under positive selection, most of which were involved in either sugar metabolism or acid tolerance. Analysis of the core genome suggested that among 73 genes present in all isolates of S. mutans but absent in other species of the mutans taxonomic group, the majority can be associated with metabolic processes that could have contributed to the successful adaptation of S. mutans to its new niche, the human mouth, and with the dietary changes that accompanied the origin of agriculture.}, } @article {pmid23194436, year = {2013}, author = {Huang, Y and Kittichotirat, W and Mayer, MP and Hall, R and Bumgarner, R and Chen, C}, title = {Comparative genomic hybridization and transcriptome analysis with a pan-genome microarray reveal distinctions between JP2 and non-JP2 genotypes of Aggregatibacter actinomycetemcomitans.}, journal = {Molecular oral microbiology}, volume = {28}, number = {1}, pages = {1-17}, pmid = {23194436}, issn = {2041-1014}, support = {R01 DE012212/DE/NIDCR NIH HHS/United States ; R01 DE12212/DE/NIDCR NIH HHS/United States ; }, mesh = {Adolescent ; Adult ; Aggregatibacter actinomycetemcomitans/*genetics/pathogenicity ; Bacterial Outer Membrane Proteins/genetics ; Bacterial Toxins/genetics ; Child ; Chromosome Mapping ; Comparative Genomic Hybridization/*methods ; Exotoxins/genetics ; Gene Expression Profiling/*methods ; Gene Expression Regulation, Bacterial/genetics ; Genome, Bacterial/*genetics ; Genomic Islands/genetics ; Genotype ; Humans ; Microarray Analysis/*methods ; Middle Aged ; Multigene Family/genetics ; Nitrate Reductase/genetics ; Operon/genetics ; Promoter Regions, Genetic/genetics ; Sensitivity and Specificity ; Serotyping ; Transcriptome/genetics ; Virulence/genetics ; Young Adult ; }, abstract = {It was postulated that the highly virulent JP2 genotype of Aggregatibacter actinomycetemcomitans may possess a constellation of distinct virulence determinants not found in non-JP2 genotypes. This study compared the genome content and the transcriptome of the serotype b JP2 genotype and the closely related serotype b non-JP2 genotype of A. actinomycetemcomitans. A custom-designed pan-genomic microarray of A. actinomycetemcomitans was constructed and validated against a panel of 11 sequenced reference strains. The microarray was subsequently used for comparative genomic hybridization of serotype b strains of JP2 (six strains) and non-JP2 (six strains) genotypes, and for transcriptome analysis of strains of JP2 (three strains) and non-JP2 (two strains). Two JP2-specific and two non-JP2-specific genomic islands were identified. In one instance, distinct genomic islands were found to be inserted into the same locus among strains of different genotypes. Transcriptome analysis identified five operons, including the leukotoxin operon, to have at least two genes with an expression ratio of 2 or greater between genotypes. Two of the differentially expressed operons were members of the membrane-bound nitrate reductase system (nap operon) and the Tol-Pal system of gram-negative bacterial species. This study is the first to demonstrate the differences in the full genome content and gene expression between A. actinomycetemcomitans strains of JP2 and non-JP2 genotypes. The information is essential for designing hypothesis-driven experiments to examine the pathogenic mechanisms of A. actinomycetemcomitans.}, } @article {pmid23189144, year = {2012}, author = {Nogueira, T and Touchon, M and Rocha, EP}, title = {Rapid evolution of the sequences and gene repertoires of secreted proteins in bacteria.}, journal = {PloS one}, volume = {7}, number = {11}, pages = {e49403}, pmid = {23189144}, issn = {1932-6203}, mesh = {Bacteria/*genetics/metabolism ; Bacterial Outer Membrane Proteins/genetics ; Bacterial Proteins/*chemistry/*genetics/metabolism ; DNA Transposable Elements ; *Evolution, Molecular ; Genome, Bacterial ; Multigene Family ; Protein Transport ; Virulence Factors ; }, abstract = {Proteins secreted to the extracellular environment or to the periphery of the cell envelope, the secretome, play essential roles in foraging, antagonistic and mutualistic interactions. We hypothesize that arms races, genetic conflicts and varying selective pressures should lead to the rapid change of sequences and gene repertoires of the secretome. The analysis of 42 bacterial pan-genomes shows that secreted, and especially extracellular proteins, are predominantly encoded in the accessory genome, i.e. among genes not ubiquitous within the clade. Genes encoding outer membrane proteins might engage more frequently in intra-chromosomal gene conversion because they are more often in multi-genic families. The gene sequences encoding the secretome evolve faster than the rest of the genome and in particular at non-synonymous positions. Cell wall proteins in Firmicutes evolve particularly fast when compared with outer membrane proteins of Proteobacteria. Virulence factors are over-represented in the secretome, notably in outer membrane proteins, but cell localization explains more of the variance in substitution rates and gene repertoires than sequence homology to known virulence factors. Accordingly, the repertoires and sequences of the genes encoding the secretome change fast in the clades of obligatory and facultative pathogens and also in the clades of mutualists and free-living bacteria. Our study shows that cell localization shapes genome evolution. In agreement with our hypothesis, the repertoires and the sequences of genes encoding secreted proteins evolve fast. The particularly rapid change of extracellular proteins suggests that these public goods are key players in bacterial adaptation.}, } @article {pmid23127486, year = {2013}, author = {Zarrilli, R and Pournaras, S and Giannouli, M and Tsakris, A}, title = {Global evolution of multidrug-resistant Acinetobacter baumannii clonal lineages.}, journal = {International journal of antimicrobial agents}, volume = {41}, number = {1}, pages = {11-19}, doi = {10.1016/j.ijantimicag.2012.09.008}, pmid = {23127486}, issn = {1872-7913}, mesh = {Acinetobacter Infections/*epidemiology/microbiology ; Acinetobacter baumannii/*classification/drug effects/genetics/*isolation & purification ; Cross Infection/epidemiology/microbiology ; *Drug Resistance, Multiple, Bacterial ; Evolution, Molecular ; Genotype ; Global Health ; Humans ; Molecular Epidemiology ; *Molecular Typing ; }, abstract = {The rapid expansion of Acinetobacter baumannii clinical isolates exhibiting resistance to carbapenems and most or all available antibiotics during the last decade is a worrying evolution. The apparent predominance of a few successful multidrug-resistant lineages worldwide underlines the importance of elucidating the mode of spread and the epidemiology of A. baumannii isolates in single hospitals, at a country-wide level and on a global scale. The evolutionary advantage of the dominant clonal lineages relies on the capability of the A. baumannii pangenome to incorporate resistance determinants. In particular, the simultaneous presence of divergent strains of the international clone II and their increasing prevalence in international hospitals further support the ongoing adaptation of this lineage to the hospital environment. Indeed, genomic and genetic studies have elucidated the role of mobile genetic elements in the transfer of antibiotic resistance genes and substantiate the rate of genetic alterations associated with acquisition in A. baumannii of various resistance genes, including OXA- and metallo-β-lactamase-type carbapenemase genes. The significance of single nucleotide polymorphisms and transposon mutagenesis in the evolution of A. baumannii has been also documented. Establishment of a network of reference laboratories in different countries would generate a more complete picture and a fuller understanding of the importance of high-risk A. baumannii clones in the international dissemination of antibiotic resistance.}, } @article {pmid23115039, year = {2013}, author = {Kansal, R and Rasko, DA and Sahl, JW and Munson, GP and Roy, K and Luo, Q and Sheikh, A and Kuhne, KJ and Fleckenstein, JM}, title = {Transcriptional modulation of enterotoxigenic Escherichia coli virulence genes in response to epithelial cell interactions.}, journal = {Infection and immunity}, volume = {81}, number = {1}, pages = {259-270}, pmid = {23115039}, issn = {1098-5522}, support = {I01 BX001469/BX/BLRD VA/United States ; R01 AI089894/AI/NIAID NIH HHS/United States ; 1R01AI089894-01/AI/NIAID NIH HHS/United States ; }, mesh = {Adhesins, Bacterial/genetics/immunology/metabolism ; Antigens, Surface/genetics/immunology/metabolism ; Caco-2 Cells ; Cell Line, Tumor ; Cyclic GMP/analogs & derivatives/genetics/immunology/metabolism ; DNA-Binding Proteins/genetics/immunology/metabolism ; Enterotoxigenic Escherichia coli/*genetics/immunology/*pathogenicity ; Epithelial Cells/*immunology/metabolism/*microbiology ; Escherichia coli Infections/genetics/immunology/metabolism/microbiology ; Escherichia coli Proteins/genetics/immunology/metabolism ; Gene Expression/genetics/immunology ; Host-Pathogen Interactions/genetics/immunology ; Humans ; Intestinal Mucosa/metabolism ; Intestines/immunology/microbiology ; Promoter Regions, Genetic/genetics/immunology ; Receptors, Cyclic AMP/genetics/immunology/metabolism ; Transcription, Genetic/genetics/immunology ; Transcriptome/genetics/immunology ; Virulence ; }, abstract = {Enterotoxigenic Escherichia coli (ETEC) strains are a leading cause of morbidity and mortality due to diarrheal illness in developing countries. There is currently no effective vaccine against these important pathogens. Because genes modulated by pathogen-host interactions potentially encode putative vaccine targets, we investigated changes in gene expression and surface morphology of ETEC upon interaction with intestinal epithelial cells in vitro. Pan-genome microarrays, quantitative reverse transcriptase PCR (qRT-PCR), and transcriptional reporter fusions of selected promoters were used to study changes in ETEC transcriptomes. Flow cytometry, immunofluorescence microscopy, and scanning electron microscopy were used to investigate alterations in surface antigen expression and morphology following pathogen-host interactions. Following host cell contact, genes for motility, adhesion, toxin production, immunodominant peptides, and key regulatory molecules, including cyclic AMP (cAMP) receptor protein (CRP) and c-di-GMP, were substantially modulated. These changes were accompanied by visible changes in both ETEC architecture and the expression of surface antigens, including a novel highly conserved adhesin molecule, EaeH. The studies reported here suggest that pathogen-host interactions are finely orchestrated by ETEC and are characterized by coordinated responses involving the sequential deployment of multiple virulence molecules. Elucidation of the molecular details of these interactions could highlight novel strategies for development of vaccines for these important pathogens.}, } @article {pmid23114024, year = {2012}, author = {Kaas, RS and Friis, C and Ussery, DW and Aarestrup, FM}, title = {Estimating variation within the genes and inferring the phylogeny of 186 sequenced diverse Escherichia coli genomes.}, journal = {BMC genomics}, volume = {13}, number = {}, pages = {577}, pmid = {23114024}, issn = {1471-2164}, mesh = {*Bacterial Typing Techniques ; Base Sequence ; Chromosome Mapping ; Escherichia coli/*genetics ; Genetic Variation ; Genome, Bacterial/*genetics ; Genomics ; Multigene Family ; *Multilocus Sequence Typing ; Phylogeny ; Sequence Alignment ; Sequence Analysis, DNA ; Shigella/genetics ; }, abstract = {BACKGROUND: Escherichia coli exists in commensal and pathogenic forms. By measuring the variation of individual genes across more than a hundred sequenced genomes, gene variation can be studied in detail, including the number of mutations found for any given gene. This knowledge will be useful for creating better phylogenies, for determination of molecular clocks and for improved typing techniques.

RESULTS: We find 3,051 gene clusters/families present in at least 95% of the genomes and 1,702 gene clusters present in 100% of the genomes. The former 'soft core' of about 3,000 gene families is perhaps more biologically relevant, especially considering that many of these genome sequences are draft quality. The E. coli pan-genome for this set of isolates contains 16,373 gene clusters.A core-gene tree, based on alignment and a pan-genome tree based on gene presence/absence, maps the relatedness of the 186 sequenced E. coli genomes. The core-gene tree displays high confidence and divides the E. coli strains into the observed MLST type clades and also separates defined phylotypes.

CONCLUSION: The results of comparing a large and diverse E. coli dataset support the theory that reliable and good resolution phylogenies can be inferred from the core-genome. The results further suggest that the resolution at the isolate level may, subsequently be improved by targeting more variable genes. The use of whole genome sequencing will make it possible to eliminate, or at least reduce, the need for several typing steps used in traditional epidemiology.}, } @article {pmid23095951, year = {2012}, author = {Santos, AR and Carneiro, A and Gala-García, A and Pinto, A and Barh, D and Barbosa, E and Aburjaile, F and Dorella, F and Rocha, F and Guimarães, L and Zurita-Turk, M and Ramos, R and Almeida, S and Soares, S and Pereira, U and Abreu, VC and Silva, A and Miyoshi, A and Azevedo, V}, title = {The Corynebacterium pseudotuberculosis in silico predicted pan-exoproteome.}, journal = {BMC genomics}, volume = {13 Suppl 5}, number = {Suppl 5}, pages = {S6}, pmid = {23095951}, issn = {1471-2164}, mesh = {Bacterial Vaccines/genetics ; Corynebacterium pseudotuberculosis/*genetics ; Genes/*genetics ; Genome, Bacterial/*genetics ; Genomics/*methods ; Membrane Proteins/genetics ; Proteome/*genetics ; Software ; }, abstract = {BACKGROUND: Pan-genomic studies aim, for instance, at defining the core, dispensable and unique genes within a species. A pan-genomics study for vaccine design tries to assess the best candidates for a vaccine against a specific pathogen. In this context, rather than studying genes predicted to be exported in a single genome, with pan-genomics it is possible to study genes present in different strains within the same species, such as virulence factors. The target organism of this pan-genomic work here presented is Corynebacterium pseudotuberculosis, the etiologic agent of caseous lymphadenitis (CLA) in goat and sheep, which causes significant economic losses in those herds around the world. Currently, only a few antigens against CLA are known as being the basis of commercial and still ineffective vaccines. In this regard, the here presented work analyses, in silico, five C. pseudotuberculosis genomes and gathers data to predict common exported proteins in all five genomes. These candidates were also compared to two recent C. pseudotuberculosis in vitro exoproteome results.

RESULTS: The complete genome of five C. pseudotuberculosis strains (1002, C231, I19, FRC41 and PAT10) were submitted to pan-genomics analysis, yielding 306, 59 and 12 gene sets, respectively, representing the core, dispensable and unique in silico predicted exported pan-genomes. These sets bear 150 genes classified as secreted (SEC) and 227 as potentially surface exposed (PSE). Our findings suggest that the main C. pseudotuberculosis in vitro exoproteome could be greater, appended by a fraction of the 35 proteins formerly predicted as making part of the variant in vitro exoproteome. These genomes were manually curated for correct methionine initiation and redeposited with a total of 1885 homogenized genes.

CONCLUSIONS: The in silico prediction of exported proteins has allowed to define a list of putative vaccine candidate genes present in all five complete C. pseudotuberculosis genomes. Moreover, it has also been possible to define the in silico predicted dispensable and unique C. pseudotuberculosis exported proteins. These results provide in silico evidence to further guide experiments in the areas of vaccines, diagnosis and drugs. The work here presented is the first whole C. pseudotuberculosis in silico predicted pan-exoproteome completed till today.}, } @article {pmid23082106, year = {2012}, author = {Williams, TM and Loman, NJ and Ebruke, C and Musher, DM and Adegbola, RA and Pallen, MJ and Weinstock, GM and Antonio, M}, title = {Genome analysis of a highly virulent serotype 1 strain of Streptococcus pneumoniae from West Africa.}, journal = {PloS one}, volume = {7}, number = {10}, pages = {e26742}, pmid = {23082106}, issn = {1932-6203}, support = {MC_U190074190/MRC_/Medical Research Council/United Kingdom ; U54 HG004968/HG/NHGRI NIH HHS/United States ; U54HG003079/HG/NHGRI NIH HHS/United States ; U54HG004968/HG/NHGRI NIH HHS/United States ; MC_U190081991/MRC_/Medical Research Council/United Kingdom ; U54 HG003079/HG/NHGRI NIH HHS/United States ; }, mesh = {Africa, Western ; Animals ; Bacterial Proteins/genetics/metabolism ; Chromosomes, Bacterial/genetics ; Conserved Sequence/genetics ; Disease Models, Animal ; Female ; Genes, Bacterial/genetics ; Genome, Bacterial/*genetics ; Humans ; Mice ; Polymorphism, Single Nucleotide/genetics ; Prophages/genetics ; Serotyping ; Streptococcal Vaccines/immunology ; Streptococcus pneumoniae/classification/*genetics/isolation & purification/*pathogenicity ; Survival Analysis ; Virulence/genetics ; Virulence Factors/metabolism ; }, abstract = {Streptococcus pneumoniae is a leading cause of pneumonia, meningitis, and bacteremia, estimated to cause 2 million deaths annually. The majority of pneumococcal mortality occurs in developing countries, with serotype 1 a leading cause in these areas. To begin to better understand the larger impact that serotype 1 strains have in developing countries, we characterized virulence and genetic content of PNI0373, a serotype 1 strain from a diseased patient in The Gambia. PNI0373 and another African serotype 1 strain showed high virulence in a mouse intraperitoneal challenge model, with 20% survival at a dose of 1 cfu. The PNI0373 genome sequence was similar in structure to other pneumococci, with the exception of a 100 kb inversion. PNI0373 showed only 15 lineage specific CDS when compared to the pan-genome of pneumococcus. However analysis of non-core orthologs of pneumococcal genomes, showed serotype 1 strains to be closely related. Three regions were found to be serotype 1 associated and likely products of horizontal gene transfer. A detailed inventory of known virulence factors showed that some functions associated with colonization were absent, consistent with the observation that carriage of this highly virulent serotype is unusual. The African serotype 1 strains thus appear to be closely related to each other and different from other pneumococci despite similar genetic content.}, } @article {pmid23077583, year = {2012}, author = {Manzano-Marín, A and Lamelas, A and Moya, A and Latorre, A}, title = {Comparative genomics of Serratia spp.: two paths towards endosymbiotic life.}, journal = {PloS one}, volume = {7}, number = {10}, pages = {e47274}, pmid = {23077583}, issn = {1932-6203}, mesh = {Animals ; Aphids/microbiology ; Buchnera/*genetics/growth & development ; Evolution, Molecular ; *Genome, Bacterial ; Genomics ; Phylogeny ; Serratia/*genetics/growth & development ; Symbiosis/*genetics ; }, abstract = {Symbiosis is a widespread phenomenon in nature, in which insects show a great number of these associations. Buchnera aphidicola, the obligate endosymbiont of aphids, coexists in some species with another intracellular bacterium, Serratia symbiotica. Of particular interest is the case of the cedar aphid Cinara cedri, where B. aphidicola BCc and S. symbiotica SCc need each other to fulfil their symbiotic role with the insect. Moreover, various features seem to indicate that S. symbiotica SCc is closer to an obligate endosymbiont than to other facultative S. symbiotica, such as the one described for the aphid Acirthosyphon pisum (S. symbiotica SAp). This work is based on the comparative genomics of five strains of Serratia, three free-living and two endosymbiotic ones (one facultative and one obligate) which should allow us to dissect the genome reduction taking place in the adaptive process to an intracellular life-style. Using a pan-genome approach, we have identified shared and strain-specific genes from both endosymbiotic strains and gained insight into the different genetic reduction both S. symbiotica have undergone. We have identified both retained and reduced functional categories in S. symbiotica compared to the Free-Living Serratia (FLS) that seem to be related with its endosymbiotic role in their specific host-symbiont systems. By means of a phylogenomic reconstruction we have solved the position of both endosymbionts with confidence, established the probable insect-pathogen origin of the symbiotic clade as well as the high amino-acid substitution rate in S. symbiotica SCc. Finally, we were able to quantify the minimal number of rearrangements suffered in the endosymbiotic lineages and reconstruct a minimal rearrangement phylogeny. All these findings provide important evidence for the existence of at least two distinctive S. symbiotica lineages that are characterized by different rearrangements, gene content, genome size and branch lengths.}, } @article {pmid23051057, year = {2012}, author = {Park, J and Zhang, Y and Buboltz, AM and Zhang, X and Schuster, SC and Ahuja, U and Liu, M and Miller, JF and Sebaihia, M and Bentley, SD and Parkhill, J and Harvill, ET}, title = {Comparative genomics of the classical Bordetella subspecies: the evolution and exchange of virulence-associated diversity amongst closely related pathogens.}, journal = {BMC genomics}, volume = {13}, number = {}, pages = {545}, pmid = {23051057}, issn = {1471-2164}, support = {R01 AI053075/AI/NIAID NIH HHS/United States ; R01 GM083113/GM/NIGMS NIH HHS/United States ; 5R01GM083113/GM/NIGMS NIH HHS/United States ; 098051/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Animals ; Biological Evolution ; Bordetella/classification/*genetics/*pathogenicity ; Chromosome Mapping ; *Gene Transfer, Horizontal ; Genetic Variation ; *Genome, Bacterial ; Genome-Wide Association Study ; Genomics ; Host Specificity ; Humans ; O Antigens/*genetics ; Pertussis Toxin/*genetics ; Phylogeny ; Polymorphism, Single Nucleotide ; Sheep ; Species Specificity ; Virulence ; Virulence Factors, Bordetella/*genetics ; }, abstract = {BACKGROUND: The classical Bordetella subspecies are phylogenetically closely related, yet differ in some of the most interesting and important characteristics of pathogens, such as host range, virulence and persistence. The compelling picture from previous comparisons of the three sequenced genomes was of genome degradation, with substantial loss of genome content (up to 24%) associated with adaptation to humans.

RESULTS: For a more comprehensive picture of lineage evolution, we employed comparative genomic and phylogenomic analyses using seven additional diverse, newly sequenced Bordetella isolates. Genome-wide single nucleotide polymorphism (SNP) analysis supports a reevaluation of the phylogenetic relationships between the classical Bordetella subspecies, and suggests a closer link between ovine and human B. parapertussis lineages than has been previously proposed. Comparative analyses of genome content revealed that only 50% of the pan-genome is conserved in all strains, reflecting substantial diversity of genome content in these closely related pathogens that may relate to their different host ranges, virulence and persistence characteristics. Strikingly, these analyses suggest possible horizontal gene transfer (HGT) events in multiple loci encoding virulence factors, including O-antigen and pertussis toxin (Ptx). Segments of the pertussis toxin locus (ptx) and its secretion system locus (ptl) appear to have been acquired by the classical Bordetella subspecies and are divergent in different lineages, suggesting functional divergence in the classical Bordetellae.

CONCLUSIONS: Together, these observations, especially in key virulence factors, reveal that multiple mechanisms, such as point mutations, gain or loss of genes, as well as HGTs, contribute to the substantial phenotypic diversity of these versatile subspecies in various hosts.}, } @article {pmid23049847, year = {2012}, author = {Wong, YF and Wilson, PD and Unwin, RJ and Norman, JT and Arno, M and Hendry, BM and Xu, Q}, title = {Retinoic acid receptor-dependent, cell-autonomous, endogenous retinoic acid signaling and its target genes in mouse collecting duct cells.}, journal = {PloS one}, volume = {7}, number = {9}, pages = {e45725}, pmid = {23049847}, issn = {1932-6203}, mesh = {Animals ; Cadherins/metabolism ; Cell Lineage ; Epithelial Cells/cytology ; Gene Expression Profiling ; Immunohistochemistry/methods ; Inflammation ; Kidney/embryology ; Kidney Tubules, Collecting/*metabolism ; Mice ; Models, Biological ; Oligonucleotide Array Sequence Analysis ; Pilot Projects ; Receptors, Retinoic Acid/*metabolism ; Response Elements ; Signal Transduction ; Tretinoin/metabolism ; Ureter/embryology ; Vitamin A/metabolism ; }, abstract = {BACKGROUND: Vitamin A is necessary for kidney development and has also been linked to regulation of solute and water homeostasis and to protection against kidney stone disease, infection, inflammation, and scarring. Most functions of vitamin A are mediated by its main active form, all-trans retinoic acid (tRA), which binds retinoic acid receptors (RARs) to modulate gene expression. We and others have recently reported that renal tRA/RAR activity is confined to the ureteric bud (UB) and collecting duct (CD) cell lineage, suggesting that endogenous tRA/RARs primarily act through regulating gene expression in these cells in embryonic and adult kidney, respectively.

To explore target genes of endogenous tRA/RARs, we employed the mIMCD-3 mouse inner medullary CD cell line, which is a model of CD principal cells and exhibits constitutive tRA/RAR activity as CD principal cells do in vivo. Combining antagonism of RARs, inhibition of tRA synthesis, exposure to exogenous tRA, and gene expression profiling techniques, we have identified 125 genes as candidate targets and validated 20 genes that were highly regulated (Dhrs3, Sprr1a, and Ppbp were the top three). Endogenous tRA/RARs were more important in maintaining, rather than suppressing, constitutive gene expression. Although many identified genes were expressed in UBs and/or CDs, their exact functions in this cell lineage are still poorly defined. Nevertheless, gene ontology analysis suggests that these genes are involved in kidney development, renal functioning, and regulation of tRA signaling.

CONCLUSIONS/SIGNIFICANCE: A rigorous approach to defining target genes for endogenous tRA/RARs has been established. At the pan-genomic level, genes regulated by endogenous tRA/RARs in a CD cell line have been catalogued for the first time. Such a catalogue will guide further studies on molecular mediators of endogenous tRA/RARs during kidney development and in relation to renal defects associated with vitamin A deficiency.}, } @article {pmid23035931, year = {2013}, author = {Busby, B and Kristensen, DM and Koonin, EV}, title = {Contribution of phage-derived genomic islands to the virulence of facultative bacterial pathogens.}, journal = {Environmental microbiology}, volume = {15}, number = {2}, pages = {307-312}, pmid = {23035931}, issn = {1462-2920}, support = {Z01 LM000073-12/ImNIH/Intramural NIH HHS/United States ; Z99 LM999999/ImNIH/Intramural NIH HHS/United States ; }, mesh = {Bacteria/*genetics/*pathogenicity ; Biological Evolution ; Genome, Bacterial/*genetics ; Genomic Islands/*genetics ; Prophages/*genetics ; Virulence/*genetics ; Virulence Factors/genetics ; }, abstract = {Facultative pathogens have extremely dynamic pan-genomes, to a large extent derived from bacteriophages and other mobile elements. We developed a simple approach to identify phage-derived genomic islands and apply it to show that pathogens from diverse bacterial genera are significantly enriched in clustered phage-derived genes compared with related benign strains. These findings show that genome expansion by integration of prophages containing virulence factors is a major route of evolution of facultative bacterial pathogens.}, } @article {pmid23035691, year = {2012}, author = {Broadbent, JR and Neeno-Eckwall, EC and Stahl, B and Tandee, K and Cai, H and Morovic, W and Horvath, P and Heidenreich, J and Perna, NT and Barrangou, R and Steele, JL}, title = {Analysis of the Lactobacillus casei supragenome and its influence in species evolution and lifestyle adaptation.}, journal = {BMC genomics}, volume = {13}, number = {}, pages = {533}, pmid = {23035691}, issn = {1471-2164}, mesh = {Adaptation, Physiological/*genetics ; *Biological Evolution ; Cluster Analysis ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Genomic Islands ; Lacticaseibacillus casei/*genetics ; Phylogeny ; }, abstract = {BACKGROUND: The broad ecological distribution of L. casei makes it an insightful subject for research on genome evolution and lifestyle adaptation. To explore evolutionary mechanisms that determine genomic diversity of L. casei, we performed comparative analysis of 17 L. casei genomes representing strains collected from dairy, plant, and human sources.

RESULTS: Differences in L. casei genome inventory revealed an open pan-genome comprised of 1,715 core and 4,220 accessory genes. Extrapolation of pan-genome data indicates L. casei has a supragenome approximately 3.2 times larger than the average genome of individual strains. Evidence suggests horizontal gene transfer from other bacterial species, particularly lactobacilli, has been important in adaptation of L. casei to new habitats and lifestyles, but evolution of dairy niche specialists also appears to involve gene decay.

CONCLUSIONS: Genome diversity in L. casei has evolved through gene acquisition and decay. Acquisition of foreign genomic islands likely confers a fitness benefit in specific habitats, notably plant-associated niches. Loss of unnecessary ancestral traits in strains collected from bacterial-ripened cheeses supports the hypothesis that gene decay contributes to enhanced fitness in that niche. This study gives the first evidence for a L. casei supragenome and provides valuable insights into mechanisms for genome evolution and lifestyle adaptation of this ecologically flexible and industrially important lactic acid bacterium. Additionally, our data confirm the Distributed Genome Hypothesis extends to non-pathogenic, ecologically flexible species like L. casei.}, } @article {pmid23029440, year = {2012}, author = {Sangwan, N and Lata, P and Dwivedi, V and Singh, A and Niharika, N and Kaur, J and Anand, S and Malhotra, J and Jindal, S and Nigam, A and Lal, D and Dua, A and Saxena, A and Garg, N and Verma, M and Kaur, J and Mukherjee, U and Gilbert, JA and Dowd, SE and Raman, R and Khurana, P and Khurana, JP and Lal, R}, title = {Comparative metagenomic analysis of soil microbial communities across three hexachlorocyclohexane contamination levels.}, journal = {PloS one}, volume = {7}, number = {9}, pages = {e46219}, pmid = {23029440}, issn = {1932-6203}, mesh = {Archaea/classification/*genetics/metabolism ; Bacteria/classification/*genetics/metabolism ; Bacterial Proteins/genetics ; Biodegradation, Environmental ; Chemotaxis/genetics ; Fusarium/*genetics/metabolism ; Gene Transfer, Horizontal ; Genes, Bacterial ; Hexachlorocyclohexane/*metabolism ; Lyases/genetics ; *Metagenomics ; Microbial Consortia/*genetics ; Plasmids/genetics ; RNA, Ribosomal, 16S/classification/genetics ; Sequence Analysis, DNA ; *Soil Microbiology ; Soil Pollutants/*metabolism ; }, abstract = {This paper presents the characterization of the microbial community responsible for the in-situ bioremediation of hexachlorocyclohexane (HCH). Microbial community structure and function was analyzed using 16S rRNA amplicon and shotgun metagenomic sequencing methods for three sets of soil samples. The three samples were collected from a HCH-dumpsite (450 mg HCH/g soil) and comprised of a HCH/soil ratio of 0.45, 0.0007, and 0.00003, respectively. Certain bacterial; (Chromohalobacter, Marinimicrobium, Idiomarina, Salinosphaera, Halomonas, Sphingopyxis, Novosphingobium, Sphingomonas and Pseudomonas), archaeal; (Halobacterium, Haloarcula and Halorhabdus) and fungal (Fusarium) genera were found to be more abundant in the soil sample from the HCH-dumpsite. Consistent with the phylogenetic shift, the dumpsite also exhibited a relatively higher abundance of genes coding for chemotaxis/motility, chloroaromatic and HCH degradation (lin genes). Reassembly of a draft pangenome of Chromohalobacter salaxigenes sp. (∼8X coverage) and 3 plasmids (pISP3, pISP4 and pLB1; 13X coverage) containing lin genes/clusters also provides an evidence for the horizontal transfer of HCH catabolism genes.}, } @article {pmid23028950, year = {2012}, author = {Liang, W and Zhao, Y and Chen, C and Cui, X and Yu, J and Xiao, J and Kan, B}, title = {Pan-genomic analysis provides insights into the genomic variation and evolution of Salmonella Paratyphi A.}, journal = {PloS one}, volume = {7}, number = {9}, pages = {e45346}, pmid = {23028950}, issn = {1932-6203}, mesh = {*Evolution, Molecular ; Genome, Bacterial/*genetics ; Genomics/*methods ; Multigene Family/genetics ; Phylogeny ; Polymorphism, Single Nucleotide/genetics ; Salmonella paratyphi A/*genetics ; }, abstract = {Salmonella Paratyphi A (S. Paratyphi A) is a highly adapted, human-specific pathogen that causes paratyphoid fever. Cases of paratyphoid fever have recently been increasing, and the disease is becoming a major public health concern, especially in Eastern and Southern Asia. To investigate the genomic variation and evolution of S. Paratyphi A, a pan-genomic analysis was performed on five newly sequenced S. Paratyphi A strains and two other reference strains. A whole genome comparison revealed that the seven genomes are collinear and that their organization is highly conserved. The high rate of substitutions in part of the core genome indicates that there are frequent homologous recombination events. Based on the changes in the pan-genome size and cluster number (both in the core functional genes and core pseudogenes), it can be inferred that the sharply increasing number of pseudogene clusters may have strong correlation with the inactivation of functional genes, and indicates that the S. Paratyphi A genome is being degraded.}, } @article {pmid22984541, year = {2012}, author = {Petroli, CD and Sansaloni, CP and Carling, J and Steane, DA and Vaillancourt, RE and Myburg, AA and da Silva, OB and Pappas, GJ and Kilian, A and Grattapaglia, D}, title = {Genomic characterization of DArT markers based on high-density linkage analysis and physical mapping to the Eucalyptus genome.}, journal = {PloS one}, volume = {7}, number = {9}, pages = {e44684}, pmid = {22984541}, issn = {1932-6203}, mesh = {Chromosome Mapping/*methods ; Chromosomes, Plant ; Cost-Benefit Analysis ; DNA, Plant/genetics ; Eucalyptus/*genetics ; Genetic Linkage ; *Genetic Markers ; Genome, Plant ; Genomics ; Genotype ; Microsatellite Repeats/genetics ; Models, Genetic ; Oligonucleotide Array Sequence Analysis/*methods ; Sequence Analysis, DNA/methods ; }, abstract = {Diversity Arrays Technology (DArT) provides a robust, high throughput, cost-effective method to query thousands of sequence polymorphisms in a single assay. Despite the extensive use of this genotyping platform for numerous plant species, little is known regarding the sequence attributes and genome-wide distribution of DArT markers. We investigated the genomic properties of the 7,680 DArT marker probes of a Eucalyptus array, by sequencing them, constructing a high density linkage map and carrying out detailed physical mapping analyses to the Eucalyptus grandis reference genome. A consensus linkage map with 2,274 DArT markers anchored to 210 microsatellites and a framework map, with improved support for ordering, displayed extensive collinearity with the genome sequence. Only 1.4 Mbp of the 75 Mbp of still unplaced scaffold sequence was captured by 45 linkage mapped but physically unaligned markers to the 11 main Eucalyptus pseudochromosomes, providing compelling evidence for the quality and completeness of the current Eucalyptus genome assembly. A highly significant correspondence was found between the locations of DArT markers and predicted gene models, while most of the 89 DArT probes unaligned to the genome correspond to sequences likely absent in E. grandis, consistent with the pan-genomic feature of this multi-Eucalyptus species DArT array. These comprehensive linkage-to-physical mapping analyses provide novel data regarding the genomic attributes of DArT markers in plant genomes in general and for Eucalyptus in particular. DArT markers preferentially target the gene space and display a largely homogeneous distribution across the genome, thereby providing superb coverage for mapping and genome-wide applications in breeding and diversity studies. Data reported on these ubiquitous properties of DArT markers will be particularly valuable to researchers working on less-studied crop species who already count on DArT genotyping arrays but for which no reference genome is yet available to allow such detailed characterization.}, } @article {pmid22976801, year = {2012}, author = {Schleiermacher, G and Mosseri, V and London, WB and Maris, JM and Brodeur, GM and Attiyeh, E and Haber, M and Khan, J and Nakagawara, A and Speleman, F and Noguera, R and Tonini, GP and Fischer, M and Ambros, I and Monclair, T and Matthay, KK and Ambros, P and Cohn, SL and Pearson, AD}, title = {Segmental chromosomal alterations have prognostic impact in neuroblastoma: a report from the INRG project.}, journal = {British journal of cancer}, volume = {107}, number = {8}, pages = {1418-1422}, pmid = {22976801}, issn = {1532-1827}, support = {R01 CA039771/CA/NCI NIH HHS/United States ; }, mesh = {Chromosome Aberrations ; Chromosomes, Human, Pair 11/genetics ; Chromosomes, Human, Pair 17/genetics ; Humans ; Infant ; N-Myc Proto-Oncogene Protein ; Neuroblastoma/*genetics ; Nuclear Proteins/*genetics ; Oncogene Proteins/*genetics ; Prognosis ; Retrospective Studies ; Survival Analysis ; }, abstract = {BACKGROUND: In the INRG dataset, the hypothesis that any segmental chromosomal alteration might be of prognostic impact in neuroblastoma without MYCN amplification (MNA) was tested.

METHODS: The presence of any segmental chromosomal alteration (chromosome 1p deletion, 11q deletion and/or chromosome 17q gain) defined a segmental genomic profile. Only tumours with a confirmed unaltered status for all three chromosome arms were considered as having no segmental chromosomal alterations.

RESULTS: Among the 8800 patients in the INRG database, a genomic type could be attributed for 505 patients without MNA: 397 cases had a segmental genomic type, whereas 108 cases had an absence of any segmental alteration. A segmental genomic type was more frequent in patients >18 months and in stage 4 disease (P<0.0001). In univariate analysis, 11q deletion, 17q gain and a segmental genomic type were associated with a poorer event-free survival (EFS) (P<0.0001, P=0.0002 and P<0.0001, respectively). In multivariate analysis modelling EFS, the parameters age, stage and a segmental genomic type were retained in the model, whereas the individual genetic markers were not (P<0.0001 and RR=2.56; P=0.0002 and RR=1.8; P=0.01 and RR=1.7, respectively).

CONCLUSION: A segmental genomic profile, rather than the single genetic markers, adds prognostic information to the clinical markers age and stage in neuroblastoma patients without MNA, underlining the importance of pangenomic studies.}, } @article {pmid22975152, year = {2013}, author = {Garrigues, C and Johansen, E and Crittenden, R}, title = {Pangenomics--an avenue to improved industrial starter cultures and probiotics.}, journal = {Current opinion in biotechnology}, volume = {24}, number = {2}, pages = {187-191}, doi = {10.1016/j.copbio.2012.08.009}, pmid = {22975152}, issn = {1879-0429}, mesh = {Bacteria/*genetics/*growth & development ; Bioreactors/microbiology ; Dairy Products/microbiology ; Fermentation ; *Food Microbiology ; Food Technology/*methods/standards ; Genome, Bacterial/*genetics ; *Genomics ; Host Specificity ; Humans ; *Probiotics ; }, abstract = {With the dramatic reductions in the cost and time involved in DNA sequencing, a new approach to characterisation of bacteria is emerging. It is based on a comparison of complete genome sequences of a number of members of the same species (pangenomics). Pangenomics opens an array of new opportunities for understanding and improving industrial starter cultures and probiotics. These include understanding the formation of texture and flavour in dairy products, understanding the functionality of probiotics as well as providing information that can be used for strain screening, strain improvement, safety assessments and process improvements.}, } @article {pmid22972410, year = {2013}, author = {Yeremenko, N and Noordenbos, T and Cantaert, T and van Tok, M and van de Sande, M and Cañete, JD and Tak, PP and Baeten, D}, title = {Disease-specific and inflammation-independent stromal alterations in spondylarthritis synovitis.}, journal = {Arthritis and rheumatism}, volume = {65}, number = {1}, pages = {174-185}, doi = {10.1002/art.37704}, pmid = {22972410}, issn = {1529-0131}, mesh = {Adult ; Aged ; Arthritis, Rheumatoid/*genetics/pathology ; Cell Culture Techniques ; Female ; Flow Cytometry ; Fluorescent Antibody Technique ; *Gene Expression ; Humans ; Immunohistochemistry ; Male ; Microarray Analysis ; Middle Aged ; Oligonucleotide Array Sequence Analysis ; Real-Time Polymerase Chain Reaction ; Spondylarthritis/*genetics/metabolism/pathology ; Stromal Cells/pathology ; Synovial Membrane/*metabolism/pathology ; Synovitis/*genetics/metabolism/pathology ; }, abstract = {OBJECTIVE: The molecular processes driving the distinct patterns of synovial inflammation and tissue remodeling in spondylarthritis (SpA) as compared to rheumatoid arthritis (RA) remain largely unknown. Therefore, we aimed to identify novel and unsuspected disease-specific pathways in SpA by a systematic and unbiased synovial gene expression analysis.

METHODS: Differentially expressed genes were identified by pan-genomic microarray and confirmed by quantitative polymerase chain reaction and immunohistochemical analyses of synovial tissue biopsy samples from patients with SpA (n=63), RA (n=28), and gout (n=9). The effect of inflammation on gene expression was assessed by stimulating fibroblast-like synoviocytes (FLS) with synovial fluid and by analysis of synovial tissue samples at weeks 0 and 12 of etanercept treatment.

RESULTS: Using very stringent statistical thresholds, microarray analysis identified 64 up-regulated transcripts in patients with SpA synovitis as compared to those with RA synovitis. Pathway analysis revealed a robust myogene signature in this gene set. The myogene signature was technically and biologically reproducible, was specific for SpA, and was independent of disease duration, treatment, and SpA subtype (nonpsoriatic versus psoriatic). Synovial tissue staining identified the myogene expressing cells as vimentin-positive, prolyl 4-hydroxylase β-positive, CD90+, and CD146+ mesenchymal cells that were significantly overrepresented in the intimal lining layer and synovial sublining of inflamed SpA synovium. Neither in vitro exposure to synovial fluid from inflamed SpA joints nor in vivo blockade of tumor necrosis factor modulated the SpA-specific myogene signature.

CONCLUSION: These data identify a novel and disease-specific myogene signature in SpA synovitis. The fact that this stromal alteration appeared not to be downstream of local inflammation warrants further analysis of its functional role in the pathogenesis of the disease.}, } @article {pmid22961851, year = {2012}, author = {Borges, V and Nunes, A and Ferreira, R and Borrego, MJ and Gomes, JP}, title = {Directional evolution of Chlamydia trachomatis towards niche-specific adaptation.}, journal = {Journal of bacteriology}, volume = {194}, number = {22}, pages = {6143-6153}, pmid = {22961851}, issn = {1098-5530}, mesh = {Adaptation, Physiological/*genetics ; *Biological Evolution ; Chlamydia Infections/microbiology ; Chlamydia trachomatis/*genetics/*metabolism ; Chromosome Mapping ; Chromosomes, Bacterial ; Gene Expression Regulation, Bacterial/physiology ; Genetic Variation ; Genome, Bacterial ; Host-Pathogen Interactions ; Humans ; Leukocytes, Mononuclear/microbiology ; Mucous Membrane/cytology/microbiology ; Selection, Genetic ; }, abstract = {On behalf of the host-pathogen "arms race," a cutting-edge approach for elucidating genotype-phenotype relationships relies on the identification of positively selected loci involved in pathoadaptation. We studied the obligate intracellular bacterium Chlamydia trachomatis, for which same-species strains display a nearly identical core and pan genome, while presenting a wide range of tissue tropism and ecological success. We sought to evaluate the evolutionary patterns underlying species separation (divergence) and C. trachomatis serovar radiation (polymorphism) and to establish genotype-phenotype associations. By analyzing 60 Chlamydia strains, we detected traces of Muller's ratchet as a result of speciation and identified positively selected genes and codons hypothetically involved in the infection of different human cell types (e.g., columnar epithelial cells of ocular or genital mucosae and mononuclear phagocytes) and also events likely driving pathogenic and ecological success dissimilarities. In general, these genes code for proteins involved in immune response elicitation, proteolysis, and the subversion of host-cell functions, and also for proteins with unknown function(s). Several genes are potentially involved in more than one adaptive process, suggesting multiple functions or a distinct modus operandi for a specific function, and thus should be considered as crucial research targets. In addition, six of the nine genes encoding the putative antigen/adhesin polymorphic membrane proteins seem to be under positive selection along specific serovars, which sustains an essential biological role of this extra-large paralogue family in chlamydial pathobiology. This study provides insight into how evolutionary inferences illuminate ecological processes such as adaptation to different niches, pathogenicity, or ecological success driven by arms races.}, } @article {pmid22960579, year = {2013}, author = {Bell, JA and Jerome, JP and Plovanich-Jones, AE and Smith, EJ and Gettings, JR and Kim, HY and Landgraf, JR and Lefébure, T and Kopper, JJ and Rathinam, VA and St Charles, JL and Buffa, BA and Brooks, AP and Poe, SA and Eaton, KA and Stanhope, MJ and Mansfield, LS}, title = {Outcome of infection of C57BL/6 IL-10(-/-) mice with Campylobacter jejuni strains is correlated with genome content of open reading frames up- and down-regulated in vivo.}, journal = {Microbial pathogenesis}, volume = {54}, number = {}, pages = {1-19}, pmid = {22960579}, issn = {1096-1208}, support = {N01AI30058/AI/NIAID NIH HHS/United States ; U19 AI090872/AI/NIAID NIH HHS/United States ; N01-AI-30058/AI/NIAID NIH HHS/United States ; 1U19-AI-09087/AI/NIAID NIH HHS/United States ; }, mesh = {Animals ; Campylobacter Infections/*immunology/microbiology/*pathology ; Campylobacter jejuni/classification/genetics/*immunology/*pathogenicity ; Female ; Gene Expression ; Genotype ; Interleukin-10/*deficiency/genetics ; Male ; Mice ; Mice, Inbred C57BL ; Mice, Knockout ; Multilocus Sequence Typing ; *Open Reading Frames ; Virulence ; Virulence Factors/*genetics/metabolism ; }, abstract = {Human Campylobacter jejuni infection can result in an asymptomatic carrier state, watery or bloody diarrhea, bacteremia, meningitis, or autoimmune neurological sequelae. Infection outcomes of C57BL/6 IL-10(-/-) mice orally infected with twenty-two phylogenetically diverse C. jejuni strains were evaluated to correlate colonization and disease phenotypes with genetic composition of the strains. Variation between strains was observed in colonization, timing of development of clinical signs, and occurrence of enteric lesions. Five pathotypes of C. jejuni in C57BL/6 IL-10(-/-) mice were delineated: little or no colonization, colonization without disease, colonization with enteritis, colonization with hemorrhagic enteritis, and colonization with neurological signs with or without enteritis. Virulence gene content of ten sequenced strains was compared in silico; virulence gene content of twelve additional strains was compared using a C. jejuni pan-genome microarray. Neither total nor virulence gene content predicted pathotype; nor was pathotype correlated with multilocus sequence type. Each strain was unique with regard to absences of known virulence-related loci and/or possession of point mutations and indels, including phase variation, in virulence-related genes. An experiment in C. jejuni 11168-infected germ-free mice showed that expression levels of ninety open reading frames (ORFs) were significantly up- or down-regulated in the mouse cecum at least two-fold compared to in vitro growth. Genomic content of these ninety C. jejuni 11168 ORFs was significantly correlated with the capacity to colonize and cause enteritis in C57BL/6 IL-10(-/-) mice. Differences in gene expression levels and patterns are thus an important determinant of pathotype in C. jejuni strains in this mouse model.}, } @article {pmid22934638, year = {2012}, author = {Syvanen, M}, title = {Evolutionary implications of horizontal gene transfer.}, journal = {Annual review of genetics}, volume = {46}, number = {}, pages = {341-358}, doi = {10.1146/annurev-genet-110711-155529}, pmid = {22934638}, issn = {1545-2948}, mesh = {Animals ; Bacteria/classification/*genetics ; Computational Biology ; DNA Transposable Elements ; Eukaryota/classification/*genetics ; *Evolution, Molecular ; Gene Flow ; *Gene Transfer, Horizontal ; *Genome, Bacterial ; Genomic Islands ; Mutation ; Phylogeny ; Reproduction/genetics ; Selection, Genetic ; }, abstract = {The flow of genes between different species represents a form of genetic variation whose implications have not been fully appreciated. Here I examine some key findings on the extent of horizontal gene transfer (HGT) revealed by comparative genome analysis and their theoretical implications. In theoretical terms, HGT affects ideas pertaining to the tree of life, the notion of a last universal common ancestor, and the biological unities, as well as the rules of taxonomic nomenclature. This review discusses the emergence of the eukaryotic cell and the occurrence of HGT among metazoan phyla involving both transposable elements and structural genes for normal housekeeping functions. I also discuss the bacterial pangenome, which provides an important case study on the permeability of species boundaries. An interesting observation about bdelloid rotifers and their reversion to asexual reproduction as it pertains to HGT is included.}, } @article {pmid22929624, year = {2012}, author = {Zakham, F and Aouane, O and Ussery, D and Benjouad, A and Ennaji, MM}, title = {Computational genomics-proteomics and Phylogeny analysis of twenty one mycobacterial genomes (Tuberculosis & non Tuberculosis strains).}, journal = {Microbial informatics and experimentation}, volume = {2}, number = {1}, pages = {7}, pmid = {22929624}, issn = {2042-5783}, abstract = {BACKGROUND: The genus Mycobacterium comprises different species, among them the most contagious and infectious bacteria. The members of the complex Mycobacterium tuberculosis are the most virulent microorganisms that have killed human and other mammals since millennia. Additionally, with the many different mycobacterial sequences available, there is a crucial need for the visualization and the simplification of their data. In this present study, we aim to highlight a comparative genome, proteome and phylogeny analysis between twenty-one mycobacterial (Tuberculosis and non tuberculosis) strains using a set of computational and bioinformatics tools (Pan and Core genome plotting, BLAST matrix and phylogeny analysis).

RESULTS: Considerably the result of pan and core genome Plotting demonstrated that less than 1250 Mycobacterium gene families are conserved across all species, and a total set of about 20,000 gene families within the Mycobacterium pan-genome of twenty one mycobacterial genomes.Viewing the BLAST matrix a high similarity was found among the species of the complex Mycobacterium tuberculosis and less conservation is found with other slow growing pathogenic mycobacteria.Phylogeny analysis based on both protein conservation, as well as rRNA clearly resolve known relationships between slow growing mycobacteria.

CONCLUSION: Mycobacteria include important pathogenic species for human and animals and the Mycobacterium tuberculosis complex is the most cause of death of the humankind. The comparative genome analysis could provide a new insight for better controlling and preventing these diseases.}, } @article {pmid22923516, year = {2012}, author = {Sati, S and Ghosh, S and Jain, V and Scaria, V and Sengupta, S}, title = {Genome-wide analysis reveals distinct patterns of epigenetic features in long non-coding RNA loci.}, journal = {Nucleic acids research}, volume = {40}, number = {20}, pages = {10018-10031}, pmid = {22923516}, issn = {1362-4962}, mesh = {CpG Islands ; DNA Methylation ; *Epigenesis, Genetic ; Genetic Loci ; Genome, Human ; Histones/metabolism ; Humans ; Proteins/genetics ; RNA, Long Noncoding/*genetics ; Transcription Initiation Site ; }, abstract = {A major fraction of the transcriptome of higher organisms comprised an extensive repertoire of long non-coding RNA (lncRNA) which express in a cell type and development stage-specific manner. While lncRNAs are a proven component of epigenetic gene expression modulation, epigenetic regulation of lncRNA itself remains poorly understood. Here we have analysed pan-genomic DNA methylation and histone modification marks (H3K4me3, H3K9me3, H3K27me3 and H3K36me3) associated with transcription start site (TSS) of lncRNA in four different cell types and three different tissue types representing various cellular stages. We observe that histone marks associated with active transcription H3K4me3 and H3K36me3 along with the repressive histone mark H3K27me3 have similar distribution pattern around TSS irrespective of cell types. Also, the density of these marks correlates well with expression of protein-coding and lncRNA genes. In contrast, the lncRNA genes harbour higher methylation density around TSS than protein-coding genes regardless of their expression status. Furthermore, we found that DNA methylation along with the other repressive histone mark H3K9me3 does not seem to play a role in lncRNA expression. Thus, our observation suggests that epigenetic regulation of lncRNA shares common features with mRNA except the role of DNA methylation which is markedly dissimilar.}, } @article {pmid22904089, year = {2012}, author = {Fouts, DE and Brinkac, L and Beck, E and Inman, J and Sutton, G}, title = {PanOCT: automated clustering of orthologs using conserved gene neighborhood for pan-genomic analysis of bacterial strains and closely related species.}, journal = {Nucleic acids research}, volume = {40}, number = {22}, pages = {e172}, pmid = {22904089}, issn = {1362-4962}, support = {N01-AI30071/AI/NIAID NIH HHS/United States ; }, mesh = {Bacteria/classification ; Bacterial Proteins/classification/*genetics ; Cluster Analysis ; *Genes, Bacterial ; *Genome, Bacterial ; Genomics/methods ; *Software ; }, abstract = {Pan-genome ortholog clustering tool (PanOCT) is a tool for pan-genomic analysis of closely related prokaryotic species or strains. PanOCT uses conserved gene neighborhood information to separate recently diverged paralogs into orthologous clusters where homology-only clustering methods cannot. The results from PanOCT and three commonly used graph-based ortholog-finding programs were compared using a set of four publicly available strains of the same bacterial species. All four methods agreed on ∼70% of the clusters and ∼86% of the proteins. The clusters that did not agree were inspected for evidence of correctness resulting in 85 high-confidence manually curated clusters that were used to compare all four methods.}, } @article {pmid22895163, year = {2013}, author = {Malmstrom, RR and Rodrigue, S and Huang, KH and Kelly, L and Kern, SE and Thompson, A and Roggensack, S and Berube, PM and Henn, MR and Chisholm, SW}, title = {Ecology of uncultured Prochlorococcus clades revealed through single-cell genomics and biogeographic analysis.}, journal = {The ISME journal}, volume = {7}, number = {1}, pages = {184-198}, pmid = {22895163}, issn = {1751-7370}, mesh = {Bacteriophages/genetics ; Genetic Variation ; Genomics/*methods ; Iron/metabolism ; Metagenomics/methods ; Pacific Ocean ; *Phylogeny ; Prochlorococcus/*classification/genetics/metabolism/virology ; Seawater/chemistry/*microbiology ; Siderophores/metabolism ; Single-Cell Analysis/*methods ; }, abstract = {Prochlorococcus is the numerically dominant photosynthetic organism throughout much of the world's oceans, yet little is known about the ecology and genetic diversity of populations inhabiting tropical waters. To help close this gap, we examined natural Prochlorococcus communities in the tropical Pacific Ocean using a single-cell whole-genome amplification and sequencing. Analysis of the gene content of just 10 single cells from these waters added 394 new genes to the Prochlorococcus pan-genome--that is, genes never before seen in a Prochlorococcus cell. Analysis of marker genes, including the ribosomal internal transcribed sequence, from dozens of individual cells revealed several representatives from two uncultivated clades of Prochlorococcus previously identified as HNLC1 and HNLC2. While the HNLC clades can dominate Prochlorococcus communities under certain conditions, their overall geographic distribution was highly restricted compared with other clades of Prochlorococcus. In the Atlantic and Pacific oceans, these clades were only found in warm waters with low Fe and high inorganic P levels. Genomic analysis suggests that at least one of these clades thrives in low Fe environments by scavenging organic-bound Fe, a process previously unknown in Prochlorococcus. Furthermore, the capacity to utilize organic-bound Fe appears to have been acquired horizontally and may be exchanged among other clades of Prochlorococcus. Finally, one of the single Prochlorococcus cells sequenced contained a partial genome of what appears to be a prophage integrated into the genome.}, } @article {pmid22890137, year = {2012}, author = {Ali, A and Soares, SC and Santos, AR and Guimarães, LC and Barbosa, E and Almeida, SS and Abreu, VA and Carneiro, AR and Ramos, RT and Bakhtiar, SM and Hassan, SS and Ussery, DW and On, S and Silva, A and Schneider, MP and Lage, AP and Miyoshi, A and Azevedo, V}, title = {Campylobacter fetus subspecies: comparative genomics and prediction of potential virulence targets.}, journal = {Gene}, volume = {508}, number = {2}, pages = {145-156}, doi = {10.1016/j.gene.2012.07.070}, pmid = {22890137}, issn = {1879-0038}, mesh = {Animals ; Campylobacter Infections/microbiology ; Campylobacter fetus/*classification/*genetics/pathogenicity ; Cattle ; DNA, Bacterial/genetics ; *Genes, Bacterial ; *Genome, Bacterial ; Genomic Islands/*genetics ; Humans ; Phylogeny ; Sequence Analysis, DNA ; Species Specificity ; Virulence/*genetics ; Virulence Factors/*genetics ; }, abstract = {The genus Campylobacter contains pathogens causing a wide range of diseases, targeting both humans and animals. Among them, the Campylobacter fetus subspecies fetus and venerealis deserve special attention, as they are the etiological agents of human bacterial gastroenteritis and bovine genital campylobacteriosis, respectively. We compare the whole genomes of both subspecies to get insights into genomic architecture, phylogenetic relationships, genome conservation and core virulence factors. Pan-genomic approach was applied to identify the core- and pan-genome for both C. fetus subspecies and members of the genus. The C. fetus subspecies conserved (76%) proteome were then analyzed for their subcellular localization and protein functions in biological processes. Furthermore, with pathogenomic strategies, unique candidate regions in the genomes and several potential core-virulence factors were identified. The potential candidate factors identified for attenuation and/or subunit vaccine development against C. fetus subspecies contain: nucleoside diphosphate kinase (Ndk), type IV secretion systems (T4SS), outer membrane proteins (OMP), substrate binding proteins CjaA and CjaC, surface array proteins, sap gene, and cytolethal distending toxin (CDT). Significantly, many of those genes were found in genomic regions with signals of horizontal gene transfer and, therefore, predicted as putative pathogenicity islands. We found CRISPR loci and dam genes in an island specific for C. fetus subsp. fetus, and T4SS and sap genes in an island specific for C. fetus subsp. venerealis. The genomic variations and potential core and unique virulence factors characterized in this study would lead to better insight into the species virulence and to more efficient use of the candidates for antibiotic, drug and vaccine development.}, } @article {pmid22887652, year = {2012}, author = {Pethick, FE and Lainson, AF and Yaga, R and Flockhart, A and Smith, DG and Donachie, W and Cerdeira, LT and Silva, A and Bol, E and Lopes, TS and Barbosa, MS and Pinto, AC and dos Santos, AR and Soares, SC and Almeida, SS and Guimaraes, LC and Aburjaile, FF and Abreu, VA and Ribeiro, D and Fiaux, KK and Diniz, CA and Barbosa, EG and Pereira, UP and Hassan, SS and Ali, A and Bakhtiar, SM and Dorella, FA and Carneiro, AR and Ramos, RT and Rocha, FS and Schneider, MP and Miyoshi, A and Azevedo, V and Fontaine, MC}, title = {Complete genome sequences of Corynebacterium pseudotuberculosis strains 3/99-5 and 42/02-A, isolated from sheep in Scotland and Australia, respectively.}, journal = {Journal of bacteriology}, volume = {194}, number = {17}, pages = {4736-4737}, pmid = {22887652}, issn = {1098-5530}, mesh = {Animals ; Australia ; Base Sequence ; Chromosome Mapping ; Corynebacterium Infections/microbiology/*veterinary ; Corynebacterium pseudotuberculosis/classification/*genetics/isolation & purification ; *Genome, Bacterial ; Lymphadenitis/microbiology/veterinary ; Molecular Sequence Data ; Scotland ; Sequence Analysis, DNA ; Sheep/microbiology ; Sheep Diseases/*microbiology ; }, abstract = {Here, we report the whole-genome sequences of two ovine-pathogenic Corynebacterium pseudotuberculosis isolates: strain 3/99-5, which represents the first C. pseudotuberculosis genome originating from the United Kingdom, and 42/02-A, the second from Australia. These genome sequences will contribute to the objective of determining the global pan-genome of this bacterium.}, } @article {pmid22882611, year = {2013}, author = {Davenport, CF and Tümmler, B}, title = {Advances in computational analysis of metagenome sequences.}, journal = {Environmental microbiology}, volume = {15}, number = {1}, pages = {1-5}, doi = {10.1111/j.1462-2920.2012.02843.x}, pmid = {22882611}, issn = {1462-2920}, mesh = {Bacteria/classification/*genetics ; Chromosome Mapping ; Metagenome/*genetics ; *Metagenomics ; Microbiology/*trends ; Sequence Analysis, DNA ; }, abstract = {Second-generation sequencing technologies are revolutionizing the study of metagenomes. Whole-genome shotgun sequencing of metagenomic DNA may become an attractive alternative to the current widely used ribosomal RNA gene studies. Large data sets of short sequence reads are mapped onto a custom microbial reference sequence. If a bacterial pangenome of completely sequenced genomes is taken as a reference, the output consists of the distribution of bacterial taxa in and bacterial gene contents of the metagenome. The relative abundance of functional categories and of individual pathways and fitness traits encoded by the metagenomic gene pool provides insight into habitat-specific features of the microbial community. Polymorphic sites in sequence reads may resolve the number and abundance of individual clonal complexes of dominant species in the polymicrobial community. These SNPs and de novo mutations may be exploited to trace the spatiotemporal spread of clones and the emergence of novel traits such as fitness or resistance determinants. In conclusion, massively parallel sequencing of metagenomic DNA allows deep insights into the composition and the genetic repertoire of polymicrobial communities.}, } @article {pmid22863143, year = {2012}, author = {Borneman, AR and McCarthy, JM and Chambers, PJ and Bartowsky, EJ}, title = {Comparative analysis of the Oenococcus oeni pan genome reveals genetic diversity in industrially-relevant pathways.}, journal = {BMC genomics}, volume = {13}, number = {}, pages = {373}, pmid = {22863143}, issn = {1471-2164}, mesh = {Genetic Variation/*genetics ; Genome, Bacterial/*genetics ; Genomics/*methods ; Oenococcus/*genetics ; }, abstract = {BACKGROUND: Oenococcus oeni, a member of the lactic acid bacteria, is one of a limited number of microorganisms that not only survive, but actively proliferate in wine. It is also unusual as, unlike the majority of bacteria present in wine, it is beneficial to wine quality rather than causing spoilage. These benefits are realised primarily through catalysing malolactic fermentation, but also through imparting other positive sensory properties. However, many of these industrially-important secondary attributes have been shown to be strain-dependent and their genetic basis it yet to be determined.

RESULTS: In order to investigate the scale and scope of genetic variation in O. oeni, we have performed whole-genome sequencing on eleven strains of this bacterium, bringing the total number of strains for which genome sequences are available to fourteen. While any single strain of O. oeni was shown to contain around 1800 protein-coding genes, in-depth comparative annotation based on genomic synteny and protein orthology identified over 2800 orthologous open reading frames that comprise the pan genome of this species, and less than 1200 genes that make up the conserved genomic core present in all of the strains. The expansion of the pan genome relative to the coding potential of individual strains was shown to be due to the varied presence and location of multiple distinct bacteriophage sequences and also in various metabolic functions with potential impacts on the industrial performance of this species, including cell wall exopolysaccharide biosynthesis, sugar transport and utilisation and amino acid biosynthesis.

CONCLUSIONS: By providing a large cohort of sequenced strains, this study provides a broad insight into the genetic variation present within O. oeni. This data is vital to understanding and harnessing the phenotypic variation present in this economically-important species.}, } @article {pmid22851513, year = {2012}, author = {Dröge, J and McHardy, AC}, title = {Taxonomic binning of metagenome samples generated by next-generation sequencing technologies.}, journal = {Briefings in bioinformatics}, volume = {13}, number = {6}, pages = {646-655}, doi = {10.1093/bib/bbs031}, pmid = {22851513}, issn = {1477-4054}, mesh = {Binding Sites ; Cluster Analysis ; *Metagenome ; Metagenomics ; Sequence Analysis, DNA/*methods ; }, abstract = {Metagenome research uses random shotgun sequencing of microbial community DNA to study the genetic sequences of its members without cultivation. This development has been strongly supported by improvements in sequencing technologies, which have rendered sequencing cheaper than before. As a consequence, downstream computational analysis of metagenome sequence samples is now faced with large amounts of complex data. One of the essential steps in metagenome analysis is reconstruction of draft genomes for populations of a community or of draft 'pan-genomes' for higher level clades. 'Taxonomic binning' corresponds to the process of assigning a taxonomic identifier to sequence fragments, based on information such as sequence similarity, sequence composition or read coverage. This is used for draft genome reconstruction, if sequencing coverage is insufficient for reconstruction based on assembly information alone. Subsequent functional and metabolic annotation of draft genomes allows a genome-level analysis of novel uncultured microbial species and even inference of their cultivation requirements.}, } @article {pmid22830599, year = {2012}, author = {Conlan, S and Mijares, LA and , and Becker, J and Blakesley, RW and Bouffard, GG and Brooks, S and Coleman, H and Gupta, J and Gurson, N and Park, M and Schmidt, B and Thomas, PJ and Otto, M and Kong, HH and Murray, PR and Segre, JA}, title = {Staphylococcus epidermidis pan-genome sequence analysis reveals diversity of skin commensal and hospital infection-associated isolates.}, journal = {Genome biology}, volume = {13}, number = {7}, pages = {R64}, pmid = {22830599}, issn = {1474-760X}, support = {1UH2AR057504-01/AR/NIAMS NIH HHS/United States ; 4UH3 AR057504-02/AR/NIAMS NIH HHS/United States ; //Intramural NIH HHS/United States ; }, mesh = {Catheter-Related Infections/*microbiology ; Cross Infection/*microbiology ; Drug Resistance, Bacterial ; Evolution, Molecular ; Genetic Variation ; Genome, Bacterial ; Humans ; Molecular Sequence Data ; Molecular Typing ; Phylogeny ; Sequence Analysis, DNA/*methods ; Skin/*microbiology ; Staphylococcus epidermidis/*classification/*genetics/isolation & purification ; }, abstract = {BACKGROUND: While Staphylococcus epidermidis is commonly isolated from healthy human skin, it is also the most frequent cause of nosocomial infections on indwelling medical devices. Despite its importance, few genome sequences existed and the most frequent hospital-associated lineage, ST2, had not been fully sequenced.

RESULTS: We cultivated 71 commensal S. epidermidis isolates from 15 skin sites and compared them with 28 nosocomial isolates from venous catheters and blood cultures. We produced 21 commensal and 9 nosocomial draft genomes, and annotated and compared their gene content, phylogenetic relatedness and biochemical functions. The commensal strains had an open pan-genome with 80% core genes and 20% variable genes. The variable genome was characterized by an overabundance of transposable elements, transcription factors and transporters. Biochemical diversity, as assayed by antibiotic resistance and in vitro biofilm formation, demonstrated the varied phenotypic consequences of this genomic diversity. The nosocomial isolates exhibited both large-scale rearrangements and single-nucleotide variation. We showed that S. epidermidis genomes separate into two phylogenetic groups, one consisting only of commensals. The formate dehydrogenase gene, present only in commensals, is a discriminatory marker between the two groups.

CONCLUSIONS: Commensal skin S. epidermidis have an open pan-genome and show considerable diversity between isolates, even when derived from a single individual or body site. For ST2, the most common nosocomial lineage, we detect variation between three independent isolates sequenced. Finally, phylogenetic analyses revealed a previously unrecognized group of S. epidermidis strains characterized by reduced virulence and formate dehydrogenase, which we propose as a clinical molecular marker.}, } @article {pmid22825724, year = {2012}, author = {Talagas, M and Marcorelles, P and Uguen, A and Redon, S and Quintin-Roué, I and Costa, S and Férec, C and Morel, F and Hieu, PD and De Braekeleer, M}, title = {Identification of a novel population in high-grade oligodendroglial tumors not deleted on 1p/19q using array CGH.}, journal = {Journal of neuro-oncology}, volume = {109}, number = {2}, pages = {405-413}, pmid = {22825724}, issn = {1573-7373}, mesh = {Brain Neoplasms/*diagnosis/*genetics ; Cell Cycle Proteins ; Chromosome Aberrations ; Chromosomes, Human, Pair 1/*genetics ; Chromosomes, Human, Pair 19/*genetics ; Class II Phosphatidylinositol 3-Kinases ; Cyclin-Dependent Kinase 4/genetics ; Cytidine Deaminase/genetics ; Female ; Gene Expression Profiling ; Humans ; In Situ Hybridization, Fluorescence ; *Loss of Heterozygosity ; Male ; Minor Histocompatibility Antigens ; Nuclear Proteins/genetics ; Oligodendroglioma/diagnosis/*genetics ; Oligonucleotide Array Sequence Analysis ; Phosphatidylinositol 3-Kinases/genetics ; Proto-Oncogene Proteins/genetics ; Proto-Oncogene Proteins c-mdm2/genetics ; }, abstract = {Oligodendroglial tumors (ODTs) are primary tumors of the central nervous system that show recurrent codeletion of whole chromosome arms 1p and 19q. Non-1p/19q-deleted high-grade ODTs can present other genetic aberrations, CDKN2A deletion (9p21.3), EGFR amplification (7p11.2) and/or chromosome 10 loss, which are associated with a poor prognosis. The identification of these abnormalities allowed drafting a histo-molecular classification. The aim of this study was to precisely identify, using array CGH, the genomic hallmarks of these tumors, particularly those that are not deleted on 1p/19q. We studied 14 formalin-fixed paraffin-embedded high-grade ODTs using pangenomic oligonucleotide array CGH with an average resolution of 22.3 kb. The 1p/19q codeletion was found in five anaplastic oligodendrogliomas. The three genomic aberrations carrying a poor prognosis were found, most often associated, in five out of nine tumors not deleted on 1p/19q. In addition, four recurrent copy number alterations, involving genes that participate to cell growth and cycle, were found to be strongly associated in five tumors not deleted on 1p/19q: gain or amplification at 1q32.1 (MDM4, PIK3C2B genes), 12q14.1 (CDK4 gene), 12q14.3-q15 (MDM2 gene) and homozygous deletion at 22q13.1 (APOBEC3B gene). MDM2, MDM4, CDK4 and PIK3C2B are known for potentially being amplified or overexpressed in high-grade gliomas. However, the involvement of APOBEC3B, coding for mRNA edition enzyme, is described here for the first time. Our results show a strong association between these four alterations. Therefore, this can open a perspective for a novel subgroup in high-grade ODTs not deleted on 1p/19q.}, } @article {pmid22809637, year = {2012}, author = {El Gharniti, F and Dols-Lafargue, M and Bon, E and Claisse, O and Miot-Sertier, C and Lonvaud, A and Le Marrec, C}, title = {IS30 elements are mediators of genetic diversity in Oenococcus oeni.}, journal = {International journal of food microbiology}, volume = {158}, number = {1}, pages = {14-22}, doi = {10.1016/j.ijfoodmicro.2012.06.009}, pmid = {22809637}, issn = {1879-3460}, mesh = {Base Sequence ; Computer Simulation ; DNA Primers/genetics ; DNA Transposable Elements/*genetics ; DNA, Bacterial/genetics ; Fermentation ; *Genetic Variation ; Genomics ; Genotype ; Multilocus Sequence Typing ; Oenococcus/*genetics ; Phenotype ; Wine/microbiology ; }, abstract = {Oenococcus oeni is responsible for the malolactic fermentation of wines. Genomic diversity has been recently established in the species and extensive attention is now being given to the genomic bases of strain-specific differences. We explored the role of insertion sequences (IS), which are considered as driving forces for novel genotypic and phenotypic variants in prokaryotes. The present study focuses on members of the IS30 family, which are widespread among lactic acid bacteria. An in silico analysis of the three available genomes of O. oeni in combination with the use of an inverse PCR strategy targeting conserved IS30-related sequences indicated the presence of seven IS30 copies in the pangenome of O. oeni. A primer designed to anneal to the conserved 3' end of the IS30 element was paired with each of the seven primers selected to bind to unique sequences upstream of each of the seven mobile elements identified. The study presents an overview of the abundance, and the genomic environment of IS30 elements in the O. oeni pangenome and shows that the two existing genetic sub-populations previously described in the species through multilocus sequence typing analysis (MLST) differ in their IS30 content. Possible IS30 impacts on bacterial adaptation are discussed.}, } @article {pmid22792073, year = {2012}, author = {Loper, JE and Hassan, KA and Mavrodi, DV and Davis, EW and Lim, CK and Shaffer, BT and Elbourne, LD and Stockwell, VO and Hartney, SL and Breakwell, K and Henkels, MD and Tetu, SG and Rangel, LI and Kidarsa, TA and Wilson, NL and van de Mortel, JE and Song, C and Blumhagen, R and Radune, D and Hostetler, JB and Brinkac, LM and Durkin, AS and Kluepfel, DA and Wechter, WP and Anderson, AJ and Kim, YC and Pierson, LS and Pierson, EA and Lindow, SE and Kobayashi, DY and Raaijmakers, JM and Weller, DM and Thomashow, LS and Allen, AE and Paulsen, IT}, title = {Comparative genomics of plant-associated Pseudomonas spp.: insights into diversity and inheritance of traits involved in multitrophic interactions.}, journal = {PLoS genetics}, volume = {8}, number = {7}, pages = {e1002784}, pmid = {22792073}, issn = {1553-7404}, mesh = {Animals ; Bacterial Proteins/genetics ; Bacterial Toxins/genetics ; Bacteriocins/genetics ; Genetic Heterogeneity ; Genetic Variation ; *Genome, Bacterial ; Host-Pathogen Interactions/genetics ; Insecta/genetics ; Multigene Family ; Phylogeny ; Plant Diseases/genetics/microbiology ; *Plants/genetics/microbiology ; Pseudomonas fluorescens/*genetics/*metabolism ; Repetitive Sequences, Nucleic Acid/genetics ; Resorcinols/metabolism ; *Sequence Analysis, DNA ; }, abstract = {We provide here a comparative genome analysis of ten strains within the Pseudomonas fluorescens group including seven new genomic sequences. These strains exhibit a diverse spectrum of traits involved in biological control and other multitrophic interactions with plants, microbes, and insects. Multilocus sequence analysis placed the strains in three sub-clades, which was reinforced by high levels of synteny, size of core genomes, and relatedness of orthologous genes between strains within a sub-clade. The heterogeneity of the P. fluorescens group was reflected in the large size of its pan-genome, which makes up approximately 54% of the pan-genome of the genus as a whole, and a core genome representing only 45-52% of the genome of any individual strain. We discovered genes for traits that were not known previously in the strains, including genes for the biosynthesis of the siderophores achromobactin and pseudomonine and the antibiotic 2-hexyl-5-propyl-alkylresorcinol; novel bacteriocins; type II, III, and VI secretion systems; and insect toxins. Certain gene clusters, such as those for two type III secretion systems, are present only in specific sub-clades, suggesting vertical inheritance. Almost all of the genes associated with multitrophic interactions map to genomic regions present in only a subset of the strains or unique to a specific strain. To explore the evolutionary origin of these genes, we mapped their distributions relative to the locations of mobile genetic elements and repetitive extragenic palindromic (REP) elements in each genome. The mobile genetic elements and many strain-specific genes fall into regions devoid of REP elements (i.e., REP deserts) and regions displaying atypical tri-nucleotide composition, possibly indicating relatively recent acquisition of these loci. Collectively, the results of this study highlight the enormous heterogeneity of the P. fluorescens group and the importance of the variable genome in tailoring individual strains to their specific lifestyles and functional repertoire.}, } @article {pmid22759432, year = {2012}, author = {Zhou, Z and Gu, J and Li, YQ and Wang, Y}, title = {Genome plasticity and systems evolution in Streptomyces.}, journal = {BMC bioinformatics}, volume = {13 Suppl 10}, number = {Suppl 10}, pages = {S8}, pmid = {22759432}, issn = {1471-2105}, support = {SC1 GM081068/GM/NIGMS NIH HHS/United States ; AI067543/AI/NIAID NIH HHS/United States ; GM081068/GM/NIGMS NIH HHS/United States ; RR013646/RR/NCRR NIH HHS/United States ; SC1 GM100806/GM/NIGMS NIH HHS/United States ; G12 MD007591/MD/NIMHD NIH HHS/United States ; G12 RR013646/RR/NCRR NIH HHS/United States ; AI080579/AI/NIAID NIH HHS/United States ; SC1 AI080579/AI/NIAID NIH HHS/United States ; }, mesh = {DNA, Bacterial/genetics ; Evolution, Molecular ; Gene Duplication ; Gene Transfer, Horizontal ; *Genome, Bacterial ; Molecular Sequence Annotation ; Multigene Family ; Phylogeny ; Sequence Analysis, DNA/*methods ; Streptomyces/*genetics ; }, abstract = {BACKGROUND: Streptomycetes are filamentous soil-dwelling bacteria. They are best known as the producers of a great variety of natural products such as antibiotics, antifungals, antiparasitics, and anticancer agents and the decomposers of organic substances for carbon recycling. They are also model organisms for the studies of gene regulatory networks, morphological differentiation, and stress response. The availability of sets of genomes from closely related Streptomyces strains makes it possible to assess the mechanisms underlying genome plasticity and systems adaptation.

RESULTS: We present the results of a comprehensive analysis of the genomes of five Streptomyces species with distinct phenotypes. These streptomycetes have a pan-genome comprised of 17,362 orthologous families which includes 3,096 components in the core genome, 5,066 components in the dispensable genome, and 9,200 components that are uniquely present in only one species. The core genome makes up about 33%-45% of each genome repertoire. It contains important genes for Streptomyces biology including those involved in gene regulation, secretion, secondary metabolism and morphological differentiation. Abundant duplicate genes have been identified, with 4%-11% of the whole genomes composed of lineage-specific expansions (LSEs), suggesting that frequent gene duplication or lateral gene transfer events play a role in shaping the genome diversification within this genus. Two patterns of expansion, single gene expansion and chromosome block expansion are observed, representing different scales of duplication.

CONCLUSIONS: Our results provide a catalog of genome components and their potential functional roles in gene regulatory networks and metabolic networks. The core genome components reveal the minimum requirement for streptomycetes to sustain a successful lifecycle in the soil environment, reflecting the effects of both genome evolution and environmental stress acting upon the expressed phenotypes. A better understanding of the LSE gene families will, on the other hand, bring a wealth of new insights into the mechanisms underlying strain-specific phenotypes, such as the production of novel antibiotics, pathogenesis, and adaptive response to environmental challenges.}, } @article {pmid22752048, year = {2012}, author = {Collins, RE and Higgs, PG}, title = {Testing the infinitely many genes model for the evolution of the bacterial core genome and pangenome.}, journal = {Molecular biology and evolution}, volume = {29}, number = {11}, pages = {3413-3425}, doi = {10.1093/molbev/mss163}, pmid = {22752048}, issn = {1537-1719}, mesh = {*Evolution, Molecular ; Gene Frequency/genetics ; Genes, Bacterial/genetics ; Genome, Bacterial/*genetics ; *Models, Genetic ; Multigene Family/genetics ; Phylogeny ; }, abstract = {When groups of related bacterial genomes are compared, the number of core genes found in all genomes is usually much less than the mean genome size, whereas the size of the pangenome (the set of genes found on at least one of the genomes) is much larger than the mean size of one genome. We analyze 172 complete genomes of Bacilli and compare the properties of the pangenomes and core genomes of monophyletic subsets taken from this group. We then assess the capabilities of several evolutionary models to predict these properties. The infinitely many genes (IMG) model is based on the assumption that each new gene can arise only once. The predictions of the model depend on the shape of the evolutionary tree that underlies the divergence of the genomes. We calculate results for coalescent trees, star trees, and arbitrary phylogenetic trees of predefined fixed branch length. On a star tree, the pangenome size increases linearly with the number of genomes, as has been suggested in some previous studies, whereas on a coalescent tree, it increases logarithmically. The coalescent tree gives a better fit to the data, for all the examples we consider. In some cases, a fixed phylogenetic tree proved better than the coalescent tree at reproducing structure in the gene frequency spectrum, but little improvement was gained in predictions of the core and pangenome sizes. Most of the data are well explained by a model with three classes of gene: an essential class that is found in all genomes, a slow class whose rate of origination and deletion is slow compared with the time of divergence of the genomes, and a fast class showing rapid origination and deletion. Although the majority of genes originating in a genome are in the fast class, these genes are not retained for long periods, and the majority of genes present in a genome are in the slow or essential classes. In general, we show that the IMG model is useful for comparison with experimental genome data both for species level and widely divergent taxonomic groups. Software implementing the described formulae is provided at http://github.com/rec3141/pangenome.}, } @article {pmid22732061, year = {2012}, author = {Petersen, J and Brinkmann, H and Bunk, B and Michael, V and Päuker, O and Pradella, S}, title = {Think pink: photosynthesis, plasmids and the Roseobacter clade.}, journal = {Environmental microbiology}, volume = {14}, number = {10}, pages = {2661-2672}, doi = {10.1111/j.1462-2920.2012.02806.x}, pmid = {22732061}, issn = {1462-2920}, mesh = {DNA Replication ; Gene Order ; Molecular Sequence Data ; Operon/genetics ; Photosynthesis/*genetics ; *Phylogeny ; Pigments, Biological ; Plasmids/*genetics ; Replicon/genetics ; Roseobacter/*classification/*genetics/metabolism ; }, abstract = {Aerobic anoxygenic photosynthesis providing additional ATP for a photoheterotrophic lifestyle is characteristic for several representatives of the marine Roseobacter clade. The patchy distribution of photosynthesis gene clusters (PGCs) within this lineage probably results from horizontal transfers and this explanation is supported by two cases of plasmid-located PGCs. In this study sequencing of the three Sulfitobacter guttiformis plasmids (pSG4, pSG53, pSG118) was initiated with the objective to analyse the 118 kb-sized photosynthetic replicon, but our annotation revealed several additional important traits including key genes of the primary metabolism. The comparison of the two photosynthesis plasmids from S. guttiformis and Roseobacter litoralis showed that their replication modules are located at precisely the same position within the 45 kb-sized PGC. However, comprehensive phylogenetic analyses of the non-homologous replicases (RepB-III, DnaA-like I) and the two ParAB partitioning proteins unequivocally document an independent origin of their extrachromosomal replicons. The analogous positioning within the two photosynthesis super-operons can be explained by a two-step recombination scenario and seems to be the ultimate result of stabilizing selection. Our exemplary analyses of 'pink' plasmids document that chromosomal outsourcing is a common phenomenon in the Roseobacter clade and subsequent horizontal exchanges offer rapid access to the marine pan-genome.}, } @article {pmid22721844, year = {2012}, author = {Zeller, P and Quenault, H and Huguet, A and Blanchard, Y and Fessard, V}, title = {Transcriptomic comparison of cyanotoxin variants in a human intestinal model revealed major differences in oxidative stress response: effects of MC-RR and MC-LR on Caco-2 cells.}, journal = {Ecotoxicology and environmental safety}, volume = {82}, number = {}, pages = {13-21}, doi = {10.1016/j.ecoenv.2012.05.001}, pmid = {22721844}, issn = {1090-2414}, mesh = {Caco-2 Cells ; Gene Expression Profiling ; Gene Expression Regulation/*drug effects ; Humans ; Marine Toxins ; Microcystins/*toxicity ; Oxidative Stress/*drug effects ; }, abstract = {Microcystins (MCs) are cyclic hepatotoxins produced by various species of cyanobacteria. Their structure includes two variable amino acids (AA) giving rise to more than 90 MC variants, however most of the studies to date have focused on the most toxic variant: microcystin LR (MC-LR). Ingestion is the major route of human exposure to MCs and several in vivo studies have demonstrated macroscopic effects on the gastro-intestinal tract. However, little information exists concerning the pathways affected by MC variants on intestinal cells. In the current study, we have investigated the effects of MC-RR and MC-LR on the human intestinal cell line Caco-2 using a non-selective method and compared their response at the pangenomic scale. The cells were incubated for 4h or 24h with a range of non-toxic concentrations of MC-RR or MC-LR. Minimal effects were observed after short term exposures (4h) to either MC variant. In contrast, dose dependent modulations of gene transcription levels were observed with MC-RR and MC-LR after 24h. The transcriptomic profiles induced by MC-RR were quite similar to those induced by MC-LR, suggestive of a largely common mechanism of toxicity. However, changes in total gene expression were more pronounced following exposure to MC-LR compared to MC-RR, as revealed by functional annotation. MC-LR affected two principal pathways, the oxidative stress response and cell cycle regulation, which did not elicit significant alteration following MC-RR exposure. This work is the first comparative description of the effects of MC-LR and MC-RR in a human intestinal cell model at the pangenomic scale. It has allowed us to propose differences in the mechanism of toxicity for MC-RR and MC-LR. These results illustrate that taking into account the toxicity of MC variants remains a key point for risk assessment.}, } @article {pmid22712577, year = {2012}, author = {Didelot, X and Méric, G and Falush, D and Darling, AE}, title = {Impact of homologous and non-homologous recombination in the genomic evolution of Escherichia coli.}, journal = {BMC genomics}, volume = {13}, number = {}, pages = {256}, pmid = {22712577}, issn = {1471-2164}, support = {087646/Z/08/Z//Wellcome Trust/United Kingdom ; G0800778//Department of Health/United Kingdom ; //Biotechnology and Biological Sciences Research Council/United Kingdom ; //Medical Research Council/United Kingdom ; }, mesh = {Biological Evolution ; Databases, Genetic ; Escherichia coli/classification/*genetics ; Genome, Bacterial/*genetics ; Homologous Recombination/*genetics ; Phylogeny ; }, abstract = {BACKGROUND: Escherichia coli is an important species of bacteria that can live as a harmless inhabitant of the guts of many animals, as a pathogen causing life-threatening conditions or freely in the non-host environment. This diversity of lifestyles has made it a particular focus of interest for studies of genetic variation, mainly with the aim to understand how a commensal can become a deadly pathogen. Many whole genomes of E. coli have been fully sequenced in the past few years, which offer helpful data to help understand how this important species evolved.

RESULTS: We compared 27 whole genomes encompassing four phylogroups of Escherichia coli (A, B1, B2 and E). From the core-genome we established the clonal relationships between the isolates as well as the role played by homologous recombination during their evolution from a common ancestor. We found strong evidence for sexual isolation between three lineages (A+B1, B2, E), which could be explained by the ecological structuring of E. coli and may represent on-going speciation. We identified three hotspots of homologous recombination, one of which had not been previously described and contains the aroC gene, involved in the essential shikimate metabolic pathway. We also described the role played by non-homologous recombination in the pan-genome, and showed that this process was highly heterogeneous. Our analyses revealed in particular that the genomes of three enterohaemorrhagic (EHEC) strains within phylogroup B1 have converged from originally separate backgrounds as a result of both homologous and non-homologous recombination.

CONCLUSIONS: Recombination is an important force shaping the genomic evolution and diversification of E. coli, both by replacing fragments of genes with an homologous sequence and also by introducing new genes. In this study, several non-random patterns of these events were identified which correlated with important changes in the lifestyle of the bacteria, and therefore provide additional evidence to explain the relationship between genomic variation and ecological adaptation.}, } @article {pmid22702646, year = {2012}, author = {Girdhani, S and Lamont, C and Hahnfeldt, P and Abdollahi, A and Hlatky, L}, title = {Proton irradiation suppresses angiogenic genes and impairs cell invasion and tumor growth.}, journal = {Radiation research}, volume = {178}, number = {1}, pages = {33-45}, doi = {10.1667/rr2724.1}, pmid = {22702646}, issn = {1938-5404}, mesh = {Cell Movement/*radiation effects ; Cell Proliferation/radiation effects ; Cell Survival/radiation effects ; Cells, Cultured ; Gene Expression/radiation effects ; Humans ; Interleukin-8/genetics ; Neoplasm Invasiveness ; Neoplasms/blood supply/pathology/*radiotherapy ; Neovascularization, Pathologic/genetics ; Neovascularization, Physiologic ; *Proton Therapy ; Vascular Endothelial Growth Factor A/genetics ; }, abstract = {The energy deposition characteristics of proton radiation have attracted considerable attention in light of its implications for carcinogenesis risk in space travel, as well for application to cancer treatment. In space, it is the principle component of the galactic cosmic radiation to which astronauts will be exposed. For treatment, an increasing number of proton facilities are being established to exploit the physical advantages of this radiation type. However, the possibility that there may also be biologically based advantages to proton exposure has not been considered in either context. We demonstrate here that high-energy proton irradiation can inhibit expression of major pro-angiogenic factors and multiple angiogenesis-associated processes, including invasion and endothelial cell proliferation, which is prominent in cancer progression. Dose-dependent suppression of angiogenic signaling was demonstrated for both cancer and nontransformed cells. Pan-genomic microarray analysis and RT-PCR revealed that post-irradiation (0.5, 1.0 and 2.0 Gy), critical pro-angiogenic signaling factors including: vascular endothelial growth factor (VEGF), interleukin 6 and 8 (IL-6, IL-8) and hypoxia-inducible factor-1 alpha (HIF-1A), were significantly downregulated. Co-culture studies demonstrated that endothelial cell proliferation and invasion were inhibited by culturing with irradiated cancer or fibroblast cells, which suggests that proton irradiation may, in addition to direct action, contribute to angiogenesis suppression through modulation of paracrine signalings from targeted cells. Addition of recombinant IL-8 or VEGF partially restored these functions in vitro, while in vivo, an attenuated tumor growth rate was demonstrated for proton-irradiated human lung cancer cells. Taken together, these findings provide novel pre-clinical evidence that proton irradiation may, in addition to its physical targeting advantages, have important biological ramifications that should be a consideration in the optimization of proton therapy.}, } @article {pmid22701679, year = {2012}, author = {Yue, M and Rankin, SC and Blanchet, RT and Nulton, JD and Edwards, RA and Schifferli, DM}, title = {Diversification of the Salmonella fimbriae: a model of macro- and microevolution.}, journal = {PloS one}, volume = {7}, number = {6}, pages = {e38596}, pmid = {22701679}, issn = {1932-6203}, support = {/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Adaptation, Biological/genetics ; Cluster Analysis ; Computational Biology ; *Evolution, Molecular ; Fimbriae Proteins/chemistry/*genetics ; Fimbriae, Bacterial/*genetics ; Gene Duplication/genetics ; *Genetic Variation ; Genome, Bacterial/*genetics ; Genomics ; *Models, Genetic ; Models, Molecular ; Multigene Family/*genetics ; Phylogeny ; Salmonella/classification/*genetics ; Species Specificity ; }, abstract = {Bacteria of the genus Salmonella comprise a large and evolutionary related population of zoonotic pathogens that can infect mammals, including humans and domestic animals, birds, reptiles and amphibians. Salmonella carries a plethora of virulence genes, including fimbrial adhesins, some of them known to participate in mammalian or avian host colonization. Each type of fimbria has its structural subunit and biogenesis genes encoded by one fimbrial gene cluster (FGC). The accumulation of new genomic information offered a timely opportunity to better evaluate the number and types of FGCs in the Salmonella pangenome, to test the use of current classifications based on phylogeny, and to infer potential correlations between FGC evolution in various Salmonella serovars and host niches. This study focused on the FGCs of the currently deciphered 90 genomes and 60 plasmids of Salmonella. The analysis highlighted a fimbriome consisting of 35 different FGCs, of which 16 were new, each strain carrying between 5 and 14 FGCs. The Salmonella fimbriome was extremely diverse with FGC representatives in 8 out of 9 previously categorized fimbrial clades and subclades. Phylogenetic analysis of Salmonella suggested macroevolutionary shifts detectable by extensive FGC deletion and acquisition. In addition, microevolutionary drifts were best depicted by the high level of allelic variation in predicted or known adhesins, such as the type 1 fimbrial adhesin FimH for which 67 different natural alleles were identified in S. enterica subsp. I. Together with strain-specific collections of FGCs, allelic variation among adhesins attested to the pathoadaptive evolution of Salmonella towards specific hosts and tissues, potentially modulating host range, strain virulence, disease progression, and transmission efficiency. Further understanding of how each Salmonella strain utilizes its panel of FGCs and specific adhesin alleles for survival and infection will support the development of new approaches for the control of Salmonellosis.}, } @article {pmid22698749, year = {2012}, author = {Fu, J and Qin, QW}, title = {[Pan-genomics analysis of 30 Escherichia coli genomes].}, journal = {Yi chuan = Hereditas}, volume = {34}, number = {6}, pages = {765-772}, doi = {10.3724/sp.j.1005.2012.00765}, pmid = {22698749}, issn = {0253-9772}, mesh = {Escherichia coli/*genetics ; *Evolution, Molecular ; *Genome, Bacterial ; Genomics/methods ; }, abstract = {A pan-genome describes the full complement of genes in species. It is a superset of all the genes in all the individuals of a species, which is composed of a 'core genome' containing genes present in all individuals, and a 'dispensable genome' containing genes present only in some individuals and individual-specific genes. From pan-genome sight, 30 finished genomes from Escherichia coli were employed to analyze their gene and genome compositions and evaluation in this study. The results indicated that the core genes accounted for about 50% of the total number of genes, while about 146 strain-specific genes existed in the each strain tested. The data suggests that the E. coli pan-genome is vast, and unique genes will continue to be identified when more E. coli genomes are sequenced. After analyzing relationships of the gene conservation, GC content and selection pressure in different strains tested, we found that more conserved genes had a nar-row range of GC content, and they also bear more selection pressure. These results will be helpful for better understanding of the evolution profile of E. coli genome, and the dynamic changes of its gene compositions. The E. coli pan-genome pro-vides useful information for prevention and control of the diseases caused by pathogenic E. coli, and also provides a para-digm for the large-scale analysis of pathogenic bacteria genomes.}, } @article {pmid22650444, year = {2012}, author = {Fisichella, M and Berenguer, F and Steinmetz, G and Auffan, M and Rose, J and Prat, O}, title = {Intestinal toxicity evaluation of TiO2 degraded surface-treated nanoparticles: a combined physico-chemical and toxicogenomics approach in caco-2 cells.}, journal = {Particle and fibre toxicology}, volume = {9}, number = {}, pages = {18}, pmid = {22650444}, issn = {1743-8977}, mesh = {Caco-2 Cells ; Coated Materials, Biocompatible/chemistry/*toxicity ; Enterocytes/*drug effects/ultrastructure ; Environmental Pollutants/chemistry/*toxicity ; Gene Expression/drug effects ; Gene Expression Profiling ; Humans ; Metal Nanoparticles/chemistry/*toxicity ; Microscopy, Electron ; Oxidative Stress/drug effects ; Reactive Oxygen Species/metabolism ; Superoxides/metabolism/toxicity ; Surface Properties ; Titanium/chemistry/*toxicity ; Toxicogenetics ; }, abstract = {BACKGROUND: Titanium dioxide (TiO2) nanoparticles (NPs) are widely used due to their specific properties, like UV filters in sunscreen. In that particular case TiO2 NPs are surface modified to avoid photocatalytic effects. These surface-treated nanoparticles (STNPs) spread in the environment and might release NPs as degradation residues. Indeed, degradation by the environment (exposure to UV, water and air contact …) will occur and could profoundly alter the physicochemical properties of STNPs such as chemistry, size, shape, surface structure and dispersion that are important parameters for toxicity. Although the toxicity of surface unmodified TiO2 NPs has been documented, nothing was done about degraded TiO2 STNPs which are the most likely to be encountered in environment. The superoxide production by aged STNPs suspensions was tested and compared to surface unmodified TiO2 NPs. We investigated the possible toxicity of commercialized STNPs, degraded by environmental conditions, on human intestinal epithelial cells. STNPs sizes and shape were characterized and viability tests were performed on Caco-2 cells exposed to STNPs. The exposed cells were imaged with SEM and STNPs internalization was researched by TEM. Gene expression microarray analyses were performed to look for potential changes in cellular functions.

RESULTS: The production of reactive oxygen species was detected with surface unmodified TiO2 NPs but not with STNPs or their residues. Through three different toxicity assays, the STNPs tested, which have a strong tendency to aggregate in complex media, showed no toxic effect in Caco-2 cells after exposures to STNPs up to 100 μg/mL over 4 h, 24 h and 72 h. The cell morphology remained intact, attested by SEM, and internalization of STNPs was not seen by TEM. Moreover gene expression analysis using pangenomic oligomicroarrays (4x 44000 genes) did not show any change versus unexposed cells after exposure to 10 μg/ mL, which is much higher than potential environmental concentrations.

CONCLUSIONS: TiO2 STNPs, degraded or not, are not harmful to Caco-2 cells and are unlikely to penetrate the body via oral route. It is likely that the strong persistence of the aluminium hydroxide layer surrounding these nanoparticles protects the cells from a direct contact with the potentially phototoxic TiO2 core.}, } @article {pmid22649452, year = {2012}, author = {Miyauchi, E and Toh, H and Nakano, A and Tanabe, S and Morita, H}, title = {Comparative Genomic Analysis of Lactococcus garvieae Strains Isolated from Different Sources Reveals Candidate Virulence Genes.}, journal = {International journal of microbiology}, volume = {2012}, number = {}, pages = {728276}, pmid = {22649452}, issn = {1687-9198}, abstract = {Lactococcus garvieae is a major pathogen for fish. Two complete (ATCC 49156 and Lg2) and three draft (UNIUD074, 8831, and 21881) genome sequences of L. garvieae have recently been released. We here present the results of a comparative genomic analysis of these fish and human isolates of L. garvieae. The pangenome comprised 1,542 core and 1,378 dispensable genes. The sequenced L. garvieae strains shared most of the possible virulence genes, but the capsule gene cluster was found only in fish-pathogenic strain Lg2. The absence of the capsule gene cluster in other nonpathogenic strains isolated from mastitis and vegetable was also confirmed by PCR. The fish and human isolates of L. garvieae contained the specific two and four adhesin genes, respectively, indicating that these adhesion proteins may be involved in the host specificity differences of L. garvieae. The discoveries revealed by the pangenomic analysis may provide significant insights into the biology of L. garvieae.}, } @article {pmid22636774, year = {2012}, author = {Blumer-Schuette, SE and Giannone, RJ and Zurawski, JV and Ozdemir, I and Ma, Q and Yin, Y and Xu, Y and Kataeva, I and Poole, FL and Adams, MW and Hamilton-Brehm, SD and Elkins, JG and Larimer, FW and Land, ML and Hauser, LJ and Cottingham, RW and Hettich, RL and Kelly, RM}, title = {Caldicellulosiruptor core and pangenomes reveal determinants for noncellulosomal thermophilic deconstruction of plant biomass.}, journal = {Journal of bacteriology}, volume = {194}, number = {15}, pages = {4015-4028}, pmid = {22636774}, issn = {1098-5530}, mesh = {Adhesins, Bacterial/analysis/genetics ; *Biomass ; *Carbohydrate Metabolism ; Cellulases/analysis/genetics ; Cellulose/*metabolism ; Genetic Variation ; Genome, Bacterial ; Gram-Positive Bacteria/enzymology/*genetics ; Metabolic Networks and Pathways/*genetics ; Plants/*chemistry ; Proteome/analysis ; }, abstract = {Extremely thermophilic bacteria of the genus Caldicellulosiruptor utilize carbohydrate components of plant cell walls, including cellulose and hemicellulose, facilitated by a diverse set of glycoside hydrolases (GHs). From a biofuel perspective, this capability is crucial for deconstruction of plant biomass into fermentable sugars. While all species from the genus grow on xylan and acid-pretreated switchgrass, growth on crystalline cellulose is variable. The basis for this variability was examined using microbiological, genomic, and proteomic analyses of eight globally diverse Caldicellulosiruptor species. The open Caldicellulosiruptor pangenome (4,009 open reading frames [ORFs]) encodes 106 GHs, representing 43 GH families, but only 26 GHs from 17 families are included in the core (noncellulosic) genome (1,543 ORFs). Differentiating the strongly cellulolytic Caldicellulosiruptor species from the others is a specific genomic locus that encodes multidomain cellulases from GH families 9 and 48, which are associated with cellulose-binding modules. This locus also encodes a novel adhesin associated with type IV pili, which was identified in the exoproteome bound to crystalline cellulose. Taking into account the core genomes, pangenomes, and individual genomes, the ancestral Caldicellulosiruptor was likely cellulolytic and evolved, in some cases, into species that lost the ability to degrade crystalline cellulose while maintaining the capacity to hydrolyze amorphous cellulose and hemicellulose.}, } @article {pmid22626276, year = {2012}, author = {Edeline, J and Mottier, S and Vigneau, C and Jouan, F and Perrin, C and Zerrouki, S and Fergelot, P and Patard, JJ and Rioux-Leclercq, N}, title = {Description of 2 angiogenic phenotypes in clear cell renal cell carcinoma.}, journal = {Human pathology}, volume = {43}, number = {11}, pages = {1982-1990}, doi = {10.1016/j.humpath.2012.01.023}, pmid = {22626276}, issn = {1532-8392}, mesh = {Biomarkers, Tumor/genetics/metabolism ; Carcinoma, Renal Cell/blood supply/genetics/*pathology ; Endothelium, Vascular/metabolism/pathology ; Female ; Gene Expression ; Gene Expression Profiling ; Humans ; Kidney/*blood supply ; Kidney Neoplasms/blood supply/genetics/*pathology ; Male ; Middle Aged ; Neovascularization, Pathologic/genetics/metabolism/*pathology ; Nephrectomy ; Oligonucleotide Array Sequence Analysis ; Phenotype ; RNA, Messenger/metabolism ; Real-Time Polymerase Chain Reaction ; }, abstract = {Angiogenesis in clear cell renal cell carcinoma has received recent focus with the development of antiangiogenic therapies. Although tumor progression is known to be correlated with intratumoral and plasma levels of vascular endothelial growth factor-A, the role of tumor induced-angiogenesis remains unclear in these tumors. We analyzed the vascular network in a cohort of 73 clear cell renal cell carcinoma cases using endothelial immunostaining. We studied protein expression of vascular endothelial growth factor, Von Hippel Lindau, and carbonic anhydrase IX by immunohistochemistry, Von Hippel Lindau gene alteration by sequencing, deletion- and methylation-specific Multiplex Ligation-dependent Probe Amplification, and gene expression by pangenomic microarray and quantitative polymerase chain reaction in a subcohort of 39 clear cell renal cell carcinoma cases. We described 2 distinct angiogenic phenotypes in comparison with the normal kidney vasculature: low and high angiogenic phenotypes. The low angiogenic phenotype was associated with more aggressive prognostic factors such as T3 to T4 (62% versus 31%, P=.002), N+ (29% versus 3% P=.004), M+ (53% versus 21%, P=.004) stages, Fuhrman grade (grade 3-4: 91% versus 36%, P<.001), and intratumoral vascular endothelial growth factor expression (74% versus 28%, P<.001); was less associated with Von Hippel Lindau inactivation (56% versus 80%, P=.03); and was a predictor of poor prognosis in terms of progression-free, cancer-specific, and overall survival (log-rank test, P=.002, P=.011, and P=.035, respectively). The low angiogenic phenotype was also associated with a relative down-regulation of gene expression (platelet-derived growth factor D, N-acetyl transferase 8, and N-acetyl transferase 8 B). In conclusion, the histologic and molecular distinction between these 2 angiogenic phenotypes could help to better understand the biologic behavior of clear cell renal cell carcinoma angiogenesis and could be analyzed in a prospective study of the effects of antiangiogenic drugs.}, } @article {pmid22584712, year = {2012}, author = {Dammann, M and Weber, F}, title = {Personalized medicine: caught between hope, hype and the real world.}, journal = {Clinics (Sao Paulo, Brazil)}, volume = {67 Suppl 1}, number = {Suppl 1}, pages = {91-97}, pmid = {22584712}, issn = {1980-5322}, mesh = {Adrenal Gland Neoplasms/genetics ; Biomedical Research ; Carcinoma, Medullary/diagnosis/*genetics/therapy ; Carcinoma, Neuroendocrine ; Genetic Predisposition to Disease ; *Genomics ; Humans ; Multiple Endocrine Neoplasia Type 2a/genetics ; Pedigree ; Pheochromocytoma/genetics ; *Precision Medicine ; Thyroid Neoplasms/diagnosis/*genetics/therapy ; }, abstract = {Genomic and personalized medicine have become buzz phrases that pervade all fields of medicine. Rapid advances in "-omics" fields of research (chief of which are genomics, proteinomics, and epigenomics) over the last few years have allowed us to dissect the molecular signatures and functional pathways that underlie disease initiation and progression and to identify molecular profiles that help the classification of tumor subtypes and determine their natural course, prognosis, and responsiveness to therapies. Genomic medicine implements the use of traditional genetic information, as well as modern pangenomic information, with the aim of individualizing risk assessment, prevention, diagnosis, and treatment of cancers and other diseases. It is of note that personalizing medical treatment based on genetic information is not the revolution of the 21st century. Indeed, the use of genetic information, such as human leukocyte antigen-matching for solid organ transplantation or blood transfusion based on ABO blood group antigens, has been standard of care for several decades. However, in recent years rapid technical advances have allowed us to perform high-throughput, high-density molecular analyses to depict the genomic, proteinomic, and epigenomic make-up of an individual at a reasonable cost. Hence, the so-called genomic revolution is more or less the logical evolution from years of bench-based research and bench-to-bedside translational medicine.}, } @article {pmid22586130, year = {2012}, author = {Tian, CF and Zhou, YJ and Zhang, YM and Li, QQ and Zhang, YZ and Li, DF and Wang, S and Wang, J and Gilbert, LB and Li, YR and Chen, WX}, title = {Comparative genomics of rhizobia nodulating soybean suggests extensive recruitment of lineage-specific genes in adaptations.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {109}, number = {22}, pages = {8629-8634}, pmid = {22586130}, issn = {1091-6490}, mesh = {Adaptation, Physiological/*genetics ; Bacterial Proteins/genetics ; Bradyrhizobium/classification/genetics/physiology ; China ; Cluster Analysis ; Evolution, Molecular ; Genes, Bacterial/*genetics ; Genome, Bacterial/genetics ; Genomics/*methods ; Geography ; Host-Pathogen Interactions ; Phylogeny ; Plant Root Nodulation ; Rhizobium/classification/*genetics/physiology ; Root Nodules, Plant/microbiology ; Sinorhizobium/classification/genetics/physiology ; Soybeans/microbiology ; Species Specificity ; Symbiosis ; }, abstract = {The rhizobium-legume symbiosis has been widely studied as the model of mutualistic evolution and the essential component of sustainable agriculture. Extensive genetic and recent genomic studies have led to the hypothesis that many distinct strategies, regardless of rhizobial phylogeny, contributed to the varied rhizobium-legume symbiosis. We sequenced 26 genomes of Sinorhizobium and Bradyrhizobium nodulating soybean to test this hypothesis. The Bradyrhizobium core genome is disproportionally enriched in lipid and secondary metabolism, whereas several gene clusters known to be involved in osmoprotection and adaptation to alkaline pH are specific to the Sinorhizobium core genome. These features are consistent with biogeographic patterns of these bacteria. Surprisingly, no genes are specifically shared by these soybean microsymbionts compared with other legume microsymbionts. On the other hand, phyletic patterns of 561 known symbiosis genes of rhizobia reflected the species phylogeny of these soybean microsymbionts and other rhizobia. Similar analyses with 887 known functional genes or the whole pan genome of rhizobia revealed that only the phyletic distribution of functional genes was consistent with the species tree of rhizobia. Further evolutionary genetics revealed that recombination dominated the evolution of core genome. Taken together, our results suggested that faithfully vertical genes were rare compared with those with history of recombination including lateral gene transfer, although rhizobial adaptations to symbiotic interactions and other environmental conditions extensively recruited lineage-specific shell genes under direct or indirect control through the speciation process.}, } @article {pmid22577862, year = {2012}, author = {Guy, L and Nystedt, B and Sun, Y and Näslund, K and Berglund, EC and Andersson, SG}, title = {A genome-wide study of recombination rate variation in Bartonella henselae.}, journal = {BMC evolutionary biology}, volume = {12}, number = {}, pages = {65}, pmid = {22577862}, issn = {1471-2148}, mesh = {Bacterial Secretion Systems/genetics ; Bartonella henselae/*genetics ; *Comparative Genomic Hybridization ; DNA, Bacterial/genetics ; *Gene Transfer, Horizontal ; *Genome, Bacterial ; Multigene Family ; Phylogeny ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; }, abstract = {BACKGROUND: Rates of recombination vary by three orders of magnitude in bacteria but the reasons for this variation is unclear. We performed a genome-wide study of recombination rate variation among genes in the intracellular bacterium Bartonella henselae, which has among the lowest estimated ratio of recombination relative to mutation in prokaryotes.

RESULTS: The 1.9 Mb genomes of B. henselae strains IC11, UGA10 and Houston-1 genomes showed only minor gene content variation. Nucleotide sequence divergence levels were less than 1% and the relative rate of recombination to mutation was estimated to 1.1 for the genome overall. Four to eight segments per genome presented significantly enhanced divergences, the most pronounced of which were the virB and trw gene clusters for type IV secretion systems that play essential roles in the infection process. Consistently, multiple recombination events were identified inside these gene clusters. High recombination frequencies were also observed for a gene putatively involved in iron metabolism. A phylogenetic study of this gene in 80 strains of Bartonella quintana, B. henselae and B. grahamii indicated different population structures for each species and revealed horizontal gene transfers across Bartonella species with different host preferences.

CONCLUSIONS: Our analysis has shown little novel gene acquisition in B. henselae, indicative of a closed pan-genome, but higher recombination frequencies within the population than previously estimated. We propose that the dramatically increased fixation rate for recombination events at gene clusters for type IV secretion systems is driven by selection for sequence variability.}, } @article {pmid22561445, year = {2012}, author = {Rauchfuss, F and Lambeck, S and Claus, RA and Ullmann, J and Schulz, T and Weber, M and Katenkamp, K and Guthke, R and Bauer, M and Settmacher, U}, title = {Sustained liver regeneration after portal vein embolization --a human molecular pilot study.}, journal = {Digestive and liver disease : official journal of the Italian Society of Gastroenterology and the Italian Association for the Study of the Liver}, volume = {44}, number = {8}, pages = {681-688}, doi = {10.1016/j.dld.2012.04.002}, pmid = {22561445}, issn = {1878-3562}, mesh = {Activating Transcription Factor 3/genetics ; Aged ; Down-Regulation ; *Embolization, Therapeutic ; Gene Expression Profiling ; Humans ; Hyperplasia/genetics/metabolism ; Inhibitor of Differentiation Protein 1/genetics ; Inhibitor of Differentiation Proteins/genetics ; Insulin-Like Growth Factor Binding Protein 1/genetics ; Insulin-Like Growth Factor Binding Protein 2/genetics ; Ki-67 Antigen/metabolism ; Liver/*metabolism ; Liver Neoplasms/*therapy ; Liver Regeneration/*genetics ; Middle Aged ; Neoplasm Proteins/genetics ; Neovascularization, Physiologic/genetics ; Pilot Projects ; Portal Vein ; Prospective Studies ; Proto-Oncogene Proteins c-fos ; Proto-Oncogene Proteins c-jun/genetics ; RNA, Messenger/metabolism ; Signal Transduction/genetics ; Transcription Factor AP-1/genetics ; Transcription, Genetic ; Transforming Growth Factor beta/genetics ; Up-Regulation ; Vascular Endothelial Growth Factor A/genetics ; beta Catenin/genetics ; }, abstract = {BACKGROUND: Portal vein embolization is a treatment option to achieve a sufficient future remnant liver volume for patients with central liver tumours requiring an extended resection with an extensive parenchymal loss. However, molecular mechanisms of this intervention are up to now poorly understood. The objective of this prospective pilot study was the characterization of molecular events leading to late hypertrophy of the non-embolized liver tissue in the human liver.

METHODS: Liver tissue of ten patients was collected before and intraoperatively more than one month after embolization. Investigation of molecular features was performed by pangenomic chips, polymerase chain reaction, immunostaining of proliferation marker Ki-67 and immunofluorescence measurements.

RESULTS: Significantly elevated genes hint towards angiogenesis and signalling by insulin-like growth factor and associated binding proteins. Increased transcript levels of activator protein 1 complex members like c-jun were reflecting potential molecular events of liver growth after embolization. Immunofluorescence data confirmed a predominant upregulation of β-catenin and c-jun (p<0.1) supported by Ki-67 (p<0.05) in the non-embolized liver. In silico analysis of transcriptomic dysplasia and hepatocellular carcinoma data showed divergent signatures compared to embolization.

CONCLUSIONS: Our findings indicate a sustained regeneration after portal vein embolization reflected in hyperplasia and angiogenesis in the human liver and provide novel molecular mechanisms of interlobe crosstalk.}, } @article {pmid22536428, year = {2012}, author = {Liu, W and Fang, L and Li, M and Li, S and Guo, S and Luo, R and Feng, Z and Li, B and Zhou, Z and Shao, G and Chen, H and Xiao, S}, title = {Comparative genomics of Mycoplasma: analysis of conserved essential genes and diversity of the pan-genome.}, journal = {PloS one}, volume = {7}, number = {4}, pages = {e35698}, pmid = {22536428}, issn = {1932-6203}, mesh = {Conserved Sequence ; Evolution, Molecular ; Gene Frequency ; *Genes, Bacterial ; *Genes, Essential ; Genetic Speciation ; *Genetic Variation ; Genome, Bacterial ; Genomics ; Mycoplasma/*genetics ; Phylogeny ; RNA, Bacterial/genetics ; RNA, Ribosomal, 16S/genetics ; Selection, Genetic ; }, abstract = {Mycoplasma, the smallest self-replicating organism with a minimal metabolism and little genomic redundancy, is expected to be a close approximation to the minimal set of genes needed to sustain bacterial life. This study employs comparative evolutionary analysis of twenty Mycoplasma genomes to gain an improved understanding of essential genes. By analyzing the core genome of mycoplasmas, we finally revealed the conserved essential genes set for mycoplasma survival. Further analysis showed that the core genome set has many characteristics in common with experimentally identified essential genes. Several key genes, which are related to DNA replication and repair and can be disrupted in transposon mutagenesis studies, may be critical for bacteria survival especially over long period natural selection. Phylogenomic reconstructions based on 3,355 homologous groups allowed robust estimation of phylogenetic relatedness among mycoplasma strains. To obtain deeper insight into the relative roles of molecular evolution in pathogen adaptation to their hosts, we also analyzed the positive selection pressures on particular sites and lineages. There appears to be an approximate correlation between the divergence of species and the level of positive selection detected in corresponding lineages.}, } @article {pmid22509370, year = {2012}, author = {Bhullar, K and Waglechner, N and Pawlowski, A and Koteva, K and Banks, ED and Johnston, MD and Barton, HA and Wright, GD}, title = {Antibiotic resistance is prevalent in an isolated cave microbiome.}, journal = {PloS one}, volume = {7}, number = {4}, pages = {e34953}, pmid = {22509370}, issn = {1932-6203}, support = {MT-13536//Canadian Institutes of Health Research/Canada ; }, mesh = {Anti-Bacterial Agents/immunology ; Bacteria/*genetics ; Biological Evolution ; Drug Resistance, Microbial/*genetics/immunology ; Genome, Bacterial ; Humans ; Metagenome/*genetics/immunology ; New Mexico ; Phosphotransferases (Alcohol Group Acceptor)/*chemistry/*metabolism ; }, abstract = {Antibiotic resistance is a global challenge that impacts all pharmaceutically used antibiotics. The origin of the genes associated with this resistance is of significant importance to our understanding of the evolution and dissemination of antibiotic resistance in pathogens. A growing body of evidence implicates environmental organisms as reservoirs of these resistance genes; however, the role of anthropogenic use of antibiotics in the emergence of these genes is controversial. We report a screen of a sample of the culturable microbiome of Lechuguilla Cave, New Mexico, in a region of the cave that has been isolated for over 4 million years. We report that, like surface microbes, these bacteria were highly resistant to antibiotics; some strains were resistant to 14 different commercially available antibiotics. Resistance was detected to a wide range of structurally different antibiotics including daptomycin, an antibiotic of last resort in the treatment of drug resistant Gram-positive pathogens. Enzyme-mediated mechanisms of resistance were also discovered for natural and semi-synthetic macrolide antibiotics via glycosylation and through a kinase-mediated phosphorylation mechanism. Sequencing of the genome of one of the resistant bacteria identified a macrolide kinase encoding gene and characterization of its product revealed it to be related to a known family of kinases circulating in modern drug resistant pathogens. The implications of this study are significant to our understanding of the prevalence of resistance, even in microbiomes isolated from human use of antibiotics. This supports a growing understanding that antibiotic resistance is natural, ancient, and hard wired in the microbial pangenome.}, } @article {pmid22505676, year = {2012}, author = {Trost, E and Blom, J and Soares, Sde C and Huang, IH and Al-Dilaimi, A and Schröder, J and Jaenicke, S and Dorella, FA and Rocha, FS and Miyoshi, A and Azevedo, V and Schneider, MP and Silva, A and Camello, TC and Sabbadini, PS and Santos, CS and Santos, LS and Hirata, R and Mattos-Guaraldi, AL and Efstratiou, A and Schmitt, MP and Ton-That, H and Tauch, A}, title = {Pangenomic study of Corynebacterium diphtheriae that provides insights into the genomic diversity of pathogenic isolates from cases of classical diphtheria, endocarditis, and pneumonia.}, journal = {Journal of bacteriology}, volume = {194}, number = {12}, pages = {3199-3215}, pmid = {22505676}, issn = {1098-5530}, mesh = {Corynebacterium diphtheriae/*genetics/*isolation & purification ; DNA, Bacterial/chemistry/genetics ; Diphtheria/*microbiology ; Endocarditis, Bacterial/*microbiology ; Gene Deletion ; Gene Transfer, Horizontal ; Genes, Bacterial ; *Genetic Variation ; *Genome, Bacterial ; Genomic Islands ; Glycolipids/genetics ; Humans ; Molecular Sequence Data ; Mutagenesis, Insertional ; Pneumonia, Bacterial/*microbiology ; Prophages/genetics ; Regulon ; Sequence Analysis, DNA ; }, abstract = {Corynebacterium diphtheriae is one of the most prominent human pathogens and the causative agent of the communicable disease diphtheria. The genomes of 12 strains isolated from patients with classical diphtheria, endocarditis, and pneumonia were completely sequenced and annotated. Including the genome of C. diphtheriae NCTC 13129, we herewith present a comprehensive comparative analysis of 13 strains and the first characterization of the pangenome of the species C. diphtheriae. Comparative genomics showed extensive synteny and revealed a core genome consisting of 1,632 conserved genes. The pangenome currently comprises 4,786 protein-coding regions and increases at an average of 65 unique genes per newly sequenced strain. Analysis of prophages carrying the diphtheria toxin gene tox revealed that the toxoid vaccine producer C. diphtheriae Park-Williams no. 8 has been lysogenized by two copies of the ω(tox)(+) phage, whereas C. diphtheriae 31A harbors a hitherto-unknown tox(+) corynephage. DNA binding sites of the tox-controlling regulator DtxR were detected by genome-wide motif searches. Comparative content analysis showed that the DtxR regulons exhibit marked differences due to gene gain, gene loss, partial gene deletion, and DtxR binding site depletion. Most predicted pathogenicity islands of C. diphtheriae revealed characteristics of horizontal gene transfer. The majority of these islands encode subunits of adhesive pili, which can play important roles in adhesion of C. diphtheriae to different host tissues. All sequenced isolates contain at least two pilus gene clusters. It appears that variation in the distributed genome is a common strategy of C. diphtheriae to establish differences in host-pathogen interactions.}, } @article {pmid22485181, year = {2012}, author = {Vidya Priyadarsini, R and Kumar, N and Khan, I and Thiyagarajan, P and Kondaiah, P and Nagini, S}, title = {Gene expression signature of DMBA-induced hamster buccal pouch carcinomas: modulation by chlorophyllin and ellagic acid.}, journal = {PloS one}, volume = {7}, number = {4}, pages = {e34628}, pmid = {22485181}, issn = {1932-6203}, mesh = {9,10-Dimethyl-1,2-benzanthracene ; Animals ; Anticarcinogenic Agents/*pharmacology/therapeutic use ; Carcinoma, Squamous Cell/chemically induced/*metabolism/pathology/prevention & control ; Cell Transformation, Neoplastic/drug effects/genetics ; Chlorophyllides/*pharmacology/therapeutic use ; Cricetinae ; Dietary Supplements ; Ellagic Acid/*pharmacology/therapeutic use ; Gene Expression Profiling ; Gene Expression Regulation, Neoplastic/drug effects ; Male ; Mesocricetus ; Mouth Mucosa/drug effects/*metabolism/pathology ; Mouth Neoplasms/chemically induced/*metabolism/pathology/prevention & control ; Transcription, Genetic ; Tumor Burden ; }, abstract = {Chlorophyllin (CHL), a water-soluble, semi-synthetic derivative of chlorophyll and ellagic acid (EA), a naturally occurring polyphenolic compound in berries, grapes, and nuts have been reported to exert anticancer effects in various human cancer cell lines and in animal tumour models. The present study was undertaken to examine the mechanism underlying chemoprevention and changes in gene expression pattern induced by dietary supplementation of chlorophyllin and ellagic acid in the 7,12-dimethylbenz[a]anthracene (DMBA)-induced hamster buccal pouch (HBP) carcinogenesis model by whole genome profiling using pangenomic microarrays. In hamsters painted with DMBA, the expression of 1,700 genes was found to be altered significantly relative to control. Dietary supplementation of chlorophyllin and ellagic acid modulated the expression profiles of 104 and 37 genes respectively. Microarray analysis also revealed changes in the expression of TGFβ receptors, NF-κB, cyclin D1, and matrix metalloproteinases (MMPs) that may play a crucial role in the transformation of the normal buccal pouch to a malignant phenotype. This gene expression signature was altered on treatment with chlorophyllin and ellagic acid. Our study has also revealed patterns of gene expression signature specific for chlorophyllin and ellagic acid exposure. Thus dietary chlorophyllin and ellagic acid that can reverse gene expression signature associated with carcinogenesis are novel candidates for cancer prevention and therapy.}, } @article {pmid22472702, year = {2012}, author = {Castellanos, E and Aranaz, A and de Juan, L and Dominguez, L and Linedale, R and Bull, TJ}, title = {A 16 kb naturally occurring genomic deletion including mce and PPE genes in Mycobacterium avium subspecies paratuberculosis isolates from goats with Johne's disease.}, journal = {Veterinary microbiology}, volume = {159}, number = {1-2}, pages = {60-68}, doi = {10.1016/j.vetmic.2012.03.010}, pmid = {22472702}, issn = {1873-2542}, mesh = {Animals ; Bacterial Proteins/*genetics ; Cattle ; Cell Line ; Cell Line, Tumor ; Genome, Bacterial/*genetics ; Genotype ; Goat Diseases/*microbiology ; Goats ; Humans ; Microbial Viability/genetics ; Mycobacterium avium subsp. paratuberculosis/*genetics/isolation & purification ; Paratuberculosis/*microbiology ; Sequence Deletion/*genetics ; Spain ; }, abstract = {In this study we characterise the genomic and transcriptomic variability of a natural deletion strain of Mycobacterium avium subspecies paratuberculosis (MAP) prevalent in Spanish Guadarrama goats. Using a pan-genome microarray including MAP and M. avium subspecies hominissuis 104 genomes (MAPAC) we demonstrate the genotype to be MAP Type II with a single deletion of 19 contiguous ORFs (16 kb) including a complete mammalian cell entry (mce7_1) operon and adjacent proline-glutamic acid (PE)/proline-proline-glutamic acid (PPE) genes. A deletion specific PCR test was developed and a subsequent screening identified four goat herds infected with the variant strain. Each was located in central Spain and showed epidemiological links suggestive of transmission between herds. A majority of animals infected with the variant manifested a paucibacillary form of the disease. Comparisons between virulent complete genome compliment strains isolated from multibacillary diseased goats and the MAP variant strain during entry into activated macrophages demonstrated an increased sensitivity in the variant to intracellular killing in human and ovine macrophages. As PPE and mce genes are associated with mycobacterial virulence and pathogenesis we investigated the interplay of these gene sets during cell entry using the MAPAC array. This showed significant differential transcriptome profiles compared to full genome complement MAP controls that included changes in other undeleted mce operons and PE/PPE genes, esx-like signalling operons and stress response/fatty acid metabolism pathways. This strain represents the first report of a MAP Type II genotype with significant natural genomic deletions which remains able to cause disease and is transmissible in goats.}, } @article {pmid22469622, year = {2012}, author = {Latouche, C and El Moghrabi, S and Messaoudi, S and Nguyen Dinh Cat, A and Hernandez-Diaz, I and Alvarez de la Rosa, D and Perret, C and López Andrés, N and Rossignol, P and Zannad, F and Farman, N and Jaisser, F}, title = {Neutrophil gelatinase-associated lipocalin is a novel mineralocorticoid target in the cardiovascular system.}, journal = {Hypertension (Dallas, Tex. : 1979)}, volume = {59}, number = {5}, pages = {966-972}, doi = {10.1161/HYPERTENSIONAHA.111.187872}, pmid = {22469622}, issn = {1524-4563}, mesh = {Acute-Phase Proteins/genetics/*metabolism ; Analysis of Variance ; Animals ; Blotting, Western ; Cardiovascular System/metabolism ; Cells, Cultured ; Disease Models, Animal ; Humans ; Lipocalin-2 ; Lipocalins/genetics/*metabolism ; Mice ; Mice, Transgenic ; Myocytes, Cardiac/drug effects/*metabolism ; Oncogene Proteins/genetics/*metabolism ; RNA, Messenger/analysis ; Random Allocation ; Receptors, Mineralocorticoid/genetics/*metabolism ; Reference Values ; Reverse Transcriptase Polymerase Chain Reaction ; Sensitivity and Specificity ; Signal Transduction/genetics/*physiology ; Up-Regulation ; }, abstract = {Mineralocorticoid receptor (MR) activation may be deleterious to the cardiovascular system, and MR antagonists improve morbidity and mortality of patients with heart failure. However, mineralocorticoid signaling in the heart remains largely unknown. Using a pan-genomic transcriptomic analysis, we identified neutrophil gelatinase-associated lipocalin (NGAL or lipocalin 2) as a strongly induced gene in the heart of mice with conditional and targeted MR overexpression in cardiomyocytes (whereas induction was low in glucocorticoid receptor-overexpressing mice). NGAL mRNA levels were enhanced after hormonal stimulation by the MR ligand aldosterone in cultured cardiac cells and in the heart of wild-type mice. Mineralocorticoid pathological challenge induced by nephrectomy/aldosterone/salt treatment upregulated NGAL expression in the heart and aorta and its plasma levels. We show evidence for MR binding to an NGAL promoter, providing a mechanism for NGAL regulation. We propose that NGAL may be a marker of mineralocorticoid-dependent injury in the cardiovascular system in mice.}, } @article {pmid22457724, year = {2012}, author = {Laksanalamai, P and Jackson, SA and Mammel, MK and Datta, AR}, title = {High density microarray analysis reveals new insights into genetic footprints of Listeria monocytogenes strains involved in listeriosis outbreaks.}, journal = {PloS one}, volume = {7}, number = {3}, pages = {e32896}, pmid = {22457724}, issn = {1932-6203}, mesh = {Cluster Analysis ; *Disease Outbreaks ; Humans ; Listeria monocytogenes/*genetics ; Listeriosis/*epidemiology/microbiology ; Nucleic Acid Hybridization ; *Oligonucleotide Array Sequence Analysis ; }, abstract = {Listeria monocytogenes, a foodborne bacterial pathogen, causes invasive and febrile gastroenteritis forms of listeriosis in humans. Both invasive and febrile gastroenteritis listeriosis is caused mostly by serotypes 1/2a, 1/2b and 4b strains. The outbreak strains of serotype 1/2a and 4b could be further classified into several epidemic clones but the genetic bases for the diverse pathophysiology have been unsuccessful. DNA microarray provides an important tool to scan the entire genome for genetic signatures that may distinguish the L. monocytogenes strains belonging to different outbreaks. We have designed a pan-genomic microarray chip (Listeria GeneChip) containing sequences from 24 L. monocytogenes strains. The chip was designed to identify the presence/absence of genomic sequences, analyze transcription profiles and identify SNPs. Analysis of the genomic profiles of 38 outbreak strains representing 1/2a, 1/2b and 4b serotypes, revealed that the strains formed distinct genetic clusters adhering to their serotypes and epidemic clone types. Although serologically 1/2a and 1/b strains share common antigenic markers microarray analysis revealed that 1/2a strains are further apart from the closely related 1/2b and 4b strains. Within any given serotype and epidemic clone type the febrile gastroenteritis and invasive strains can be further distinguished based on several genetic markers including large numbers of phage genome, and intergenic sequences. Our results showed that the microarray-based data can be an important tool in characterization of L. monocytogenes strains involved in both invasive and gastroenteritis outbreaks. The results for the first time showed that the serotypes and epidemic clones are based on extensive pan-genomic variability and the 1/2b and 4bstrains are more closely related to each other than the 1/2a strains. The data also supported the hypothesis that the strains causing these two diverse outbreaks are genotypically different and this finding might be important in understanding the pathophysiology of this organism.}, } @article {pmid22455317, year = {2012}, author = {Machugh, DE and Taraktsoglou, M and Killick, KE and Nalpas, NC and Browne, JA and DE Park, S and Hokamp, K and Gormley, E and Magee, DA}, title = {Pan-genomic analysis of bovine monocyte-derived macrophage gene expression in response to in vitro infection with Mycobacterium avium subspecies paratuberculosis.}, journal = {Veterinary research}, volume = {43}, number = {1}, pages = {25}, pmid = {22455317}, issn = {1297-9716}, mesh = {Animals ; Cattle ; Female ; Gene Expression Profiling/veterinary ; *Gene Expression Regulation ; Host-Pathogen Interactions ; Macrophages/*immunology/microbiology ; Mycobacterium avium subsp. paratuberculosis/*physiology ; Oligonucleotide Array Sequence Analysis/veterinary ; Paratuberculosis/genetics/*immunology/microbiology ; Polymerase Chain Reaction/veterinary ; Time Factors ; }, abstract = {Mycobacterium avium subspecies paratuberculosis is the causative agent of Johne's disease, an intestinal disease of ruminants with major economic consequences. Infectious bacilli are phagocytosed by host macrophages upon exposure where they persist, resulting in lengthy subclinical phases of infection that can lead to immunopathology and disease dissemination. Consequently, analysis of the macrophage transcriptome in response to M. avium subsp. paratuberculosis infection can provide valuable insights into the molecular mechanisms that underlie Johne's disease. Here, we investigate pan-genomic gene expression in bovine monocyte-derived macrophages (MDM) purified from seven age-matched females, in response to in vitro infection with M. avium subsp. paratuberculosis (multiplicity of infection 2:1) at intervals of 2 hours, 6 hours and 24 hours post-infection (hpi). Differentially expressed genes were identified by comparing the transcriptomes of the infected MDM to the non-infected control MDM at each time point (adjusted P-value threshold ≤ 0.10). 1050 differentially expressed unique genes were identified 2 hpi, with 974 and 78 differentially expressed unique genes detected 6 and 24 hpi, respectively. Furthermore, in the infected MDM the number of upregulated genes exceeded the number of downregulated genes at each time point, with the fold-change in expression for the upregulated genes markedly higher than that for the downregulated genes. Inspection and systems biology analysis of the differentially expressed genes revealed an enrichment of genes involved in the inflammatory response, cell signalling pathways and apoptosis. The transcriptional changes associated with cellular signalling and the inflammatory response may reflect different immuno-modulatory mechanisms that underlie host-pathogen interactions during infection.}, } @article {pmid22442318, year = {2012}, author = {Ho, CC and Wu, AK and Tse, CW and Yuen, KY and Lau, SK and Woo, PC}, title = {Automated pangenomic analysis in target selection for PCR detection and identification of bacteria by use of ssGeneFinder Webserver and its application to Salmonella enterica serovar Typhi.}, journal = {Journal of clinical microbiology}, volume = {50}, number = {6}, pages = {1905-1911}, pmid = {22442318}, issn = {1098-660X}, mesh = {Bacteriological Techniques/*methods ; Computational Biology/*methods ; DNA Primers/*genetics ; Databases, Nucleic Acid ; Genome, Bacterial ; Humans ; Internet ; Molecular Diagnostic Techniques/*methods ; Polymerase Chain Reaction/*methods ; Salmonella typhi/*genetics ; Sensitivity and Specificity ; Typhoid Fever/*diagnosis/microbiology ; }, abstract = {With the advent of high-throughput DNA sequencing, more than 4,000 bacterial genomes have been sequenced and are publicly available. We report a user-friendly web platform, ssGeneFinder Webserver (http://147.8.74.24/ssGeneFinder/), which is updated weekly for the automated pangenomic selection of specific targets for direct PCR detection and the identification of clinically important bacteria without the need of gene sequencing. To apply the ssGeneFinder Webserver for identifying specific targets for Salmonella enterica serovar Typhi, we analyzed 11 S. Typhi genomes, generated two specific targets, and validated them using 40 S. Typhi, 110 non-Typhi Salmonella serovars (serovar Paratyphi A, n = 4; Paratyphi B, n = 1; Typhimurium, n = 5; Enteritidis, n = 12; non-Paratyphi group A, n = 6; non-Paratyphi group B, n = 29; non-Paratyphi group C, n = 12; non-Typhi group D, n = 35; group E and others, n = 6), 115 Enterobacteriaceae isolates (Escherichia, n = 78; Shigella, n = 2; Klebsiella, n = 13; Enterobacter, n = 9; others, n = 13), and 66 human stool samples that were culture negative for S. Typhi. Both targets successfully detected all typical and atypical S. Typhi isolates, including an H1-j flagellin gene mutant, an aflagellated mutant which reacted with 2O Salmonella antiserum, and the Vi-negative attenuated vaccine strain Ty21a. No false positive was detected from any of the bacterial isolates and stool samples. DNA sequencing confirmed the identity of all positive amplicons. The PCR assays have detection limits as low as 100 CFU per reaction and were tested using spiked stool samples. Using a pangenomic approach, ssGeneFinder Webserver generated targets specific to S. Typhi. These and other validated targets should be applicable to the identification and direct PCR detection of bacterial pathogens from uncultured, mixed, and environmental samples.}, } @article {pmid22422677, year = {2012}, author = {Schofield, PN and Hoehndorf, R and Gkoutos, GV}, title = {Mouse genetic and phenotypic resources for human genetics.}, journal = {Human mutation}, volume = {33}, number = {5}, pages = {826-836}, pmid = {22422677}, issn = {1098-1004}, support = {R01 HG004838/HG/NHGRI NIH HHS/United States ; R01 HG004838-02/HG/NHGRI NIH HHS/United States ; }, mesh = {Animals ; Databases, Genetic ; Embryonic Stem Cells/cytology/metabolism ; Genetic Association Studies ; Genome ; Genome-Wide Association Study ; Humans ; International Cooperation ; Mice/*genetics ; Mice, Knockout ; *Models, Animal ; *Phenotype ; Reverse Genetics ; }, abstract = {The use of model organisms to provide information on gene function has proved to be a powerful approach to our understanding of both human disease and fundamental mammalian biology. Large-scale community projects using mice, based on forward and reverse genetics, and now the pan-genomic phenotyping efforts of the International Mouse Phenotyping Consortium, are generating resources on an unprecedented scale, which will be extremely valuable to human genetics and medicine. We discuss the nature and availability of data, mice and embryonic stem cells from these large-scale programmes, the use of these resources to help prioritize and validate candidate genes in human genetic association studies, and how they can improve our understanding of the underlying pathobiology of human disease.}, } @article {pmid22416012, year = {2012}, author = {Young, J and Metay, C and Bouligand, J and Tou, B and Francou, B and Maione, L and Tosca, L and Sarfati, J and Brioude, F and Esteva, B and Briand-Suleau, A and Brisset, S and Goossens, M and Tachdjian, G and Guiochon-Mantel, A}, title = {SEMA3A deletion in a family with Kallmann syndrome validates the role of semaphorin 3A in human puberty and olfactory system development.}, journal = {Human reproduction (Oxford, England)}, volume = {27}, number = {5}, pages = {1460-1465}, doi = {10.1093/humrep/des022}, pmid = {22416012}, issn = {1460-2350}, mesh = {Female ; *Gene Deletion ; Humans ; Kallmann Syndrome/*genetics ; Male ; Pedigree ; Phenotype ; Puberty/genetics/physiology ; Semaphorin-3A/*genetics/physiology ; Smell/genetics/physiology ; }, abstract = {BACKGROUND: Kallmann syndrome (KS) is a genetic disorder associating pubertal failure with congenitally absent or impaired sense of smell. KS is related to defective neuronal development affecting both the migration of olfactory nerve endings and GnRH neurons. The discovery of several genetic mutations responsible for KS led to the identification of signaling pathways involved in these processes, but the mutations so far identified account for only 30% of cases of KS. Here, we attempted to identify new genes responsible for KS by using a pan-genomic approach.

METHODS: From a cohort of 120 KS patients, we selected 48 propositi with no mutations in known KS genes. They were analyzed by comparative genomic hybridization array, using Agilent 105K oligonucleotide chips with a mean resolution of 50 kb.

RESULTS: One propositus was found to have a heterozygous deletion of 213 kb at locus 7q21.11, confirmed by real-time qPCR, deleting 11 of the 17 SEMA3A exons. This deletion cosegregated in the propositus' family with the KS phenotype, that was transmitted in autosomal dominant fashion and was not associated with other neurological or non-neurological clinical disorders. SEMA3A codes for semaphorin 3A, a protein that interacts with neuropilins. Mice lacking semaphorin 3A expression have been showed to have a Kallmann-like phenotype.

CONCLUSIONS: SEMA3A is therefore a new gene whose loss-of-function is involved in KS. These findings validate the specific role of semaphorin 3A in the development of the olfactory system and in neuronal control of puberty in humans.}, } @article {pmid22409488, year = {2012}, author = {Leekitcharoenphon, P and Lukjancenko, O and Friis, C and Aarestrup, FM and Ussery, DW}, title = {Genomic variation in Salmonella enterica core genes for epidemiological typing.}, journal = {BMC genomics}, volume = {13}, number = {}, pages = {88}, pmid = {22409488}, issn = {1471-2164}, mesh = {Bacterial Typing Techniques/*methods ; Conserved Sequence ; Genes, Bacterial/*genetics ; Genetic Variation/*genetics ; *Genomics ; Molecular Epidemiology ; Multigene Family/genetics ; Phylogeny ; Salmonella enterica/*classification/*genetics/physiology ; }, abstract = {BACKGROUND: Technological advances in high throughput genome sequencing are making whole genome sequencing (WGS) available as a routine tool for bacterial typing. Standardized procedures for identification of relevant genes and of variation are needed to enable comparison between studies and over time. The core genes--the genes that are conserved in all (or most) members of a genus or species--are potentially good candidates for investigating genomic variation in phylogeny and epidemiology.

RESULTS: We identify a set of 2,882 core genes clusters based on 73 publicly available Salmonella enterica genomes and evaluate their value as typing targets, comparing whole genome typing and traditional methods such as 16S and MLST. A consensus tree based on variation of core genes gives much better resolution than 16S and MLST; the pan-genome family tree is similar to the consensus tree, but with higher confidence. The core genes can be divided into two categories: a few highly variable genes and a larger set of conserved core genes, with low variance. For the most variable core genes, the variance in amino acid sequences is higher than for the corresponding nucleotide sequences, suggesting that there is a positive selection towards mutations leading to amino acid changes.

CONCLUSIONS: Genomic variation within the core genome is useful for investigating molecular evolution and providing candidate genes for bacterial genome typing. Identification of genes with different degrees of variation is important especially in trend analysis.}, } @article {pmid22399456, year = {2012}, author = {Bapteste, E and Bouchard, F and Burian, RM}, title = {Philosophy and evolution: minding the gap between evolutionary patterns and tree-like patterns.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {856}, number = {}, pages = {81-110}, doi = {10.1007/978-1-61779-585-5_4}, pmid = {22399456}, issn = {1940-6029}, mesh = {Adaptation, Physiological ; Anti-Bacterial Agents/pharmacology ; Archaea/genetics ; Bacteria/drug effects/genetics ; Drug Resistance, Bacterial/genetics ; *Evolution, Molecular ; *Phylogeny ; }, abstract = {Ever since Darwin, the familiar genealogical pattern known as the Tree of Life (TOL) has been prominent in evolutionary thinking and has dominated not only systematics, but also the analysis of the units of evolution. However, recent findings indicate that the evolution of DNA, especially in prokaryotes and such DNA vehicles as viruses and plasmids, does not follow a unique tree-like pattern. Because evolutionary patterns track a greater range of processes than those captured in genealogies, genealogical patterns are in fact only a subset of a broader set of evolutionary patterns. This fact suggests that evolutionists who focus exclusively on genealogical patterns are blocked from providing a significant range of genuine evolutionary explanations. Consequently, we highlight challenges to tree-based approaches, and point the way toward more appropriate methods to study evolution (although we do not present them in technical detail). We argue that there is significant benefit in adopting wider range of models, evolutionary representations, and evolutionary explanations, based on an analysis of the full range of evolutionary processes. We introduce an ecosystem orientation into evolutionary thinking that highlights the importance of "type 1 coalitions" (functionally related units with genetic exchanges, aka "friends with genetic benefits"), "type 2 coalitions" (functionally related units without genetic exchanges), "communal interactions," and "emergent evolutionary properties." On this basis, we seek to promote the study of (especially prokaryotic) evolution with dynamic evolutionary networks, which are less constrained than the TOL, and to provide new ways to analyze an expanded range of evolutionary units (genetic modules, recombined genes, plasmids, phages and prokaryotic genomes, pangenomes, microbial communities) and evolutionary processes. Finally, we discuss some of the conceptual and practical questions raised by such network-based representation.}, } @article {pmid22395783, year = {2012}, author = {Dini-Andreote, F and Andreote, FD and Araújo, WL and Trevors, JT and van Elsas, JD}, title = {Bacterial genomes: habitat specificity and uncharted organisms.}, journal = {Microbial ecology}, volume = {64}, number = {1}, pages = {1-7}, pmid = {22395783}, issn = {1432-184X}, mesh = {Animals ; Bacteria/classification/*genetics/isolation & purification ; *Ecosystem ; Environmental Microbiology ; *Genome, Bacterial ; Humans ; Phylogeny ; }, abstract = {The capability and speed in generating genomic data have increased profoundly since the release of the draft human genome in 2000. Additionally, sequencing costs have continued to plummet as the next generation of highly efficient sequencing technologies (next-generation sequencing) became available and commercial facilities promote market competition. However, new challenges have emerged as researchers attempt to efficiently process the massive amounts of sequence data being generated. First, the described genome sequences are unequally distributed among the branches of bacterial life and, second, bacterial pan-genomes are often not considered when setting aims for sequencing projects. Here, we propose that scientists should be concerned with attaining an improved equal representation of most of the bacterial tree of life organisms, at the genomic level. Moreover, they should take into account the natural variation that is often observed within bacterial species and the role of the often changing surrounding environment and natural selection pressures, which is central to bacterial speciation and genome evolution. Not only will such efforts contribute to our overall understanding of the microbial diversity extant in ecosystems as well as the structuring of the extant genomes, but they will also facilitate the development of better methods for (meta)genome annotation.}, } @article {pmid22377449, year = {2012}, author = {Strouts, FR and Power, P and Croucher, NJ and Corton, N and van Tonder, A and Quail, MA and Langford, PR and Hudson, MJ and Parkhill, J and Kroll, JS and Bentley, SD}, title = {Lineage-specific virulence determinants of Haemophilus influenzae biogroup aegyptius.}, journal = {Emerging infectious diseases}, volume = {18}, number = {3}, pages = {449-457}, pmid = {22377449}, issn = {1080-6059}, support = {/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Adhesins, Bacterial/genetics ; Gene Order ; Genome, Bacterial ; Haemophilus Infections/*microbiology ; Haemophilus influenzae/classification/*genetics/*pathogenicity ; Host-Pathogen Interactions ; Humans ; Molecular Sequence Annotation ; Molecular Sequence Data ; Operon ; Phylogeny ; Sequence Homology ; Virulence ; Virulence Factors/genetics ; }, abstract = {An emergent clone of Haemophilus influenzae biogroup aegyptius (Hae) is responsible for outbreaks of Brazilian purpuric fever (BPF). First recorded in Brazil in 1984, the so-called BPF clone of Hae caused a fulminant disease that started with conjunctivitis but developed into septicemic shock; mortality rates were as high as 70%. To identify virulence determinants, we conducted a pan-genomic analysis. Sequencing of the genomes of the BPF clone strain F3031 and a noninvasive conjunctivitis strain, F3047, and comparison of these sequences with 5 other complete H. influenzae genomes showed that >77% of the F3031 genome is shared among all H. influenzae strains. Delineation of the Hae accessory genome enabled characterization of 163 predicted protein-coding genes; identified differences in established autotransporter adhesins; and revealed a suite of novel adhesins unique to Hae, including novel trimeric autotransporter adhesins and 4 new fimbrial operons. These novel adhesins might play a critical role in host-pathogen interactions.}, } @article {pmid22369888, year = {2012}, author = {Dunn, B and Richter, C and Kvitek, DJ and Pugh, T and Sherlock, G}, title = {Analysis of the Saccharomyces cerevisiae pan-genome reveals a pool of copy number variants distributed in diverse yeast strains from differing industrial environments.}, journal = {Genome research}, volume = {22}, number = {5}, pages = {908-924}, pmid = {22369888}, issn = {1549-5469}, mesh = {Chromosome Mapping ; Cluster Analysis ; Comparative Genomic Hybridization ; *DNA Copy Number Variations ; DNA Transposable Elements/genetics ; Genetic Variation ; *Genome, Fungal ; Genome, Mitochondrial ; Hybridization, Genetic ; Plasmids/genetics ; Principal Component Analysis ; Recombination, Genetic ; Saccharomyces cerevisiae/*genetics ; Sequence Analysis, DNA ; Telomere/genetics ; }, abstract = {Although the budding yeast Saccharomyces cerevisiae is arguably one of the most well-studied organisms on earth, the genome-wide variation within this species--i.e., its "pan-genome"--has been less explored. We created a multispecies microarray platform containing probes covering the genomes of several Saccharomyces species: S. cerevisiae, including regions not found in the standard laboratory S288c strain, as well as the mitochondrial and 2-μm circle genomes-plus S. paradoxus, S. mikatae, S. kudriavzevii, S. uvarum, S. kluyveri, and S. castellii. We performed array-Comparative Genomic Hybridization (aCGH) on 83 different S. cerevisiae strains collected across a wide range of habitats; of these, 69 were commercial wine strains, while the remaining 14 were from a diverse set of other industrial and natural environments. We observed interspecific hybridization events, introgression events, and pervasive copy number variation (CNV) in all but a few of the strains. These CNVs were distributed throughout the strains such that they did not produce any clear phylogeny, suggesting extensive mating in both industrial and wild strains. To validate our results and to determine whether apparently similar introgressions and CNVs were identical by descent or recurrent, we also performed whole-genome sequencing on nine of these strains. These data may help pinpoint genomic regions involved in adaptation to different industrial milieus, as well as shed light on the course of domestication of S. cerevisiae.}, } @article {pmid22357598, year = {2012}, author = {Baumdicker, F and Hess, WR and Pfaffelhuber, P}, title = {The infinitely many genes model for the distributed genome of bacteria.}, journal = {Genome biology and evolution}, volume = {4}, number = {4}, pages = {443-456}, pmid = {22357598}, issn = {1759-6653}, mesh = {Bacterial Proteins/genetics ; Evolution, Molecular ; *Genome, Bacterial ; *Models, Genetic ; Phylogeny ; Prochlorococcus/classification/*genetics ; Synechococcus/classification/*genetics ; }, abstract = {The distributed genome hypothesis states that the gene pool of a bacterial taxon is much more complex than that found in a single individual genome. However, the possible fitness advantage, why such genomic diversity is maintained, whether this variation is largely adaptive or neutral, and why these distinct individuals can coexist, remains poorly understood. Here, we present the infinitely many genes (IMG) model, which is a quantitative, evolutionary model for the distributed genome. It is based on a genealogy of individual genomes and the possibility of gene gain (from an unbounded reservoir of novel genes, e.g., by horizontal gene transfer from distant taxa) and gene loss, for example, by pseudogenization and deletion of genes, during reproduction. By implementing these mechanisms, the IMG model differs from existing concepts for the distributed genome, which cannot differentiate between neutral evolution and adaptation as drivers of the observed genomic diversity. Using the IMG model, we tested whether the distributed genome of 22 full genomes of picocyanobacteria (Prochlorococcus and Synechococcus) shows signs of adaptation or neutrality. We calculated the effective population size of Prochlorococcus at 1.01 × 10(11) and predicted 18 distinct clades for this population, only six of which have been isolated and cultured thus far. We predicted that the Prochlorococcus pangenome contains 57,792 genes and found that the evolution of the distributed genome of Prochlorococcus was possibly neutral, whereas that of Synechococcus and the combined sample shows a clear deviation from neutrality.}, } @article {pmid22341410, year = {2012}, author = {Lindeberg, M and Cunnac, S and Collmer, A}, title = {Pseudomonas syringae type III effector repertoires: last words in endless arguments.}, journal = {Trends in microbiology}, volume = {20}, number = {4}, pages = {199-208}, doi = {10.1016/j.tim.2012.01.003}, pmid = {22341410}, issn = {1878-4380}, mesh = {Bacterial Proteins/metabolism ; *Bacterial Secretion Systems ; *Host-Pathogen Interactions ; Plant Diseases/*microbiology ; Pseudomonas syringae/*metabolism/*pathogenicity ; Virulence Factors/*metabolism ; }, abstract = {Many plant pathogens subvert host immunity by injecting compositionally diverse but functionally similar repertoires of cytoplasmic effector proteins. The bacterial pathogen Pseudomonas syringae is a model for exploring the functional structure of such repertoires. The pangenome of P. syringae encodes 57 families of effectors injected by the type III secretion system. Distribution of effector genes among phylogenetically diverse strains reveals a small set of core effectors targeting antimicrobial vesicle trafficking and a much larger set of variable effectors targeting kinase-based recognition processes. Complete disassembly of the 28-effector repertoire of a model strain and reassembly of a minimal functional repertoire reveals the importance of simultaneously attacking both processes. These observations, coupled with growing knowledge of effector targets in plants, support a model for coevolving molecular dialogs between effector repertoires and plant immune systems that emphasizes mutually-driven expansion of the components governing recognition.}, } @article {pmid22294762, year = {2012}, author = {Castellví-Bel, S and Ruiz-Ponte, C and Fernández-Rozadilla, C and Abulí, A and Muñoz, J and Bessa, X and Brea-Fernández, A and Ferro, M and Giráldez, MD and Xicola, RM and Llor, X and Jover, R and Piqué, JM and Andreu, M and Castells, A and Carracedo, A and , }, title = {Seeking genetic susceptibility variants for colorectal cancer: the EPICOLON consortium experience.}, journal = {Mutagenesis}, volume = {27}, number = {2}, pages = {153-159}, doi = {10.1093/mutage/ger047}, pmid = {22294762}, issn = {1464-3804}, mesh = {Clinical Trials as Topic ; Colorectal Neoplasms/diagnosis/*genetics ; *Genes, Neoplasm ; *Genetic Predisposition to Disease ; *Genome, Human ; *Genome-Wide Association Study ; Humans ; Polymorphism, Genetic/*genetics ; White People ; }, abstract = {The EPICOLON consortium was initiated in 1999 by the Gastrointestinal Oncology Group of the Spanish Gastroenterology Association. It recruited consecutive, unselected, population-based colorectal cancer (CRC) cases and control subjects matched by age and gender without personal or familial history of cancer all over Spain with the main goal of gaining knowledge in Lynch syndrome and familial CRC. This epidemiological, prospective and multicentre study collected extensive clinical data and biological samples from ∼2000 CRC cases and 2000 controls in Phases 1 and 2 involving 25 and 14 participating hospitals, respectively. Genetic susceptibility projects in EPICOLON have included candidate-gene approaches evaluating single-nucleotide polymorphisms/genes from the historical category (linked to CRC risk by previous studies), from human syntenic CRC susceptibility regions identified in mouse, from the CRC carcinogenesis-related pathways Wnt and BMP, from regions 9q22 and 3q22 with positive linkage in CRC families, and from the mucin gene family. This consortium has also participated actively in the identification 5 of the 16 common, low-penetrance CRC genetic variants identified so far by genome-wide association studies. Finishing their own pangenomic study and performing whole-exome sequencing in selected CRC samples are among EPICOLON future research prospects.}, } @article {pmid22292301, year = {2011}, author = {Junien, C}, title = {[Early determinants of health and disease: epigenetics and environment].}, journal = {Bulletin de l'Academie nationale de medecine}, volume = {195}, number = {3}, pages = {511-26; discussion 526-7}, pmid = {22292301}, issn = {0001-4079}, mesh = {Animals ; Environmental Exposure/*adverse effects ; *Epigenesis, Genetic ; Epigenomics ; Fetal Development/genetics ; Genetic Predisposition to Disease ; Humans ; }, abstract = {Several studies of the developmental origins of health and disease and metabolic programming have identified links between early nutrition, epigenetic processes and long-term illness. The ways in which epigenetic modifications fix the effects of early environmental events, leading to sustained responses to transient stimuli, modified gene expression patterns and altered phenotypes in later life, is a topic of considerable interest. This review focuses on recently discovered mechanisms, and challenges prevailing views on the dynamics, position and functions of epigenetic marks. Most epigenetic studies have addressed long-term effects of environmental stressors on a small number of epigenetic marks, at the pan-genomic or individual gene level, in humans and animal models. In parallel, studies of humans and mice, using high-throughput technologies, have revealed additional complexity in epigenetic processes, notably highlighting the importance of crosstalk between different epigenetic marks. A self-propagating epigenetic cycle has also been identified. Recent studies have shown clear sexual dimorphism both in programming trajectories and in response to a given environmental insult. Despite recent progress we are still far from understanding how, when and where environmental stressors disturb key epigenetic mechanisms. Further work is needed to identify original key marks and their changes during development, throughout an individual's lifetime or over several generations, and to determine how to revert malprogramming with a view to prevention and treatment.}, } @article {pmid22286995, year = {2012}, author = {Cellier, G and Remenant, B and Chiroleu, F and Lefeuvre, P and Prior, P}, title = {Phylogeny and population structure of brown rot- and Moko disease-causing strains of Ralstonia solanacearum phylotype II.}, journal = {Applied and environmental microbiology}, volume = {78}, number = {7}, pages = {2367-2375}, pmid = {22286995}, issn = {1098-5336}, mesh = {Comparative Genomic Hybridization ; Ecotype ; *Genetic Variation ; Solanum lycopersicum/microbiology ; Musa/microbiology ; Oligonucleotide Array Sequence Analysis ; *Phylogeny ; Plant Diseases/*microbiology ; Ralstonia solanacearum/classification/*genetics/isolation & purification/pathogenicity ; Solanum melongena/microbiology ; Solanum tuberosum/microbiology ; }, abstract = {The ancient soilborne plant vascular pathogen Ralstonia solanacearum has evolved and adapted to cause severe damage in an unusually wide range of plants. In order to better describe and understand these adaptations, strains with very similar lifestyles and host specializations are grouped into ecotypes. We used comparative genomic hybridization (CGH) to investigate three particular ecotypes in the American phylotype II group: (i) brown rot strains from phylotypes IIB-1 and IIB-2, historically known as race 3 biovar 2 and clonal; (ii) new pathogenic variants from phylotype IIB-4NPB that lack pathogenicity for banana but can infect many other plant species; and (iii) Moko disease-causing strains from phylotypes IIB-3, IIB-4, and IIA-6, historically known as race 2, that cause wilt on banana, plantain, and Heliconia spp. We compared the genomes of 72 R. solanacearum strains, mainly from the three major ecotypes of phylotype II, using a newly developed pangenomic microarray to decipher their population structure and gain clues about the epidemiology of these ecotypes. Strain phylogeny and population structure were reconstructed. The results revealed a phylogeographic structure within brown rot strains, allowing us to distinguish European outbreak strains of Andean and African origins. The pangenomic CGH data also demonstrated that Moko ecotype IIB-4 is phylogenetically distinct from the emerging IIB-4NPB strains. These findings improved our understanding of the epidemiology of important ecotypes in phylotype II and will be useful for evolutionary analyses and the development of new DNA-based diagnostic tools.}, } @article {pmid22253721, year = {2012}, author = {Poaty, H and Coullin, P and Peko, JF and Dessen, P and Diatta, AL and Valent, A and Leguern, E and Prévot, S and Gombé-Mbalawa, C and Candelier, JJ and Picard, JY and Bernheim, A}, title = {Genome-wide high-resolution aCGH analysis of gestational choriocarcinomas.}, journal = {PloS one}, volume = {7}, number = {1}, pages = {e29426}, pmid = {22253721}, issn = {1932-6203}, mesh = {Cell Line, Tumor ; Choriocarcinoma/*genetics/pathology ; Comparative Genomic Hybridization/*methods ; Female ; Genome, Human/*genetics ; Genotype ; Humans ; Immunohistochemistry ; In Situ Hybridization, Fluorescence ; Microsatellite Repeats/genetics ; Pregnancy ; Pregnancy Complications/*genetics/pathology ; Sequence Analysis, DNA ; Uterine Neoplasms/*genetics/pathology ; }, abstract = {Eleven samples of DNA from choriocarcinomas were studied by high resolution CGH-array 244 K. They were studied after histopathological confirmation of the diagnosis, of the androgenic etiology and after a microsatellite marker analysis confirming the absence of contamination of tumor DNA from maternal DNA. Three cell lines, BeWo, JAR, JEG were also studied by this high resolution pangenomic technique. According to aCGH analysis, the de novo choriocarcinomas exhibited simple chromosomal rearrangements or normal profiles. The cell lines showed various and complex chromosomal aberrations. 23 Minimal Critical Regions were defined that allowed us to list the genes that were potentially implicated. Among them, unusually high numbers of microRNA clusters and imprinted genes were observed.}, } @article {pmid22247066, year = {2012}, author = {Lopez, E and Callier, P and Cormier-Daire, V and Lacombe, D and Moncla, A and Bottani, A and Lambert, S and Goldenberg, A and Doray, B and Odent, S and Sanlaville, D and Gueneau, L and Duplomb, L and Huet, F and Aral, B and Thauvin-Robinet, C and Faivre, L}, title = {Search for a gene responsible for Floating-Harbor syndrome on chromosome 12q15q21.1.}, journal = {American journal of medical genetics. Part A}, volume = {158A}, number = {2}, pages = {333-339}, doi = {10.1002/ajmg.a.34401}, pmid = {22247066}, issn = {1552-4833}, mesh = {Abnormalities, Multiple/*genetics/*pathology ; Adult ; Child ; Child, Preschool ; Chromosomes, Human, Pair 12/*genetics ; Comparative Genomic Hybridization/methods ; Craniofacial Abnormalities/*genetics/*pathology ; Female ; Genetic Predisposition to Disease ; Growth Disorders/*genetics/*pathology ; Haploinsufficiency/genetics ; Heart Septal Defects, Ventricular/*genetics/*pathology ; High-Throughput Nucleotide Sequencing ; Humans ; Male ; Phenotype ; Sequence Deletion/*genetics ; }, abstract = {Floating-Harbor syndrome (FHS) is characterized by characteristic facial dysmorphism, short stature with delayed bone age, and expressive language delay. To date, the gene(s) responsible for FHS is (are) unknown and the diagnosis is only made on the basis of the clinical phenotype. The majority of cases appeared to be sporadic but rare cases following autosomal dominant inheritance have been reported. We identified a 4.7 Mb de novo 12q15-q21.1 microdeletion in a patient with FHS and intellectual deficiency. Pangenomic 244K array-CGH performed in a series of 12 patients with FHS failed to identify overlapping deletions. We hypothesized that FHS is caused by haploinsufficiency of one of the 19 genes or predictions located in the deletion found in our index patient. Since none of them appeared to be good candidate gene by their function, a high-throughput sequencing approach of the region of interest was used in eight FHS patients. No pathogenic mutation was found in these patients. This approach failed to identify the gene responsible for FHS, and this can be explained by at least four reasons: (i) our index patient could be a phenocopy of FHS; (ii) the disease may be clinically heterogeneous (since the diagnosis relies exclusively on clinical features), (iii) these could be genetic heterogeneity of the disease, (iv) the patient could carry a mutation in a gene located elsewhere. Recent descriptions of patients with 12q15-q21.1 microdeletions argue in favor of the phenocopy hypothesis.}, } @article {pmid22236517, year = {2012}, author = {Tomazella, GG and Risberg, K and Mylvaganam, H and Lindemann, PC and Thiede, B and Souza, GA and Wiker, HG}, title = {Proteomic analysis of a multi-resistant clinical Escherichia coli isolate of unknown genomic background.}, journal = {Journal of proteomics}, volume = {75}, number = {6}, pages = {1830-1837}, doi = {10.1016/j.jprot.2011.12.024}, pmid = {22236517}, issn = {1876-7737}, mesh = {Databases, Genetic ; Drug Resistance, Multiple, Bacterial/*genetics ; Escherichia coli/*genetics/pathogenicity ; Escherichia coli Infections/genetics/microbiology ; Escherichia coli Proteins/*genetics ; Humans ; Proteomics/methods ; Software ; Virulence Factors/genetics ; }, abstract = {Horizontal transfer of gene clusters occurs in Escherichia coli (E. coli), which could lead to evolution of new pathovars and improve survival fitness. However, this genetic event results in genomic plasticity which is a hindrance for proteomic characterization of strains with unknown genetic backgrounds. To characterize such isolate with many specific genetic variations we used the recently in-house designed MSMSpdbb software which merges protein databases from several sources of E. coli including type strains and other commensal and pathogenic isolates. We selected a multidrug resistant clinical isolate in order to check the capacity of our approach to identify selected protein markers. From the 1596 identified proteins, we found important virulence factors such as IutA, OmpA, TraT and selected enzymes conferring antibiotic resistance, such as CTX-M-15 (Extended-Spectrum Beta Lactamase--ESBL) and AAC(6')-Ib-cr (to aminoglycoside+fluoroquinolone). In addition, we compared the protein identifications with E. coli gene annotation and found that 27% of the proteins identified in the present study corresponded to the pan-genome of E. coli species and are only present in a subset of strains. This demonstrates the ability of our approach to characterize the proteome of bacterial strains with complex genomic plasticity even without its genomic information.}, } @article {pmid22232693, year = {2012}, author = {Stecher, B and Denzler, R and Maier, L and Bernet, F and Sanders, MJ and Pickard, DJ and Barthel, M and Westendorf, AM and Krogfelt, KA and Walker, AW and Ackermann, M and Dobrindt, U and Thomson, NR and Hardt, WD}, title = {Gut inflammation can boost horizontal gene transfer between pathogenic and commensal Enterobacteriaceae.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {109}, number = {4}, pages = {1269-1274}, pmid = {22232693}, issn = {1091-6490}, support = {/WT_/Wellcome Trust/United Kingdom ; 076964/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Animals ; Bacteriocin Plasmids/genetics ; Base Sequence ; *Biological Evolution ; Colitis/*microbiology ; Computational Biology ; DNA Primers/genetics ; Enterobacteriaceae/*genetics/growth & development ; Escherichia coli/genetics ; Gene Transfer, Horizontal/*genetics ; Mice ; Molecular Sequence Data ; Oligonucleotide Array Sequence Analysis ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Salmonella typhimurium/genetics ; Sequence Alignment ; Sequence Analysis, DNA ; }, abstract = {The mammalian gut harbors a dense microbial community interacting in multiple ways, including horizontal gene transfer (HGT). Pangenome analyses established particularly high levels of genetic flux between Gram-negative Enterobacteriaceae. However, the mechanisms fostering intraenterobacterial HGT are incompletely understood. Using a mouse colitis model, we found that Salmonella-inflicted enteropathy elicits parallel blooms of the pathogen and of resident commensal Escherichia coli. These blooms boosted conjugative HGT of the colicin-plasmid p2 from Salmonella enterica serovar Typhimurium to E. coli. Transconjugation efficiencies of ~100% in vivo were attributable to high intrinsic p2-transfer rates. Plasmid-encoded fitness benefits contributed little. Under normal conditions, HGT was blocked by the commensal microbiota inhibiting contact-dependent conjugation between Enterobacteriaceae. Our data show that pathogen-driven inflammatory responses in the gut can generate transient enterobacterial blooms in which conjugative transfer occurs at unprecedented rates. These blooms may favor reassortment of plasmid-encoded genes between pathogens and commensals fostering the spread of fitness-, virulence-, and antibiotic-resistance determinants.}, } @article {pmid22215741, year = {2012}, author = {Sahl, JW and Rasko, DA}, title = {Analysis of global transcriptional profiles of enterotoxigenic Escherichia coli isolate E24377A.}, journal = {Infection and immunity}, volume = {80}, number = {3}, pages = {1232-1242}, pmid = {22215741}, issn = {1098-5522}, support = {R01 AI089894/AI/NIAID NIH HHS/United States ; }, mesh = {Bile Acids and Salts/metabolism ; Culture Media/chemistry ; Enterotoxigenic Escherichia coli/*genetics/growth & development ; Escherichia coli ; Gene Expression Profiling ; Gene Expression Regulation, Bacterial ; Humans ; Microarray Analysis ; Real-Time Polymerase Chain Reaction ; Reverse Transcriptase Polymerase Chain Reaction ; Sequence Analysis, RNA ; Signal Transduction ; Stress, Physiological ; *Transcriptome ; Virulence Factors/genetics/metabolism ; }, abstract = {Enterotoxigenic Escherichia coli (ETEC) is an important pathogenic variant (pathovar) of E. coli in developing countries from a human health perspective, causing significant morbidity and mortality. Previous studies have examined specific regulatory networks in ETEC, although little is known about the global effects of inter- and intrakingdom signaling on the expression of virulence and colonization factors in ETEC. In this study, an E. coli/Shigella pan-genome microarray, combined with quantitative reverse transcriptase PCR (qRT-PCR) and RNA sequencing (RNA-seq), was used to quantify the expression of ETEC virulence and colonization factors. Biologically relevant chemical signals were combined with ETEC isolate E24377A during growth in either Luria broth (LB) or Dulbecco's modified Eagle medium (DMEM), and transcription was examined during different phases of the growth cycle; chemical signals examined included glucose, bile salts, and preconditioned media from E. coli/Shigella isolates. The results demonstrate that the presence of bile salts, which are found in the intestine and thought to be bactericidal, upregulates the expression of many ETEC virulence factors, including heat-stable (estA) and heat-labile (eltA) enterotoxin genes. In contrast, the ETEC colonization factors CS1 and CS3 were downregulated in the presence of bile, consistent with findings in studies of other enteric pathogens. RNA-seq analysis demonstrated that one of the most differentially expressed genes in the presence of bile is a unique plasmid-encoded AraC-like transcriptional regulator (peaR); other previously unknown genetic elements were found as well. These results provide transcriptional targets and putative mechanisms that should help improve understanding of the global regulatory networks and virulence expression in this important human pathogen.}, } @article {pmid24704919, year = {2012}, author = {Psomopoulos, FE and Siarkou, VI and Papanikolaou, N and Iliopoulos, I and Tsaftaris, AS and Promponas, VJ and Ouzounis, CA}, title = {The chlamydiales pangenome revisited: structural stability and functional coherence.}, journal = {Genes}, volume = {3}, number = {2}, pages = {291-319}, pmid = {24704919}, issn = {2073-4425}, abstract = {The entire publicly available set of 37 genome sequences from the bacterial order Chlamydiales has been subjected to comparative analysis in order to reveal the salient features of this pangenome and its evolutionary history. Over 2,000 protein families are detected across multiple species, with a distribution consistent to other studied pangenomes. Of these, there are 180 protein families with multiple members, 312 families with exactly 37 members corresponding to core genes, 428 families with peripheral genes with varying taxonomic distribution and finally 1,125 smaller families. The fact that, even for smaller genomes of Chlamydiales, core genes represent over a quarter of the average protein complement, signifies a certain degree of structural stability, given the wide range of phylogenetic relationships within the group. In addition, the propagation of a corpus of manually curated annotations within the discovered core families reveals key functional properties, reflecting a coherent repertoire of cellular capabilities for Chlamydiales. We further investigate over 2,000 genes without homologs in the pangenome and discover two new protein sequence domains. Our results, supported by the genome-based phylogeny for this group, are fully consistent with previous analyses and current knowledge, and point to future research directions towards a better understanding of the structural and functional properties of Chlamydiales.}, } @article {pmid24555018, year = {2012}, author = {Snipen, LG and Ussery, DW}, title = {A domain sequence approach to pangenomics: applications to Escherichia coli.}, journal = {F1000Research}, volume = {1}, number = {}, pages = {19}, pmid = {24555018}, issn = {2046-1402}, abstract = {The study of microbial pangenomes relies on the computation of gene families, i.e. the clustering of coding sequences into groups of essentially similar genes. There is no standard approach to obtain such gene families. Ideally, the gene family computations should be robust against errors in the annotation of genes in various genomes. In an attempt to achieve this robustness, we propose to cluster sequences by their domain sequence, i.e. the ordered sequence of domains in their protein sequence. In a study of 347 genomes from Escherichia coli we find on average around 4500 proteins having hits in Pfam-A in every genome, clustering into around 2500 distinct domain sequence families in each genome. Across all genomes we find a total of 5724 such families. A binomial mixture model approach indicates this is around 95% of all domain sequences we would expect to see in E. coli in the future. A Heaps law analysis indicates the population of domain sequences is larger, but this analysis is also very sensitive to smaller changes in the computation procedure. The resolution between strains is good despite the coarse grouping obtained by domain sequence families. Clustering sequences by their ordered domain content give us domain sequence families, who are robust to errors in the gene prediction step. The computational load of the procedure scales linearly with the number of genomes, which is needed for the future explosion in the number of re-sequenced strains. The use of domain sequence families for a functional classification of strains clearly has some potential to be explored.}, } @article {pmid24358823, year = {2012}, author = {Rodriguez-Valera, F and Ussery, DW}, title = {Is the pan-genome also a pan-selectome?.}, journal = {F1000Research}, volume = {1}, number = {}, pages = {16}, pmid = {24358823}, issn = {2046-1402}, abstract = {The comparative genomics of prokaryotes has shown the presence of conserved regions containing highly similar genes (the 'core genome') and other regions that vary in gene content (the 'flexible' regions). A significant part of the latter is involved in surface structures that are phage recognition targets. Another sizeable part provides for differences in niche exploitation. Metagenomic data indicates that natural populations of prokaryotes are composed of assemblages of clonal lineages or "meta-clones" that share a core of genes but contain a high diversity by varying the flexible component. This meta-clonal diversity is maintained by a collection of phages that equalize the populations by preventing any individual clonal lineage from hoarding common resources. Thus, this polyclonal assemblage and the phages preying upon them constitute natural selection units.}, } @article {pmid22199376, year = {2011}, author = {Hu, B and Xie, G and Lo, CC and Starkenburg, SR and Chain, PS}, title = {Pathogen comparative genomics in the next-generation sequencing era: genome alignments, pangenomics and metagenomics.}, journal = {Briefings in functional genomics}, volume = {10}, number = {6}, pages = {322-333}, doi = {10.1093/bfgp/elr042}, pmid = {22199376}, issn = {2041-2657}, mesh = {Animals ; Bacteria/genetics ; *Genome, Bacterial ; Humans ; Metagenomics/*methods ; Sequence Alignment ; Sequence Analysis, DNA/*methods ; }, abstract = {As soon as whole-genome sequencing entered the scene in the mid-1990s and demonstrated its use in revealing the entire genetic potential of any given microbial organism, this technique immediately revolutionized the way pathogen (and many other fields of) research was carried out. The ability to perform whole-genome comparisons further transformed the field and allowed scientists to obtain information linking phenotypic dissimilarities among closely related organisms and their underlying genetic mechanisms. Such comparisons have become commonplace in examining strain-to-strain variability, as well as comparing pathogens to less, or nonpathogenic near neighbors. In recent years, a bloom in novel sequencing technologies along with continuous increases in throughput has occurred, inundating the field with various types of massively parallel sequencing data and further transforming comparative genomics research. Here, we review the evolution of comparative genomics, its impact in understanding pathogen evolution and physiology and the opportunities and challenges presented by next-generation sequencing as applied to pathogen genome comparisons.}, } @article {pmid22196399, year = {2011}, author = {Fang, Y and Li, Z and Liu, J and Shu, C and Wang, X and Zhang, X and Yu, X and Zhao, D and Liu, G and Hu, S and Zhang, J and Al-Mssallem, I and Yu, J}, title = {A pangenomic study of Bacillus thuringiensis.}, journal = {Journal of genetics and genomics = Yi chuan xue bao}, volume = {38}, number = {12}, pages = {567-576}, doi = {10.1016/j.jgg.2011.11.001}, pmid = {22196399}, issn = {1673-8527}, mesh = {Bacillus thuringiensis/classification/*genetics ; Bacillus thuringiensis Toxins ; Bacterial Proteins/genetics ; DNA Mutational Analysis ; Endotoxins/genetics ; Genetic Variation ; *Genome, Bacterial ; Genomics ; Hemolysin Proteins/genetics ; Phylogeny ; Plasmids ; Sequence Analysis, DNA ; }, abstract = {Bacillus thuringiensis (B. thuringiensis) is a soil-dwelling Gram-positive bacterium and its plasmid-encoded toxins (Cry) are commonly used as biological alternatives to pesticides. In a pangenomic study, we sequenced seven B. thuringiensis isolates in both high coverage and base-quality using the next-generation sequencing platform. The B. thuringiensis pangenome was extrapolated to have 4196 core genes and an asymptotic value of 558 unique genes when a new genome is added. Compared to the pangenomes of its closely related species of the same genus, B. thuringiensis pangenome shows an open characteristic, similar to B. cereus but not to B. anthracis; the latter has a closed pangenome. We also found extensive divergence among the seven B. thuringiensis genome assemblies, which harbor ample repeats and single nucleotide polymorphisms (SNPs). The identities among orthologous genes are greater than 84.5% and the hotspots for the genome variations were discovered in genomic regions of 2.3-2.8Mb and 5.0-5.6Mb. We concluded that high-coverage sequence assemblies from multiple strains, before all the gaps are closed, are very useful for pangenomic studies.}, } @article {pmid22174796, year = {2011}, author = {Halachev, MR and Loman, NJ and Pallen, MJ}, title = {Calculating orthologs in bacteria and Archaea: a divide and conquer approach.}, journal = {PloS one}, volume = {6}, number = {12}, pages = {e28388}, pmid = {22174796}, issn = {1932-6203}, support = {BB/E011179/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBE0111791/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Archaea/*genetics ; Bacteria/*genetics ; Genome, Archaeal/genetics ; Genome, Bacterial/genetics ; Open Reading Frames/genetics ; Sequence Alignment ; *Sequence Homology, Amino Acid ; Time Factors ; }, abstract = {Among proteins, orthologs are defined as those that are derived by vertical descent from a single progenitor in the last common ancestor of their host organisms. Our goal is to compute a complete set of protein orthologs derived from all currently available complete bacterial and archaeal genomes. Traditional approaches typically rely on all-against-all BLAST searching which is prohibitively expensive in terms of hardware requirements or computational time (requiring an estimated 18 months or more on a typical server). Here, we present xBASE-Orth, a system for ongoing ortholog annotation, which applies a "divide and conquer" approach and adopts a pragmatic scheme that trades accuracy for speed. Starting at species level, xBASE-Orth carefully constructs and uses pan-genomes as proxies for the full collections of coding sequences at each level as it progressively climbs the taxonomic tree using the previously computed data. This leads to a significant decrease in the number of alignments that need to be performed, which translates into faster computation, making ortholog computation possible on a global scale. Using xBASE-Orth, we analyzed an NCBI collection of 1,288 bacterial and 94 archaeal complete genomes with more than 4 million coding sequences in 5 weeks and predicted more than 700 million ortholog pairs, clustered in 175,531 orthologous groups. We have also identified sets of highly conserved bacterial and archaeal orthologs and in so doing have highlighted anomalies in genome annotation and in the proposed composition of the minimal bacterial genome. In summary, our approach allows for scalable and efficient computation of the bacterial and archaeal ortholog annotations. In addition, due to its hierarchical nature, it is suitable for incorporating novel complete genomes and alternative genome annotations. The computed ortholog data and a continuously evolving set of applications based on it are integrated in the xBASE database, available at http://www.xbase.ac.uk/.}, } @article {pmid22166956, year = {2012}, author = {Falgarone, G and Essabbani, A and Dumont, F and Cagnard, N and Mistou, S and Chiocchia, G}, title = {Implication of clusterin in TNF-α response of rheumatoid synovitis: lesson from in vitro knock-down of clusterin in human synovial fibroblast cells.}, journal = {Physiological genomics}, volume = {44}, number = {3}, pages = {229-235}, doi = {10.1152/physiolgenomics.00095.2010}, pmid = {22166956}, issn = {1531-2267}, mesh = {Analysis of Variance ; Arthritis, Rheumatoid/*complications ; Cluster Analysis ; Clusterin/genetics/*metabolism ; Fibroblasts/*metabolism ; Gene Expression Profiling ; Gene Expression Regulation/*physiology ; Gene Knockdown Techniques ; Humans ; In Vitro Techniques ; Joint Capsule/*cytology ; Microarray Analysis ; RNA, Small Interfering/genetics ; Real-Time Polymerase Chain Reaction ; Synovitis/etiology/*metabolism/physiopathology ; Tumor Necrosis Factor-alpha/*metabolism ; }, abstract = {Recently clusterin (CLU) was reported to be an inhibitor of NF-κB pathway and involved in rheumatoid arthritis (RA) synovitis. This study was designed to decipher the molecular network linked to CLU expression in FLS (fibroblast-like synoviocytes) and evaluate the consequences of its low expression in conditions of TNF-α stimulation. FLS were transfected with siRNA for CLU or not and cultured for 24 and 48 h with TNF-α or not. Pan-genomic gene expression was assayed by DNA microarray. The gene network around CLU and gene interactions were analyzed with the Ingenuity Pathway Analysis software. Downregulation of CLU resulted in modification of the expression of genes known to be directly linked to CLU and for almost 5% of the tested genes (857 out of 17,225); the upregulation of a small group of gene (e.g., TIAM1) emphasizes the hypothetical role of CLU in the pseudotumoral characteristic of FLS. The comparison of gene expression with or without TNF stimulation allowed the classification of sampled with good concordance. Moreover, differential comparison showed that CLU downregulation in RA led to a profound modification of the TNF-α response as three sets of genes emerged: 497 genes modulated by siCLU transfection with TNF stimulation, 356 genes modified because of TNF stimulation only, and 484 genes modulated during TNF stimulation with CLU expression (e.g., IL-8 and Wnt signaling genes). Using a global two-way ANOVA we could identify a set of genes defining a molecular signature of TNF response directly influenced by CLU. These results (based on differential gene expression patterns) argue that CLU downregulation in FLS alters their aggressiveness in RA synovitis.}, } @article {pmid22130594, year = {2012}, author = {Zhao, Y and Wu, J and Yang, J and Sun, S and Xiao, J and Yu, J}, title = {PGAP: pan-genomes analysis pipeline.}, journal = {Bioinformatics (Oxford, England)}, volume = {28}, number = {3}, pages = {416-418}, pmid = {22130594}, issn = {1367-4811}, mesh = {Algorithms ; *Genome, Bacterial ; *Software ; Streptococcus pyogenes/classification/*genetics ; }, abstract = {SUMMARY: With the rapid development of DNA sequencing technology, increasing bacteria genome data enable the biologists to dig the evolutionary and genetic information of prokaryotic species from pan-genome sight. Therefore, the high-efficiency pipelines for pan-genome analysis are mostly needed. We have developed a new pan-genome analysis pipeline (PGAP), which can perform five analytic functions with only one command, including cluster analysis of functional genes, pan-genome profile analysis, genetic variation analysis of functional genes, species evolution analysis and function enrichment analysis of gene clusters. PGAP's performance has been evaluated on 11 Streptococcus pyogenes strains.

AVAILABILITY: PGAP is developed with Perl script on the Linux Platform and the package is freely available from http://pgap.sf.net.

CONTACT: junyu@big.ac.cn; xiaojingfa@big.ac.cn

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid22128332, year = {2011}, author = {Karberg, KA and Olsen, GJ and Davis, JJ}, title = {Similarity of genes horizontally acquired by Escherichia coli and Salmonella enterica is evidence of a supraspecies pangenome.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {108}, number = {50}, pages = {20154-20159}, pmid = {22128332}, issn = {1091-6490}, mesh = {Codon/genetics ; Escherichia coli/*genetics ; Gene Transfer, Horizontal/*genetics ; Genes, Bacterial/*genetics ; Phylogeny ; Salmonella enterica/*genetics ; *Sequence Homology, Nucleic Acid ; Species Specificity ; }, abstract = {Most bacterial and archaeal genomes contain many genes with little or no similarity to other genes, a property that impedes identification of gene origins. By comparing the codon usage of genes shared among strains (primarily vertically inherited genes) and genes unique to one strain (primarily recently horizontally acquired genes), we found that the plurality of unique genes in Escherichia coli and Salmonella enterica are much more similar to each other than are their vertically inherited genes. We conclude that E. coli and S. enterica derive these unique genes from a common source, a supraspecies phylogenetic group that includes the organisms themselves. The phylogenetic range of the sharing appears to include other (but not all) members of the Enterobacteriaceae. We found evidence of similar gene sharing in other bacterial and archaeal taxa. Thus, we conclude that frequent gene exchange, particularly that of genetic novelties, extends well beyond accepted species boundaries.}, } @article {pmid22126291, year = {2011}, author = {Bennani-Baiti, IM}, title = {Epigenetic and epigenomic mechanisms shape sarcoma and other mesenchymal tumor pathogenesis.}, journal = {Epigenomics}, volume = {3}, number = {6}, pages = {715-732}, doi = {10.2217/epi.11.93}, pmid = {22126291}, issn = {1750-192X}, mesh = {Animals ; Cell Transformation, Neoplastic/*metabolism ; Epigenesis, Genetic/*physiology ; Genes, Tumor Suppressor/physiology ; Histone Demethylases/metabolism ; Histone Methyltransferases ; Histone-Lysine N-Methyltransferase/metabolism ; Humans ; Mesenchymal Stem Cells/*metabolism ; Mice ; MicroRNAs/metabolism ; *Models, Biological ; Sarcoma/metabolism/*physiopathology ; }, abstract = {Sarcomas comprise a large number of rare, histogenetically heterogeneous, mesenchymal tumors. Cancers such as Ewing's sarcoma, liposarcoma, rhabdomyosarcoma and synovial sarcoma can be generated by the transduction of mesenchymal stem cell progenitors with sarcoma-pathognomonic oncogenic fusions, a neoplastic transformation process accompanied by profound locus-specific and pangenomic epigenetic alterations. The epigenetic activities of histone-modifying and chromatin-remodeling enzymes such as SUV39H1/KMT1A, EZH2/KMT6A and BMI1 are central to epigenetic-regulated transformation, a property we coin oncoepigenic. Sarcoma-specific oncoepigenic aberrations modulate critical signaling pathways that control cell growth and differentiation including several miRNAs, Wnt, PI3K/AKT, Sav-RASSF1-Hpo and regulators of the G1 and G2/M checkpoints of the cell cycle. Herein an overview of the current knowledge of this rapidly evolving field that will undoubtedly uncover additional oncoepigenic mechanisms and yield druggable targets in the near future is discussed.}, } @article {pmid22100608, year = {2012}, author = {Josse, R and Dumont, J and Fautrel, A and Robin, MA and Guillouzo, A}, title = {Identification of early target genes of aflatoxin B1 in human hepatocytes, inter-individual variability and comparison with other genotoxic compounds.}, journal = {Toxicology and applied pharmacology}, volume = {258}, number = {2}, pages = {176-187}, doi = {10.1016/j.taap.2011.10.019}, pmid = {22100608}, issn = {1096-0333}, mesh = {Aflatoxin B1/administration & dosage/*toxicity ; Cell Cycle/drug effects ; Cells, Cultured ; DNA Damage/*drug effects ; DNA Repair/drug effects ; Dose-Response Relationship, Drug ; Gene Expression Profiling ; Gene Expression Regulation/*drug effects ; Hepatocytes/*drug effects/pathology ; Humans ; Mutagens/administration & dosage/*toxicity ; Oligonucleotide Array Sequence Analysis ; Signal Transduction/drug effects ; Tumor Suppressor Protein p53/metabolism ; }, abstract = {Gene expression profiling has recently emerged as a promising approach to identify early target genes and discriminate genotoxic carcinogens from non-genotoxic carcinogens and non-carcinogens. However, early gene changes induced by genotoxic compounds in human liver remain largely unknown. Primary human hepatocytes and differentiated HepaRG cells were exposed to aflatoxin B1 (AFB1) that induces DNA damage following enzyme-mediated bioactivation. Gene expression profile changes induced by a 24h exposure of these hepatocyte models to 0.05 and 0.25μM AFB1 were analyzed by using oligonucleotide pangenomic microarrays. The main altered signaling pathway was the p53 pathway and related functions such as cell cycle, apoptosis and DNA repair. Direct involvement of the p53 protein in response to AFB1 was verified by using siRNA directed against p53. Among the 83 well-annotated genes commonly modulated in two pools of three human hepatocyte populations and HepaRG cells, several genes were identified as altered by AFB1 for the first time. In addition, a subset of 10 AFB1-altered genes, selected upon basis of their function or tumor suppressor role, was tested in four human hepatocyte populations and in response to other chemicals. Although they exhibited large variable inter-donor fold-changes, several of these genes, particularly FHIT, BCAS3 and SMYD3, were found to be altered by various direct and other indirect genotoxic compounds and unaffected by non-genotoxic compounds. Overall, this comprehensive analysis of early gene expression changes induced by AFB1 in human hepatocytes identified a gene subset that included several genes representing potential biomarkers of genotoxic compounds.}, } @article {pmid22086490, year = {2011}, author = {Lehours, P and Vale, FF and Bjursell, MK and Melefors, O and Advani, R and Glavas, S and Guegueniat, J and Gontier, E and Lacomme, S and Alves Matos, A and Menard, A and Mégraud, F and Engstrand, L and Andersson, AF}, title = {Genome sequencing reveals a phage in Helicobacter pylori.}, journal = {mBio}, volume = {2}, number = {6}, pages = {}, pmid = {22086490}, issn = {2150-7511}, mesh = {Bacteriophages/*genetics/isolation & purification/ultrastructure ; Cluster Analysis ; DNA, Bacterial/*genetics ; DNA, Viral/*genetics ; Gastric Mucosa/microbiology ; *Genome, Bacterial ; Helicobacter Infections/complications/microbiology ; Helicobacter pylori/*genetics/isolation & purification/radiation effects/*virology ; Humans ; Lymphoma/complications ; Lysogeny ; Molecular Sequence Data ; Phylogeny ; Sequence Analysis, DNA ; Sequence Homology ; Ultraviolet Rays ; }, abstract = {UNLABELLED: Helicobacter pylori chronically infects the gastric mucosa in more than half of the human population; in a subset of this population, its presence is associated with development of severe disease, such as gastric cancer. Genomic analysis of several strains has revealed an extensive H. pylori pan-genome, likely to grow as more genomes are sampled. Here we describe the draft genome sequence (63 contigs; 26× mean coverage) of H. pylori strain B45, isolated from a patient with gastric mucosa-associated lymphoid tissue (MALT) lymphoma. The major finding was a 24.6-kb prophage integrated in the bacterial genome. The prophage shares most of its genes (22/27) with prophage region II of Helicobacter acinonychis strain Sheeba. After UV treatment of liquid cultures, circular DNA carrying the prophage integrase gene could be detected, and intracellular tailed phage-like particles were observed in H. pylori cells by transmission electron microscopy, indicating that phage production can be induced from the prophage. PCR amplification and sequencing of the integrase gene from 341 H. pylori strains from different geographic regions revealed a high prevalence of the prophage (21.4%). Phylogenetic reconstruction showed four distinct clusters in the integrase gene, three of which tended to be specific for geographic regions. Our study implies that phages may play important roles in the ecology and evolution of H. pylori.

IMPORTANCE: Helicobacter pylori chronically infects the gastric mucosa in more than half of the human population, and while most of the infected individuals do not develop disease, H. pylori infection doubles the risk of developing gastric cancer. An abundance and diversity of viruses (phages) infect microbial populations in most environments and are important mediators of microbial diversity. Our finding of a 24.6-kb prophage integrated inside an H. pylori genome and the observation of circular integrase gene-containing DNA and phage-like particles inside cells upon UV treatment demonstrate that we have discovered a viable H. pylori phage. The additional finding of integrase genes in a large proportion of screened isolates of diverse geographic origins indicates that the prevalence of prophages may have been underestimated in H. pylori. Since phages are important drivers of microbial evolution, the discovery should be important for understanding and predicting genetic diversity in H. pylori.}, } @article {pmid22059093, year = {2011}, author = {Zhang, Y and Dai, Y and Zheng, T and Ma, S}, title = {Risk Factors of Non-Hodgkin Lymphoma.}, journal = {Expert opinion on medical diagnostics}, volume = {5}, number = {6}, pages = {539-550}, pmid = {22059093}, issn = {1753-0067}, support = {R01 CA142774/CA/NCI NIH HHS/United States ; R01 CA142774-03/CA/NCI NIH HHS/United States ; }, abstract = {INTRODUCTION: Despite decades of intensive research, Non-Hodgkin Lymphoma (NHL) remains poorly understood and is largely incurable. NHL is a heterogeneous group of malignancies with multiple subtypes, each of which has distinct morphologic, immunophenotypic, and clinical features. Identifying the risk factors for NHL may improve our understanding of the underlying biological mechanisms and have an impact on clinical practice. AREAS COVERED: This article provides a review of several aspects of NHL, including epidemiology and subtype classification, clinical, environmental, genetic, and genomic risk factors identified for etiology and prognosis, and available statistical and bioinformatics tools for identification of genetic and genomic risk factors from the analysis of high-throughput studies. EXPERT OPINION: Multiple clinical and environmental risk factors have been identified. However, they have failed to provide practically effective prediction. Genetic and genomic risk factors identified from high-throughput studies have suffered a lack of reproducibility. The identification of genetic/genomic risk factors demands innovative statistical and bioinformatics tools. Although multiple analysis methods have been developed, there is still room for improvement. There is a critical need for well-designed, prospective, large-scale pangenomic studies.}, } @article {pmid22044664, year = {2011}, author = {Baumler, DJ and Peplinski, RG and Reed, JL and Glasner, JD and Perna, NT}, title = {The evolution of metabolic networks of E. coli.}, journal = {BMC systems biology}, volume = {5}, number = {}, pages = {182}, pmid = {22044664}, issn = {1752-0509}, support = {T15 LM007359/LM/NLM NIH HHS/United States ; GM62994-02/GM/NIGMS NIH HHS/United States ; 5T15LM007359/LM/NLM NIH HHS/United States ; }, mesh = {Escherichia coli/genetics/*metabolism ; *Evolution, Molecular ; Genome, Bacterial ; *Metabolic Networks and Pathways ; *Models, Biological ; Phylogeny ; Salmonella/genetics/metabolism ; }, abstract = {BACKGROUND: Despite the availability of numerous complete genome sequences from E. coli strains, published genome-scale metabolic models exist only for two commensal E. coli strains. These models have proven useful for many applications, such as engineering strains for desired product formation, and we sought to explore how constructing and evaluating additional metabolic models for E. coli strains could enhance these efforts.

RESULTS: We used the genomic information from 16 E. coli strains to generate an E. coli pangenome metabolic network by evaluating their collective 76,990 ORFs. Each of these ORFs was assigned to one of 17,647 ortholog groups including ORFs associated with reactions in the most recent metabolic model for E. coli K-12. For orthologous groups that contain an ORF already represented in the MG1655 model, the gene to protein to reaction associations represented in this model could then be easily propagated to other E. coli strain models. All remaining orthologous groups were evaluated to see if new metabolic reactions could be added to generate a pangenome-scale metabolic model (iEco1712_pan). The pangenome model included reactions from a metabolic model update for E. coli K-12 MG1655 (iEco1339_MG1655) and enabled development of five additional strain-specific genome-scale metabolic models. These additional models include a second K-12 strain (iEco1335_W3110) and four pathogenic strains (two enterohemorrhagic E. coli O157:H7 and two uropathogens). When compared to the E. coli K-12 models, the metabolic models for the enterohemorrhagic (iEco1344_EDL933 and iEco1345_Sakai) and uropathogenic strains (iEco1288_CFT073 and iEco1301_UTI89) contained numerous lineage-specific gene and reaction differences. All six E. coli models were evaluated by comparing model predictions to carbon source utilization measurements under aerobic and anaerobic conditions, and to batch growth profiles in minimal media with 0.2% (w/v) glucose. An ancestral genome-scale metabolic model based on conserved ortholog groups in all 16 E. coli genomes was also constructed, reflecting the conserved ancestral core of E. coli metabolism (iEco1053_core). Comparative analysis of all six strain-specific E. coli models revealed that some of the pathogenic E. coli strains possess reactions in their metabolic networks enabling higher biomass yields on glucose. Finally the lineage-specific metabolic traits were compared to the ancestral core model predictions to derive new insight into the evolution of metabolism within this species.

CONCLUSION: Our findings demonstrate that a pangenome-scale metabolic model can be used to rapidly construct additional E. coli strain-specific models, and that quantitative models of different strains of E. coli can accurately predict strain-specific phenotypes. Such pangenome and strain-specific models can be further used to engineer metabolic phenotypes of interest, such as designing new industrial E. coli strains.}, } @article {pmid22037019, year = {2012}, author = {Jash, S and Chowdhury, T and Adhya, S}, title = {Modulation of mitochondrial respiratory capacity by carrier-mediated transfer of RNA in vivo.}, journal = {Mitochondrion}, volume = {12}, number = {2}, pages = {262-270}, doi = {10.1016/j.mito.2011.10.001}, pmid = {22037019}, issn = {1872-8278}, mesh = {Animals ; Cell Respiration/*drug effects ; Gene Transfer Techniques ; Mitochondria/*drug effects/*metabolism ; Muscles/cytology ; RNA/*metabolism ; Rats ; Rats, Sprague-Dawley ; *Transformation, Genetic ; }, abstract = {Genetic dysfunction of mitochondria is pathological, but an effective method of nucleic acid delivery to mitochondria in vivo is lacking. Injection into rodents of tagged polycistronic RNAs (pcRNAs) encoding parts of the organelle genome and bound to a carrier complex, resulted in rapid uptake and concentration of the RNA in many tissues. The delivered RNA was localized to mitochondria. A pan-genomic cocktail of pcRNAs restored mRNA levels, stimulated mitochondrial translation and respiratory capacity of skeletal muscle of middle-aged and old rats. Thus, the carrier-based protocol may be suitable for delivery of functional RNAs to mitochondria in vivo.}, } @article {pmid22034231, year = {2011}, author = {Imperi, F and Antunes, LC and Blom, J and Villa, L and Iacono, M and Visca, P and Carattoli, A}, title = {The genomics of Acinetobacter baumannii: insights into genome plasticity, antimicrobial resistance and pathogenicity.}, journal = {IUBMB life}, volume = {63}, number = {12}, pages = {1068-1074}, doi = {10.1002/iub.531}, pmid = {22034231}, issn = {1521-6551}, mesh = {Acinetobacter Infections/epidemiology/genetics/microbiology ; Acinetobacter baumannii/drug effects/*genetics/*pathogenicity ; Adaptation, Biological/drug effects/*genetics ; Anti-Bacterial Agents/pharmacology/therapeutic use ; Biological Evolution ; Drug Resistance, Multiple, Bacterial/drug effects/*genetics ; Genes, MDR/drug effects/*genetics ; Genome, Bacterial/drug effects/*genetics ; Humans ; Models, Genetic ; Open Reading Frames ; Virulence/drug effects/*genetics ; }, abstract = {The genome sequences of a number of Acinetobacter baumannii strains, including representatives of the main epidemic international lineages, have now been determined, and several others are in progress. The study of A. baumannii genomics has provided an expanded view of the adaptation and virulence capacities of this bacterial species, whilst also presenting novel insights into its intraspecies diversity and genome evolution. Genomic analyses have revealed that the current A. baumannii clinical population consists of low-grade pathogens, whose pathogenicity relies mainly on an ability to persist in the hospital setting and survive antibiotic treatment. A. baumannii has a high capacity to acquire new genetic determinants and displays an open pan genome; this feature may have played a crucial role in the evolution of this human opportunistic pathogen towards clinical success.}, } @article {pmid22026465, year = {2011}, author = {Zhang, A and Yang, M and Hu, P and Wu, J and Chen, B and Hua, Y and Yu, J and Chen, H and Xiao, J and Jin, M}, title = {Comparative genomic analysis of Streptococcus suis reveals significant genomic diversity among different serotypes.}, journal = {BMC genomics}, volume = {12}, number = {}, pages = {523}, pmid = {22026465}, issn = {1471-2164}, mesh = {Cluster Analysis ; *Comparative Genomic Hybridization ; DNA, Bacterial/genetics ; *Genetic Variation ; Genome, Bacterial ; Genomic Islands ; Molecular Sequence Annotation ; Phylogeny ; Sequence Analysis, DNA ; Serotyping ; Streptococcus suis/classification/*genetics ; }, abstract = {BACKGROUND: Streptococcus suis (S. suis) is a major swine pathogen and an emerging zoonotic agent. Serotypes 1, 2, 3, 7, 9, 14 and 1/2 are the most prevalent serotypes of this pathogen. However, almost all studies were carried out on serotype 2 strains. Therefore, characterization of genomic features of other serotypes will be required to better understand their virulence potential and phylogenetic relationships among different serotypes.

RESULTS: Four Chinese S. suis strains belonging to serotypes 1, 7, 9 and 1/2 were sequenced using a rapid, high-throughput approach. Based on the 13 corresponding serotype strains, including 9 previously completed genomes of this bacterium, a full comparative genomic analysis was performed. The results provide evidence that (i) the pan-genome of this species is open and the size increases with addition of new sequenced genomes, (ii) strains of serotypes 1, 3, 7 and 9 are phylogenetically distinct from serotype 2 strains, but all serotype 2 strains, plus the serotype 1/2 and 14 strains, are very closely related. (iii) all these strains, except for the serotype 1 strain, could harbor a recombinant site for a pathogenic island (89 K) mediated by conjugal transfer, and may have the ability to gain the 89 K sequence.

CONCLUSIONS: There is significant genomic diversity among different strains in S. suis, and the gain and loss of large amount of genes are involved in shaping their genomes. This is indicated by (i) pairwise gene content comparisons between every pair of these strains, (ii) the open pan-genome of this species, (iii) the observed indels, invertions and rearrangements in the collinearity analysis. Phylogenetic relationships may be associated with serotype, as serotype 2 strains are closely related and distinct from other serotypes like 1, 3, 7 and 9, but more strains need to be sequenced to confirm this.}, } @article {pmid22001825, year = {2011}, author = {Laing, C and Villegas, A and Taboada, EN and Kropinski, A and Thomas, JE and Gannon, VP}, title = {Identification of Salmonella enterica species- and subgroup-specific genomic regions using Panseq 2.0.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {11}, number = {8}, pages = {2151-2161}, doi = {10.1016/j.meegid.2011.09.021}, pmid = {22001825}, issn = {1567-7257}, mesh = {Bacterial Typing Techniques/*methods ; DNA, Bacterial/*analysis/*genetics ; Genome, Bacterial ; Humans ; Phylogeny ; Polymorphism, Single Nucleotide ; Salmonella enterica/*classification/*genetics ; Sequence Analysis, DNA/*methods ; }, abstract = {The pan-genome of a taxonomic group consists of evolutionarily conserved core genes shared by all members and accessory genes that are present only in some members of the group. Group- and subgroup-specific core genes are thought to contribute to shared phenotypes such as virulence and niche specificity. In this study we analyzed 39 Salmonella enterica genomes (16 closed, 23 draft), a species that contains two human-specific serovars that cause typhoid fever, as well as a large number of zoonotic serovars that cause gastroenteritis in humans. Panseq 2.0 was used to define the pan-genome by adjusting the threshold at which group-specific "core" loci are defined. We found the pan-genome to be 9.03 Mbp in size, and that the core genome size decreased, while the number of SNPs/100 bp increased, as the number of strains used to define the core genome increased, suggesting substantial divergence among S. enterica subgroups. Subgroup-specific "core" genes, in contrast, had fewer SNPs/100 bp, likely reflecting their more recent acquisition. Phylogenetic trees were created from the concatenated and aligned pan-genome, the core genome, and multi-locus-sequence typing (MLST) loci. Branch support increased among the trees, and strains of the same serovar grouped closer together as the number of loci used to create the tree increased. Further, high levels of discrimination were achieved even amongst the most closely related strains of S. enterica Typhi, suggesting that the data generated by Panseq may also be of value in short-term epidemiological studies. Panseq provides an easy and fast way of performing pan-genomic analyses, which can include the identification of group-dominant as well as group-specific loci and is available as a web-server and a standalone version at http://lfz.corefacility.ca/panseq/.}, } @article {pmid22000739, year = {2011}, author = {Muzzi, A and Donati, C}, title = {Population genetics and evolution of the pan-genome of Streptococcus pneumoniae.}, journal = {International journal of medical microbiology : IJMM}, volume = {301}, number = {8}, pages = {619-622}, doi = {10.1016/j.ijmm.2011.09.008}, pmid = {22000739}, issn = {1618-0607}, mesh = {*Evolution, Molecular ; Gene Transfer, Horizontal ; *Genetic Variation ; *Genome, Bacterial ; Genotype ; Humans ; Phylogeny ; Pneumococcal Infections/microbiology ; Streptococcus pneumoniae/*classification/*genetics/isolation & purification ; }, abstract = {The genetic variability in bacterial species is much larger than in other kingdoms of life. The gene content between pairs of isolates can diverge by as much as 30% in species like Escherichia coli or Streptococcus pneumoniae. This unexpected finding led to the introduction of the concept of the pan-genome, the set of genes that can be found in a given bacterial species. The genome of any isolate is thus composed by a "core genome" shared by all strains and characteristic of the species, and a "dispensable genome" that accounts for many of the phenotypic differences between strains. The pan-genome is usually much larger than the genome of any single isolate and, given the ability of many bacteria to exchange genetic material with the environment, constitutes a reservoir that could enhance their ability to survive in a mutating environment. To understand the evolution of the pan-genome of an important pathogen and its interactions with the commensal microbial flora, we have analyzed the genomes of 44 strains of Streptococcus pneumoniae, one of the most important causes of microbial diseases in humans. Despite evidence of extensive homologous recombination, the S. pneumoniae phylogenetic tree reconstructed from polymorphisms in the core genome identified major groups of genetically related strains. With the exception of serotype 1, the tree correlated poorly with capsular serotype, geographical site of isolation and disease outcome. The distribution of dispensable genes was consistent with phylogeny, although horizontal gene transfer events attenuated this correlation in the case of ancient lineages. Homologous recombination, involving short stretches of DNA, was the dominant evolutionary process of the core genome of S. pneumoniae. Genetic exchange with related species sharing the same ecological niche was the main mechanism of evolution of S. pneumonia; and S. mitis was the main reservoir of genetic diversity of S. pneumoniae. The pan-genome of S. pneumoniae increased logarithmically with the number of strains and linearly with the variability of the sample, suggesting that acquired genes accumulate proportionately to the age of clones.}, } @article {pmid21994436, year = {2012}, author = {Rogue, A and Lambert, C and Spire, C and Claude, N and Guillouzo, A}, title = {Interindividual variability in gene expression profiles in human hepatocytes and comparison with HepaRG cells.}, journal = {Drug metabolism and disposition: the biological fate of chemicals}, volume = {40}, number = {1}, pages = {151-158}, doi = {10.1124/dmd.111.042028}, pmid = {21994436}, issn = {1521-009X}, mesh = {Cell Differentiation/physiology ; Cell Line, Tumor ; Gene Expression Profiling/*methods ; Genetic Variation/*physiology ; Hepatocytes/*physiology ; Humans ; }, abstract = {Interindividual variations in functions other than drug metabolism activity, remain poorly elucidated in human liver. In the present study, the whole transcriptome of several human hepatocyte populations and the differentiated human HepaRG cell line have been analyzed and compared, using oligonucleotide pangenomic microarrays. We show that, although the variation in the percentages of expressed genes did not exceed 14% among the primary human hepatocyte populations, huge interindividual differences in the transcript levels of many genes were observed. These genes were related to various functions; in addition to drug metabolism, they mainly concerned carbohydrate, amino acid, and lipid metabolism. HepaRG cells expressed from 81 to 92% of the genes active in human hepatocytes and, in addition, a specific gene subset mainly related to their transformed status, some chromosomal abnormalities, and the presence of primitive biliary epithelial cells. Of interest, a relationship was evidenced between abnormal basal expression levels of some target genes and their corresponding previously reported fold changes in one of four human hepatocyte populations treated with the hepatotoxic drug troglitazone and not with other nonhepatotoxic peroxisome proliferator-activated receptor agonists (PLoS One 6:e18816, 2011). Taken together, our results support the view that HepaRG cells express most of the genes active in primary human hepatocytes and show that expression of most human hepatic genes can quantitatively greatly vary among individuals, thereby contributing to explain the huge interindividual variability in susceptibility to drugs and other environmental factors.}, } @article {pmid23761346, year = {2011}, author = {Wackett, LP}, title = {Bacterial pan-genomes: An annotated selection of World Wide Web sites relevant to the topics in environmental microbiology.}, journal = {Environmental microbiology reports}, volume = {3}, number = {5}, pages = {638-639}, doi = {10.1111/j.1758-2229.2011.00294.x}, pmid = {23761346}, issn = {1758-2229}, } @article {pmid21949075, year = {2011}, author = {Bessen, DE and Kumar, N and Hall, GS and Riley, DR and Luo, F and Lizano, S and Ford, CN and McShan, WM and Nguyen, SV and Dunning Hotopp, JC and Tettelin, H}, title = {Whole-genome association study on tissue tropism phenotypes in group A Streptococcus.}, journal = {Journal of bacteriology}, volume = {193}, number = {23}, pages = {6651-6663}, pmid = {21949075}, issn = {1098-5530}, support = {R01 AI053826/AI/NIAID NIH HHS/United States ; AI-053826/AI/NIAID NIH HHS/United States ; R01 AI065572/AI/NIAID NIH HHS/United States ; AI-065572/AI/NIAID NIH HHS/United States ; R56 AI053826/AI/NIAID NIH HHS/United States ; }, mesh = {Bacterial Proteins/genetics/metabolism ; Evolution, Molecular ; Gene Expression Profiling ; *Genome-Wide Association Study ; Humans ; Molecular Sequence Data ; Organ Specificity ; Pharynx/microbiology ; Phylogeny ; Skin/microbiology ; Streptococcal Infections/*microbiology ; Streptococcus pyogenes/classification/*genetics/growth & development/metabolism ; *Tropism ; }, abstract = {Group A Streptococcus (GAS) has a rich evolutionary history of horizontal transfer among its core genes. Yet, despite extensive genetic mixing, GAS strains have discrete ecological phenotypes. To further our understanding of the molecular basis for ecological phenotypes, comparative genomic hybridization of a set of 97 diverse strains to a GAS pangenome microarray was undertaken, and the association of accessory genes with emm genotypes that define tissue tropisms for infection was determined. Of the 22 nonprophage accessory gene regions (AGRs) identified, only 3 account for all statistically significant linkage disequilibrium among strains having the genotypic biomarkers for throat versus skin infection specialists. Networked evolution and population structure analyses of loci representing each of the AGRs reveal that most strains with the skin specialist and generalist biomarkers form discrete clusters, whereas strains with the throat specialist biomarker are highly diverse. To identify coinherited and coselected accessory genes, the strength of genetic associations was determined for all possible pairwise combinations of accessory genes among the 97 GAS strains. Accessory genes showing very strong associations provide the basis for an evolutionary model, which reveals that a major transition between many throat and skin specialist haplotypes correlates with the gain or loss of genes encoding fibronectin-binding proteins. This study employs a novel synthesis of tools to help delineate the major genetic changes associated with key adaptive shifts in an extensively recombined bacterial species.}, } @article {pmid21947821, year = {2011}, author = {Tse, CH and Hwang, HC and Goldstein, LC and Kandalaft, PL and Wiley, JC and Kussick, SJ and Gown, AM}, title = {Determining true HER2 gene status in breast cancers with polysomy by using alternative chromosome 17 reference genes: implications for anti-HER2 targeted therapy.}, journal = {Journal of clinical oncology : official journal of the American Society of Clinical Oncology}, volume = {29}, number = {31}, pages = {4168-4174}, doi = {10.1200/JCO.2011.36.0107}, pmid = {21947821}, issn = {1527-7755}, mesh = {Adult ; Aged ; Algorithms ; Antineoplastic Agents/therapeutic use ; Biomarkers, Tumor/analysis/*genetics ; Breast Neoplasms/chemistry/*drug therapy/*genetics ; Chromosomes, Human, Pair 17/*genetics ; Female ; Gene Amplification ; Gene Expression Regulation, Neoplastic/drug effects ; *Genes, erbB-2/drug effects ; Humans ; Immunohistochemistry ; In Situ Hybridization, Fluorescence ; Microtubule-Associated Proteins ; Middle Aged ; *Molecular Targeted Therapy ; Phosphoproteins/analysis/*genetics ; Receptor, ErbB-2/analysis/*genetics ; Tumor Suppressor Protein p53/analysis ; }, abstract = {PURPOSE: The ratio of human epidermal growth factor receptor 2 (HER2) to CEP17 by fluorescent in situ hybridization (FISH) with the centromeric probe CEP17 is used to determine HER2 gene status in breast cancer. Increases in CEP17 copy number have been interpreted as representing polysomy 17. However, pangenomic studies have demonstrated that polysomy 17 is rare. This study tests the hypothesis that the use of alternative chromosome 17 reference genes might more accurately assess true HER2 gene status.

PATIENTS AND METHODS: In all, 171 patients with breast cancer who had HER2 FISH that had increased mean CEP17 copy numbers (> 2.6) were selected for additional chromosome 17 studies that used probes for Smith-Magenis syndrome (SMS), retinoic acid receptor alpha (RARA), and tumor protein p53 (TP53) genes. A eusomic copy number exhibited in one or more of these loci was used to calculate a revised HER2-to-chromosome-17 ratio by using the eusomic gene locus as the reference.

RESULTS: Of 132 cases classified as nonamplified on the basis of their HER2:CEP17 ratios, 58 (43.9%) were scored as amplified by using alternative chromosome 17 reference gene probes, and 13 (92.9%) of 14 cases scored as equivocal were reclassified as amplified. Among the cases with mean HER2 copy number of 4 to 6, 41 (47.7%) of 86 had their HER2 gene status upgraded from nonamplified to amplified, and four (4.7%) of 86 were upgraded from equivocal to amplified.

CONCLUSION: Our results support the findings of recent pangenomic studies that true polysomy 17 is uncommon. Additional FISH studies that use probes to the SMS, RARA, and TP53 genes are an effective way to determine the true HER2 amplification status in patients with polysomy 17 and they have important potential implications for guiding HER2-targeted therapy in breast cancer.}, } @article {pmid21936906, year = {2011}, author = {Williams, D and Fournier, GP and Lapierre, P and Swithers, KS and Green, AG and Andam, CP and Gogarten, JP}, title = {A rooted net of life.}, journal = {Biology direct}, volume = {6}, number = {}, pages = {45}, pmid = {21936906}, issn = {1745-6150}, mesh = {Archaea/genetics ; Bacteria/genetics ; Biological Evolution ; Gene Transfer, Horizontal ; Genes, rRNA ; *Genome, Archaeal ; *Genome, Bacterial ; *Models, Genetic ; Multigene Family ; *Phylogeny ; Ribosomal Proteins/genetics ; Ribosomes/*genetics ; }, abstract = {Phylogenetic reconstruction using DNA and protein sequences has allowed the reconstruction of evolutionary histories encompassing all life. We present and discuss a means to incorporate much of this rich narrative into a single model that acknowledges the discrete evolutionary units that constitute the organism. Briefly, this Rooted Net of Life genome phylogeny is constructed around an initial, well resolved and rooted tree scaffold inferred from a supermatrix of combined ribosomal genes. Extant sampled ribosomes form the leaves of the tree scaffold. These leaves, but not necessarily the deeper parts of the scaffold, can be considered to represent a genome or pan-genome, and to be associated with members of other gene families within that sequenced (pan)genome. Unrooted phylogenies of gene families containing four or more members are reconstructed and superimposed over the scaffold. Initially, reticulations are formed where incongruities between topologies exist. Given sufficient evidence, edges may then be differentiated as those representing vertical lines of inheritance within lineages and those representing horizontal genetic transfers or endosymbioses between lineages.}, } @article {pmid21874002, year = {2011}, author = {Cao, J and Schneeberger, K and Ossowski, S and Günther, T and Bender, S and Fitz, J and Koenig, D and Lanz, C and Stegle, O and Lippert, C and Wang, X and Ott, F and Müller, J and Alonso-Blanco, C and Borgwardt, K and Schmid, KJ and Weigel, D}, title = {Whole-genome sequencing of multiple Arabidopsis thaliana populations.}, journal = {Nature genetics}, volume = {43}, number = {10}, pages = {956-963}, pmid = {21874002}, issn = {1546-1718}, mesh = {Alleles ; Arabidopsis/*genetics ; Chromosome Mapping ; Chromosomes, Plant ; DNA, Plant/genetics ; Genetic Loci ; *Genetics, Population ; *Genome, Plant ; Geography ; Linkage Disequilibrium ; Mutation ; Phenotype ; Polymorphism, Single Nucleotide ; Selection, Genetic ; Sequence Analysis, DNA/*methods ; }, abstract = {The plant Arabidopsis thaliana occurs naturally in many different habitats throughout Eurasia. As a foundation for identifying genetic variation contributing to adaptation to diverse environments, a 1001 Genomes Project to sequence geographically diverse A. thaliana strains has been initiated. Here we present the first phase of this project, based on population-scale sequencing of 80 strains drawn from eight regions throughout the species' native range. We describe the majority of common small-scale polymorphisms as well as many larger insertions and deletions in the A. thaliana pan-genome, their effects on gene function, and the patterns of local and global linkage among these variants. The action of processes other than spontaneous mutation is identified by comparing the spectrum of mutations that have accumulated since A. thaliana diverged from its closest relative 10 million years ago with the spectrum observed in the laboratory. Recent species-wide selective sweeps are rare, and potentially deleterious mutations are more common in marginal populations.}, } @article {pmid21816042, year = {2011}, author = {Håfström, T and Jansson, DS and Segerman, B}, title = {Complete genome sequence of Brachyspira intermedia reveals unique genomic features in Brachyspira species and phage-mediated horizontal gene transfer.}, journal = {BMC genomics}, volume = {12}, number = {}, pages = {395}, pmid = {21816042}, issn = {1471-2164}, mesh = {Bacteriophages/*genetics ; Brachyspira/*genetics/*virology ; Gene Transfer, Horizontal/*genetics ; Genome, Bacterial/genetics ; *Genomics ; Multigene Family/genetics ; Plasmids/genetics ; Species Specificity ; }, abstract = {BACKGROUND: Brachyspira spp. colonize the intestines of some mammalian and avian species and show different degrees of enteropathogenicity. Brachyspira intermedia can cause production losses in chickens and strain PWS/AT now becomes the fourth genome to be completed in the genus Brachyspira.

RESULTS: 15 classes of unique and shared genes were analyzed in B. intermedia, B. murdochii, B. hyodysenteriae and B. pilosicoli. The largest number of unique genes was found in B. intermedia and B. murdochii. This indicates the presence of larger pan-genomes. In general, hypothetical protein annotations are overrepresented among the unique genes. A 3.2 kb plasmid was found in B. intermedia strain PWS/AT. The plasmid was also present in the B. murdochii strain but not in nine other Brachyspira isolates. Within the Brachyspira genomes, genes had been translocated and also frequently switched between leading and lagging strands, a process that can be followed by different AT-skews in the third positions of synonymous codons. We also found evidence that bacteriophages were being remodeled and genes incorporated into them.

CONCLUSIONS: The accessory gene pool shapes species-specific traits. It is also influenced by reductive genome evolution and horizontal gene transfer. Gene-transfer events can cross both species and genus boundaries and bacteriophages appear to play an important role in this process. A mechanism for horizontal gene transfer appears to be gene translocations leading to remodeling of bacteriophages in combination with broad tropism.}, } @article {pmid21811795, year = {2012}, author = {Özen, AI and Ussery, DW}, title = {Defining the Pseudomonas genus: where do we draw the line with Azotobacter?.}, journal = {Microbial ecology}, volume = {63}, number = {2}, pages = {239-248}, pmid = {21811795}, issn = {1432-184X}, mesh = {Azotobacter vinelandii/*classification/*genetics ; Cellvibrio/classification/genetics ; Evolution, Molecular ; Gammaproteobacteria/classification/genetics ; *Genome, Bacterial ; *Phylogeny ; Pseudomonas/*classification/*genetics ; RNA, Bacterial/genetics ; RNA, Ribosomal, 16S/genetics ; Sequence Homology, Nucleic Acid ; }, abstract = {The genus Pseudomonas has gone through many taxonomic revisions over the past 100 years, going from a very large and diverse group of bacteria to a smaller, more refined and ordered list having specific properties. The relationship of the Pseudomonas genus to Azotobacter vinelandii is examined using three genomic sequence-based methods. First, using 16S rRNA trees, it is shown that A. vinelandii groups within the Pseudomonas close to Pseudomonas aeruginosa. Genomes from other related organisms (Acinetobacter, Psychrobacter, and Cellvibrio) are outside the Pseudomonas cluster. Second, pan genome family trees based on conserved gene families also show A. vinelandii to be more closely related to Pseudomonas than other related organisms. Third, exhaustive BLAST comparisons demonstrate that the fraction of shared genes between A. vinelandii and Pseudomonas genomes is similar to that of Pseudomonas species with each other. The results of these different methods point to a high similarity between A. vinelandii and the Pseudomonas genus, suggesting that Azotobacter might actually be a Pseudomonas.}, } @article {pmid21811606, year = {2011}, author = {Kittichotirat, W and Bumgarner, RE and Asikainen, S and Chen, C}, title = {Identification of the pangenome and its components in 14 distinct Aggregatibacter actinomycetemcomitans strains by comparative genomic analysis.}, journal = {PloS one}, volume = {6}, number = {7}, pages = {e22420}, pmid = {21811606}, issn = {1932-6203}, support = {R01 DE012212/DE/NIDCR NIH HHS/United States ; R01 DE12212/DE/NIDCR NIH HHS/United States ; }, mesh = {Actinobacillus/*genetics/pathogenicity ; Cluster Analysis ; Comparative Genomic Hybridization/*methods ; Gene Pool ; Genes, Bacterial/genetics ; Genome, Bacterial/*genetics ; Genomic Islands/genetics ; Molecular Sequence Annotation ; Phylogeny ; Sequence Analysis, DNA ; Species Specificity ; Virulence/genetics ; }, abstract = {BACKGROUND: Aggregatibacter actinomycetemcomitans is genetically heterogeneous and comprises distinct clonal lineages that may have different virulence potentials. However, limited information of the strain-to-strain genomic variations is available.

The genome sequences of 11 A. actinomycetemcomitans strains (serotypes a-f) were generated de novo, annotated and combined with three previously sequenced genomes (serotypes a-c) for comparative genomic analysis. Two major groups were identified; serotypes a, d, e, and f, and serotypes b and c. A serotype e strain was found to be distinct from both groups. The size of the pangenome was 3,301 genes, which included 2,034 core genes and 1,267 flexible genes. The number of core genes is estimated to stabilize at 2,060, while the size of the pangenome is estimated to increase by 16 genes with every additional strain sequenced in the future. Within each strain 16.7-29.4% of the genome belonged to the flexible gene pool. Between any two strains 0.4-19.5% of the genomes were different. The genomic differences were occasionally greater for strains of the same serotypes than strains of different serotypes. Furthermore, 171 genomic islands were identified. Cumulatively, 777 strain-specific genes were found on these islands and represented 61% of the flexible gene pool.

CONCLUSIONS/SIGNIFICANCE: Substantial genomic differences were detected among A. actinomycetemcomitans strains. Genomic islands account for more than half of the flexible genes. The phenotype and virulence of A. actinomycetemcomitans may not be defined by any single strain. Moreover, the genomic variation within each clonal lineage of A. actinomycetemcomitans (as defined by serotype grouping) may be greater than between clonal lineages. The large genomic data set in this study will be useful to further examine the molecular basis of variable virulence among A. actinomycetemcomitans strains.}, } @article {pmid21808635, year = {2011}, author = {Klockgether, J and Cramer, N and Wiehlmann, L and Davenport, CF and Tümmler, B}, title = {Pseudomonas aeruginosa Genomic Structure and Diversity.}, journal = {Frontiers in microbiology}, volume = {2}, number = {}, pages = {150}, pmid = {21808635}, issn = {1664-302X}, abstract = {The Pseudomonas aeruginosa genome (G + C content 65-67%, size 5.5-7 Mbp) is made up of a single circular chromosome and a variable number of plasmids. Sequencing of complete genomes or blocks of the accessory genome has revealed that the genome encodes a large repertoire of transporters, transcriptional regulators, and two-component regulatory systems which reflects its metabolic diversity to utilize a broad range of nutrients. The conserved core component of the genome is largely collinear among P. aeruginosa strains and exhibits an interclonal sequence diversity of 0.5-0.7%. Only a few loci of the core genome are subject to diversifying selection. Genome diversity is mainly caused by accessory DNA elements located in 79 regions of genome plasticity that are scattered around the genome and show an anomalous usage of mono- to tetradecanucleotides. Genomic islands of the pKLC102/PAGI-2 family that integrate into tRNA(Lys) or tRNA(Gly) genes represent hotspots of inter- and intraclonal genomic diversity. The individual islands differ in their repertoire of metabolic genes that make a large contribution to the pangenome. In order to unravel intraclonal diversity of P. aeruginosa, the genomes of two members of the PA14 clonal complex from diverse habitats and geographic origin were compared. The genome sequences differed by less than 0.01% from each other. One hundred ninety-eight of the 231 single nucleotide substitutions (SNPs) were non-randomly distributed in the genome. Non-synonymous SNPs were mainly found in an integrated Pf1-like phage and in genes involved in transcriptional regulation, membrane and extracellular constituents, transport, and secretion. In summary, P. aeruginosa is endowed with a highly conserved core genome of low sequence diversity and a highly variable accessory genome that communicates with other pseudomonads and genera via horizontal gene transfer.}, } @article {pmid21799664, year = {2011}, author = {Baltrus, DA and Nishimura, MT and Romanchuk, A and Chang, JH and Mukhtar, MS and Cherkis, K and Roach, J and Grant, SR and Jones, CD and Dangl, JL}, title = {Dynamic evolution of pathogenicity revealed by sequencing and comparative genomics of 19 Pseudomonas syringae isolates.}, journal = {PLoS pathogens}, volume = {7}, number = {7}, pages = {e1002132}, pmid = {21799664}, issn = {1553-7374}, support = {F32 GM082279/GM/NIGMS NIH HHS/United States ; R01 GM066025/GM/NIGMS NIH HHS/United States ; GM082279-03/GM/NIGMS NIH HHS/United States ; 1-R01-GM066025/GM/NIGMS NIH HHS/United States ; }, mesh = {Alleles ; Bacterial Proteins/genetics ; Base Sequence ; *Biological Evolution ; Genome, Bacterial ; Genomics ; Phylogeny ; Plant Diseases/*genetics ; Plasmids/genetics ; Pseudomonas syringae/*genetics/*pathogenicity ; Virulence Factors/*genetics ; }, abstract = {Closely related pathogens may differ dramatically in host range, but the molecular, genetic, and evolutionary basis for these differences remains unclear. In many Gram- negative bacteria, including the phytopathogen Pseudomonas syringae, type III effectors (TTEs) are essential for pathogenicity, instrumental in structuring host range, and exhibit wide diversity between strains. To capture the dynamic nature of virulence gene repertoires across P. syringae, we screened 11 diverse strains for novel TTE families and coupled this nearly saturating screen with the sequencing and assembly of 14 phylogenetically diverse isolates from a broad collection of diseased host plants. TTE repertoires vary dramatically in size and content across all P. syringae clades; surprisingly few TTEs are conserved and present in all strains. Those that are likely provide basal requirements for pathogenicity. We demonstrate that functional divergence within one conserved locus, hopM1, leads to dramatic differences in pathogenicity, and we demonstrate that phylogenetics-informed mutagenesis can be used to identify functionally critical residues of TTEs. The dynamism of the TTE repertoire is mirrored by diversity in pathways affecting the synthesis of secreted phytotoxins, highlighting the likely role of both types of virulence factors in determination of host range. We used these 14 draft genome sequences, plus five additional genome sequences previously reported, to identify the core genome for P. syringae and we compared this core to that of two closely related non-pathogenic pseudomonad species. These data revealed the recent acquisition of a 1 Mb megaplasmid by a sub-clade of cucumber pathogens. This megaplasmid encodes a type IV secretion system and a diverse set of unknown proteins, which dramatically increases both the genomic content of these strains and the pan-genome of the species.}, } @article {pmid21785696, year = {2011}, author = {Vannier-Santos, MA and Lenzi, HL}, title = {Parasites or cohabitants: cruel omnipresent usurpers or creative "éminences grises"?.}, journal = {Journal of parasitology research}, volume = {2011}, number = {}, pages = {214174}, pmid = {21785696}, issn = {2090-0031}, abstract = {This paper presents many types of interplays between parasites and the host, showing the history of parasites, the effects of parasites on the outcome of wars, invasions, migrations, and on the development of numerous regions of the globe, and the impact of parasitic diseases on the society and on the course of human evolution. It also emphasizes the pressing need to change the look at the parasitism phenomenon, proposing that the term "cohabitant" is more accurate than parasite, because every living being, from bacteria to mammals, is a consortium of living beings in the pangenome. Even the term parasitology should be replaced by cohabitology because there is no parasite alone and host alone: both together compose a new adaptive system: the parasitized-host or the cohabitant-cohabited being. It also suggests switching the old paradigm based on attrition and destruction, to a new one founded on adaptation and living together.}, } @article {pmid21764529, year = {2011}, author = {Laing, CR and Zhang, Y and Thomas, JE and Gannon, VP}, title = {Everything at once: comparative analysis of the genomes of bacterial pathogens.}, journal = {Veterinary microbiology}, volume = {153}, number = {1-2}, pages = {13-26}, doi = {10.1016/j.vetmic.2011.06.014}, pmid = {21764529}, issn = {1873-2542}, mesh = {Algorithms ; Bacteria/classification/drug effects/*genetics/pathogenicity ; *Genome, Bacterial ; High-Throughput Nucleotide Sequencing/*methods ; Phylogeny ; Software ; }, abstract = {The sum of unique genes in all genomes of a bacterial species is referred to as the pan-genome and is comprised of variably absent or present accessory genes and universally present core genes. The accessory genome is an important source of genetic variability in bacterial populations, allowing sub-populations of bacteria to better adapt to specific niches. Such subgroups may themselves have a relatively stable core genome that may influence host preference, virulence, or an association with specific disease syndromes. The core genome provides a useful means of phylogenetic reconstruction as well as contributing to phenotypic heterogeneity. Variation within the pan-genome forms the basis of comparative genotyping techniques, which have evolved alongside technology. Current high-throughput sequencing platforms have created an unprecedented opportunity for comparisons among multiple, closely related genomes. The computer algorithms and software for such comparisons continue to evolve and promise exciting advances in the world of bacterial comparative genomics. We review genotyping techniques based upon phenotypic traits, both core and accessory genomes, and look at some of the software programs currently available to perform whole-genome comparative analyses.}, } @article {pmid21731613, year = {2011}, author = {Smih, F and Desmoulin, F and Berry, M and Turkieh, A and Harmancey, R and Iacovoni, J and Trouillet, C and Delmas, C and Pathak, A and Lairez, O and Koukoui, F and Massabuau, P and Ferrieres, J and Galinier, M and Rouet, P}, title = {Blood signature of pre-heart failure: a microarrays study.}, journal = {PloS one}, volume = {6}, number = {6}, pages = {e20414}, pmid = {21731613}, issn = {1932-6203}, mesh = {Adult ; Aged ; Female ; *Gene Expression Profiling ; Gene Expression Regulation ; Genetic Predisposition to Disease ; Heart Failure/*blood/*genetics/physiopathology ; Humans ; Leukocytes/metabolism ; Male ; Middle Aged ; Models, Cardiovascular ; Natriuretic Peptide, Brain/blood ; Oligonucleotide Array Sequence Analysis/*methods ; Principal Component Analysis ; ROC Curve ; Reproducibility of Results ; Risk Factors ; Software ; Ventricular Dysfunction, Left/blood/genetics/physiopathology ; Young Adult ; }, abstract = {BACKGROUND: The preclinical stage of systolic heart failure (HF), known as asymptomatic left ventricular dysfunction (ALVD), is diagnosed only by echocardiography, frequent in the general population and leads to a high risk of developing severe HF. Large scale screening for ALVD is a difficult task and represents a major unmet clinical challenge that requires the determination of ALVD biomarkers.

294 individuals were screened by echocardiography. We identified 9 ALVD cases out of 128 subjects with cardiovascular risk factors. White blood cell gene expression profiling was performed using pangenomic microarrays. Data were analyzed using principal component analysis (PCA) and Significant Analysis of Microarrays (SAM). To build an ALVD classifier model, we used the nearest centroid classification method (NCCM) with the ClaNC software package. Classification performance was determined using the leave-one-out cross-validation method. Blood transcriptome analysis provided a specific molecular signature for ALVD which defined a model based on 7 genes capable of discriminating ALVD cases. Analysis of an ALVD patients validation group demonstrated that these genes are accurate diagnostic predictors for ALVD with 87% accuracy and 100% precision. Furthermore, Receiver Operating Characteristic curves of expression levels confirmed that 6 out of 7 genes discriminate for left ventricular dysfunction classification.

CONCLUSIONS/SIGNIFICANCE: These targets could serve to enhance the ability to efficiently detect ALVD by general care practitioners to facilitate preemptive initiation of medical treatment preventing the development of HF.}, } @article {pmid21731083, year = {2011}, author = {Day, MD and Beck, D and Foster, JA}, title = {Microbial Communities as Experimental Units.}, journal = {Bioscience}, volume = {61}, number = {5}, pages = {398-406}, pmid = {21731083}, issn = {0006-3568}, support = {P20 RR016448/RR/NCRR NIH HHS/United States ; P20 RR016448-01/RR/NCRR NIH HHS/United States ; P20 RR016454/RR/NCRR NIH HHS/United States ; P20 RR016454-06/RR/NCRR NIH HHS/United States ; }, abstract = {Artificial ecosystem selection is an experimental technique that treats microbial communities as though they were discrete units by applying selection on community-level properties. Highly diverse microbial communities associated with humans and other organisms can have significant impacts on the health of the host. It is difficult to find correlations between microbial community composition and community-associated diseases, in part because it may be impossible to define a universal and robust species concept for microbes. Microbial communities are composed of potentially thousands of unique populations that evolved in intimate contact, so it is appropriate in many situations to view the community as the unit of analysis. This perspective is supported by recent discoveries using metagenomics and pangenomics. Artificial ecosystem selection experiments can be costly, but they bring the logical rigor of biological model systems to the emerging field of microbial community analysis.}, } @article {pmid21718539, year = {2011}, author = {Angiuoli, SV and Dunning Hotopp, JC and Salzberg, SL and Tettelin, H}, title = {Improving pan-genome annotation using whole genome multiple alignment.}, journal = {BMC bioinformatics}, volume = {12}, number = {}, pages = {272}, pmid = {21718539}, issn = {1471-2105}, support = {R01 HG006677/HG/NHGRI NIH HHS/United States ; R01 HG006677-12/HG/NHGRI NIH HHS/United States ; R01 GM083873/GM/NIGMS NIH HHS/United States ; R01 HG006102/HG/NHGRI NIH HHS/United States ; R01-GM083873/GM/NIGMS NIH HHS/United States ; R01 GM083873-09/GM/NIGMS NIH HHS/United States ; }, mesh = {Bacteria/*genetics ; Chromosome Mapping ; *Genome, Bacterial ; *Molecular Sequence Annotation ; Sequence Alignment/*methods ; }, abstract = {BACKGROUND: Rapid annotation and comparisons of genomes from multiple isolates (pan-genomes) is becoming commonplace due to advances in sequencing technology. Genome annotations can contain inconsistencies and errors that hinder comparative analysis even within a single species. Tools are needed to compare and improve annotation quality across sets of closely related genomes.

RESULTS: We introduce a new tool, Mugsy-Annotator, that identifies orthologs and evaluates annotation quality in prokaryotic genomes using whole genome multiple alignment. Mugsy-Annotator identifies anomalies in annotated gene structures, including inconsistently located translation initiation sites and disrupted genes due to draft genome sequencing or pseudogenes. An evaluation of species pan-genomes using the tool indicates that such anomalies are common, especially at translation initiation sites. Mugsy-Annotator reports alternate annotations that improve consistency and are candidates for further review.

CONCLUSIONS: Whole genome multiple alignment can be used to efficiently identify orthologs and annotation problem areas in a bacterial pan-genome. Comparisons of annotated gene structures within a species may show more variation than is actually present in the genome, indicating errors in genome annotation. Our new tool Mugsy-Annotator assists re-annotation efforts by highlighting edits that improve annotation consistency.}, } @article {pmid21699924, year = {2011}, author = {van Aartsen, JJ and Rajakumar, K}, title = {An optimized method for suicide vector-based allelic exchange in Klebsiella pneumoniae.}, journal = {Journal of microbiological methods}, volume = {86}, number = {3}, pages = {313-319}, doi = {10.1016/j.mimet.2011.06.006}, pmid = {21699924}, issn = {1872-8359}, mesh = {Base Sequence ; DNA Nucleotidyltransferases/*genetics ; DNA, Bacterial/genetics ; *Gene Transfer Techniques ; Genes, Transgenic, Suicide/*genetics ; Genetic Vectors ; Genome ; Klebsiella Infections ; Klebsiella pneumoniae/*genetics ; Plasmids ; }, abstract = {Klebsiella pneumoniae is an important and versatile bacterium that can be found in diverse environments and is also a frequent cause of human infections. Limited data exists on the mechanisms of interaction between K. pneumoniae and the human host and of adaptations to other environments. Coupled with the high genetic diversity of this species, these factors highlight the necessity for substantial further K. pneumoniae-focused molecular genetics studies. In this report we describe a simple and efficient experimental protocol for suicide vector-based allelic exchange in K. pneumoniae. The protocol has been validated by mutating multiple loci in four distinct K. pneumoniae strains, including highly capsulated and/or multi-antibiotic resistant clinical isolates. Three key enhancements are reported:(1) Use of pDS132-derived conjugative plasmids carrying improved cloning sites, (2) Performance of sacB counterselection at 25°C as opposed to higher temperatures, and (3) Exploitation of Flp-recombinase-mediated deletion of FRT (Flp recombinase target) flanked resistance cassettes to allow for reiterative manipulations with a single selectable marker. This study also highlights a problem that may be encountered when the aacC1 gentamicin resistance marker is used in K. pneumoniae and suggests alternative markers. The protocol developed in this study will help investigate the plethora of uncharacterized genes present in the K. pneumoniae pan-genome and shed further light upon clinically and industrially important phenotypes observed in this ubiquitous species.}, } @article {pmid21690563, year = {2011}, author = {Collingro, A and Tischler, P and Weinmaier, T and Penz, T and Heinz, E and Brunham, RC and Read, TD and Bavoil, PM and Sachse, K and Kahane, S and Friedman, MG and Rattei, T and Myers, GS and Horn, M}, title = {Unity in variety--the pan-genome of the Chlamydiae.}, journal = {Molecular biology and evolution}, volume = {28}, number = {12}, pages = {3253-3270}, pmid = {21690563}, issn = {1537-1719}, support = {R01 AI051472/AI/NIAID NIH HHS/United States ; 1R01AI051472/AI/NIAID NIH HHS/United States ; }, mesh = {Bacterial Outer Membrane Proteins/chemistry/genetics ; Bacterial Proteins/chemistry/genetics ; Bacterial Secretion Systems/genetics ; Base Sequence ; Cell Membrane ; Chlamydia/classification/*genetics/pathogenicity ; Chlamydiales/classification/*genetics/pathogenicity ; DNA, Bacterial/analysis/*genetics ; Evolution, Molecular ; Gene Transfer, Horizontal ; Genetic Variation ; *Genome, Bacterial ; Host-Pathogen Interactions ; Molecular Sequence Data ; Phylogeny ; Plasmids ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Symbiosis ; }, abstract = {Chlamydiae are evolutionarily well-separated bacteria that live exclusively within eukaryotic host cells. They include important human pathogens such as Chlamydia trachomatis as well as symbionts of protozoa. As these bacteria are experimentally challenging and genetically intractable, our knowledge about them is still limited. In this study, we obtained the genome sequences of Simkania negevensis Z, Waddlia chondrophila 2032/99, and Parachlamydia acanthamoebae UV-7. This enabled us to perform the first comprehensive comparative and phylogenomic analysis of representative members of four major families of the Chlamydiae, including the Chlamydiaceae. We identified a surprisingly large core gene set present in all genomes and a high number of diverse accessory genes in those Chlamydiae that do not primarily infect humans or animals, including a chemosensory system in P. acanthamoebae and a type IV secretion system. In S. negevensis, the type IV secretion system is encoded on a large conjugative plasmid (pSn, 132 kb). Phylogenetic analyses suggested that a plasmid similar to the S. negevensis plasmid was originally acquired by the last common ancestor of all four families and that it was subsequently reduced, integrated into the chromosome, or lost during diversification, ultimately giving rise to the extant virulence-associated plasmid of pathogenic chlamydiae. Other virulence factors, including a type III secretion system, are conserved among the Chlamydiae to variable degrees and together with differences in the composition of the cell wall reflect adaptation to different host cells including convergent evolution among the four chlamydial families. Phylogenomic analysis focusing on chlamydial proteins with homology to plant proteins provided evidence for the acquisition of 53 chlamydial genes by a plant progenitor, lending further support for the hypothesis of an early interaction between a chlamydial ancestor and the primary photosynthetic eukaryote.}, } @article {pmid21685873, year = {2011}, author = {Reynders, A and Yessaad, N and Vu Manh, TP and Dalod, M and Fenis, A and Aubry, C and Nikitas, G and Escalière, B and Renauld, JC and Dussurget, O and Cossart, P and Lecuit, M and Vivier, E and Tomasello, E}, title = {Identity, regulation and in vivo function of gut NKp46+RORγt+ and NKp46+RORγt- lymphoid cells.}, journal = {The EMBO journal}, volume = {30}, number = {14}, pages = {2934-2947}, pmid = {21685873}, issn = {1460-2075}, mesh = {Animals ; *Cell Lineage ; Female ; Flow Cytometry ; *Immunity, Innate ; Intestine, Small/immunology/metabolism/microbiology ; Listeria monocytogenes/isolation & purification ; Listeriosis/metabolism/microbiology ; Lymphocytes/immunology/*metabolism/*microbiology ; Mice ; Mice, Inbred C57BL ; Mice, Knockout ; Myeloid Differentiation Factor 88/physiology ; Natural Cytotoxicity Triggering Receptor 1/genetics/*metabolism ; Receptors, Interleukin-1/physiology ; Receptors, Retinoic Acid/genetics/*metabolism ; Tissue Distribution ; }, abstract = {The gut is a major barrier against microbes and encloses various innate lymphoid cells (ILCs), including two subsets expressing the natural cytotoxicity receptor NKp46. A subset of NKp46(+) cells expresses retinoic acid receptor-related orphan receptor γt (RORγt) and produces IL-22, like lymphoid tissue inducer (LTi) cells. Other NKp46(+) cells lack RORγt and produce IFN-γ, like conventional Natural Killer (cNK) cells. The identity, the regulation and the in vivo functions of gut NKp46(+) ILCs largely remain to be unravelled. Using pan-genomic profiling, we showed here that small intestine (SI) NKp46(+)RORγt(-) ILCs correspond to SI NK cells. Conversely, we identified a transcriptional programme conserved in fetal LTi cells and adult SI NKp46(+)RORγt(+) and NKp46(-)RORγt(+) ILCs. We also demonstrated that the IL-1β/IL-1R1/MyD88 pathway, but not the commensal flora, drove IL-22 production by NKp46(+)RORγt(+) ILCs. Finally, oral Listeria monocytogenes infection induced IFN-γ production in SI NK and IL-22 production in NKp46(+)RORγt(+) ILCs, but only IFN-γ contributed to control bacteria dissemination. NKp46(+) ILC heterogeneity is thus associated with subset-specific transcriptional programmes and effector functions that govern their implication in gut innate immunity.}, } @article {pmid21670281, year = {2011}, author = {Tschopp, P and Fraudeau, N and Béna, F and Duboule, D}, title = {Reshuffling genomic landscapes to study the regulatory evolution of Hox gene clusters.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {108}, number = {26}, pages = {10632-10637}, pmid = {21670281}, issn = {1091-6490}, mesh = {Animals ; Base Sequence ; DNA Primers ; *Evolution, Molecular ; Gene Expression Regulation ; *Genes, Homeobox ; *Genomics ; In Situ Hybridization, Fluorescence ; Mice ; *Multigene Family ; Transcription, Genetic ; }, abstract = {The emergence of Vertebrata was accompanied by two rounds of whole-genome duplications. This enabled paralogous genes to acquire novel functions with high evolutionary potential, a process suggested to occur mostly by changes in gene regulation, rather than in protein sequences. In the case of Hox gene clusters, such duplications favored the appearance of distinct global regulations. To assess the impact of such "regulatory evolution" upon neo-functionalization, we developed PANTHERE (PAN-genomic Translocation for Heterologous Enhancer RE-shuffling) to bring the entire megabase-scale HoxD regulatory landscape in front of the HoxC gene cluster via a targeted translocation in vivo. At this chimeric locus, Hoxc genes could both interpret this foreign regulation and functionally substitute for their Hoxd counterparts. Our results emphasize the importance of evolving regulatory modules rather than their target genes in the process of neo-functionalization and offer a genetic tool to study the complexity of the vertebrate regulatory genome.}, } @article {pmid21666017, year = {2011}, author = {Hug, LA and Salehi, M and Nuin, P and Tillier, ER and Edwards, EA}, title = {Design and verification of a pangenome microarray oligonucleotide probe set for Dehalococcoides spp.}, journal = {Applied and environmental microbiology}, volume = {77}, number = {15}, pages = {5361-5369}, pmid = {21666017}, issn = {1098-5336}, mesh = {Bacterial Typing Techniques/*methods ; Base Sequence ; Chloroflexi/*genetics ; DNA, Bacterial/analysis/genetics ; Multigene Family ; Nucleic Acid Hybridization/genetics ; Oligonucleotide Array Sequence Analysis/*methods ; Oligonucleotide Probes/genetics ; Proteomics/methods ; RNA, Ribosomal, 16S/analysis/genetics ; Sequence Alignment ; Sequence Analysis, DNA ; }, abstract = {Dehalococcoides spp. are an industrially relevant group of Chloroflexi bacteria capable of reductively dechlorinating contaminants in groundwater environments. Existing Dehalococcoides genomes revealed a high level of sequence identity within this group, including 98 to 100% 16S rRNA sequence identity between strains with diverse substrate specificities. Common molecular techniques for identification of microbial populations are often not applicable for distinguishing Dehalococcoides strains. Here we describe an oligonucleotide microarray probe set designed based on clustered Dehalococcoides genes from five different sources (strain DET195, CBDB1, BAV1, and VS genomes and the KB-1 metagenome). This "pangenome" probe set provides coverage of core Dehalococcoides genes as well as strain-specific genes while optimizing the potential for hybridization to closely related, previously unknown Dehalococcoides strains. The pangenome probe set was compared to probe sets designed independently for each of the five Dehalococcoides strains. The pangenome probe set demonstrated better predictability and higher detection of Dehalococcoides genes than strain-specific probe sets on nontarget strains with <99% average nucleotide identity. An in silico analysis of the expected probe hybridization against the recently released Dehalococcoides strain GT genome and additional KB-1 metagenome sequence data indicated that the pangenome probe set performs more robustly than the combined strain-specific probe sets in the detection of genes not included in the original design. The pangenome probe set represents a highly specific, universal tool for the detection and characterization of Dehalococcoides from contaminated sites. It has the potential to become a common platform for Dehalococcoides-focused research, allowing meaningful comparisons between microarray experiments regardless of the strain examined.}, } @article {pmid21643699, year = {2011}, author = {Jacobsen, A and Hendriksen, RS and Aaresturp, FM and Ussery, DW and Friis, C}, title = {The Salmonella enterica pan-genome.}, journal = {Microbial ecology}, volume = {62}, number = {3}, pages = {487-504}, pmid = {21643699}, issn = {1432-184X}, mesh = {Comparative Genomic Hybridization ; DNA Transposable Elements ; DNA, Bacterial/genetics ; Escherichia coli/genetics ; *Genome, Bacterial ; Genomic Islands ; Multigene Family ; Plasmids/genetics ; RNA, Ribosomal, 16S/genetics ; Salmonella enterica/*genetics ; Sequence Analysis, DNA ; }, abstract = {Salmonella enterica is divided into four subspecies containing a large number of different serovars, several of which are important zoonotic pathogens and some show a high degree of host specificity or host preference. We compare 45 sequenced S. enterica genomes that are publicly available (22 complete and 23 draft genome sequences). Of these, 35 were found to be of sufficiently good quality to allow a detailed analysis, along with two Escherichia coli strains (K-12 substr. DH10B and the avian pathogenic E. coli (APEC O1) strain). All genomes were subjected to standardized gene finding, and the core and pan-genome of Salmonella were estimated to be around 2,800 and 10,000 gene families, respectively. The constructed pan-genomic dendrograms suggest that gene content is often, but not uniformly correlated to serotype. Any given Salmonella strain has a large stable core, whilst there is an abundance of accessory genes, including the Salmonella pathogenicity islands (SPIs), transposable elements, phages, and plasmid DNA. We visualize conservation in the genomes in relation to chromosomal location and DNA structural features and find that variation in gene content is localized in a selection of variable genomic regions or islands. These include the SPIs but also encompass phage insertion sites and transposable elements. The islands were typically well conserved in several, but not all, isolates--a difference which may have implications in, e.g., host specificity.}, } @article {pmid21622745, year = {2011}, author = {Lapidus, A and Clum, A and Labutti, K and Kaluzhnaya, MG and Lim, S and Beck, DA and Glavina Del Rio, T and Nolan, M and Mavromatis, K and Huntemann, M and Lucas, S and Lidstrom, ME and Ivanova, N and Chistoserdova, L}, title = {Genomes of three methylotrophs from a single niche reveal the genetic and metabolic divergence of the methylophilaceae.}, journal = {Journal of bacteriology}, volume = {193}, number = {15}, pages = {3757-3764}, pmid = {21622745}, issn = {1098-5530}, mesh = {*Evolution, Molecular ; Fresh Water/microbiology ; *Genome, Bacterial ; Methanol/metabolism ; Methylophilaceae/classification/*genetics/isolation & purification/*metabolism ; Molecular Sequence Data ; }, abstract = {The genomes of three representatives of the family Methylophilaceae, Methylotenera mobilis JLW8, Methylotenera versatilis 301, and Methylovorus glucosetrophus SIP3-4, all isolated from a single study site, Lake Washington in Seattle, WA, were completely sequenced. These were compared to each other and to the previously published genomes of Methylobacillus flagellatus KT and an unclassified Methylophilales strain, HTCC2181. Comparative analysis revealed that the core genome of Methylophilaceae may be as small as approximately 600 genes, while the pangenome may be as large as approximately 6,000 genes. Significant divergence between the genomes in terms of both gene content and gene and protein conservation was uncovered, including the varied presence of certain genes involved in methylotrophy. Overall, our data demonstrate that metabolic potentials can vary significantly between different species of Methylophilaceae, including organisms inhabiting the very same environment. These data suggest that genetic divergence among the members of this family may be responsible for their specialized and nonredundant functions in C1 cycling, which in turn suggests means for their successful coexistence in their specific ecological niches.}, } @article {pmid21615910, year = {2011}, author = {Joseph, SJ and Didelot, X and Gandhi, K and Dean, D and Read, TD}, title = {Interplay of recombination and selection in the genomes of Chlamydia trachomatis.}, journal = {Biology direct}, volume = {6}, number = {}, pages = {28}, pmid = {21615910}, issn = {1745-6150}, support = {R01 AI059647/AI/NIAID NIH HHS/United States ; }, mesh = {Base Sequence ; Chlamydia trachomatis/classification/*genetics ; DNA, Bacterial/genetics ; Evolution, Molecular ; *Genome, Bacterial ; Linkage Disequilibrium ; Phylogeny ; *Recombination, Genetic ; *Selection, Genetic ; Sequence Alignment ; }, abstract = {BACKGROUND: Chlamydia trachomatis is an obligate intracellular bacterial parasite, which causes several severe and debilitating diseases in humans. This study uses comparative genomic analyses of 12 complete published C. trachomatis genomes to assess the contribution of recombination and selection in this pathogen and to understand the major evolutionary forces acting on the genome of this bacterium.

RESULTS: The conserved core genes of C. trachomatis are a large proportion of the pan-genome: we identified 836 core genes in C. trachomatis out of a range of 874-927 total genes in each genome. The ratio of recombination events compared to mutation (ρ/θ) was 0.07 based on ancestral reconstructions using the ClonalFrame tool, but recombination had a significant effect on genetic diversification (r/m=0.71). The distance-dependent decay of linkage disequilibrium also indicated that C. trachomatis populations behaved intermediately between sexual and clonal extremes. Fifty-five genes were identified as having a history of recombination and 92 were under positive selection based on statistical tests. Twenty-three genes showed evidence of being under both positive selection and recombination, which included genes with a known role in virulence and pathogencity (e.g., ompA, pmps, tarp). Analysis of inter-clade recombination flux indicated non-uniform currents of recombination between clades, which suggests the possibility of spatial population structure in C. trachomatis infections.

CONCLUSIONS: C. trachomatis is the archetype of a bacterial species where recombination is relatively frequent yet gene gains by horizontal gene transfer (HGT) and losses (by deletion) are rare. Gene conversion occurs at sites across the whole C. trachomatis genome but may be more often fixed in genes that are under diversifying selection. Furthermore, genome sequencing will reveal patterns of serotype specific gene exchange and selection that will generate important research questions for understanding C. trachomatis pathogenesis.

REVIEWERS: This article was reviewed by Dr. Jeremy Selengut, Dr. Lee S. Katz (nominated by Dr. I. King Jordan) and Dr. Arcady Mushegian.}, } @article {pmid21569405, year = {2011}, author = {Galardini, M and Mengoni, A and Brilli, M and Pini, F and Fioravanti, A and Lucas, S and Lapidus, A and Cheng, JF and Goodwin, L and Pitluck, S and Land, M and Hauser, L and Woyke, T and Mikhailova, N and Ivanova, N and Daligault, H and Bruce, D and Detter, C and Tapia, R and Han, C and Teshima, H and Mocali, S and Bazzicalupo, M and Biondi, EG}, title = {Exploring the symbiotic pangenome of the nitrogen-fixing bacterium Sinorhizobium meliloti.}, journal = {BMC genomics}, volume = {12}, number = {}, pages = {235}, pmid = {21569405}, issn = {1471-2164}, mesh = {Genes, Bacterial/genetics ; Genome, Bacterial/*genetics ; Genomics ; Molecular Sequence Annotation ; Nitrogen Fixation/*genetics ; Phenotype ; Regulon/genetics ; Sinorhizobium meliloti/*genetics/*metabolism ; Species Specificity ; Symbiosis/*genetics ; Transcription Factors/metabolism ; }, abstract = {BACKGROUND: Sinorhizobium meliloti is a model system for the studies of symbiotic nitrogen fixation. An extensive polymorphism at the genetic and phenotypic level is present in natural populations of this species, especially in relation with symbiotic promotion of plant growth. AK83 and BL225C are two nodule-isolated strains with diverse symbiotic phenotypes; BL225C is more efficient in promoting growth of the Medicago sativa plants than strain AK83. In order to investigate the genetic determinants of the phenotypic diversification of S. meliloti strains AK83 and BL225C, we sequenced the complete genomes for these two strains.

RESULTS: With sizes of 7.14 Mbp and 6.97 Mbp, respectively, the genomes of AK83 and BL225C are larger than the laboratory strain Rm1021. The core genome of Rm1021, AK83, BL225C strains included 5124 orthologous groups, while the accessory genome was composed by 2700 orthologous groups. While Rm1021 and BL225C have only three replicons (Chromosome, pSymA and pSymB), AK83 has also two plasmids, 260 and 70 Kbp long. We found 65 interesting orthologous groups of genes that were present only in the accessory genome, consequently responsible for phenotypic diversity and putatively involved in plant-bacterium interaction. Notably, the symbiosis inefficient AK83 lacked several genes required for microaerophilic growth inside nodules, while several genes for accessory functions related to competition, plant invasion and bacteroid tropism were identified only in AK83 and BL225C strains. Presence and extent of polymorphism in regulons of transcription factors involved in symbiotic interaction were also analyzed. Our results indicate that regulons are flexible, with a large number of accessory genes, suggesting that regulons polymorphism could also be a key determinant in the variability of symbiotic performances among the analyzed strains.

CONCLUSIONS: In conclusions, the extended comparative genomics approach revealed a variable subset of genes and regulons that may contribute to the symbiotic diversity.}, } @article {pmid21562597, year = {2011}, author = {Bailly, X and Giuntini, E and Sexton, MC and Lower, RP and Harrison, PW and Kumar, N and Young, JP}, title = {Population genomics of Sinorhizobium medicae based on low-coverage sequencing of sympatric isolates.}, journal = {The ISME journal}, volume = {5}, number = {11}, pages = {1722-1734}, pmid = {21562597}, issn = {1751-7370}, support = {//Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Genome, Bacterial ; Medicago/*microbiology/physiology ; *Metagenomics ; Polymerase Chain Reaction ; Polymorphism, Genetic ; Sequence Analysis, DNA ; Sinorhizobium/*classification/*genetics/physiology ; Symbiosis ; }, abstract = {We investigated the genomic diversity of a local population of the symbiotic bacterium Sinorhizobium medicae, isolated from the roots of wild Medicago lupulina plants, in order to assess genomic diversity, to identify genomic regions influenced by duplication, deletion or strong selection, and to explore the composition of the pan-genome. Partial genome sequences of 12 isolates were obtained by Roche 454 shotgun sequencing (average 5.3 Mb per isolate) and compared with the published sequence of S. medicae WSM 419. Homologous recombination appears to have less impact on the polymorphism patterns of the chromosome than on the chromid pSMED01 and megaplasmid pSMED02. Moreover, pSMED02 is a hot spot of insertions and deletions. The whole chromosome is characterized by low sequence polymorphism, consistent with the high density of housekeeping genes. Similarly, the level of polymorphism of symbiosis genes (low) and of genes involved in polysaccharide synthesis (high) may reflect different selection. Finally, some isolates carry genes that may confer adaptations that S. medicae WSM 419 lacks, including homologues of genes encoding rhizobitoxine synthesis, iron uptake, response to autoinducer-2, and synthesis of distinct polysaccharides. The presence or absence of these genes was confirmed by PCR in each of these 12 isolates and a further 27 isolates from the same population. All isolates had rhizobitoxine genes, while the other genes were co-distributed, suggesting that they may be on the same mobile element. These results are discussed in relation to the ecology of Medicago symbionts and in the perspective of population genomics studies.}, } @article {pmid21559486, year = {2011}, author = {Colson, P and Gimenez, G and Boyer, M and Fournous, G and Raoult, D}, title = {The giant Cafeteria roenbergensis virus that infects a widespread marine phagocytic protist is a new member of the fourth domain of Life.}, journal = {PloS one}, volume = {6}, number = {4}, pages = {e18935}, pmid = {21559486}, issn = {1932-6203}, mesh = {Capsid ; Cluster Analysis ; Computational Biology/methods ; DNA Viruses/*classification/*genetics ; Evolution, Molecular ; Genome ; Genome, Viral ; Models, Genetic ; Open Reading Frames ; Phagocytosis ; Phylogeny ; }, abstract = {BACKGROUND: A recent work has provided strong arguments in favor of a fourth domain of Life composed of nucleo-cytoplasmic large DNA viruses (NCLDVs). This hypothesis was supported by phylogenetic and phyletic analyses based on a common set of proteins conserved in Eukarya, Archaea, Bacteria, and viruses, and implicated in the functions of information storage and processing. Recently, the genome of a new NCLDV, Cafeteria roenbergensis virus (CroV), was released. The present work aimed to determine if CroV supports the fourth domain of Life hypothesis.

METHODS: A consensus phylogenetic tree of NCLDVs including CroV was generated from a concatenated alignment of four universal proteins of NCLDVs. Some features of the gene complement of CroV and its distribution along the genome were further analyzed. Phylogenetic and phyletic analyses were performed using the previously identified common set of informational genes present in Eukarya, Archaea, Bacteria, and NCLDVs, including CroV.

FINDINGS: Phylogenetic reconstructions indicated that CroV is clearly related to the Mimiviridae family. The comparison between the gene repertoires of CroV and Mimivirus showed similarities regarding the gene contents and genome organization. In addition, the phyletic clustering based on the comparison of informational gene repertoire between Eukarya, Archaea, Bacteria, and NCLDVs unambiguously classified CroV with other NCLDVs and clearly included it in a fourth domain of Life. Taken together, these data suggest that Mimiviridae, including CroV, may have inherited a common gene content probably acquired from a common Mimiviridae ancestor.

CONCLUSIONS: This further analysis of the gene repertoire of CroV consolidated the fourth domain of Life hypothesis and contributed to outline a functional pan-genome for giant viruses infecting phagocytic protistan grazers.}, } @article {pmid21533120, year = {2011}, author = {Rogue, A and Lambert, C and Jossé, R and Antherieu, S and Spire, C and Claude, N and Guillouzo, A}, title = {Comparative gene expression profiles induced by PPARγ and PPARα/γ agonists in human hepatocytes.}, journal = {PloS one}, volume = {6}, number = {4}, pages = {e18816}, pmid = {21533120}, issn = {1932-6203}, mesh = {Adenosine Triphosphate/metabolism ; Adult ; Blotting, Western ; Cells, Cultured ; Cluster Analysis ; *Gene Expression Profiling ; Hepatocytes/*metabolism ; Humans ; PPAR alpha/*agonists ; PPAR gamma/*agonists ; Polymerase Chain Reaction ; Reactive Oxygen Species/metabolism ; }, abstract = {BACKGROUND: Several glitazones (PPARγ agonists) and glitazars (dual PPARα/γ agonists) have been developed to treat hyperglycemia and, simultaneously, hyperglycemia and dyslipidemia, respectively. However, most have caused idiosyncratic hepatic or extrahepatic toxicities through mechanisms that remain largely unknown. Since the liver plays a key role in lipid metabolism, we analyzed changes in gene expression profiles induced by these two types of PPAR agonists in human hepatocytes.

Primary human hepatocytes and the well-differentiated human hepatoma HepaRG cells were exposed to different concentrations of two PPARγ (troglitazone and rosiglitazone) and two PPARα/γ (muraglitazar and tesaglitazar) agonists for 24 h and their transcriptomes were analyzed using human pangenomic Agilent microarrays. Principal Component Analysis, hierarchical clustering and Ingenuity Pathway Analysis® revealed large inter-individual variability in the response of the human hepatocyte populations to the different compounds. Many genes involved in lipid, carbohydrate, xenobiotic and cholesterol metabolism, as well as inflammation and immunity, were regulated by both PPARγ and PPARα/γ agonists in at least a number of human hepatocyte populations and/or HepaRG cells. Only a few genes were selectively deregulated by glitazars when compared to glitazones, indicating that PPARγ and PPARα/γ agonists share most of their target genes. Moreover, some target genes thought to be regulated only in mouse or to be expressed in Kupffer cells were also found to be responsive in human hepatocytes and HepaRG cells.

CONCLUSIONS/SIGNIFICANCE: This first comprehensive analysis of gene regulation by PPARγ and PPARα/γ agonists favor the conclusion that glitazones and glitazars share most of their target genes and induce large differential changes in gene profiles in human hepatocytes depending on hepatocyte donor, the compound class and/or individual compound, thereby supporting the occurrence of idiosyncratic toxicity in some patients.}, } @article {pmid21521245, year = {2011}, author = {Andam, CP and Fournier, GP and Gogarten, JP}, title = {Multilevel populations and the evolution of antibiotic resistance through horizontal gene transfer.}, journal = {FEMS microbiology reviews}, volume = {35}, number = {5}, pages = {756-767}, doi = {10.1111/j.1574-6976.2011.00274.x}, pmid = {21521245}, issn = {1574-6976}, mesh = {Alleles ; Amino Acyl-tRNA Synthetases/genetics ; Anti-Bacterial Agents/*pharmacology ; Bacteria/*drug effects/*genetics ; *Drug Resistance, Bacterial ; Evolution, Molecular ; *Gene Transfer, Horizontal ; Phylogeny ; *Recombination, Genetic ; *Selection, Genetic ; }, abstract = {Horizontal gene transfer (HGT) can create diversity in the genetic repertoire of a lineage. Successful gene transfer likely occurs more frequently between more closely related organisms, leading to the formation of higher-level exchange groups that in some respects are comparable to single-species populations. Genes that appear fixed in a single species can be replaced through distant homologs or iso-functional analogs acquired through HGT. These genes may originate from other species or they may be acquired by an individual strain from the species pan-genome. Because of their similarity to alleles in a population, we label these gene variants that are exchanged between related species as homeoalleles. In a case study, we show that biased gene transfer plays an important role in the evolution of aminoacyl-tRNA synthetases (aaRS). Many microorganisms make use of these genes against naturally occurring antibiotics. We suggest that the resistance against naturally occurring antibiotics is the likely driving force behind the frequent switching between divergent aaRS types and the reason for the maintenance of these homeoalleles in higher-level exchange groups. Resistance to naturally occurring antibiotics may lead to the maintenance of different types of aminoacyl-tRNA synthetases in Bacteria through gene transfer.}, } @article {pmid21515775, year = {2011}, author = {Fauvart, M and Sánchez-Rodríguez, A and Beullens, S and Marchal, K and Michiels, J}, title = {Genome sequence of Rhizobium etli CNPAF512, a nitrogen-fixing symbiont isolated from bean root nodules in Brazil.}, journal = {Journal of bacteriology}, volume = {193}, number = {12}, pages = {3158-3159}, pmid = {21515775}, issn = {1098-5530}, mesh = {Brazil ; Fabaceae/*microbiology ; Gene Expression Regulation, Bacterial/physiology ; *Genome, Bacterial ; Molecular Sequence Data ; Nitrogen Fixation ; Rhizobium etli/*genetics ; Root Nodules, Plant/*microbiology ; }, abstract = {Rhizobium etli is a Gram-negative soil-dwelling alphaproteobacterium that carries out symbiotic biological nitrogen fixation in close association with legume hosts. R. etli strains exhibit high sequence divergence and are geographically structured, with a potentially dramatic influence on the outcome of symbiosis. Here, we present the genome sequence of R. etli CNPAF512, a Brazilian isolate from bean nodules. We anticipate that the availability of genome sequences of R. etli strains from distinctly different areas will provide valuable new insights into the geographic mosaic of the R. etli pangenome and the evolutionary dynamics that shape it.}, } @article {pmid21511245, year = {2011}, author = {Rousset, B and Ziercher, L and Borson-Chazot, F}, title = {Molecular analyses of thyroid tumors for diagnosis of malignancy on fine-needle aspiration biopsies and for prognosis of invasiveness on surgical specimens.}, journal = {Annales d'endocrinologie}, volume = {72}, number = {2}, pages = {125-128}, doi = {10.1016/j.ando.2011.03.017}, pmid = {21511245}, issn = {2213-3941}, mesh = {Adolescent ; Adult ; Aged ; Aged, 80 and over ; Biopsy, Fine-Needle ; Carcinoma ; Carcinoma, Papillary ; Child ; Female ; Gene Expression Profiling ; Humans ; Male ; Middle Aged ; Mutation ; Prognosis ; Proto-Oncogene Proteins B-raf/genetics ; Receptors, G-Protein-Coupled/genetics ; Thyroid Cancer, Papillary ; Thyroid Neoplasms/*genetics/*pathology/secondary ; Young Adult ; }, abstract = {High throughput genetic and genomic analyses have allowed the identification of series of genes exhibiting either distinct expression profiles or a particular mutational status in the different types or subtypes of thyroid tumors. The use of molecular data to improve the preoperative diagnosis of thyroid cancer on materiel from fine-needle aspiration biopsy (FNAB) is in the course of validation by numerous teams throughout the world. We have proposed a molecular test based on the expression level of a series of 19 genes, capable of discriminating malignant from benign tumors [15]. A prospective study aiming at the clinical validation of the molecular test has been performed on a cohort of 730 patients with a thyroid nodule. In patients subjected to tumor resection (≈ 220), the preoperative molecular diagnosis (generated on FNAB material from analyses of the expression level of the 19 genes) was compared to the postoperative diagnosis given by the pathologist (used as reference). Treatment and follow-up of the serious forms of thyroid cancer should benefit by the early identification of tumors with a metastatic potential using molecular characteristics differentiating invasive and non-invasive thyroid carcinomas. We have performed genetic and genomic analyses on a series of 200 papillary thyroid carcinomas (non-invasive or NI-PTC, 50%; invasive or I-PTC, 50%). BRAF(V600E) mutation or/and RET/PTC gene rearrangement have been detected in less than 25% of NI-PTC but in more than 75% of I-PTC. Pan-genomic analyses (Agilent microarray) revealed that 1373 genes are differentially expressed (fold change greater than 2) in NI-PTC as compared to I-PTC samples. The majority of genes (≈ 1200) are overexpressed in I-PTC. Data related to the two domains: diagnosis and prognosis of thyroid cancer will be presented at 2011 International H.P. KLOTZ conference on Clinical Endocrinology.}, } @article {pmid21496010, year = {2012}, author = {D'Amours, G and Kibar, Z and Mathonnet, G and Fetni, R and Tihy, F and Désilets, V and Nizard, S and Michaud, JL and Lemyre, E}, title = {Whole-genome array CGH identifies pathogenic copy number variations in fetuses with major malformations and a normal karyotype.}, journal = {Clinical genetics}, volume = {81}, number = {2}, pages = {128-141}, doi = {10.1111/j.1399-0004.2011.01687.x}, pmid = {21496010}, issn = {1399-0004}, mesh = {Abnormalities, Multiple/diagnosis/*genetics ; Chromosome Aberrations ; *Comparative Genomic Hybridization ; *DNA Copy Number Variations ; Fetus ; Humans ; Intellectual Disability/diagnosis/genetics ; *Karyotype ; Prenatal Diagnosis ; Reproducibility of Results ; }, abstract = {Despite a wide range of clinical tools, the etiology of mental retardation and multiple congenital malformations remains unknown for many patients. Array-based comparative genomic hybridization (aCGH) has proven to be a valuable tool in these cases, as its pangenomic coverage allows the identification of chromosomal aberrations that are undetectable by other genetic methods targeting specific genomic regions. Therefore, aCGH is increasingly used in clinical genetics, both in the postnatal and the prenatal settings. While the diagnostic yield in the postnatal population has been established at 10-12%, studies investigating fetuses have reported variable results. We used whole-genome aCGH to investigate fetuses presenting at least one major malformation detected on ultrasound, but for whom standard genetic analyses (including karyotype) failed to provide a diagnosis. We identified a clinically significant chromosomal aberration in 8.2% of tested fetuses (4/49), and a result of unclear clinical significance in 12.2% of tested fetuses (6/49). Our results document the value of whole-genome aCGH as a prenatal diagnostic tool and highlight the interpretation difficulties associated with copy number variations of unclear significance.}, } @article {pmid21453472, year = {2011}, author = {Fuchs, TM and Brandt, K and Starke, M and Rattei, T}, title = {Shotgun sequencing of Yersinia enterocolitica strain W22703 (biotype 2, serotype O:9): genomic evidence for oscillation between invertebrates and mammals.}, journal = {BMC genomics}, volume = {12}, number = {}, pages = {168}, pmid = {21453472}, issn = {1471-2164}, mesh = {Adaptation, Biological/genetics ; Animals ; *Comparative Genomic Hybridization ; DNA, Bacterial/genetics ; *Evolution, Molecular ; Filaggrin Proteins ; *Genome, Bacterial ; Genomic Library ; High-Throughput Nucleotide Sequencing ; Humans ; Insecta/microbiology ; Molecular Sequence Annotation ; Multigene Family ; Sequence Analysis, DNA ; Software ; Virulence/genetics ; Yersinia enterocolitica/*genetics/pathogenicity ; }, abstract = {BACKGROUND: Yersinia enterocolitica strains responsible for mild gastroenteritis in humans are very diverse with respect to their metabolic and virulence properties. Strain W22703 (biotype 2, serotype O:9) was recently identified to possess nematocidal and insecticidal activity. To better understand the relationship between pathogenicity towards insects and humans, we compared the W22703 genome with that of the highly pathogenic strain 8081 (biotype1B; serotype O:8), the only Y. enterocolitica strain sequenced so far.

RESULTS: We used whole-genome shotgun data to assemble, annotate and analyse the sequence of strain W22703. Numerous factors assumed to contribute to enteric survival and pathogenesis, among them osmoregulated periplasmic glucan, hydrogenases, cobalamin-dependent pathways, iron uptake systems and the Yersinia genome island 1 (YGI-1) involved in tight adherence were identified to be common to the 8081 and W22703 genomes. However, sets of ~550 genes revealed to be specific for each of them in comparison to the other strain. The plasticity zone (PZ) of 142 kb in the W22703 genome carries an ancient flagellar cluster Flg-2 of ~40 kb, but it lacks the pathogenicity island YAPI(Ye), the secretion system ysa and yts1, and other virulence determinants of the 8081 PZ. Its composition underlines the prominent variability of this genome region and demonstrates its contribution to the higher pathogenicity of biotype 1B strains with respect to W22703. A novel type three secretion system of mosaic structure was found in the genome of W22703 that is absent in the sequenced strains of the human pathogenic Yersinia species, but conserved in the genomes of the apathogenic species. We identified several regions of differences in W22703 that mainly code for transporters, regulators, metabolic pathways, and defence factors.

CONCLUSION: The W22703 sequence analysis revealed a genome composition distinct from other pathogenic Yersinia enterocolitica strains, thus contributing novel data to the Y. enterocolitica pan-genome. This study also sheds further light on the strategies of this pathogen to cope with its environments.}, } @article {pmid21396969, year = {2011}, author = {Schneiker-Bekel, S and Wibberg, D and Bekel, T and Blom, J and Linke, B and Neuweger, H and Stiens, M and Vorhölter, FJ and Weidner, S and Goesmann, A and Pühler, A and Schlüter, A}, title = {The complete genome sequence of the dominant Sinorhizobium meliloti field isolate SM11 extends the S. meliloti pan-genome.}, journal = {Journal of biotechnology}, volume = {155}, number = {1}, pages = {20-33}, doi = {10.1016/j.jbiotec.2010.12.018}, pmid = {21396969}, issn = {1873-4863}, mesh = {Bacteriophages/genetics ; *Chromosomes, Bacterial ; Ethylenes/metabolism ; Evolution, Molecular ; *Genome, Bacterial ; Genomics ; Medicago sativa/microbiology ; Nitrogen Fixation ; Nitrous Oxide/metabolism ; Plasmids/*genetics ; Root Nodules, Plant/microbiology ; Sequence Analysis, DNA ; Sinorhizobium meliloti/*genetics/isolation & purification/metabolism ; Symbiosis ; }, abstract = {Isolates of the symbiotic nitrogen-fixing species Sinorhizobium meliloti usually contain a chromosome and two large megaplasmids encoding functions that are absolutely required for the specific interaction of the microsymbiont with corresponding host plants leading to an effective symbiosis. The complete genome sequence, including the megaplasmids pSmeSM11c (related to pSymA) and pSmeSM11d (related to pSymB), was established for the dominant, indigenous S. meliloti strain SM11 that had been isolated during a long-term field release experiment with genetically modified S. meliloti strains. The chromosome, the largest replicon of S. meliloti SM11, is 3,908,022bp in size and codes for 3785 predicted protein coding sequences. The size of megaplasmid pSmeSM11c is 1,633,319bp and it contains 1760 predicted protein coding sequences whereas megaplasmid pSmeSM11d is 1,632,395bp in size and comprises 1548 predicted coding sequences. The gene content of the SM11 chromosome is quite similar to that of the reference strain S. meliloti Rm1021. Comparison of pSmeSM11c to pSymA of the reference strain revealed that many gene regions of these replicons are variable, supporting the assessment that pSymA is a major hot-spot for intra-specific differentiation. Plasmids pSymA and pSmeSM11c both encode unique genes. Large gene regions of pSmeSM11c are closely related to corresponding parts of Sinorhizobium medicae WSM419 plasmids. Moreover, pSmeSM11c encodes further novel gene regions, e.g. additional plasmid survival genes (partition, mobilisation and conjugative transfer genes), acdS encoding 1-aminocyclopropane-1-carboxylate deaminase involved in modulation of the phytohormone ethylene level and genes having predicted functions in degradative capabilities, stress response, amino acid metabolism and associated pathways. In contrast to Rm1021 pSymA and pSmeSM11c, megaplasmid pSymB of strain Rm1021 and pSmeSM11d are highly conserved showing extensive synteny with only few rearrangements. Most remarkably, pSmeSM11b contains a new gene cluster predicted to be involved in polysaccharide biosynthesis. Compilation of the S. meliloti SM11 genome sequence contributes to an extension of the S. meliloti pan-genome.}, } @article {pmid21390229, year = {2011}, author = {van Passel, MW and Kant, R and Zoetendal, EG and Plugge, CM and Derrien, M and Malfatti, SA and Chain, PS and Woyke, T and Palva, A and de Vos, WM and Smidt, H}, title = {The genome of Akkermansia muciniphila, a dedicated intestinal mucin degrader, and its use in exploring intestinal metagenomes.}, journal = {PloS one}, volume = {6}, number = {3}, pages = {e16876}, pmid = {21390229}, issn = {1932-6203}, mesh = {Bacteria/*genetics ; Base Sequence ; DNA, Bacterial/genetics ; Gene Library ; Genome, Bacterial/*genetics ; Humans ; Intestines/*microbiology ; Metagenome/*genetics ; Mucins/*metabolism ; Nucleotides/genetics ; RNA, Ribosomal, 16S/genetics ; }, abstract = {BACKGROUND: The human gastrointestinal tract contains a complex community of microbes, fulfilling important health-promoting functions. However, this vast complexity of species hampers the assignment of responsible organisms to these functions. Recently, Akkermansia muciniphila, a new species from the deeply branched phylum Verrucomicrobia, was isolated from the human intestinal tract based on its capacity to efficiently use mucus as a carbon and nitrogen source. This anaerobic resident is associated with the protective mucus lining of the intestines.

In order to uncover the functional potential of A. muciniphila, its genome was sequenced and annotated. It was found to contain numerous candidate mucinase-encoding genes, but lacking genes encoding canonical mucus-binding domains. Numerous phage-associated sequences found throughout the genome indicate that viruses have played an important part in the evolution of this species. Furthermore, we mined 37 GI tract metagenomes for the presence, and genetic diversity of Akkermansia sequences. Out of 37, eleven contained 16S ribosomal RNA gene sequences that are >95% identical to that of A. muciniphila. In addition, these libraries were found to contain large amounts of Akkermansia DNA based on average nucleotide identity scores, which indicated in one subject co-colonization by different Akkermansia phylotypes. An additional 12 libraries also contained Akkermansia sequences, making a total of ∼16 Mbp of new Akkermansia pangenomic DNA. The relative abundance of Akkermansia DNA varied between <0.01% to nearly 4% of the assembled metagenomic reads. Finally, by testing a large collection of full length 16S sequences, we find at least eight different representative species in the genus Akkermansia.

CONCLUSIONS/SIGNIFICANCE: These large repositories allow us to further mine for genetic heterogeneity and species diversity in the genus Akkermansia, providing novel insight towards the functionality of this abundant inhabitant of the human intestinal tract.}, } @article {pmid21379339, year = {2011}, author = {Frese, SA and Benson, AK and Tannock, GW and Loach, DM and Kim, J and Zhang, M and Oh, PL and Heng, NC and Patil, PB and Juge, N and Mackenzie, DA and Pearson, BM and Lapidus, A and Dalin, E and Tice, H and Goltsman, E and Land, M and Hauser, L and Ivanova, N and Kyrpides, NC and Walter, J}, title = {The evolution of host specialization in the vertebrate gut symbiont Lactobacillus reuteri.}, journal = {PLoS genetics}, volume = {7}, number = {2}, pages = {e1001314}, pmid = {21379339}, issn = {1553-7404}, support = {BBS/E/F/00044452/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; *Evolution, Molecular ; Gastrointestinal Tract/*microbiology ; Genetic Fitness ; Genome, Bacterial/genetics ; Genomics ; Host Specificity/*genetics ; Humans ; Limosilactobacillus reuteri/*genetics ; Polymerase Chain Reaction ; Reproducibility of Results ; Rodentia/microbiology ; Species Specificity ; Symbiosis/*genetics ; Vertebrates/*microbiology ; }, abstract = {Recent research has provided mechanistic insight into the important contributions of the gut microbiota to vertebrate biology, but questions remain about the evolutionary processes that have shaped this symbiosis. In the present study, we showed in experiments with gnotobiotic mice that the evolution of Lactobacillus reuteri with rodents resulted in the emergence of host specialization. To identify genomic events marking adaptations to the murine host, we compared the genome of the rodent isolate L. reuteri 100-23 with that of the human isolate L. reuteri F275, and we identified hundreds of genes that were specific to each strain. In order to differentiate true host-specific genome content from strain-level differences, comparative genome hybridizations were performed to query 57 L. reuteri strains originating from six different vertebrate hosts in combination with genome sequence comparisons of nine strains encompassing five phylogenetic lineages of the species. This approach revealed that rodent strains, although showing a high degree of genomic plasticity, possessed a specific genome inventory that was rare or absent in strains from other vertebrate hosts. The distinct genome content of L. reuteri lineages reflected the niche characteristics in the gastrointestinal tracts of their respective hosts, and inactivation of seven out of eight representative rodent-specific genes in L. reuteri 100-23 resulted in impaired ecological performance in the gut of mice. The comparative genomic analyses suggested fundamentally different trends of genome evolution in rodent and human L. reuteri populations, with the former possessing a large and adaptable pan-genome while the latter being subjected to a process of reductive evolution. In conclusion, this study provided experimental evidence and a molecular basis for the evolution of host specificity in a vertebrate gut symbiont, and it identified genomic events that have shaped this process.}, } @article {pmid21375712, year = {2011}, author = {Kant, R and Blom, J and Palva, A and Siezen, RJ and de Vos, WM}, title = {Comparative genomics of Lactobacillus.}, journal = {Microbial biotechnology}, volume = {4}, number = {3}, pages = {323-332}, pmid = {21375712}, issn = {1751-7915}, mesh = {Animals ; Bacterial Proteins/genetics ; Base Composition ; Cattle ; Chickens ; Dairy Products/microbiology ; Feces/microbiology ; Genome Size ; *Genome, Bacterial ; *Genomics ; Humans ; Intestines/microbiology ; Lactobacillus/classification/*genetics/isolation & purification ; Molecular Sequence Data ; Open Reading Frames ; Phylogeny ; }, abstract = {The genus Lactobacillus includes a diverse group of bacteria consisting of many species that are associated with fermentations of plants, meat or milk. In addition, various lactobacilli are natural inhabitants of the intestinal tract of humans and other animals. Finally, several Lactobacillus strains are marketed as probiotics as their consumption can confer a health benefit to host. Presently, 154 Lactobacillus species are known and a growing fraction of these are subject to draft genome sequencing. However, complete genome sequences are needed to provide a platform for detailed genomic comparisons. Therefore, we selected a total of 20 genomes of various Lactobacillus strains for which complete genomic sequences have been reported. These genomes had sizes varying from 1.8 to 3.3 Mb and other characteristic features, such as G+C content that ranged from 33% to 51%. The Lactobacillus pan genome was found to consist of approximately 14 000 protein-encoding genes while all 20 genomes shared a total of 383 sets of orthologous genes that defined the Lactobacillus core genome (LCG). Based on advanced phylogeny of the proteins encoded by this LCG, we grouped the 20 strains into three main groups and defined core group genes present in all genomes of a single group, signature group genes shared in all genomes of one group but absent in all other Lactobacillus genomes, and Group-specific ORFans present in core group genes of one group and absent in all other complete genomes. The latter are of specific value in defining the different groups of genomes. The study provides a platform for present individual comparisons as well as future analysis of new Lactobacillus genomes.}, } @article {pmid21372607, year = {2011}, author = {Cancello, R and Rouault, C and Guilhem, G and Bedel, JF and Poitou, C and Di Blasio, AM and Basdevant, A and Tordjman, J and Clément, K}, title = {Urokinase plasminogen activator receptor in adipose tissue macrophages of morbidly obese subjects.}, journal = {Obesity facts}, volume = {4}, number = {1}, pages = {17-25}, pmid = {21372607}, issn = {1662-4033}, mesh = {Adipose Tissue, White/*immunology/metabolism/pathology ; Adult ; Biomarkers/analysis ; Female ; Humans ; Inflammation/*pathology ; Macrophages/immunology/metabolism/*pathology ; Obesity, Morbid/immunology/metabolism/*pathology ; Receptors, Urokinase Plasminogen Activator/*metabolism ; }, abstract = {OBJECTIVE: At present, circulating markers characterizing the inflammatory infiltration of white adipose tissue (WAT) in human obesity are not well known. We previously identified, by a pangenomic approach (microarrays), the urokinase plasminogen activator receptor (PLAUR or CD87) as a potential marker of subcutaneous adipose tissue macrophage infiltration (ATM).

METHOD: We studied i) the presence of PLAUR protein in WAT; ii) the PLAUR amount in plasma of obese patients; iii) the circulating variations during surgery-induced weight loss, and iv) the correlations between PLAUR circulating levels and bioclinical parameters.

RESULTS: We observed that PLAUR is preferentially expressed by infiltrating ATMs, with a typical localization on macrophage membrane. Circulating soluble PLAUR levels were significantly elevated in obese patients compared to lean controls. However, despite a trend towards a decrease 3 months after weight loss, PLAUR plasma levels were not modulated during a 1-year weight loss follow-up, suggesting the contribution of secretion sites other than subcutaneous WAT in obese patients.

CONCLUSIONS: These findings indicate that PLAUR mRNA expression could be used for the estimation of local subcutaneous ATMs infiltration in obese patients, but it cannot be used as a systemic marker of this inflammatory infiltration in dynamic phases of weight loss.}, } @article {pmid21325297, year = {2011}, author = {Ambros, IM and Brunner, B and Aigner, G and Bedwell, C and Beiske, K and Bénard, J and Bown, N and Combaret, V and Couturier, J and Defferrari, R and Gross, N and Jeison, M and Lunec, J and Marques, B and Martinsson, T and Mazzocco, K and Noguera, R and Schleiermacher, G and Speleman, F and Stallings, R and Tonini, GP and Tweddle, DA and Valent, A and Vicha, A and Roy, NV and Villamon, E and Ziegler, A and Preuner, S and Drobics, M and Ladenstein, R and Amann, G and Schuit, RJ and Pötschger, U and Ambros, PF}, title = {A multilocus technique for risk evaluation of patients with neuroblastoma.}, journal = {Clinical cancer research : an official journal of the American Association for Cancer Research}, volume = {17}, number = {4}, pages = {792-804}, doi = {10.1158/1078-0432.CCR-10-0830}, pmid = {21325297}, issn = {1557-3265}, mesh = {Computer Graphics ; Gene Amplification ; *Genetic Loci ; *Genetic Markers ; Humans ; Limit of Detection ; Molecular Diagnostic Techniques/*methods ; Mutation ; N-Myc Proto-Oncogene Protein ; Neuroblastoma/*genetics/pathology ; Nuclear Proteins/genetics ; Oncogene Proteins/genetics ; Risk Assessment ; }, abstract = {PURPOSE: Precise and comprehensive analysis of neuroblastoma genetics is essential for accurate risk evaluation and only pangenomic/multilocus approaches fulfill the present-day requirements. We present the establishment and validation of the PCR-based multiplex ligation-dependent probe amplification (MLPA) technique for neuroblastoma.

EXPERIMENTAL DESIGN: A neuroblastoma-specific MLPA kit was designed by the SIOP Europe Neuroblastoma Biology Committee in cooperation with MRC-Holland. The contained target sequences cover 19 chromosomal arms and reference loci. Validation was performed by single locus and pangenomic techniques (n = 174). Dilution experiments for determination of minimal tumor cell percentage were performed and testing of reproducibility was checked by interlaboratory testing (n = 15). Further 156 neuroblastomas were used for establishing the amplification cutoff level.

RESULTS: The MLPA technique was tested in 310 neuroblastomas and 8 neuroblastoma cell lines (including validation and amplification cutoff level testing). Intertechnique validation showed a high concordance rate (99.5%). Interlaboratory MLPA testing (κ = 0.95, P < 0.01) revealed 7 discrepant of 1,490 results (0.5%). Validation by pangenomic techniques showed a single discordance of 190 consensus results (0.5%). The test results led to formulation of interpretation standards and to a kit revision. The minimal tumor cell percentage was fixed at 60%.

CONCLUSIONS: The recently designed neuroblastoma-specific MLPA kit covers all chromosomal regions demanded by the International Neuroblastoma Risk Group for therapy stratification and includes all hitherto described genetic loci of prognostic interest for future studies and can be modified or extended at any time. Moreover, the technique is cost effective, reliable, and robust with a high interlaboratory and intertechnique concordance.}, } @article {pmid21317366, year = {2011}, author = {Hansen, EE and Lozupone, CA and Rey, FE and Wu, M and Guruge, JL and Narra, A and Goodfellow, J and Zaneveld, JR and McDonald, DT and Goodrich, JA and Heath, AC and Knight, R and Gordon, JI}, title = {Pan-genome of the dominant human gut-associated archaeon, Methanobrevibacter smithii, studied in twins.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {108 Suppl 1}, number = {Suppl 1}, pages = {4599-4606}, pmid = {21317366}, issn = {1091-6490}, support = {AA09022/AA/NIAAA NIH HHS/United States ; K05 AA017688/AA/NIAAA NIH HHS/United States ; P01 DK078669/DK/NIDDK NIH HHS/United States ; R01 DK030292/DK/NIDDK NIH HHS/United States ; K01 DK090285/DK/NIDDK NIH HHS/United States ; T32 GM07200-31/GM/NIGMS NIH HHS/United States ; DK70977/DK/NIDDK NIH HHS/United States ; T32 GM142607/GM/NIGMS NIH HHS/United States ; R01 AA017915/AA/NIAAA NIH HHS/United States ; R37 DK030292/DK/NIDDK NIH HHS/United States ; DK78669/DK/NIDDK NIH HHS/United States ; R01 DK070977/DK/NIDDK NIH HHS/United States ; T32 GM008759/GM/NIGMS NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; R01 AA009022/AA/NIAAA NIH HHS/United States ; T32 GM007200/GM/NIGMS NIH HHS/United States ; DK30292/DK/NIDDK NIH HHS/United States ; }, mesh = {Adhesins, Bacterial/*genetics ; Adult ; Base Sequence ; Female ; Formates/analysis ; Gastrointestinal Tract/*microbiology ; *Genome, Archaeal ; Humans ; Metagenomics ; Methanobrevibacter/*genetics/metabolism ; Molecular Sequence Data ; Polymorphism, Single Nucleotide/genetics ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Species Specificity ; *Twins ; }, abstract = {The human gut microbiota harbors three main groups of H(2)-consuming microbes: methanogens including the dominant archaeon, Methanobrevibacter smithii, a polyphyletic group of acetogens, and sulfate-reducing bacteria. Defining their roles in the gut is important for understanding how hydrogen metabolism affects the efficiency of fermentation of dietary components. We quantified methanogens in fecal samples from 40 healthy adult female monozygotic (MZ) and 28 dizygotic (DZ) twin pairs, analyzed bacterial 16S rRNA datasets generated from their fecal samples to identify taxa that co-occur with methanogens, sequenced the genomes of 20 M. smithii strains isolated from families of MZ and DZ twins, and performed RNA-Seq of a subset of strains to identify their responses to varied formate concentrations. The concordance rate for methanogen carriage was significantly higher for MZ versus DZ twin pairs. Co-occurrence analysis revealed 22 bacterial species-level taxa positively correlated with methanogens: all but two were members of the Clostridiales, with several being, or related to, known hydrogen-producing and -consuming bacteria. The M. smithii pan-genome contains 987 genes conserved in all strains, and 1,860 variably represented genes. Strains from MZ and DZ twin pairs had a similar degree of shared genes and SNPs, and were significantly more similar than strains isolated from mothers or members of other families. The 101 adhesin-like proteins (ALPs) in the pan-genome (45 ± 6 per strain) exhibit strain-specific differences in expression and responsiveness to formate. We hypothesize that M. smithii strains use their different repertoires of ALPs to create diversity in their metabolic niches, by allowing them to establish syntrophic relationships with bacterial partners with differing metabolic capabilities and patterns of co-occurrence.}, } @article {pmid21308851, year = {2011}, author = {Yano, A and Nicol, B and Guerin, A and Guiguen, Y}, title = {The duplicated rainbow trout (Oncorhynchus mykiss) T-box transcription factors 1, tbx1a and tbx1b, are up-regulated during testicular development.}, journal = {Molecular reproduction and development}, volume = {78}, number = {3}, pages = {172-180}, doi = {10.1002/mrd.21279}, pmid = {21308851}, issn = {1098-2795}, mesh = {Amino Acid Sequence ; Animals ; Aromatase Inhibitors/pharmacology ; Computational Biology ; DNA Primers/genetics ; Estrogens/pharmacology ; Gene Expression Regulation, Developmental/drug effects/*physiology ; Genes, Duplicate/genetics ; In Situ Hybridization ; Male ; Molecular Sequence Data ; Oncorhynchus mykiss/*growth & development ; Phylogeny ; Polymerase Chain Reaction ; Sequence Alignment ; T-Box Domain Proteins/genetics/*metabolism ; Testis/*growth & development/metabolism ; Tretinoin/pharmacology ; }, abstract = {Tbx1 is a member of the T-box transcription factor gene family involved in embryogenesis and organogenesis. Recently, within a pan-genomic screen using rainbow trout (Oncorhynchus mykiss) cDNA microarrays, we identified a tbx1 homolog with testicular over-expression during sex differentiation. Here, we characterized two very similar rainbow trout tbx1 paralogs, tbx1a and tbx1b. In adult tissues, tbx1a expression is restricted to the gonads, with high expression in the testis, while tbx1b is more widely expressed in gonads, gills, brains, muscle, and skin. During gonadal differentiation, both genes are differentially expressed in favor of testis formation shortly after hatching. These genes are expressed in somatic cells surrounding germ cells of the differentiating testis, while no or only weak expression was observed in the differentiating ovary. tbx1a and tbx1b were also both down-regulated in the differentiating testis during feminization with estrogens and up-regulated in the differentiating ovary during masculinization with an aromatase inhibitor. These results suggest that tbx1a and tbx1b are probably involved in the regulation of testicular differentiation in rainbow trout. Since Tbx1 is known to interact with the retinoic acid (RA) signaling pathway, we also examined the effect of RA on the rainbow trout tbx1 expression pattern. Expression of tbx1a and tbx1b was down-regulated in RA-treated male gonads, suggesting that tbx1 interacts with the RA signaling pathway and thus could be involved in the control of rainbow trout gonadal differentiation.}, } @article {pmid21304685, year = {2010}, author = {Snipen, L and Ussery, DW}, title = {Standard operating procedure for computing pangenome trees.}, journal = {Standards in genomic sciences}, volume = {2}, number = {1}, pages = {135-141}, pmid = {21304685}, issn = {1944-3277}, abstract = {We present the pan-genome tree as a tool for visualizing similarities and differences between closely related microbial genomes within a species or genus. Distance between genomes is computed as a weighted relative Manhattan distance based on gene family presence/absence. The weights can be chosen with emphasis on groups of gene families conserved to various degrees inside the pan-genome. The software is available for free as an R-package.}, } @article {pmid21264216, year = {2011}, author = {Hao, P and Zheng, H and Yu, Y and Ding, G and Gu, W and Chen, S and Yu, Z and Ren, S and Oda, M and Konno, T and Wang, S and Li, X and Ji, ZS and Zhao, G}, title = {Complete sequencing and pan-genomic analysis of Lactobacillus delbrueckii subsp. bulgaricus reveal its genetic basis for industrial yogurt production.}, journal = {PloS one}, volume = {6}, number = {1}, pages = {e15964}, pmid = {21264216}, issn = {1932-6203}, mesh = {Fermentation ; Food Industry ; Genome, Bacterial/*genetics ; Lactobacillus delbrueckii/*genetics ; Phylogeny ; Sequence Analysis, DNA ; Yogurt/*microbiology ; }, abstract = {Lactobacillus delbrueckii subsp. bulgaricus (Lb. bulgaricus) is an important species of Lactic Acid Bacteria (LAB) used for cheese and yogurt fermentation. The genome of Lb. bulgaricus 2038, an industrial strain mainly used for yogurt production, was completely sequenced and compared against the other two ATCC collection strains of the same subspecies. Specific physiological properties of strain 2038, such as lysine biosynthesis, formate production, aspartate-related carbon-skeleton intermediate metabolism, unique EPS synthesis and efficient DNA restriction/modification systems, are all different from those of the collection strains that might benefit the industrial production of yogurt. Other common features shared by Lb. bulgaricus strains, such as efficient protocooperation with Streptococcus thermophilus and lactate production as well as well-equipped stress tolerance mechanisms may account for it being selected originally for yogurt fermentation industry. Multiple lines of evidence suggested that Lb. bulgaricus 2038 was genetically closer to the common ancestor of the subspecies than the other two sequenced collection strains, probably due to a strict industrial maintenance process for strain 2038 that might have halted its genome decay and sustained a gene network suitable for large scale yogurt production.}, } @article {pmid21239590, year = {2011}, author = {Vieira, G and Sabarly, V and Bourguignon, PY and Durot, M and Le Fèvre, F and Mornico, D and Vallenet, D and Bouvet, O and Denamur, E and Schachter, V and Médigue, C}, title = {Core and panmetabolism in Escherichia coli.}, journal = {Journal of bacteriology}, volume = {193}, number = {6}, pages = {1461-1472}, pmid = {21239590}, issn = {1098-5530}, mesh = {Computational Biology ; Escherichia coli/*genetics/*metabolism ; Genetic Variation ; *Genome, Bacterial ; Metabolic Networks and Pathways/*genetics ; }, abstract = {Escherichia coli exhibits a wide range of lifestyles encompassing commensalism and various pathogenic behaviors which its highly dynamic genome contributes to develop. How environmental and host factors shape the genetic structure of E. coli strains remains, however, largely unknown. Following a previous study of E. coli genomic diversity, we investigated its diversity at the metabolic level by building and analyzing the genome-scale metabolic networks of 29 E. coli strains (8 commensal and 21 pathogenic strains, including 6 Shigella strains). Using a tailor-made reconstruction strategy, we significantly improved the completeness and accuracy of the metabolic networks over default automatic reconstruction processes. Among the 1,545 reactions forming E. coli panmetabolism, 885 reactions were common to all strains. This high proportion of core reactions (57%) was found to be in sharp contrast to the low proportion (13%) of core genes in the E. coli pangenome, suggesting less diversity of metabolic functions compared to that of all gene functions. Core reactions were significantly overrepresented among biosynthetic reactions compared to the more variable degradation processes. Differences between metabolic networks were found to follow E. coli phylogeny rather than pathogenic phenotypes, except for Shigella networks, which were significantly more distant from the others. This suggests that most metabolic changes in non-Shigella strains were not driven by their pathogenic phenotypes. Using a supervised method, we were yet able to identify small sets of reactions related to pathogenicity or commensalism. The quality of our reconstructed networks also makes them reliable bases for building metabolic models.}, } @article {pmid21232151, year = {2011}, author = {Kislyuk, AO and Haegeman, B and Bergman, NH and Weitz, JS}, title = {Genomic fluidity: an integrative view of gene diversity within microbial populations.}, journal = {BMC genomics}, volume = {12}, number = {}, pages = {32}, pmid = {21232151}, issn = {1471-2164}, mesh = {Computer Simulation ; Genetic Variation/*genetics ; Genome, Bacterial/*genetics ; Models, Statistical ; Models, Theoretical ; }, abstract = {BACKGROUND: The dual concepts of pan and core genomes have been widely adopted as means to assess the distribution of gene families within microbial species and genera. The core genome is the set of genes shared by a group of organisms; the pan genome is the set of all genes seen in any of these organisms. A variety of methods have provided drastically different estimates of the sizes of pan and core genomes from sequenced representatives of the same groups of bacteria.

RESULTS: We use a combination of mathematical, statistical and computational methods to show that current predictions of pan and core genome sizes may have no correspondence to true values. Pan and core genome size estimates are problematic because they depend on the estimation of the occurrence of rare genes and genomes, respectively, which are difficult to estimate precisely because they are rare. Instead, we introduce and evaluate a robust metric - genomic fluidity - to categorize the gene-level similarity among groups of sequenced isolates. Genomic fluidity is a measure of the dissimilarity of genomes evaluated at the gene level.

CONCLUSIONS: The genomic fluidity of a population can be estimated accurately given a small number of sequenced genomes. Further, the genomic fluidity of groups of organisms can be compared robustly despite variation in algorithms used to identify genes and their homologs. As such, we recommend that genomic fluidity be used in place of pan and core genome size estimates when assessing gene diversity within genomes of a species or a group of closely related organisms.}, } @article {pmid21209903, year = {2010}, author = {Cizkova, M and Cizeron-Clairac, G and Vacher, S and Susini, A and Andrieu, C and Lidereau, R and Bièche, I}, title = {Gene expression profiling reveals new aspects of PIK3CA mutation in ERalpha-positive breast cancer: major implication of the Wnt signaling pathway.}, journal = {PloS one}, volume = {5}, number = {12}, pages = {e15647}, pmid = {21209903}, issn = {1932-6203}, mesh = {Breast Neoplasms/*metabolism ; Catalysis ; Catalytic Domain ; Class I Phosphatidylinositol 3-Kinases ; DNA Mutational Analysis ; Estrogen Receptor alpha/*metabolism ; Female ; *Gene Expression Profiling ; *Gene Expression Regulation, Neoplastic ; Humans ; Mutation ; Oligonucleotide Array Sequence Analysis ; Phosphatidylinositol 3-Kinases/*genetics ; Reverse Transcriptase Polymerase Chain Reaction ; Signal Transduction ; Wnt Proteins/*metabolism ; }, abstract = {BACKGROUND: The PI3K/AKT pathway plays a pivotal role in breast cancer development and maintenance. PIK3CA, encoding the PI3K catalytic subunit, is the oncogene exhibiting a high frequency of gain-of-function mutations leading to PI3K/AKT pathway activation in breast cancer. PIK3CA mutations have been observed in 30% to 40% of ERα-positive breast tumors. However the physiopathological role of PIK3CA mutations in breast tumorigenesis remains largely unclear.

To identify relevant downstream target genes and signaling activated by aberrant PI3K/AKT pathway in breast tumors, we first analyzed gene expression with a pangenomic oligonucleotide microarray in a series of 43 ERα-positive tumors with and without PIK3CA mutations. Genes of interest were then investigated in 249 ERα-positive breast tumors by real-time quantitative RT-PCR. A robust collection of 19 genes was found to be differently expressed in PIK3CA-mutated tumors. PIK3CA mutations were associated with over-expression of several genes involved in the Wnt signaling pathway (WNT5A, TCF7L2, MSX2, TNFRSF11B), regulation of gene transcription (SEC14L2, MSX2, TFAP2B, NRIP3) and metal ion binding (CYP4Z1, CYP4Z2P, SLC40A1, LTF, LIMCH1).

CONCLUSION/SIGNIFICANCE: This new gene set should help to understand the behavior of PIK3CA-mutated cancers and detailed knowledge of Wnt signaling activation could lead to novel therapeutic strategies.}, } @article {pmid21199655, year = {2011}, author = {Celeghin, A and Benato, F and Pikulkaew, S and Rabbane, MG and Colombo, L and Dalla Valle, L}, title = {The knockdown of the maternal estrogen receptor 2a (esr2a) mRNA affects embryo transcript contents and larval development in zebrafish.}, journal = {General and comparative endocrinology}, volume = {172}, number = {1}, pages = {120-129}, doi = {10.1016/j.ygcen.2010.12.020}, pmid = {21199655}, issn = {1095-6840}, mesh = {Animals ; Animals, Genetically Modified ; Cartilage/embryology/growth & development/metabolism ; Embryo, Nonmammalian/*chemistry/metabolism ; Epigenesis, Genetic/physiology ; Estrogen Receptor beta ; Gene Expression Profiling ; Gene Expression Regulation, Developmental ; Gene Knockdown Techniques ; Larva/genetics/*growth & development/metabolism ; Microarray Analysis ; Phenotype ; RNA, Messenger/*analysis/genetics/metabolism ; RNA, Messenger, Stored/antagonists & inhibitors/*genetics ; Receptors, Estrogen/antagonists & inhibitors/*genetics/metabolism ; Validation Studies as Topic ; *Zebrafish/embryology/genetics/growth & development/metabolism ; Zebrafish Proteins/antagonists & inhibitors/*genetics/metabolism ; }, abstract = {In zebrafish, ovulated oocytes are loaded with maternal estrogen receptor 2a (esr2a) mRNA which is spread as granular and filamentous structures throughout the central ooplasm and is promptly relocated inside the blastodisc area at the 1-cell stage (0.2h post-fertilization, hpf), as shown by in situ hybridization. This transcript is available for translation until its sharp decline from 4 to 8 hpf, being replaced by low levels of zygotic esr2a mRNA mainly localized in the head region and around the yolk sac from 24 hpf until hatching at 48 hpf. To test the functional role of the maternal esr2a mRNA, 1- or 2-cell embryos were injected with 10.3 ng each of morpholino (MO) to knockdown translation (MO2-esr2a) of both maternal and zygotic esr2a transcripts, with a missplicing MO (MO3-esr2a) to effectively block post-transcriptionally the zygotic transcript alone, and with a non-specific MO-control. Treatment with MO2-esr2a increased apoptosis in embryos, especially in the brain, and caused severe malformations in 63% of 1-5 dpf larvae, as compared to 10-11% in those treated with MO3-esr2a and MO-control. Defects included body growth delay with curved shape, persistent yolk sac with reduced sub-intestinal veins and swollen yolk extension, abnormal brain and splanchnocranium development, smaller eyes and otic vesicles, pericardial oedema, uninflated swim bladder and rudimentary caudal fin with aberrant circular swimming. Affected larvae could survive for only 12-14 days. The MO2-esr2a phenotype was rescued with co-injection of 30 pg/embryo of mutated zebrafish esr2a mRNA encoding the full length of Esr2a, but containing eight silent mutations in the region recognised by MO2-esr2a. A lower dosage (15 pg) failed to recover mortality and abnormality. Raising the dosage to 60 and 90 pg increased abnormality, but not mortality, whereas with 120 pg both mortality and abnormality worsened, indicating a strict quantitative requirement of Esr2a. Co-injection of an anti-p53 MO failed to rescue the MO2-esr2a phenotype, eliminating the possibility of off-target effects. Pangenomic microarray analysis revealed that 240 and 219 significantly expressed transcripts were up- and down-regulated, respectively, by maternal Esr2a protein deficiency in 8-hpf MO2-esr2a embryos. Also at 48 hpf, 162 and 120 presumably zygotic transcripts were up- and down-regulated, respectively, but only 18 were in common with each of the 8-hpf sets. In total, the transcripts from 705 genes were affected by Esr2a knockdown. These findings suggest the involvement of maternal esr2a mRNA, presumably transactivated by maternal 17β-estradiol stored in the oocyte from enveloping granulosa cells, in the epigenetic programming of zebrafish development.}, } @article {pmid21182879, year = {2011}, author = {Zhang, J and van Aartsen, JJ and Jiang, X and Shao, Y and Tai, C and He, X and Tan, Z and Deng, Z and Jia, S and Rajakumar, K and Ou, HY}, title = {Expansion of the known Klebsiella pneumoniae species gene pool by characterization of novel alien DNA islands integrated into tmRNA gene sites.}, journal = {Journal of microbiological methods}, volume = {84}, number = {2}, pages = {283-289}, doi = {10.1016/j.mimet.2010.12.016}, pmid = {21182879}, issn = {1872-8359}, mesh = {Animals ; Chromosomes, Bacterial ; DNA, Bacterial/chemistry/genetics ; Environmental Microbiology ; Gene Transfer, Horizontal ; *Genome, Bacterial ; *Genomic Islands ; Humans ; Klebsiella Infections/microbiology ; Klebsiella pneumoniae/*genetics/isolation & purification ; Molecular Sequence Data ; *Mutagenesis, Insertional ; Plasmids ; Polymerase Chain Reaction ; RNA, Bacterial/*genetics ; Sequence Analysis, DNA ; }, abstract = {Klebsiella pneumoniae is an important bacterial pathogen of man that is commonly associated with opportunistic and hospital-associated infections. Increasing levels of multiple-antibiotic resistance associated with this species pose a major emerging clinical problem. This organism also occurs naturally in other diverse environments, including the soil. Consistent with its varied lifestyle and membership of the Enterobacteriaceae family, K. pneumoniae genomes exhibit highly plastic architecture comprising a core genome backbone interspersed with numerous and varied alien genomic islands. In this study the size of the presently known K. pneumoniae pan-genome gene pool was estimated through analysis of complete sequences of three chromosomes and 31 plasmids belonging to K. pneumoniae strains. In addition, using a PCR-based strategy the genomic content of eight tRNA/tmRNA gene sites that serve as DNA insertion hotspots were investigated in 28 diverse environmental and clinical strains of K. pneumoniae. Sequencing and characterization of five newly identified horizontally-acquired tmRNA-associated islands further expanded the archived K. pneumoniae gene pool to a total of 7648 unique gene members. Large-scale investigation of the content of tRNA/tmRNA hotspots will be useful to identify and/or survey accessory sequences dispersed amongst hundreds to thousands of members of many key bacterial species.}, } @article {pmid21179488, year = {2010}, author = {Ben Amara, A and Ghigo, E and Le Priol, Y and Lépolard, C and Salcedo, SP and Lemichez, E and Bretelle, F and Capo, C and Mege, JL}, title = {Coxiella burnetii, the agent of Q fever, replicates within trophoblasts and induces a unique transcriptional response.}, journal = {PloS one}, volume = {5}, number = {12}, pages = {e15315}, pmid = {21179488}, issn = {1932-6203}, mesh = {Animals ; Coxiella burnetii/metabolism/*physiology ; Early Growth Response Protein 1/metabolism ; Gene Expression Profiling ; Inflammation ; Interleukin-13/metabolism ; Interleukin-6/metabolism ; Mice ; Oligonucleotide Array Sequence Analysis ; Phagosomes/microbiology ; Phenotype ; Q Fever/microbiology ; STAT3 Transcription Factor/metabolism ; *Transcription, Genetic ; Trophoblasts/*microbiology ; }, abstract = {Q fever is a zoonosis caused by Coxiella burnetii, an obligate intracellular bacterium typically found in myeloid cells. The infection is a source of severe obstetrical complications in humans and cattle and can undergo chronic evolution in a minority of pregnant women. Because C. burnetii is found in the placentas of aborted fetuses, we investigated the possibility that it could infect trophoblasts. Here, we show that C. burnetii infected and replicated in BeWo trophoblasts within phagolysosomes. Using pangenomic microarrays, we found that C. burnetii induced a specific transcriptomic program. This program was associated with the modulation of inflammatory responses that were shared with inflammatory agonists, such as TNF, and more specific responses involving genes related to pregnancy development, including EGR-1 and NDGR1. In addition, C. burnetii stimulated gene networks organized around the IL-6 and IL-13 pathways, which both modulate STAT3. Taken together, these results revealed that trophoblasts represent a protective niche for C. burnetii. The activation program induced by C. burnetii in trophoblasts may allow bacterial replication but seems unable to interfere with the development of normal pregnancy. Such pathophysiologocal processes should require the activation of immune placental cells associated with trophoblasts.}, } @article {pmid21179431, year = {2010}, author = {Passerini, D and Beltramo, C and Coddeville, M and Quentin, Y and Ritzenthaler, P and Daveran-Mingot, ML and Le Bourgeois, P}, title = {Genes but not genomes reveal bacterial domestication of Lactococcus lactis.}, journal = {PloS one}, volume = {5}, number = {12}, pages = {e15306}, pmid = {21179431}, issn = {1932-6203}, mesh = {Alleles ; Cloning, Molecular ; Ecology ; Electrophoresis, Gel, Pulsed-Field ; Environment ; *Genes, Bacterial ; Genetic Variation ; *Genome, Bacterial ; Genotype ; Lactococcus lactis/*genetics ; Models, Genetic ; Multilocus Sequence Typing ; Phylogeny ; Recombination, Genetic ; Software ; }, abstract = {BACKGROUND: The population structure and diversity of Lactococcus lactis subsp. lactis, a major industrial bacterium involved in milk fermentation, was determined at both gene and genome level. Seventy-six lactococcal isolates of various origins were studied by different genotyping methods and thirty-six strains displaying unique macrorestriction fingerprints were analyzed by a new multilocus sequence typing (MLST) scheme. This gene-based analysis was compared to genomic characteristics determined by pulsed-field gel electrophoresis (PFGE).

The MLST analysis revealed that L. lactis subsp. lactis is essentially clonal with infrequent intra- and intergenic recombination; also, despite its taxonomical classification as a subspecies, it displays a genetic diversity as substantial as that within several other bacterial species. Genome-based analysis revealed a genome size variability of 20%, a value typical of bacteria inhabiting different ecological niches, and that suggests a large pan-genome for this subspecies. However, the genomic characteristics (macrorestriction pattern, genome or chromosome size, plasmid content) did not correlate to the MLST-based phylogeny, with strains from the same sequence type (ST) differing by up to 230 kb in genome size.

CONCLUSION/SIGNIFICANCE: The gene-based phylogeny was not fully consistent with the traditional classification into dairy and non-dairy strains but supported a new classification based on ecological separation between "environmental" strains, the main contributors to the genetic diversity within the subspecies, and "domesticated" strains, subject to recent genetic bottlenecks. Comparison between gene- and genome-based analyses revealed little relationship between core and dispensable genome phylogenies, indicating that clonal diversification and phenotypic variability of the "domesticated" strains essentially arose through substantial genomic flux within the dispensable genome.}, } @article {pmid21177905, year = {2011}, author = {Ho, CC and Lau, CC and Martelli, P and Chan, SY and Tse, CW and Wu, AK and Yuen, KY and Lau, SK and Woo, PC}, title = {Novel pan-genomic analysis approach in target selection for multiplex PCR identification and detection of Burkholderia pseudomallei, Burkholderia thailandensis, and Burkholderia cepacia complex species: a proof-of-concept study.}, journal = {Journal of clinical microbiology}, volume = {49}, number = {3}, pages = {814-821}, pmid = {21177905}, issn = {1098-660X}, mesh = {Animals ; Bacteriological Techniques/*methods ; Burkholderia/*classification/genetics/*isolation & purification ; Burkholderia Infections/diagnosis/microbiology ; DNA Primers/genetics ; DNA, Bacterial/chemistry/genetics ; Environmental Microbiology ; Humans ; Molecular Sequence Data ; Polymerase Chain Reaction/*methods ; Sensitivity and Specificity ; Sequence Analysis, DNA ; }, abstract = {Burkholderia pseudomallei, Burkholderia thailandensis, and the Burkholderia cepacia complex differ greatly in pathogenicity and epidemiology. Yet, they are occasionally misidentified by biochemical profiling, and even 16S rRNA gene sequencing may not offer adequate discrimination between certain species groups. Using the 23 B. pseudomallei, four B. thailandensis, and 16 B. cepacia complex genome sequences available, we identified gene targets specific to each of them (a Tat domain protein, a 70-kDa protein, and a 12-kDa protein for B. pseudomallei, B. thailandensis, and the B. cepacia complex, respectively), with an in-house developed algorithm. Using these targets, we designed a robust multiplex PCR assay useful for their identification and detection from soil and simulated sputum samples. For all 43 B. pseudomallei, seven B. thailandensis, and 20 B. cepacia complex (B. multivorans, n = 6; B. cenocepacia, n = 3; B. cepacia, n = 4; B. arboris, n = 2; B. contaminans, B. anthina, and B. pyrrocinia, n = 1 each; other unnamed members, n = 2) isolates, the assay produced specific products of predicted size without false positives or negatives. Of the 60 soil samples screened, 19 (31.6%) and 29 (48.3%) were positive for B. pseudomallei and the B. cepacia complex, respectively, and in four (6.7%) soil samples, the organisms were codetected. DNA sequencing confirmed that all PCR products originated from their targeted loci. This novel pan-genomic analysis approach in target selection is simple, computationally efficient, and potentially applicable to any species that harbors species-specific genes. A multiplex PCR assay for rapid and accurate identification and detection of B. pseudomallei, B. thailandensis, and the B. cepacia complex was developed and verified.}, } @article {pmid21143815, year = {2010}, author = {Chen, SH and Lo, CZ and Su, SY and Kuo, BH and Hsiung, CA and Lin, CY}, title = {UPS 2.0: unique probe selector for probe design and oligonucleotide microarrays at the pangenomic/genomic level.}, journal = {BMC genomics}, volume = {11 Suppl 4}, number = {Suppl 4}, pages = {S6}, pmid = {21143815}, issn = {1471-2164}, mesh = {*Algorithms ; Animals ; Base Composition ; Base Sequence ; Computer Simulation ; DNA Probes/*chemistry ; Databases, Factual ; Gene Expression Profiling ; *Genome ; Hot Temperature ; Humans ; *Internet ; Nucleic Acid Hybridization/methods ; Oligonucleotide Array Sequence Analysis/*methods ; Reproducibility of Results ; Sensitivity and Specificity ; Species Specificity ; Thermodynamics ; }, abstract = {BACKGROUND: Nucleic acid hybridization is an extensively adopted principle in biomedical research, in which the performance of any hybridization-based method depends on the specificity of probes to their targets. To determine the optimal probe(s) for detecting target(s) from a sample cocktail, we developed a novel algorithm, which has been implemented into a web platform for probe designing. This probe design workflow is now upgraded to satisfy experiments that require a probe designing tool to take the increasing volume of sequence datasets.

RESULTS: Algorithms and probe parameters applied in UPS 2.0 include GC content, the secondary structure, melting temperature (Tm), the stability of the probe-target duplex estimated by the thermodynamic model, sequence complexity, similarity of probes to non-target sequences, and other empirical parameters used in the laboratory. Several probe background options,Unique probe within a group,Unique probe in a specific Unigene set,Unique probe based on the pangenomic level, and Unique Probe in the user-defined genome/transcriptome, are available to meet the scenarios that the experiments will be conducted. Parameters, such as salt concentration and the lower-bound Tm of probes, are available for users to optimize their probe design query. Output files are available for download on the result page. Probes designed by the UPS algorithm are suitable for generating microarrays, and the performance of UPS-designed probes has been validated by experiments.

CONCLUSIONS: The UPS 2.0 evaluates probe-to-target hybridization under a user-defined condition to ensure high-performance hybridization with minimal chance of non-specific binding at the pangenomic and genomic levels. The UPS algorithm mimics the target/non-target mixture in an experiment and is very useful in developing diagnostic kits and microarrays. The UPS 2.0 website has had more than 1,300 visits and 360,000 sequences performed the probe designing task in the last 30 months. It is freely accessible at http://array.iis.sinica.edu.tw/ups/. Screen cast: http://array.iis.sinica.edu.tw/ups/demo/demo.htm.}, } @article {pmid21170335, year = {2010}, author = {Scaria, J and Ponnala, L and Janvilisri, T and Yan, W and Mueller, LA and Chang, YF}, title = {Analysis of ultra low genome conservation in Clostridium difficile.}, journal = {PloS one}, volume = {5}, number = {12}, pages = {e15147}, pmid = {21170335}, issn = {1932-6203}, support = {N01AI30054/AI/NIAID NIH HHS/United States ; }, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Cattle ; Clostridioides difficile/*genetics ; Comparative Genomic Hybridization ; DNA, Bacterial/genetics ; Drug Resistance, Bacterial/genetics ; Genetic Variation ; *Genome, Bacterial ; Horses ; Humans ; Models, Genetic ; Sequence Analysis, DNA ; Species Specificity ; Virulence ; }, abstract = {Microarray-based comparative genome hybridisations (CGH) and genome sequencing of Clostridium difficile isolates have shown that the genomes of this species are highly variable. To further characterize their genome variation, we employed integration of data from CGH, genome sequencing and putative cellular pathways. Transcontinental strain comparison using CGH data confirmed the emergence of a human-specific hypervirulent cluster. However, there was no correlation between total toxin production and hypervirulent phenotype, indicating the possibility of involvement of additional factors towards hypervirulence. Calculation of C. difficile core and pan genome size using CGH and sequence data estimated that the core genome is composed of 947 to 1,033 genes and a pan genome comprised of 9,640 genes. The reconstruction, annotation and analysis of cellular pathways revealed highly conserved pathways despite large genome variation. However, few pathways such as tetrahydrofolate biosynthesis were found to be variable and could be contributing to adaptation towards virulence such as antibiotic resistance.}, } @article {pmid21156234, year = {2010}, author = {Gruhl, AN and Gostjeva, EV and Thilly, WG and Fomina, JN and Darroudi, F}, title = {Human fetal/tumor metakaryotic stem cells: pangenomic homologous pairing and telomeric end-joining of chromatids.}, journal = {Cancer genetics and cytogenetics}, volume = {203}, number = {2}, pages = {203-208}, pmid = {21156234}, issn = {1873-4456}, support = {T32 ES007020/ES/NIEHS NIH HHS/United States ; }, mesh = {Cell Nucleus/metabolism ; Centromere/ultrastructure ; Chromatids/*ultrastructure ; Chromosome Mapping ; Coloring Agents/chemistry ; *Cytogenetics ; Fetal Stem Cells/*cytology ; Genome ; Humans ; Image Cytometry/methods ; In Situ Hybridization, Fluorescence/methods ; Interphase ; Stem Cells/*cytology ; Telomere/*ultrastructure ; Time Factors ; }, abstract = {Metakaryotic cells and syncytia with large, hollow, bell-shaped nuclei demonstrate symmetrical and asymmetrical amitotic nuclear fissions in microanatomical positions and numbers expected of stem cell lineages in tissues of all three primordial germ layers and their derived tumors. Using fluorescence in situ hybridization, mononuclear metakaryotic interphase cells have been found with only 23 centromeric and 23 telomeric staining regions. Syncytial bell-shaped nuclei found approximately during weeks 5-12 of human gestation display 23 centromeric and either 23 or 46 telomeric staining regions. These images suggest that (1) homologous chromatids pair at centromeres and telomeres, (2) all paired telomeres join end-to-end with other paired telomeres in all mononuclear and some syncytial metakaryotic cells, and (3) telomere junctions may open and close during the syncytial phase of development. Twenty-three telomeric joining figures could be accounted by 23 rings of one chromatid pair each, a single pangenomic ring of 23 joined chromatid pairs, or any of many possible sets of oligo-chromatid pair rings. As telomeric end-joining may affect peri-telomeric gene expression, a programmed sequence of telomeric end-joining associations in metakaryotic stem cells could guide developmental arboration and errors in, or interruptions of, this program could contribute to carcinogenesis.}, } @article {pmid21148543, year = {2011}, author = {Angiuoli, SV and Salzberg, SL}, title = {Mugsy: fast multiple alignment of closely related whole genomes.}, journal = {Bioinformatics (Oxford, England)}, volume = {27}, number = {3}, pages = {334-342}, pmid = {21148543}, issn = {1367-4811}, support = {R01 GM083873/GM/NIGMS NIH HHS/United States ; R01-LM006845/LM/NLM NIH HHS/United States ; R01 LM006845-10/LM/NLM NIH HHS/United States ; R01 LM006845-11/LM/NLM NIH HHS/United States ; R01-GM083873/GM/NIGMS NIH HHS/United States ; R01 LM006845/LM/NLM NIH HHS/United States ; R01 GM083873-09/GM/NIGMS NIH HHS/United States ; R01 GM083873-08/GM/NIGMS NIH HHS/United States ; }, mesh = {*Algorithms ; Genome, Bacterial/genetics ; Genomics/*methods ; Humans ; Sequence Alignment/*methods ; Software ; Streptococcus pneumoniae/genetics ; }, abstract = {MOTIVATION: The relative ease and low cost of current generation sequencing technologies has led to a dramatic increase in the number of sequenced genomes for species across the tree of life. This increasing volume of data requires tools that can quickly compare multiple whole-genome sequences, millions of base pairs in length, to aid in the study of populations, pan-genomes, and genome evolution.

RESULTS: We present a new multiple alignment tool for whole genomes named Mugsy. Mugsy is computationally efficient and can align 31 Streptococcus pneumoniae genomes in less than 2 hours producing alignments that compare favorably to other tools. Mugsy is also the fastest program evaluated for the multiple alignment of assembled human chromosome sequences from four individuals. Mugsy does not require a reference sequence, can align mixtures of assembled draft and completed genome data, and is robust in identifying a rich complement of genetic variation including duplications, rearrangements, and large-scale gain and loss of sequence.

AVAILABILITY: Mugsy is free, open-source software available from http://mugsy.sf.net.}, } @article {pmid21143895, year = {2010}, author = {Klitgaard, K and Friis, C and Angen, O and Boye, M}, title = {Comparative profiling of the transcriptional response to iron restriction in six serotypes of Actinobacillus pleuropneumoniae with different virulence potential.}, journal = {BMC genomics}, volume = {11}, number = {}, pages = {698}, pmid = {21143895}, issn = {1471-2164}, mesh = {2,2'-Dipyridyl/pharmacology ; Actinobacillus pleuropneumoniae/classification/*genetics/growth & development/*pathogenicity ; Animals ; Base Sequence ; Culture Media/pharmacology ; Down-Regulation/drug effects/genetics ; Gene Expression Profiling/*methods ; Gene Expression Regulation, Bacterial/drug effects ; Genes, Bacterial/genetics ; Heme/metabolism ; Hemoglobins/metabolism ; Iron/metabolism/*pharmacology ; Molecular Sequence Data ; Oligonucleotide Array Sequence Analysis ; Polymerase Chain Reaction ; Rats ; Reproducibility of Results ; Serotyping ; Siderophores/metabolism ; Transcription, Genetic/*drug effects ; Up-Regulation/drug effects/genetics ; Virulence/drug effects/genetics ; }, abstract = {BACKGROUND: Comparative analysis of gene expression among serotypes within a species can provide valuable information on important differences between related genomes. For the pig lung pathogen Actinobacillus pleuropneumoniae, 15 serotypes with a considerable variation in virulence potential and immunogenicity have been identified. This serotypic diversity can only partly be explained by amount of capsule and differences in the RTX toxin genes in their genomes. Iron acquisition in vivo is an important bacterial function and in pathogenic bacteria, iron-limitation is often a signal for the induction of virulence genes. We used a pan-genomic microarray to study the transcriptional response to iron restriction in vitro in six serotypes of A. pleuropneumoniae (1, 2, 3, 5b, 6, and 7), representing at least two levels of virulence.

RESULTS: In total, 45 genes were significantly (p < 0.0001) up-regulated and 67 genes significantly down-regulated in response to iron limitation. Not previously observed in A. pleuropneumoniae was the up-regulation of a putative cirA-like siderophore in all six serotypes. Three genes, recently described in A. pleuropneumoniae as possibly coding for haemoglobin-haptoglobin binding proteins, displayed significant serotype related up-regulation to iron limitation. For all three genes, the expression appeared at its lowest in serotype 3, which is generally considered one of the least virulent serotypes of A. pleuropneumoniae. The three genes share homology with the hmbR haemoglobin receptor of Neisseria meningitidis, a possible virulence factor which contributes to bacterial survival in rats.

CONCLUSIONS: By comparative analysis of gene expression among 6 different serotypes of A. pleuropneumoniae we identified a common set of presumably essential core genes, involved in iron regulation. The results support and expand previous observations concerning the identification of new potential iron acquisition systems in A. pleuropneumoniae, showing that this bacterium has evolved several strategies for scavenging the limited iron resources of the host. The combined effect of iron-depletion and serotype proved to be modest, indicating that serotypes of both moderate and high virulence at least in vitro are reacting almost identical to iron restriction. One notable exception, however, is the haemoglobin-haptoglobin binding protein cluster which merits further investigation.}, } @article {pmid21126366, year = {2010}, author = {den Bakker, HC and Cummings, CA and Ferreira, V and Vatta, P and Orsi, RH and Degoricija, L and Barker, M and Petrauskene, O and Furtado, MR and Wiedmann, M}, title = {Comparative genomics of the bacterial genus Listeria: Genome evolution is characterized by limited gene acquisition and limited gene loss.}, journal = {BMC genomics}, volume = {11}, number = {}, pages = {688}, pmid = {21126366}, issn = {1471-2164}, mesh = {Bacterial Proteins/genetics/metabolism ; Base Sequence ; Bayes Theorem ; Biological Clocks/genetics ; Caco-2 Cells ; Chromosomes, Bacterial/genetics ; *Evolution, Molecular ; Genes, Bacterial/*genetics ; Genomics/*methods ; Humans ; Listeria/*genetics/pathogenicity ; Multigene Family/genetics ; Phylogeny ; Plasmids/genetics ; Polymorphism, Single Nucleotide/genetics ; Reproducibility of Results ; Species Specificity ; Virulence/genetics ; }, abstract = {BACKGROUND: The bacterial genus Listeria contains pathogenic and non-pathogenic species, including the pathogens L. monocytogenes and L. ivanovii, both of which carry homologous virulence gene clusters such as the prfA cluster and clusters of internalin genes. Initial evidence for multiple deletions of the prfA cluster during the evolution of Listeria indicates that this genus provides an interesting model for studying the evolution of virulence and also presents practical challenges with regard to definition of pathogenic strains.

RESULTS: To better understand genome evolution and evolution of virulence characteristics in Listeria, we used a next generation sequencing approach to generate draft genomes for seven strains representing Listeria species or clades for which genome sequences were not available. Comparative analyses of these draft genomes and six publicly available genomes, which together represent the main Listeria species, showed evidence for (i) a pangenome with 2,032 core and 2,918 accessory genes identified to date, (ii) a critical role of gene loss events in transition of Listeria species from facultative pathogen to saprotroph, even though a consistent pattern of gene loss seemed to be absent, and a number of isolates representing non-pathogenic species still carried some virulence associated genes, and (iii) divergence of modern pathogenic and non-pathogenic Listeria species and strains, most likely circa 47 million years ago, from a pathogenic common ancestor that contained key virulence genes.

CONCLUSIONS: Genome evolution in Listeria involved limited gene loss and acquisition as supported by (i) a relatively high coverage of the predicted pan-genome by the observed pan-genome, (ii) conserved genome size (between 2.8 and 3.2 Mb), and (iii) a highly syntenic genome. Limited gene loss in Listeria did include loss of virulence associated genes, likely associated with multiple transitions to a saprotrophic lifestyle. The genus Listeria thus provides an example of a group of bacteria that appears to evolve through a loss of virulence rather than acquisition of virulence characteristics. While Listeria includes a number of species-like clades, many of these putative species include clades or strains with atypical virulence associated characteristics. This information will allow for the development of genetic and genomic criteria for pathogenic strains, including development of assays that specifically detect pathogenic Listeria strains.}, } @article {pmid21067849, year = {2011}, author = {Pacheco, Y}, title = {[Pathogenesis of sarcoidosis].}, journal = {La Revue de medecine interne}, volume = {32}, number = {2}, pages = {73-79}, doi = {10.1016/j.revmed.2010.09.011}, pmid = {21067849}, issn = {1768-3122}, mesh = {Environment ; Humans ; Polymorphism, Genetic ; Sarcoidosis/*etiology ; T-Lymphocytes/immunology ; }, abstract = {Many improvements have been obtained in understanding the immune and genetic mechanisms of sarcoidosis. Main immune abnormalities in this disease involve T lymphocytes, macrophages and dendritic cells. Interactions between these various immune cells through the immune synapse are tight. Environmental factors and genetic polymorphisms interact at molecular level in these immune targets. Recent pangenomic studies highlight some regions of the genome such as 6p21 where are located important immune genes: MHC, BTNL2 and TNF-α. Gene-environment interactions are important in this polymorphic disease. They need accurate clinical analysis for a better definition of patient subgroups and familial disease studies to progress in the role of genetic determinants.}, } @article {pmid21034474, year = {2010}, author = {Donati, C and Hiller, NL and Tettelin, H and Muzzi, A and Croucher, NJ and Angiuoli, SV and Oggioni, M and Dunning Hotopp, JC and Hu, FZ and Riley, DR and Covacci, A and Mitchell, TJ and Bentley, SD and Kilian, M and Ehrlich, GD and Rappuoli, R and Moxon, ER and Masignani, V}, title = {Structure and dynamics of the pan-genome of Streptococcus pneumoniae and closely related species.}, journal = {Genome biology}, volume = {11}, number = {10}, pages = {R107}, pmid = {21034474}, issn = {1474-760X}, support = {R01 DC002148/DC/NIDCD NIH HHS/United States ; N01-AI-30071/AI/NIAID NIH HHS/United States ; DC05659/DC/NIDCD NIH HHS/United States ; R01 AI080935/AI/NIAID NIH HHS/United States ; /WT_/Wellcome Trust/United Kingdom ; AI080935/AI/NIAID NIH HHS/United States ; DC04173/DC/NIDCD NIH HHS/United States ; DC02148/DC/NIDCD NIH HHS/United States ; }, mesh = {DNA, Bacterial/genetics ; Evolution, Molecular ; Gene Conversion ; Genes, Bacterial ; *Genetic Variation ; *Genome, Bacterial ; Linkage Disequilibrium ; Multigene Family ; Phylogeny ; Polymorphism, Single Nucleotide ; Sequence Alignment ; Sequence Analysis, DNA ; Streptococcus mitis/*genetics/pathogenicity ; Streptococcus pneumoniae/*genetics/pathogenicity ; Virulence ; }, abstract = {BACKGROUND: Streptococcus pneumoniae is one of the most important causes of microbial diseases in humans. The genomes of 44 diverse strains of S. pneumoniae were analyzed and compared with strains of non-pathogenic streptococci of the Mitis group.

RESULTS: Despite evidence of extensive recombination, the S. pneumoniae phylogenetic tree revealed six major lineages. With the exception of serotype 1, the tree correlated poorly with capsular serotype, geographical site of isolation and disease outcome. The distribution of dispensable genes--genes present in more than one strain but not in all strains--was consistent with phylogeny, although horizontal gene transfer events attenuated this correlation in the case of ancient lineages. Homologous recombination, involving short stretches of DNA, was the dominant evolutionary process of the core genome of S. pneumoniae. Genetic exchange occurred both within and across the borders of the species, and S. mitis was the main reservoir of genetic diversity of S. pneumoniae. The pan-genome size of S. pneumoniae increased logarithmically with the number of strains and linearly with the number of polymorphic sites of the sampled genomes, suggesting that acquired genes accumulate proportionately to the age of clones. Most genes associated with pathogenicity were shared by all S. pneumoniae strains, but were also present in S. mitis, S. oralis and S. infantis, indicating that these genes are not sufficient to determine virulence.

CONCLUSIONS: Genetic exchange with related species sharing the same ecological niche is the main mechanism of evolution of S. pneumoniae. The open pan-genome guarantees the species a quick and economical response to diverse environments.}, } @article {pmid20961962, year = {2011}, author = {Coscollá, M and Comas, I and González-Candelas, F}, title = {Quantifying nonvertical inheritance in the evolution of Legionella pneumophila.}, journal = {Molecular biology and evolution}, volume = {28}, number = {2}, pages = {985-1001}, doi = {10.1093/molbev/msq278}, pmid = {20961962}, issn = {1537-1719}, support = {MRC_U117588500//Medical Research Council/United Kingdom ; }, mesh = {Biological Evolution ; *Gene Transfer, Horizontal ; Legionella pneumophila/classification/*genetics ; Multilocus Sequence Typing ; Phylogeny ; }, abstract = {The exchange of genetic material among bacterial strains and species is recognized as an important factor determining their evolutionary, population genetic, and epidemiological features. We present a detailed analysis of nonvertical inheritance in Legionella pneumophila, a human pathogen and facultative intracellular parasite of amoebas. We have analyzed the exchange of L. pneumophila genetic material with other bacteria at three different levels: population genetics, population genomics, and phylogenomics. At the population genetics level, we have analyzed 89 clinical and environmental isolates after sequencing six coding loci and three intergenic regions for a total of 3,923 bp. In the population genomics analysis, we have studied the roles of recombination and mutation in the common portion of the genome sequence of four L. pneumophila strains. In the phylogenomic analysis, we have studied the phylogenetic origin of 1,700 genes in the L. pneumophila pangenome. For this, we have considered 12 possible phylogenetic alternatives, derived from a reference tree obtained from 104 genes from 41 species, which have been tested under a rigorous statistical framework. The results obtained agree in assigning an important role to nonvertical inheritance in shaping the composition of the L. pneumophila genome and of the genetic variation in its populations. We have found a negative correlation between phylogenetic distance and likelihood of horizontal gene transfer. Phylogenetic proximity and increased chances resulting from sharing the ecological niche provided by the amoeba host have likely had a major influence on the rate of gene exchange in Legionella.}, } @article {pmid20955399, year = {2010}, author = {Pérez, B and Kosmider, O and Cassinat, B and Renneville, A and Lachenaud, J and Kaltenbach, S and Bertrand, Y and Baruchel, A and Chomienne, C and Fontenay, M and Preudhomme, C and Cavé, H}, title = {Genetic typing of CBL, ASXL1, RUNX1, TET2 and JAK2 in juvenile myelomonocytic leukaemia reveals a genetic profile distinct from chronic myelomonocytic leukaemia.}, journal = {British journal of haematology}, volume = {151}, number = {5}, pages = {460-468}, doi = {10.1111/j.1365-2141.2010.08393.x}, pmid = {20955399}, issn = {1365-2141}, mesh = {Child ; Child, Preschool ; DNA Mutational Analysis ; DNA, Neoplasm/genetics ; Female ; Humans ; Infant ; Leukemia, Myelomonocytic, Chronic/*genetics ; Leukemia, Myelomonocytic, Juvenile/*genetics ; Male ; Mutation ; Neoplasm Proteins/*genetics ; Polymorphism, Single Nucleotide ; Repressor Proteins/genetics ; }, abstract = {JMML and CMML are rare myelodysplastic/myeloproliferative neoplasms occurring at both ends of life. To investigate relationships between JMML and CMML, genes recently involved in CMML were studied in 68 JMML patients. Mutations in TET2, RUNX1 and JAK2(V617F) are involved in myelodysplastic and/or myeloproliferative syndromes, and more specifically in CMML but were not found in JMML. Pangenomic analysis by SNP-array showed no abnormality at these loci. Three frameshift mutations of ASXL1 leading to a truncated protein were found in three patients (4%) with late onset JMML displaying also RAS activating mutations. Homozygous mutations of CBL with 11q loss of heterozygosity were found in five (7%) JMML. CBL substitutions were different from those reported in CMML, exclusive from other RAS activating mutations, and were germline in all patients. Overall, the pattern of genetic lesions observed in JMML differed from that of CMML. Although signalling deregulation is involved in CMML, transcriptional deregulation seems to play a pivotal role, with mutation of RUNX1, ASXL1 or TET2. Conversely, none of these genes involved in transcription or chromatin remodelling was found to be significantly altered in JMML, while CBL mutations confirm the central role of RAS and growth factor signalling deregulation in JMML.}, } @article {pmid20942950, year = {2010}, author = {Trost, B and Haakensen, M and Pittet, V and Ziola, B and Kusalik, A}, title = {Analysis and comparison of the pan-genomic properties of sixteen well-characterized bacterial genera.}, journal = {BMC microbiology}, volume = {10}, number = {}, pages = {258}, pmid = {20942950}, issn = {1471-2180}, mesh = {Bacteria/chemistry/*classification/*genetics ; Bacterial Proteins/analysis/*genetics ; Phylogeny ; Proteome/analysis ; RNA, Bacterial/analysis/genetics ; RNA, Ribosomal, 16S/analysis/genetics ; }, abstract = {BACKGROUND: The increasing availability of whole genome sequences allows the gene or protein content of different organisms to be compared, leading to burgeoning interest in the relatively new subfield of pan-genomics. However, while several studies have analyzed protein content relationships in specific groups of bacteria, there has yet to be a study that provides a general characterization of protein content relationships in a broad range of bacteria.

RESULTS: A variation on reciprocal BLAST hits was used to infer relationships among proteins in several groups of bacteria, and data regarding protein conservation and uniqueness in different bacterial genera are reported in terms of "core proteomes", "unique proteomes", and "singlets". We also analyzed the relationship between protein content similarity and the percent identity of the 16S rRNA gene in pairs of bacterial isolates from the same genus, and found that the strength of this relationship varied substantially depending on the genus, perhaps reflecting different rates of genome evolution and/or horizontal gene transfer. Finally, core proteomes and unique proteomes were used to study the proteomic cohesiveness of several bacterial species, revealing that some bacterial species had little cohesiveness in their protein content, with some having fewer proteins unique to that species than randomly-chosen sets of isolates from the same genus.

CONCLUSIONS: The results described in this study aid our understanding of protein content relationships in different bacterial groups, allowing us to make further inferences regarding genome-environment relationships, genome evolution, and the soundness of existing taxonomic classifications.}, } @article {pmid20890839, year = {2010}, author = {Mira, A and Martín-Cuadrado, AB and D'Auria, G and Rodríguez-Valera, F}, title = {The bacterial pan-genome:a new paradigm in microbiology.}, journal = {International microbiology : the official journal of the Spanish Society for Microbiology}, volume = {13}, number = {2}, pages = {45-57}, doi = {10.2436/20.1501.01.110}, pmid = {20890839}, issn = {1618-1905}, mesh = {Adaptation, Biological ; Biomedical Research/*methods/trends ; Computational Biology ; Evolution, Molecular ; *Gene Order ; *Genome, Bacterial ; Metagenome ; Microbiological Techniques/*methods ; Microbiology/trends ; Nucleic Acid Hybridization ; *Polymorphism, Genetic ; Sequence Analysis, DNA ; }, abstract = {Bacterial strains belonging to the same species vary considerably in gene content. Thus, the genetic repertoire of a given species (its "pan-genome") is much larger than the gene content of individual strains. These variations in DNA material, together with differences in genomic structure and nucleotide polymorphisms among strains, confer upon prokaryotic species a phenomenal adaptability. Although the approach of sequencing multiple strains from a single species remains the main and often easiest way to study the pan-genome, feasible alternatives include those related to DNA hybridization. In other cases, the use of metagenomic sequences is already applicable by data mining from the growing metagenomic databases. Eventually, the single-cell genome approach might be the ideal solution. The pan-genome concept has important consequences for the way we understand bacterial evolution, adaptation, and population structure, as well as for more applied issues such as vaccine design or the identification of virulence genes.}, } @article {pmid20876808, year = {2010}, author = {Whitehurst, AW and Xie, Y and Purinton, SC and Cappell, KM and Swanik, JT and Larson, B and Girard, L and Schorge, JO and White, MA}, title = {Tumor antigen acrosin binding protein normalizes mitotic spindle function to promote cancer cell proliferation.}, journal = {Cancer research}, volume = {70}, number = {19}, pages = {7652-7661}, pmid = {20876808}, issn = {1538-7445}, support = {CA071341-14/CA/NCI NIH HHS/United States ; CA128926/CA/NCI NIH HHS/United States ; R01 CA154699/CA/NCI NIH HHS/United States ; R00 CA128926/CA/NCI NIH HHS/United States ; T32 CA071341/CA/NCI NIH HHS/United States ; UL1 RR024982/RR/NCRR NIH HHS/United States ; R01 CA071443-15/CA/NCI NIH HHS/United States ; R01 CA071443/CA/NCI NIH HHS/United States ; R01 CA071443-16/CA/NCI NIH HHS/United States ; CA71443/CA/NCI NIH HHS/United States ; K99 CA128926/CA/NCI NIH HHS/United States ; }, mesh = {Antigens, Nuclear/physiology ; Carrier Proteins/biosynthesis/genetics/*physiology ; Cell Cycle Proteins ; Drug Resistance, Neoplasm ; Female ; Humans ; Mitosis/drug effects/physiology ; Nuclear Matrix-Associated Proteins/physiology ; Ovarian Neoplasms/drug therapy/pathology ; Paclitaxel/pharmacology ; Spindle Apparatus/drug effects/*physiology ; }, abstract = {Cancer cells manage to divide in the context of gross chromosomal abnormalities. These abnormalities can promote bypass of normal restraints on cell proliferation but at a cost of mitotic vulnerabilities that can be attacked by chemotherapy. Determining how cancer cells balance these issues may permit chemotherapeutic sensitivity to be leveraged more efficiently. From a pan-genomic small interfering RNA screen for modifiers of chemoresponsiveness, we identified the tumor antigen acrosin binding protein (ACRBP)/OY-TES-1 as a specifier of paclitaxel resistance. ACRBP expression is normally restricted to the testes but is detected in a wide variety of cancers, including most ovarian cancers. We found that ACRBP is both necessary and sufficient for paclitaxel resistance in ovarian cancer cell lines and ovarian tumor explants. Moreover, high ACRBP expression correlated with reduced survival time and faster relapse among ovarian cancer patients. We identified the mitotic spindle protein NuMA as an ACRBP-interacting protein that could account for the effects of ACRBP on paclitaxel sensitivity. In cancer cells, ACRBP restricted a NuMA-dependent abrogation of a mitotic spindle assembly that is otherwise pathologic. As a consequence, ACRBP depletion resulted in mitotic errors and reduced proliferative fitness that could be rescued by NuMA codepletion. We propose that the codependent relationship of ACRBP and NuMA in cancer cells reflects their passage through a selection bottleneck during tumor evolution, one which requires the acquisition of traits that normalize mitotic perturbations that originally drove the plasticity of a preneoplastic genome. The molecular definition of such traits as defined by the ACRBP-NuMA complex may represent conceptually ideal intervention targets based on the wide therapeutic windows they may offer.}, } @article {pmid20865522, year = {2010}, author = {Mahadevan, P and Seto, D}, title = {Taxonomic parsing of bacteriophages using core genes and in silico proteome-based CGUG and applications to small bacterial genomes.}, journal = {Advances in experimental medicine and biology}, volume = {680}, number = {}, pages = {379-385}, doi = {10.1007/978-1-4419-5913-3_43}, pmid = {20865522}, issn = {0065-2598}, mesh = {*Algorithms ; Bacteriophage P22/classification/genetics ; Bacteriophage T7/classification/genetics ; Bacteriophage lambda/classification/genetics ; Bacteriophages/*classification/*genetics ; Burkholderia cenocepacia/classification/genetics ; Computational Biology ; Genes, Viral ; Genome, Bacterial ; Genomics/*statistics & numerical data ; Internet ; Podoviridae/classification/genetics ; Proteome ; Proteomics/statistics & numerical data ; }, abstract = {A combined genomics and in situ proteomics approach can be used to determine and classify the relatedness of organisms. The common set of proteins shared within a group of genomes is encoded by the "core" set of genes, which is increasingly recognized as a metric for parsing viral and bacterial species. These can be described by the concept of a "pan-genome", which consists of this "core" set and a "dispensable" set, i.e., genes found in one or more but not all organisms in the grouping. "CoreGenesUniqueGenes" (CGUG) is a web-based tool that determines this core set of proteins in a set of genomes as well as parses the dispensable set of unique proteins in a pair of viral or small bacterial genomes. This proteome-based methodology is validated using bacteriophages, aiding the reevaluation of current classifications of bacteriophages. The utility of CGUG in the analysis of small bacterial genomes and the annotation of hypothetical proteins is also presented.}, } @article {pmid20865039, year = {2010}, author = {Friis, C and Wassenaar, TM and Javed, MA and Snipen, L and Lagesen, K and Hallin, PF and Newell, DG and Toszeghy, M and Ridley, A and Manning, G and Ussery, DW}, title = {Genomic characterization of Campylobacter jejuni strain M1.}, journal = {PloS one}, volume = {5}, number = {8}, pages = {e12253}, pmid = {20865039}, issn = {1932-6203}, mesh = {Animals ; Bacterial Proteins/genetics ; Campylobacter Infections/*microbiology/transmission/*veterinary ; Campylobacter jejuni/classification/*genetics/isolation & purification ; Chickens ; Chromosome Mapping ; *Genome, Bacterial ; Humans ; Molecular Sequence Data ; Phylogeny ; Poultry Diseases/*microbiology ; }, abstract = {Campylobacter jejuni strain M1 (laboratory designation 99/308) is a rarely documented case of direct transmission of C. jejuni from chicken to a person, resulting in enteritis. We have sequenced the genome of C. jejuni strain M1, and compared this to 12 other C. jejuni sequenced genomes currently publicly available. Compared to these, M1 is closest to strain 81116. Based on the 13 genome sequences, we have identified the C. jejuni pan-genome, as well as the core genome, the auxiliary genes, and genes unique between strains M1 and 81116. The pan-genome contains 2,427 gene families, whilst the core genome comprised 1,295 gene families, or about two-thirds of the gene content of the average of the sequenced C. jejuni genomes. Various comparison and visualization tools were applied to the 13 C. jejuni genome sequences, including a species pan- and core genome plot, a BLAST Matrix and a BLAST Atlas. Trees based on 16S rRNA sequences and on the total gene families in each genome are presented. The findings are discussed in the background of the proven virulence potential of M1.}, } @article {pmid20846431, year = {2010}, author = {Deng, X and Phillippy, AM and Li, Z and Salzberg, SL and Zhang, W}, title = {Probing the pan-genome of Listeria monocytogenes: new insights into intraspecific niche expansion and genomic diversification.}, journal = {BMC genomics}, volume = {11}, number = {}, pages = {500}, pmid = {20846431}, issn = {1471-2164}, support = {R01 GM083873/GM/NIGMS NIH HHS/United States ; R01 LM006845-10/LM/NLM NIH HHS/United States ; R01 LM006845-11/LM/NLM NIH HHS/United States ; R01 LM006845/LM/NLM NIH HHS/United States ; R01 GM083873-07/GM/NIGMS NIH HHS/United States ; R01 GM083873-08/GM/NIGMS NIH HHS/United States ; }, mesh = {Chromosome Mapping ; Comparative Genomic Hybridization ; Conserved Sequence/genetics ; Genes, Bacterial/genetics ; Genetic Loci/genetics ; *Genetic Variation ; Genome, Bacterial/*genetics ; Humans ; Listeria monocytogenes/*genetics ; Metabolic Networks and Pathways/genetics ; Phylogeny ; RNA, Bacterial/genetics ; ROC Curve ; Reference Standards ; Sequence Analysis, DNA ; Sequence Homology, Nucleic Acid ; Species Specificity ; }, abstract = {BACKGROUND: Bacterial pathogens often show significant intraspecific variations in ecological fitness, host preference and pathogenic potential to cause infectious disease. The species of Listeria monocytogenes, a facultative intracellular pathogen and the causative agent of human listeriosis, consists of at least three distinct genetic lineages. Two of these lineages predominantly cause human sporadic and epidemic infections, whereas the third lineage has never been implicated in human disease outbreaks despite its overall conservation of many known virulence factors.

RESULTS: Here we compare the genomes of 26 L. monocytogenes strains representing the three lineages based on both in silico comparative genomic analysis and high-density, pan-genomic DNA array hybridizations. We uncover 86 genes and 8 small regulatory RNAs that likely make L. monocytogenes lineages differ in carbohydrate utilization and stress resistance during their residence in natural habitats and passage through the host gastrointestinal tract. We also identify 2,330 to 2,456 core genes that define this species along with an open pan-genome pool that contains more than 4,052 genes. Phylogenomic reconstructions based on 3,560 homologous groups allowed robust estimation of phylogenetic relatedness among L. monocytogenes strains.

CONCLUSIONS: Our pan-genome approach enables accurate co-analysis of DNA sequence and hybridization array data for both core gene estimation and phylogenomics. Application of our method to the pan-genome of L. monocytogenes sheds new insights into the intraspecific niche expansion and evolution of this important foodborne pathogen.}, } @article {pmid20843356, year = {2010}, author = {Laing, C and Buchanan, C and Taboada, EN and Zhang, Y and Kropinski, A and Villegas, A and Thomas, JE and Gannon, VP}, title = {Pan-genome sequence analysis using Panseq: an online tool for the rapid analysis of core and accessory genomic regions.}, journal = {BMC bioinformatics}, volume = {11}, number = {}, pages = {461}, pmid = {20843356}, issn = {1471-2105}, mesh = {DNA, Bacterial/metabolism ; Escherichia coli/*genetics ; Escherichia coli O157/*genetics ; *Genome, Bacterial ; Phylogeny ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA/*methods ; *Software ; }, abstract = {BACKGROUND: The pan-genome of a bacterial species consists of a core and an accessory gene pool. The accessory genome is thought to be an important source of genetic variability in bacterial populations and is gained through lateral gene transfer, allowing subpopulations of bacteria to better adapt to specific niches. Low-cost and high-throughput sequencing platforms have created an exponential increase in genome sequence data and an opportunity to study the pan-genomes of many bacterial species. In this study, we describe a new online pan-genome sequence analysis program, Panseq.

RESULTS: Panseq was used to identify Escherichia coli O157:H7 and E. coli K-12 genomic islands. Within a population of 60 E. coli O157:H7 strains, the existence of 65 accessory genomic regions identified by Panseq analysis was confirmed by PCR. The accessory genome and binary presence/absence data, and core genome and single nucleotide polymorphisms (SNPs) of six L. monocytogenes strains were extracted with Panseq and hierarchically clustered and visualized. The nucleotide core and binary accessory data were also used to construct maximum parsimony (MP) trees, which were compared to the MP tree generated by multi-locus sequence typing (MLST). The topology of the accessory and core trees was identical but differed from the tree produced using seven MLST loci. The Loci Selector module found the most variable and discriminatory combinations of four loci within a 100 loci set among 10 strains in 1 s, compared to the 449 s required to exhaustively search for all possible combinations; it also found the most discriminatory 20 loci from a 96 loci E. coli O157:H7 SNP dataset.

CONCLUSION: Panseq determines the core and accessory regions among a collection of genomic sequences based on user-defined parameters. It readily extracts regions unique to a genome or group of genomes, identifies SNPs within shared core genomic regions, constructs files for use in phylogeny programs based on both the presence/absence of accessory regions and SNPs within core regions and produces a graphical overview of the output. Panseq also includes a loci selector that calculates the most variable and discriminatory loci among sets of accessory loci or core gene SNPs.

AVAILABILITY: Panseq is freely available online at http://76.70.11.198/panseq. Panseq is written in Perl.}, } @article {pmid20823274, year = {2010}, author = {Cappell, KM and Larson, B and Sciaky, N and Whitehurst, AW}, title = {Symplekin specifies mitotic fidelity by supporting microtubule dynamics.}, journal = {Molecular and cellular biology}, volume = {30}, number = {21}, pages = {5135-5144}, pmid = {20823274}, issn = {1098-5549}, support = {CA071341-14/CA/NCI NIH HHS/United States ; GM008719/GM/NIGMS NIH HHS/United States ; CA128926/CA/NCI NIH HHS/United States ; R01 CA154699/CA/NCI NIH HHS/United States ; R00 CA128926/CA/NCI NIH HHS/United States ; T32 CA071341/CA/NCI NIH HHS/United States ; K99 CA128926/CA/NCI NIH HHS/United States ; T32 GM008719/GM/NIGMS NIH HHS/United States ; }, mesh = {Animals ; Carcinoma, Non-Small-Cell Lung/drug therapy/genetics/pathology/physiopathology ; Cell Line, Tumor ; Humans ; Lung Neoplasms/drug therapy/genetics/pathology/physiopathology ; Mice ; Microtubule-Associated Proteins/genetics/physiology ; Microtubules/genetics/*physiology ; Mitosis/genetics/*physiology ; Neoplasm Transplantation ; Nuclear Proteins/antagonists & inhibitors/genetics/*physiology ; Paclitaxel/pharmacology ; Phenotype ; RNA, Messenger/genetics/metabolism ; RNA, Neoplasm/genetics/metabolism ; RNA, Small Interfering/genetics ; Spindle Apparatus/genetics/physiology ; Transplantation, Heterologous ; }, abstract = {Using a pangenomic loss-of-function screening strategy, we have previously identified 76 potent modulators of paclitaxel responsiveness in non-small-cell lung cancer. The top hit isolated from this screen, symplekin, is a well-established component of the mRNA polyadenylation machinery. Here, we performed a high-resolution phenotypic analysis to reveal the mechanistic underpinnings by which symplekin depletion collaborates with paclitaxel. We find that symplekin supports faithful mitosis by contributing to the formation of a bipolar spindle apparatus. Depletion of symplekin attenuates microtubule polymerization activity as well as expression of the critical microtubule polymerization protein CKAP5 (TOGp). Depletion of additional members of the polyadenylation complex induces similar phenotypes, suggesting that polyadenylation machinery is intimately coupled to microtubule function and thus mitotic spindle formation. Importantly, tumor cells depleted of symplekin display reduced fecundity, but the mitotic defects that we observe are not evident in immortalized cells. These results demonstrate a critical connection between the polyadenylation machinery and mitosis and suggest that tumor cells have an enhanced dependency on these components for spindle assembly.}, } @article {pmid20816883, year = {2010}, author = {Dumont, J and Jossé, R and Lambert, C and Anthérieu, S and Laurent, V and Loyer, P and Robin, MA and Guillouzo, A}, title = {Preferential induction of the AhR gene battery in HepaRG cells after a single or repeated exposure to heterocyclic aromatic amines.}, journal = {Toxicology and applied pharmacology}, volume = {249}, number = {1}, pages = {91-100}, doi = {10.1016/j.taap.2010.08.027}, pmid = {20816883}, issn = {1096-0333}, mesh = {Amines/administration & dosage/toxicity ; Cell Line, Tumor ; Gene Expression Profiling/methods ; Gene Expression Regulation, Neoplastic/drug effects ; Humans ; Imidazoles/*administration & dosage/toxicity ; Quinoxalines/*administration & dosage/toxicity ; Receptors, Aryl Hydrocarbon/*biosynthesis/*genetics ; }, abstract = {2-Amino-1-methyl-6-phenylimidazo[4,5-b]pyridine (PhIP) and 2-amino-3,8-dimethylimidazo[4,5-f]quinoxaline (MeIQx) are two of the most common heterocyclic aromatic amines (HAA) produced during cooking of meat, fish and poultry. Both HAA produce different tumor profiles in rodents and are suspected to be carcinogenic in humans. In order to better understand the molecular basis of HAA toxicity, we have analyzed gene expression profiles in the metabolically competent human HepaRG cells using pangenomic oligonucleotide microarrays, after either a single (24-h) or a repeated (28-day) exposure to 10 μM PhIP or MeIQx. The most responsive genes to both HAA were downstream targets of the arylhydrocarbon receptor (AhR): CYP1A1 and CYP1A2 after both time points and CYP1B1 and ALDH3A1 after 28 days. Accordingly, CYP1A1/1A2 induction in HAA-treated HepaRG cells was prevented by chemical inhibition or small interference RNA-mediated down-regulation of the AhR. Consistently, HAA induced activity of the CYP1A1 promoter, which contains a consensus AhR-related xenobiotic-responsive element (XRE). In addition, several other genes exhibited both time-dependent and compound-specific expression changes with, however, a smaller magnitude than previously reported for the prototypical AhR target genes. These changes concerned genes mainly related to cell growth and proliferation, apoptosis, and cancer. In conclusion, these results identify the AhR gene battery as the preferential target of PhIP and MeIQx in HepaRG cells and further support the hypothesis that intake of HAA in diet might increase human cancer risk.}, } @article {pmid20811585, year = {2010}, author = {Ahsanul Islam, M and Edwards, EA and Mahadevan, R}, title = {Characterizing the metabolism of Dehalococcoides with a constraint-based model.}, journal = {PLoS computational biology}, volume = {6}, number = {8}, pages = {}, pmid = {20811585}, issn = {1553-7358}, mesh = {Biological Evolution ; Chloroflexi/genetics/*metabolism ; Genome, Bacterial ; Metabolic Networks and Pathways/*genetics ; Models, Biological ; Molecular Sequence Data ; }, abstract = {Dehalococcoides strains respire a wide variety of chloro-organic compounds and are important for the bioremediation of toxic, persistent, carcinogenic, and ubiquitous ground water pollutants. In order to better understand metabolism and optimize their application, we have developed a pan-genome-scale metabolic network and constraint-based metabolic model of Dehalococcoides. The pan-genome was constructed from publicly available complete genome sequences of Dehalococcoides sp. strain CBDB1, strain 195, strain BAV1, and strain VS. We found that Dehalococcoides pan-genome consisted of 1118 core genes (shared by all), 457 dispensable genes (shared by some), and 486 unique genes (found in only one genome). The model included 549 metabolic genes that encoded 356 proteins catalyzing 497 gene-associated model reactions. Of these 497 reactions, 477 were associated with core metabolic genes, 18 with dispensable genes, and 2 with unique genes. This study, in addition to analyzing the metabolism of an environmentally important phylogenetic group on a pan-genome scale, provides valuable insights into Dehalococcoides metabolic limitations, low growth yields, and energy conservation. The model also provides a framework to anchor and compare disparate experimental data, as well as to give insights on the physiological impact of "incomplete" pathways, such as the TCA-cycle, CO(2) fixation, and cobalamin biosynthesis pathways. The model, referred to as iAI549, highlights the specialized and highly conserved nature of Dehalococcoides metabolism, and suggests that evolution of Dehalococcoides species is driven by the electron acceptor availability.}, } @article {pmid20802045, year = {2010}, author = {Xu, Z and Chen, X and Li, L and Li, T and Wang, S and Chen, H and Zhou, R}, title = {Comparative genomic characterization of Actinobacillus pleuropneumoniae.}, journal = {Journal of bacteriology}, volume = {192}, number = {21}, pages = {5625-5636}, pmid = {20802045}, issn = {1098-5530}, mesh = {Actinobacillus Infections/microbiology/veterinary ; Actinobacillus pleuropneumoniae/classification/*genetics/pathogenicity ; Animals ; Bacterial Proteins/genetics/metabolism ; Bacterial Typing Techniques ; Chromosome Mapping ; Chromosomes, Bacterial ; Cluster Analysis ; Gene Expression Regulation, Bacterial/physiology ; *Genome, Bacterial ; *Genomics ; Molecular Sequence Data ; Phylogeny ; Sequence Alignment/veterinary ; Species Specificity ; Swine ; Swine Diseases/microbiology ; Virulence ; }, abstract = {The Gram-negative bacterium Actinobacillus pleuropneumoniae is the etiologic agent of porcine contagious pleuropneumoniae, a lethal respiratory infectious disease causing great economic losses in the swine industry worldwide. In order to better interpret the genetic background of serotypic diversity, nine genomes of A. pleuropneumoniae reference strains of serovars 1, 2, 4, 6, 9, 10, 11, 12, and 13 were sequenced by using rapid high-throughput approach. Based on 12 genomes of corresponding serovar reference strains including three publicly available complete genomes (serovars 3, 5b, and 7) of this bacterium, we performed a comprehensive analysis of comparative genomics and first reported a global genomic characterization for this pathogen. Clustering of 26,012 predicted protein-coding genes showed that the pan genome of A. pleuropneumoniae consists of 3,303 gene clusters, which contain 1,709 core genome genes, 822 distributed genes, and 772 strain-specific genes. The genome components involved in the biogenesis of capsular polysaccharide and lipopolysaccharide O antigen relative to serovar diversity were compared, and their genetic diversity was depicted. Our findings shed more light on genomic features associated with serovar diversity of A. pleuropneumoniae and provide broader insight into both pathogenesis research and clinical/epidemiological application against the severe disease caused by this swine pathogen.}, } @article {pmid20799932, year = {2010}, author = {Sim, BM and Chantratita, N and Ooi, WF and Nandi, T and Tewhey, R and Wuthiekanun, V and Thaipadungpanit, J and Tumapa, S and Ariyaratne, P and Sung, WK and Sem, XH and Chua, HH and Ramnarayanan, K and Lin, CH and Liu, Y and Feil, EJ and Glass, MB and Tan, G and Peacock, SJ and Tan, P}, title = {Genomic acquisition of a capsular polysaccharide virulence cluster by non-pathogenic Burkholderia isolates.}, journal = {Genome biology}, volume = {11}, number = {8}, pages = {R89}, pmid = {20799932}, issn = {1474-760X}, support = {//Wellcome Trust/United Kingdom ; }, mesh = {Animals ; Burkholderia/*genetics/isolation & purification/*pathogenicity ; Burkholderia Infections/immunology ; Genetic Speciation ; Genetic Variation ; *Genome, Bacterial ; Humans ; Metabolic Networks and Pathways/genetics ; Mice ; *Multigene Family ; Polysaccharides, Bacterial/biosynthesis ; Virulence/genetics ; }, abstract = {BACKGROUND: Burkholderia thailandensis is a non-pathogenic environmental saprophyte closely related to Burkholderia pseudomallei, the causative agent of the often fatal animal and human disease melioidosis. To study B. thailandensis genomic variation, we profiled 50 isolates using a pan-genome microarray comprising genomic elements from 28 Burkholderia strains and species.

RESULTS: Of 39 genomic regions variably present across the B. thailandensis strains, 13 regions corresponded to known genomic islands, while 26 regions were novel. Variant B. thailandensis isolates exhibited isolated acquisition of a capsular polysaccharide biosynthesis gene cluster (B. pseudomallei-like capsular polysaccharide) closely resembling a similar cluster in B. pseudomallei that is essential for virulence in mammals; presence of this cluster was confirmed by whole genome sequencing of a representative variant strain (B. thailandensis E555). Both whole-genome microarray and multi-locus sequence typing analysis revealed that the variant strains formed part of a phylogenetic subgroup distinct from the ancestral B. thailandensis population and were associated with atypical isolation sources when compared to the majority of previously described B. thailandensis strains. In functional assays, B. thailandensis E555 exhibited several B. pseudomallei-like phenotypes, including colony wrinkling, resistance to human complement binding, and intracellular macrophage survival. However, in murine infection assays, B. thailandensis E555 did not exhibit enhanced virulence relative to other B. thailandensis strains, suggesting that additional factors are required to successfully colonize and infect mammals.

CONCLUSIONS: The discovery of such novel variant strains demonstrates how unbiased genomic surveys of non-pathogenic isolates can reveal insights into the development and emergence of new pathogenic species.}, } @article {pmid20707916, year = {2010}, author = {Bohlin, J and Snipen, L and Cloeckaert, A and Lagesen, K and Ussery, D and Kristoffersen, AB and Godfroid, J}, title = {Genomic comparisons of Brucella spp. and closely related bacteria using base compositional and proteome based methods.}, journal = {BMC evolutionary biology}, volume = {10}, number = {}, pages = {249}, pmid = {20707916}, issn = {1471-2148}, mesh = {Brucella/classification/*genetics ; Comparative Genomic Hybridization ; DNA, Bacterial/genetics ; *Genome, Bacterial ; Markov Chains ; Models, Genetic ; Phylogeny ; Proteome/*genetics ; Proteomics/*methods ; Sequence Analysis, DNA/*methods ; }, abstract = {BACKGROUND: Classification of bacteria within the genus Brucella has been difficult due in part to considerable genomic homogeneity between the different species and biovars, in spite of clear differences in phenotypes. Therefore, many different methods have been used to assess Brucella taxonomy. In the current work, we examine 32 sequenced genomes from genus Brucella representing the six classical species, as well as more recently described species, using bioinformatical methods. Comparisons were made at the level of genomic DNA using oligonucleotide based methods (Markov chain based genomic signatures, genomic codon and amino acid frequencies based comparisons) and proteomes (all-against-all BLAST protein comparisons and pan-genomic analyses).

RESULTS: We found that the oligonucleotide based methods gave different results compared to that of the proteome based methods. Differences were also found between the oligonucleotide based methods used. Whilst the Markov chain based genomic signatures grouped the different species in genus Brucella according to host preference, the codon and amino acid frequencies based methods reflected small differences between the Brucella species. Only minor differences could be detected between all genera included in this study using the codon and amino acid frequencies based methods. Proteome comparisons were found to be in strong accordance with current Brucella taxonomy indicating a remarkable association between gene gain or loss on one hand and mutations in marker genes on the other. The proteome based methods found greater similarity between Brucella species and Ochrobactrum species than between species within genus Agrobacterium compared to each other. In other words, proteome comparisons of species within genus Agrobacterium were found to be more diverse than proteome comparisons between species in genus Brucella and genus Ochrobactrum. Pan-genomic analyses indicated that uptake of DNA from outside genus Brucella appears to be limited.

CONCLUSIONS: While both the proteome based methods and the Markov chain based genomic signatures were able to reflect environmental diversity between the different species and strains of genus Brucella, the genomic codon and amino acid frequencies based comparisons were not found adequate for such comparisons. The proteome comparison based phylogenies of the species in genus Brucella showed a surprising consistency with current Brucella taxonomy.}, } @article {pmid20700504, year = {2010}, author = {Béziat, V and Descours, B and Parizot, C and Debré, P and Vieillard, V}, title = {NK cell terminal differentiation: correlated stepwise decrease of NKG2A and acquisition of KIRs.}, journal = {PloS one}, volume = {5}, number = {8}, pages = {e11966}, pmid = {20700504}, issn = {1932-6203}, mesh = {CD56 Antigen/metabolism ; *Cell Differentiation ; Cell Line ; Gene Expression Profiling ; HLA Antigens/genetics ; Histocompatibility Antigens Class I/genetics ; Humans ; Interferon-gamma/biosynthesis ; Interleukin-12/pharmacology ; Interleukin-18/pharmacology ; Killer Cells, Natural/*cytology/drug effects/*metabolism ; Kinetics ; Lymphocyte Subsets/cytology/drug effects/metabolism ; NK Cell Lectin-Like Receptor Subfamily C/genetics/*metabolism ; Phenotype ; Receptors, KIR/genetics/*metabolism ; }, abstract = {BACKGROUND: Terminal differentiation of NK cells is crucial in maintaining broad responsiveness to pathogens and discriminating normal cells from cells in distress. Although it is well established that KIRs, in conjunction with NKG2A, play a major role in the NK cell education that determines whether cells will end up competent or hyporesponsive, the events underlying the differentiation are still debated.

A combination of complementary approaches to assess the kinetics of the appearance of each subset during development allowed us to obtain new insights into these terminal stages of differentiation, characterising their gene expression profiles at a pan-genomic level, their distinct surface receptor patterns and their prototypic effector functions. The present study supports the hypothesis that CD56dim cells derive from the CD56bright subset and suggests that NK cell responsiveness is determined by persistent inhibitory signals received during their education. We report here the inverse correlation of NKG2A expression with KIR expression and explore whether this correlation bestows functional competence on NK cells. We show that CD56dimNKG2A-KIR+ cells display the most differentiated phenotype associated to their unique ability to respond against HLA-E+ target cells. Importantly, after IL-12+IL-18 stimulation, reacquisition of NKG2A strongly correlates with IFN-gamma production in CD56dimNKG2A- NK cells.

CONCLUSIONS/SIGNIFICANCE: Together, these findings call for the reclassification of mature human NK cells into distinct subsets and support a new model, in which the NK cell differentiation and functional fate are based on a stepwise decrease of NKG2A and acquisition of KIRs.}, } @article {pmid20688752, year = {2010}, author = {Lefébure, T and Bitar, PD and Suzuki, H and Stanhope, MJ}, title = {Evolutionary dynamics of complete Campylobacter pan-genomes and the bacterial species concept.}, journal = {Genome biology and evolution}, volume = {2}, number = {}, pages = {646-655}, pmid = {20688752}, issn = {1759-6653}, support = {N01AI30054/AI/NIAID NIH HHS/United States ; }, mesh = {Campylobacter coli/*genetics ; Campylobacter jejuni/*genetics ; Chromosome Mapping ; *Evolution, Molecular ; Gene Transfer, Horizontal ; *Genome, Bacterial ; }, abstract = {Defining bacterial species and understanding the relative cohesiveness of different components of their genomes remains a fundamental problem in microbiology. Bacterial species tend to be comprised of both a set of core and dispensable genes, with the sum of these two components forming the species pan-genome. The role of the core and dispensable genes in defining bacterial species and the question of whether pan-genomes are finite or infinite remain unclear. Here we demonstrate, through the analysis of 96 genome sequences derived from two closely related sympatric sister species of pathogenic bacteria (Campylobacter coli and C. jejuni), that their pan-genome is indeed finite and that there are unique and cohesive features to each of their genomes defining their genomic identity. The two species have a similar pan-genome size; however, C. coli has acquired a larger core genome and each species has evolved a number of species-specific core genes, possibly reflecting different adaptive strategies. Genome-wide assessment of the level of lateral gene transfer within and between the two sister species, as well as within the core and non-core genes, demonstrates a resistance to interspecies recombination in the core genome of the two species and therefore provides persuasive support for the core genome hypothesis for bacterial species.}, } @article {pmid20678907, year = {2012}, author = {Mauray, A and Felgines, C and Morand, C and Mazur, A and Scalbert, A and Milenkovic, D}, title = {Bilberry anthocyanin-rich extract alters expression of genes related to atherosclerosis development in aorta of apo E-deficient mice.}, journal = {Nutrition, metabolism, and cardiovascular diseases : NMCD}, volume = {22}, number = {1}, pages = {72-80}, doi = {10.1016/j.numecd.2010.04.011}, pmid = {20678907}, issn = {1590-3729}, mesh = {Aldehyde Oxidase/genetics/metabolism ; Animals ; Anthocyanins/*pharmacology ; Antioxidants/pharmacology ; Aorta/metabolism/pathology ; Apolipoproteins E/*deficiency ; Atherosclerosis/*genetics/metabolism/pathology ; Cadherins/genetics/metabolism ; Carrier Proteins/genetics/metabolism ; Cell Adhesion Molecules/genetics/metabolism ; Claudins/genetics/metabolism ; Computational Biology ; Cytochrome P-450 CYP2E1/genetics/metabolism ; *Dietary Supplements ; Down-Regulation ; Fruit/chemistry ; Gene Expression Regulation, Plant ; Lipids/blood ; Male ; Membrane Glycoproteins ; Membrane Proteins/genetics/metabolism ; Mice ; Mice, Knockout ; Oxidative Stress/genetics ; Plant Extracts/*pharmacology ; Thioredoxins/genetics/metabolism ; Up-Regulation ; Vaccinium myrtillus/*chemistry ; }, abstract = {Intake of anthocyanin-rich foods has been associated with a reduced risk of cardiovascular diseases. We recently reported that a nutritional supplementation with a bilberry anthocyanin-rich extract (BE) attenuates atherosclerotic lesion development in apolipoprotein E-deficient (apoE[-]/[-]) mice. However, the mechanism(s) of their preventive action are not completely understood. Anthocyanins may alter mRNA levels of genes related to atherosclerosis in cultured macrophages and endothelial cells, but in vivo studies remain scarce. The aim of the present study was to explore the in vivo mechanisms of action of the same bilberry extract, administered by supplementation at a nutritional level, in the aorta of apo E[-]/[-] mice using a global transcriptomic approach. This study focused on the early stage of atherosclerosis development for better assessment of BE action on initiation mechanisms of this pathology. After a two week period, plasma lipid and antioxidant capacity were evaluated and the global genomic analysis was carried out using pangenomic microarrays. BE supplementation significantly improved hypercholesterolemia whereas the plasmatic antioxidant status remained unchanged. Nutrigenomic analysis identified 1261 genes which expression was modulated by BE in the aorta. Bioinformatic analysis revealed that these genes are implicated in different cellular processes such as oxidative stress, inflammation, transendothelial migration and angiogenesis, processes associated with atherosclerosis development/protection. Some of the most significantly down-regulated genes included genes coding for AOX1, CYP2E1 or TXNIP implicated in the regulation of oxidative stress, JAM-A coding for adhesion molecules or VEGFR2 implicate in regulation of angiogenesis. Other genes were up-regulated, such as CRB3, CLDN14 or CDH4 potentially associated with increased cell-cell adhesion and decreased paracellular permeability. These results provide a global integrated view of the mechanisms involved in the preventive action of bilberry anthocyanin-rich extract against atherosclerosis.}, } @article {pmid20634238, year = {2010}, author = {Bottacini, F and Medini, D and Pavesi, A and Turroni, F and Foroni, E and Riley, D and Giubellini, V and Tettelin, H and van Sinderen, D and Ventura, M}, title = {Comparative genomics of the genus Bifidobacterium.}, journal = {Microbiology (Reading, England)}, volume = {156}, number = {Pt 11}, pages = {3243-3254}, doi = {10.1099/mic.0.039545-0}, pmid = {20634238}, issn = {1465-2080}, mesh = {Bifidobacterium/*genetics ; *Comparative Genomic Hybridization ; DNA, Bacterial/genetics ; Gastrointestinal Tract/microbiology ; *Genome, Bacterial ; *Genomics ; Humans ; Multivariate Analysis ; Phylogeny ; Proteome ; RNA, Ribosomal, 16S/genetics ; Sequence Alignment ; Sequence Analysis, DNA ; }, abstract = {Whole-genome sequencing efforts have revolutionized the study of bifidobacterial genetics and physiology. Unfortunately, the sequence of a single genome does not provide information on bifidobacterial genetic diversity and on how genetic variability supports improved adaptation of these bacteria to the environment of the human gastrointestinal tract (GIT). Analysis of nine genomes from bifidobacterial species showed that such genomes display an open pan-genome structure. Mathematical extrapolation of the data indicates that the genome reservoir available to the bifidobacterial pan-genome consists of more than 5000 genes, many of which are uncharacterized, but which are probably important to provide adaptive abilities pertinent to the human GIT. We also define a core bifidobacterial gene set which will undoubtedly provide a new baseline from which one can examine the evolution of bifidobacteria. Phylogenetic investigation performed on a total of 506 orthologues that are common to nine complete bifidobacterial genomes allowed the construction of a Bifidobacterium supertree which is largely concordant with the phylogenetic tree obtained using 16S rRNA genes. Moreover, this supertree provided a more robust phylogenetic resolution than the 16S rRNA gene-based analysis. This comparative study of the genus Bifidobacterium thus presents a foundation for future functional analyses of this important group of GIT bacteria.}, } @article {pmid20623278, year = {2010}, author = {Lukjancenko, O and Wassenaar, TM and Ussery, DW}, title = {Comparison of 61 sequenced Escherichia coli genomes.}, journal = {Microbial ecology}, volume = {60}, number = {4}, pages = {708-720}, pmid = {20623278}, issn = {1432-184X}, mesh = {Chromosome Mapping ; Escherichia coli/classification/*genetics ; *Genome, Bacterial ; Molecular Sequence Data ; Phylogeny ; }, abstract = {Escherichia coli is an important component of the biosphere and is an ideal model for studies of processes involved in bacterial genome evolution. Sixty-one publically available E. coli and Shigella spp. sequenced genomes are compared, using basic methods to produce phylogenetic and proteomics trees, and to identify the pan- and core genomes of this set of sequenced strains. A hierarchical clustering of variable genes allowed clear separation of the strains into clusters, including known pathotypes; clinically relevant serotypes can also be resolved in this way. In contrast, when in silico MLST was performed, many of the various strains appear jumbled and less well resolved. The predicted pan-genome comprises 15,741 gene families, and only 993 (6%) of the families are represented in every genome, comprising the core genome. The variable or 'accessory' genes thus make up more than 90% of the pan-genome and about 80% of a typical genome; some of these variable genes tend to be co-localized on genomic islands. The diversity within the species E. coli, and the overlap in gene content between this and related species, suggests a continuum rather than sharp species borders in this group of Enterobacteriaceae.}, } @article {pmid20593022, year = {2010}, author = {Darling, AE and Mau, B and Perna, NT}, title = {progressiveMauve: multiple genome alignment with gene gain, loss and rearrangement.}, journal = {PloS one}, volume = {5}, number = {6}, pages = {e11147}, pmid = {20593022}, issn = {1932-6203}, support = {R01 GM062994/GM/NIGMS NIH HHS/United States ; R01-GM62994/GM/NIGMS NIH HHS/United States ; HHSN266200400040C//PHS HHS/United States ; }, mesh = {Algorithms ; *Gene Rearrangement ; *Genome, Bacterial ; Models, Theoretical ; *Sequence Alignment ; }, abstract = {BACKGROUND: Multiple genome alignment remains a challenging problem. Effects of recombination including rearrangement, segmental duplication, gain, and loss can create a mosaic pattern of homology even among closely related organisms.

We describe a new method to align two or more genomes that have undergone rearrangements due to recombination and substantial amounts of segmental gain and loss (flux). We demonstrate that the new method can accurately align regions conserved in some, but not all, of the genomes, an important case not handled by our previous work. The method uses a novel alignment objective score called a sum-of-pairs breakpoint score, which facilitates accurate detection of rearrangement breakpoints when genomes have unequal gene content. We also apply a probabilistic alignment filtering method to remove erroneous alignments of unrelated sequences, which are commonly observed in other genome alignment methods. We describe new metrics for quantifying genome alignment accuracy which measure the quality of rearrangement breakpoint predictions and indel predictions. The new genome alignment algorithm demonstrates high accuracy in situations where genomes have undergone biologically feasible amounts of genome rearrangement, segmental gain and loss. We apply the new algorithm to a set of 23 genomes from the genera Escherichia, Shigella, and Salmonella. Analysis of whole-genome multiple alignments allows us to extend the previously defined concepts of core- and pan-genomes to include not only annotated genes, but also non-coding regions with potential regulatory roles. The 23 enterobacteria have an estimated core-genome of 2.46Mbp conserved among all taxa and a pan-genome of 15.2Mbp. We document substantial population-level variability among these organisms driven by segmental gain and loss. Interestingly, much variability lies in intergenic regions, suggesting that the Enterobacteriacae may exhibit regulatory divergence.

CONCLUSIONS: The multiple genome alignments generated by our software provide a platform for comparative genomic and population genomic studies. Free, open-source software implementing the described genome alignment approach is available from http://gel.ahabs.wisc.edu/mauve.}, } @article {pmid20574927, year = {2010}, author = {Satih, S and Chalabi, N and Rabiau, N and Bignon, YJ and Bernard-Gallon, DJ}, title = {Transcriptional profiling of breast cancer cells exposed to soy phytoestrogens after BRCA1 knockdown with a whole human genome microarray approach.}, journal = {Nutrition and cancer}, volume = {62}, number = {5}, pages = {659-667}, doi = {10.1080/01635581003605540}, pmid = {20574927}, issn = {1532-7914}, mesh = {Breast Neoplasms/*etiology/genetics/prevention & control ; Cell Line, Tumor ; *Gene Expression Profiling ; Gene Expression Regulation, Neoplastic/drug effects ; Genes, BRCA1/*physiology ; Genes, bcl-2 ; Humans ; Isoflavones/*pharmacology ; NF-kappa B/physiology ; Oligonucleotide Array Sequence Analysis/*methods ; Protein Tyrosine Phosphatase, Non-Receptor Type 13/genetics ; RNA Interference ; RNA, Messenger/analysis ; Receptors, Estrogen/analysis ; Reverse Transcriptase Polymerase Chain Reaction ; bcl-2-Associated X Protein/genetics ; }, abstract = {The estrogen-like properties of the soy phytoestrogens could modulate the estrogen-dependent expression of BRCA1 oncosuppressor, which is highly involved in hereditary and sporadic breast cancer. In order to better understand the importance of BRCA1 function and the role of other genes involved around BRCA1 in the phytoestrogen pathways, we have exploited the BRCA1-specific knockdown by RNA interference using double stranded small interfering RNA (siRNA) in breast tumor cell lines (MCF-7, MDA-MB-231) and a fibrokystic breast cell line (MCF-10a) and treated with 18.5 microM genistein or 78.5 microM daidzein for 72 h. We used pangenomic microarrays and subsequently TLDA analysis and demonstrated that cumulated BRCA1 knockdown with soy isoflavone supplementations in breast cell lines seems to modulate apoptosis, MAPK pathway, cell communication, xenobiotic metabolism, and sterol metabolism. Also, transient BRCA1 deficiency in breast cell lines significantly diminished or reversed gene expression after phytoestrogen supplementation. We observed that the significant decrease expression of apoptosis-related genes such as BAX, and the increase expression of BCL2, under BRCA1 knockdown condition, were completely reversed after phytoestrogen treatments. These results underlined the role of BRCA1 expression in breast carcinogenesis and suggested that soy phytoestrogen supplementation could play a role in cancer.}, } @article {pmid20551685, year = {2010}, author = {Colson, P and Raoult, D}, title = {Gene repertoire of amoeba-associated giant viruses.}, journal = {Intervirology}, volume = {53}, number = {5}, pages = {330-343}, doi = {10.1159/000312918}, pmid = {20551685}, issn = {1423-0100}, mesh = {Amoeba/*virology ; Archaea/genetics ; Bacteria/genetics ; DNA Viruses/*genetics ; Eukaryota/genetics ; Evolution, Molecular ; Gene Transfer, Horizontal ; *Genes, Viral ; *Genome, Viral ; Open Reading Frames ; Recombination, Genetic ; Sequence Homology ; Viral Proteins/*genetics ; }, abstract = {Acanthamoeba polyphaga mimivirus, Marseillevirus, and Sputnik, a virophage, are intra-amoebal viruses that have been isolated from water collected in cooling towers. They have provided fascinating data and have raised exciting questions about viruses definition and evolution. Mimivirus and Marseillevirus have been classified in the nucleo-cytoplasmic large DNA viruses (NCLDVs) class. Their genomes are the largest and fifth largest viral genomes sequenced so far. The gene repertoire of these amoeba-associated viruses can be divided into four groups: the core genome, genes acquired by lateral gene transfer, duplicated genes, and ORFans. Open reading frames (ORFs) that have homologs in the NCLDVs core gene set represent 2.9 and 6.1% of the Mimivirus and Marseillevirus gene contents, respectively. A substantial proportion of the Mimivirus, Marseillevirus and Sputnik ORFs exhibit sequence similarities to homologs found in bacteria, archaea, eukaryotes or viruses. The large amount of chimeric genes in these viral genomes might have resulted from acquisitions by lateral gene transfers, implicating sympatric bacteria and viruses with an intra-amoebal lifestyle. In addition, lineage-specific gene expansion may have played a major role in the genome shaping. Altogether, the data so far accumulated on amoeba-associated giant viruses are a powerful incentive to isolate and study additional strains to gain better understanding of their pangenome.}, } @article {pmid20550686, year = {2010}, author = {Remenant, B and Coupat-Goutaland, B and Guidot, A and Cellier, G and Wicker, E and Allen, C and Fegan, M and Pruvost, O and Elbaz, M and Calteau, A and Salvignol, G and Mornico, D and Mangenot, S and Barbe, V and Médigue, C and Prior, P}, title = {Genomes of three tomato pathogens within the Ralstonia solanacearum species complex reveal significant evolutionary divergence.}, journal = {BMC genomics}, volume = {11}, number = {}, pages = {379}, pmid = {20550686}, issn = {1471-2164}, mesh = {Comparative Genomic Hybridization ; Conserved Sequence ; *Evolution, Molecular ; Genes, Bacterial/genetics ; *Genetic Variation ; Genome, Bacterial/*genetics ; Genomic Islands/genetics ; Solanum lycopersicum/*microbiology ; Oligonucleotide Array Sequence Analysis ; Phylogeny ; Plasmids/genetics ; Ralstonia solanacearum/*genetics/metabolism ; Virulence Factors/genetics ; }, abstract = {BACKGROUND: The Ralstonia solanacearum species complex includes thousands of strains pathogenic to an unusually wide range of plant species. These globally dispersed and heterogeneous strains cause bacterial wilt diseases, which have major socio-economic impacts. Pathogenicity is an ancestral trait in R. solanacearum and strains with high genetic variation can be subdivided into four phylotypes, correlating to isolates from Asia (phylotype I), the Americas (phylotype IIA and IIB), Africa (phylotype III) and Indonesia (phylotype IV). Comparison of genome sequences strains representative of this phylogenetic diversity can help determine which traits allow this bacterium to be such a pathogen of so many different plant species and how the bacteria survive in many different habitats.

RESULTS: The genomes of three tomato bacterial wilt pathogens, CFBP2957 (phy. IIA), CMR15 (phy. III) and PSI07 (phy. IV) were sequenced and manually annotated. These genomes were compared with those of three previously sequenced R. solanacearum strains: GMI1000 (tomato, phy. I), IPO1609 (potato, phy. IIB), and Molk2 (banana, phy. IIB). The major genomic features (size, G+C content, number of genes) were conserved across all of the six sequenced strains. Despite relatively high genetic distances (calculated from average nucleotide identity) and many genomic rearrangements, more than 60% of the genes of the megaplasmid and 70% of those on the chromosome are syntenic. The three new genomic sequences revealed the presence of several previously unknown traits, probably acquired by horizontal transfers, within the genomes of R. solanacearum, including a type IV secretion system, a rhi-type anti-mitotic toxin and two small plasmids. Genes involved in virulence appear to be evolving at a faster rate than the genome as a whole.

CONCLUSIONS: Comparative analysis of genome sequences and gene content confirmed the differentiation of R. solanacearum species complex strains into four phylotypes. Genetic distances between strains, in conjunction with CGH analysis of a larger set of strains, revealed differences great enough to consider reclassification of the R. solanacearum species complex into three species. The data are still too fragmentary to link genomic classification and phenotypes, but these new genome sequences identify a pan-genome more representative of the diversity in the R. solanancearum species complex.}, } @article {pmid20537180, year = {2010}, author = {Lilburn, TG and Gu, J and Cai, H and Wang, Y}, title = {Comparative genomics of the family Vibrionaceae reveals the wide distribution of genes encoding virulence-associated proteins.}, journal = {BMC genomics}, volume = {11}, number = {}, pages = {369}, pmid = {20537180}, issn = {1471-2164}, support = {SC1 GM081068/GM/NIGMS NIH HHS/United States ; SC1 AI080579-05/AI/NIAID NIH HHS/United States ; SC1AI080579/AI/NIAID NIH HHS/United States ; SC1 GM081068-01/GM/NIGMS NIH HHS/United States ; 1R21AI067543/AI/NIAID NIH HHS/United States ; SC1GM081068/GM/NIGMS NIH HHS/United States ; SC1 AI080579-04/AI/NIAID NIH HHS/United States ; SC1 AI080579-03/AI/NIAID NIH HHS/United States ; SC1 AI080579/AI/NIAID NIH HHS/United States ; SC1 AI080579-02/AI/NIAID NIH HHS/United States ; }, mesh = {Bacterial Proteins/*genetics/*metabolism ; Evolution, Molecular ; Genes, Bacterial/*genetics ; Genetic Variation ; *Genomics ; Proteome/genetics/metabolism ; Sequence Homology, Amino Acid ; Species Specificity ; Vibrionaceae/*genetics/*pathogenicity ; }, abstract = {BACKGROUND: Species of the family Vibrionaceae are ubiquitous in marine environments. Several of these species are important pathogens of humans and marine species. Evidence indicates that genetic exchange plays an important role in the emergence of new pathogenic strains within this family. Data from the sequenced genomes of strains in this family could show how the genes encoded by all these strains, known as the pangenome, are distributed. Information about the core, accessory and panproteome of this family can show how, for example, genes encoding virulence-associated proteins are distributed and help us understand how virulence emerges.

RESULTS: We deduced the complete set of orthologs for eleven strains from this family. The core proteome consists of 1,882 orthologous groups, which is 28% of the 6,629 orthologous groups in this family. There were 4,411 accessory orthologous groups (i.e., proteins that occurred in from 2 to 10 proteomes) and 5,584 unique proteins (encoded once on only one of the eleven genomes). Proteins that have been associated with virulence in V. cholerae were widely distributed across the eleven genomes, but the majority was found only on the genomes of the two V. cholerae strains examined.

CONCLUSIONS: The proteomes are reflective of the differing evolutionary trajectories followed by different strains to similar phenotypes. The composition of the proteomes supports the notion that genetic exchange among species of the Vibrionaceae is widespread and that this exchange aids these species in adapting to their environments.}, } @article {pmid20504335, year = {2010}, author = {Alcaraz, LD and Moreno-Hagelsieb, G and Eguiarte, LE and Souza, V and Herrera-Estrella, L and Olmedo, G}, title = {Understanding the evolutionary relationships and major traits of Bacillus through comparative genomics.}, journal = {BMC genomics}, volume = {11}, number = {}, pages = {332}, pmid = {20504335}, issn = {1471-2164}, support = {55005946//Howard Hughes Medical Institute/United States ; }, mesh = {Bacillus/*genetics/physiology ; *Evolution, Molecular ; Genes, Bacterial/genetics ; Genomics/*methods ; Phylogeny ; Spores, Bacterial/genetics ; }, abstract = {BACKGROUND: The presence of Bacillus in very diverse environments reflects the versatile metabolic capabilities of a widely distributed genus. Traditional phylogenetic analysis based on limited gene sampling is not adequate for resolving the genus evolutionary relationships. By distinguishing between core and pan-genome, we determined the evolutionary and functional relationships of known Bacillus.

RESULTS: Our analysis is based upon twenty complete and draft Bacillus genomes, including a newly sequenced Bacillus isolate from an aquatic environment that we report for the first time here. Using a core genome, we were able to determine the phylogeny of known Bacilli, including aquatic strains whose position in the phylogenetic tree could not be unambiguously determined in the past. Using the pan-genome from the sequenced Bacillus, we identified functional differences, such as carbohydrate utilization and genes involved in signal transduction, which distinguished the taxonomic groups. We also assessed the genetic architecture of the defining traits of Bacillus, such as sporulation and competence, and showed that less than one third of the B. subtilis genes are conserved across other Bacilli. Most variation was shown to occur in genes that are needed to respond to environmental cues, suggesting that Bacilli have genetically specialized to allow for the occupation of diverse habitats and niches.

CONCLUSIONS: The aquatic Bacilli are defined here for the first time as a group through the phylogenetic analysis of 814 genes that comprise the core genome. Our data distinguished between genomic components, especially core vs. pan-genome to provide insight into phylogeny and function that would otherwise be difficult to achieve. A phylogeny may mask the diversity of functions, which we tried to uncover in our approach. The diversity of sporulation and competence genes across the Bacilli was unexpected based on previous studies of the B. subtilis model alone. The challenge of uncovering the novelties and variations among genes of the non-subtilis groups still remains. This task will be best accomplished by directing efforts toward understanding phylogenetic groups with similar ecological niches.}, } @article {pmid20497596, year = {2010}, author = {Fischer, M and Berthold, F}, title = {The role of complex genomic alterations in neuroblastoma risk estimation.}, journal = {Genome medicine}, volume = {2}, number = {5}, pages = {31}, pmid = {20497596}, issn = {1756-994X}, abstract = {Specific genomic alterations, such as loss of the chromosomal region 11q or amplification of the oncogene MYCN, are well established markers of poor outcome in neuroblastoma. The advent of microarray-based comparative genomic hybridization (array-CGH) has enabled the analysis of pangenomic alteration profiles in the cancer genome, offering the possibility of identifying new prognostic markers from complex aberration patterns. Results from recent studies examining large primary neuroblastoma cohorts by array-CGH show that global genomic profiles may add significant prognostic information. Here, we discuss potential implications for risk estimation of neuroblastoma patients in clinical practice as well as for the understanding of neuroblastoma pathogenesis.}, } @article {pmid20489017, year = {2010}, author = {, and Nelson, KE and Weinstock, GM and Highlander, SK and Worley, KC and Creasy, HH and Wortman, JR and Rusch, DB and Mitreva, M and Sodergren, E and Chinwalla, AT and Feldgarden, M and Gevers, D and Haas, BJ and Madupu, R and Ward, DV and Birren, BW and Gibbs, RA and Methe, B and Petrosino, JF and Strausberg, RL and Sutton, GG and White, OR and Wilson, RK and Durkin, S and Giglio, MG and Gujja, S and Howarth, C and Kodira, CD and Kyrpides, N and Mehta, T and Muzny, DM and Pearson, M and Pepin, K and Pati, A and Qin, X and Yandava, C and Zeng, Q and Zhang, L and Berlin, AM and Chen, L and Hepburn, TA and Johnson, J and McCorrison, J and Miller, J and Minx, P and Nusbaum, C and Russ, C and Sykes, SM and Tomlinson, CM and Young, S and Warren, WC and Badger, J and Crabtree, J and Markowitz, VM and Orvis, J and Cree, A and Ferriera, S and Fulton, LL and Fulton, RS and Gillis, M and Hemphill, LD and Joshi, V and Kovar, C and Torralba, M and Wetterstrand, KA and Abouellleil, A and Wollam, AM and Buhay, CJ and Ding, Y and Dugan, S and FitzGerald, MG and Holder, M and Hostetler, J and Clifton, SW and Allen-Vercoe, E and Earl, AM and Farmer, CN and Liolios, K and Surette, MG and Xu, Q and Pohl, C and Wilczek-Boney, K and Zhu, D}, title = {A catalog of reference genomes from the human microbiome.}, journal = {Science (New York, N.Y.)}, volume = {328}, number = {5981}, pages = {994-999}, pmid = {20489017}, issn = {1095-9203}, support = {U54-HG004969/HG/NHGRI NIH HHS/United States ; U54-HG003079/HG/NHGRI NIH HHS/United States ; U54 HG004973-02/HG/NHGRI NIH HHS/United States ; U54 HG003273-04S1/HG/NHGRI NIH HHS/United States ; /CAPMC/CIHR/Canada ; U54 HG003273/HG/NHGRI NIH HHS/United States ; U54 HG003273-05S2/HG/NHGRI NIH HHS/United States ; U54 HG003273-08/HG/NHGRI NIH HHS/United States ; U54 HG003273-04/HG/NHGRI NIH HHS/United States ; U54 HG003273-06S1/HG/NHGRI NIH HHS/United States ; U54-AI084844/AI/NIAID NIH HHS/United States ; U54 HG003079/HG/NHGRI NIH HHS/United States ; U54 HG003273-06S2/HG/NHGRI NIH HHS/United States ; U54-HG003273/HG/NHGRI NIH HHS/United States ; U54 HG004973/HG/NHGRI NIH HHS/United States ; U54 HG003067/HG/NHGRI NIH HHS/United States ; U54 HG003273-05S1/HG/NHGRI NIH HHS/United States ; U54 AI084844/AI/NIAID NIH HHS/United States ; U54 HG003273-05/HG/NHGRI NIH HHS/United States ; U54 HG004973-01/HG/NHGRI NIH HHS/United States ; U54-HG004968/HG/NHGRI NIH HHS/United States ; U54 HG004968/HG/NHGRI NIH HHS/United States ; U54 HG004969/HG/NHGRI NIH HHS/United States ; N01 AI30071/AI/NIAID NIH HHS/United States ; U54 HG003273-07/HG/NHGRI NIH HHS/United States ; U54-HG004973/HG/NHGRI NIH HHS/United States ; HHSN272200900017C/AI/NIAID NIH HHS/United States ; U54 HG003273-06/HG/NHGRI NIH HHS/United States ; N01 AI030071/AI/NIAID NIH HHS/United States ; }, mesh = {Bacteria/classification/genetics ; Bacterial Proteins/chemistry/genetics ; Biodiversity ; Computational Biology ; Databases, Genetic ; Gastrointestinal Tract/microbiology ; Genes, Bacterial ; Genetic Variation ; Genome, Archaeal ; *Genome, Bacterial ; Humans ; Metagenome/*genetics ; Metagenomics/methods/standards ; Mouth/microbiology ; Peptides/chemistry/genetics ; Phylogeny ; Respiratory System/microbiology ; *Sequence Analysis, DNA/standards ; Skin/microbiology ; Urogenital System/microbiology ; }, abstract = {The human microbiome refers to the community of microorganisms, including prokaryotes, viruses, and microbial eukaryotes, that populate the human body. The National Institutes of Health launched an initiative that focuses on describing the diversity of microbial species that are associated with health and disease. The first phase of this initiative includes the sequencing of hundreds of microbial reference genomes, coupled to metagenomic sequencing from multiple body sites. Here we present results from an initial reference genome sequencing of 178 microbial genomes. From 547,968 predicted polypeptides that correspond to the gene complement of these strains, previously unidentified ("novel") polypeptides that had both unmasked sequence length greater than 100 amino acids and no BLASTP match to any nonreference entry in the nonredundant subset were defined. This analysis resulted in a set of 30,867 polypeptides, of which 29,987 (approximately 97%) were unique. In addition, this set of microbial genomes allows for approximately 40% of random sequences from the microbiome of the gastrointestinal tract to be associated with organisms based on the match criteria used. Insights into pan-genome analysis suggest that we are still far from saturating microbial species genetic data sets. In addition, the associated metrics and standards used by our group for quality assurance are presented.}, } @article {pmid20478826, year = {2010}, author = {Fischer, W and Windhager, L and Rohrer, S and Zeiller, M and Karnholz, A and Hoffmann, R and Zimmer, R and Haas, R}, title = {Strain-specific genes of Helicobacter pylori: genome evolution driven by a novel type IV secretion system and genomic island transfer.}, journal = {Nucleic acids research}, volume = {38}, number = {18}, pages = {6089-6101}, pmid = {20478826}, issn = {1362-4962}, mesh = {Bacterial Proteins/genetics ; Base Sequence ; *Evolution, Molecular ; Gene Transfer, Horizontal ; Genes, Bacterial ; *Genome, Bacterial ; *Genomic Islands ; Helicobacter pylori/enzymology/*genetics ; Molecular Sequence Data ; Recombinases/genetics/metabolism ; Species Specificity ; }, abstract = {The availability of multiple bacterial genome sequences has revealed a surprising extent of variability among strains of the same species. The human gastric pathogen Helicobacter pylori is known as one of the most genetically diverse species. We have compared the genome sequence of the duodenal ulcer strain P12 and six other H. pylori genomes to elucidate the genetic repertoire and genome evolution mechanisms of this species. In agreement with previous findings, we estimate that the core genome comprises about 1200 genes and that H. pylori possesses an open pan-genome. Strain-specific genes are preferentially located at potential genome rearrangement sites or in distinct plasticity zones, suggesting two different mechanisms of genome evolution. The P12 genome contains three plasticity zones, two of which encode type IV secretion systems and have typical features of genomic islands. We demonstrate for the first time that one of these islands is capable of self-excision and horizontal transfer by a conjugative process. We also show that excision is mediated by a protein of the XerD family of tyrosine recombinases. Thus, in addition to its natural transformation competence, conjugative transfer of genomic islands has to be considered as an important source of genetic diversity in H. pylori.}, } @article {pmid20478438, year = {2010}, author = {Luu-The, V and Labrie, F}, title = {The intracrine sex steroid biosynthesis pathways.}, journal = {Progress in brain research}, volume = {181}, number = {}, pages = {177-192}, doi = {10.1016/S0079-6123(08)81010-2}, pmid = {20478438}, issn = {1875-7855}, mesh = {17-Hydroxysteroid Dehydrogenases/genetics/metabolism ; 3-Oxo-5-alpha-Steroid 4-Dehydrogenase/genetics/metabolism ; Adrenal Glands/chemistry/metabolism ; Animals ; Dehydroepiandrosterone/metabolism ; Dihydrotestosterone/*metabolism ; Estradiol/*biosynthesis/genetics ; Evolution, Molecular ; Female ; Gonadotropin-Releasing Hormone/metabolism ; Humans ; Male ; Ovary/chemistry/metabolism ; Steroid Hydroxylases/classification/genetics/metabolism ; Testis/chemistry/metabolism ; }, abstract = {There is an increasing number of differences reported between the steroidogenesis pathways described in the traditional literature related to gonadal steroidogenesis and the more recent observations achieved using new technologies, especially molecular cloning, pangenomic expression studies, precise quantification of mRNA expression using real-time PCR, use of steroidogenic enzymes stably transfected in cells, detailed enzymatic activity analysis in cultured cell lines and mass spectrometry analysis of steroids. The objective of this chapter is to present steroidogenesis in the light of new findings that demonstrate pathways of biosynthesis of estradiol (E(2)) and dihydrotestosterone (DHT) from adrenal dehydroepiandrosterone (DHEA) in peripheral intracrine tissues which do not involve testosterone as intermediate as classically found in the testis and ovary. Steroidogenic enzymes different from those of the ovary and testis act in a tissue-specific manner to catalyze the transformation of DHEA into active sex steroids. These new pathways are especially important in post-menopausal women where all estrogens and practically all androgens are made at their site of action in peripheral tissues from DHEA, the precursor of adrenal origin. In men, on the other hand, from 40 to 50% of androgens are made in peripheral tissues from adrenal DHEA, thus indicating the major importance of the intracrine pathways in both men and women. We also examine the molecular evolution of steroidogenic enzymes which explains the major differences in steroid metabolism observed between laboratory animals and humans.}, } @article {pmid20455703, year = {2010}, author = {Satih, S and Chalabi, N and Rabiau, N and Bosviel, R and Fontana, L and Bignon, YJ and Bernard-Gallon, DJ}, title = {Gene expression profiling of breast cancer cell lines in response to soy isoflavones using a pangenomic microarray approach.}, journal = {Omics : a journal of integrative biology}, volume = {14}, number = {3}, pages = {231-238}, pmid = {20455703}, issn = {1557-8100}, mesh = {Breast Neoplasms/*genetics ; Cell Line, Tumor/*drug effects ; Cells, Cultured ; Female ; Gene Expression Profiling/methods ; Gene Expression Regulation, Neoplastic/*drug effects ; Genistein/*pharmacology ; Genome, Human ; Humans ; Isoflavones/*pharmacology ; Microarray Analysis/*methods ; Molecular Sequence Data ; Multigene Family ; Phytoestrogens/*pharmacology ; }, abstract = {Although the rate of breast cancer differs between women in Asian and Western countries, molecular genetics/genomics basis of this epidemiological observation remains elusive. Moreover, the intake of phytoestrogens is associated with a lower incidence of breast cancer. Genistein and daidzein are the primary soy isoflavones with a chemical structure similar to estrogens. Conceivably, the actions of phytoestrogens on gene expression signatures might mediate their postulated effects on breast cancer pathogenesis. The present study evaluated the transcriptional responsiveness of breast cancer cells to soy phytoestrogens using a whole-genome microarray-based approach. Human breast cancer cell lines and a fibrocystic breast cell line were treated with genistein or daidzein. We identified 278 and 334 differentially expressed genes after genistein or daidzein treatment, respectively, in estrogen-positive (MCF-7) and estrogen-negative (MDA-MB-231, MCF-10a) cells. Hierarchical clustering of this finding revealed a significant modulation, respectively, of 246 or 169 genes after genistein or daidzein exposures. Importantly, the molecular pathways for the differentially expressed genes included those that relate to cell communication, biodegradation of xenobiotics, lipid metabolism, signal transduction, and cell growth/death. These molecular observations collectively contribute to a growing knowledgebase on the putative mechanism(s) of action of phytoestrogens in breast cancer pathogenesis and chemoprevention.}, } @article {pmid20435682, year = {2010}, author = {Shao, Y and He, X and Harrison, EM and Tai, C and Ou, HY and Rajakumar, K and Deng, Z}, title = {mGenomeSubtractor: a web-based tool for parallel in silico subtractive hybridization analysis of multiple bacterial genomes.}, journal = {Nucleic acids research}, volume = {38}, number = {Web Server issue}, pages = {W194-200}, pmid = {20435682}, issn = {1362-4962}, mesh = {Genes, Bacterial ; *Genome, Bacterial ; Genomics/methods ; Internet ; Oligonucleotide Array Sequence Analysis ; *Sequence Alignment ; *Software ; Species Specificity ; }, abstract = {mGenomeSubtractor performs an mpiBLAST-based comparison of reference bacterial genomes against multiple user-selected genomes for investigation of strain variable accessory regions. With parallel computing architecture, mGenomeSubtractor is able to run rapid BLAST searches of the segmented reference genome against multiple subject genomes at the DNA or amino acid level within a minute. In addition to comparison of protein coding sequences, the highly flexible sliding window-based genome fragmentation approach offered can be used to identify short unique sequences within or between genes. mGenomeSubtractor provides powerful schematic outputs for exploration of identified core and accessory regions, including searches against databases of mobile genetic elements, virulence factors or bacterial essential genes, examination of G+C content and binucleotide distribution bias, and integrated primer design tools. mGenomeSubtractor also allows for the ready definition of species-specific gene pools based on available genomes. Pan-genomic arrays can be easily developed using the efficient oligonucleotide design tool. This simple high-throughput in silico 'subtractive hybridization' analytical tool will support the rapidly escalating number of comparative bacterial genomics studies aimed at defining genomic biomarkers of evolutionary lineage, phenotype, pathotype, environmental adaptation and/or disease-association of diverse bacterial species. mGenomeSubtractor is freely available to all users without any login requirement at: http://bioinfo-mml.sjtu.edu.cn/mGS/.}, } @article {pmid20416515, year = {2010}, author = {Mueller, CK and Thorwarth, M and Schultze-Mosgau, S}, title = {Influence of insertion protocol and implant shoulder design on inflammatory infiltration and gene expression in peri-implant soft tissue during nonsubmerged dental implant healing.}, journal = {Oral surgery, oral medicine, oral pathology, oral radiology, and endodontics}, volume = {109}, number = {5}, pages = {e11-9}, doi = {10.1016/j.tripleo.2010.01.003}, pmid = {20416515}, issn = {1528-395X}, mesh = {Animals ; Biopsy ; Dental Implantation, Endosseous/*methods ; *Dental Implants ; *Dental Prosthesis Design ; Female ; Gene Expression Profiling ; Gingiva/*pathology ; Leukocyte Count ; Leukocytes/pathology ; Lymphocytes/pathology ; Macrophages/pathology ; Mouth Mucosa/pathology ; Osseointegration/physiology ; Osteotomy/methods ; Periodontium/*pathology ; Random Allocation ; Surface Properties ; Surgical Flaps ; Swine ; Swine, Miniature ; Time Factors ; Wound Healing/physiology ; }, abstract = {OBJECTIVE: This study aimed at elucidating the influence of insertion protocol and implant shoulder design on peri-implant soft tissue healing.

STUDY DESIGN: One month after removal of all maxillary premolars in 12 minipigs, 4 implants were installed in each quadrant. According to implant shoulder design, 3 groups were established: 1) rough, 0.4 mm; 2) smooth, 3 mm; and 3) smooth, 0.4 mm. One quadrant was randomized to flapless insertion, and the other was used for flap surgery in each animal. Mucosa biopsies were retrieved 1, 2, 4, and 12 weeks after surgery and subjected to a leukocyte count as well as pangenomic gene expression analysis.

RESULTS: Flapless surgery shortened the period of postsurgical inflammation as shown by the leukocyte count and induced early constructive remodeling as indicated by the microarray. Regarding design of the implant shoulder, leukocyte count values were lowest for group 3.

CONCLUSION: Flapless surgery in combination with group 3 implants appears to enhance peri-implant soft tissue healing.}, } @article {pmid20407445, year = {2010}, author = {Dahlman, I and Mejhert, N and Linder, K and Agustsson, T and Mutch, DM and Kulyte, A and Isaksson, B and Permert, J and Petrovic, N and Nedergaard, J and Sjölin, E and Brodin, D and Clement, K and Dahlman-Wright, K and Rydén, M and Arner, P}, title = {Adipose tissue pathways involved in weight loss of cancer cachexia.}, journal = {British journal of cancer}, volume = {102}, number = {10}, pages = {1541-1548}, pmid = {20407445}, issn = {1532-1827}, mesh = {Adipose Tissue/*metabolism ; Aged ; Cachexia/etiology/*genetics ; Female ; Gene Expression ; Gene Expression Profiling ; Gene Expression Regulation/genetics ; Humans ; Male ; Neoplasms/complications/*genetics/metabolism ; Obesity/genetics/metabolism ; Oligonucleotide Array Sequence Analysis ; Reverse Transcriptase Polymerase Chain Reaction ; Signal Transduction/*genetics ; Weight Loss/*genetics ; }, abstract = {BACKGROUND: The regulatory gene pathways that accompany loss of adipose tissue in cancer cachexia are unknown and were explored using pangenomic transcriptome profiling.

METHODS: Global gene expression profiles of abdominal subcutaneous adipose tissue were studied in gastrointestinal cancer patients with (n=13) or without (n=14) cachexia.

RESULTS: Cachexia was accompanied by preferential loss of adipose tissue and decreased fat cell volume, but not number. Adipose tissue pathways regulating energy turnover were upregulated, whereas genes in pathways related to cell and tissue structure (cellular adhesion, extracellular matrix and actin cytoskeleton) were downregulated in cachectic patients. Transcriptional response elements for hepatic nuclear factor-4 (HNF4) were overrepresented in the promoters of extracellular matrix and adhesion molecule genes, and adipose HNF4 mRNA was downregulated in cachexia.

CONCLUSIONS: Cancer cachexia is characterised by preferential loss of adipose tissue; muscle mass is less affected. Loss of adipose tissue is secondary to a decrease in adipocyte lipid content and associates with changes in the expression of genes that regulate energy turnover, cytoskeleton and extracellular matrix, which suggest high tissue remodelling. Changes in gene expression in cachexia are reciprocal to those observed in obesity, suggesting that regulation of fat mass at least partly corresponds to two sides of the same coin.}, } @article {pmid20406972, year = {2010}, author = {Dutertre, M and Gratadou, L and Dardenne, E and Germann, S and Samaan, S and Lidereau, R and Driouch, K and de la Grange, P and Auboeuf, D}, title = {Estrogen regulation and physiopathologic significance of alternative promoters in breast cancer.}, journal = {Cancer research}, volume = {70}, number = {9}, pages = {3760-3770}, doi = {10.1158/0008-5472.CAN-09-3988}, pmid = {20406972}, issn = {1538-7445}, mesh = {Breast Neoplasms/enzymology/*genetics/metabolism ; CCCTC-Binding Factor ; Cell Line, Tumor ; DEAD-box RNA Helicases/genetics ; Estradiol/pharmacology ; Estrogen Receptor alpha/genetics/metabolism ; Exons ; Female ; Gene Expression Profiling ; *Gene Expression Regulation, Neoplastic ; Humans ; Oligonucleotide Array Sequence Analysis/methods ; *Promoter Regions, Genetic ; Protein Isoforms ; Repressor Proteins/genetics/metabolism ; Reverse Transcriptase Polymerase Chain Reaction ; }, abstract = {Alternative promoters (AP) occur in >30% protein-coding genes and contribute to proteome diversity. However, large-scale analyses of AP regulation are lacking, and little is known about their potential physiopathologic significance. To better understand the transcriptomic effect of estrogens, which play a major role in breast cancer, we analyzed gene and AP regulation by estradiol in MCF7 cells using pan-genomic exon arrays. We thereby identified novel estrogen-regulated genes (ERG) and determined the regulation of AP-encoded transcripts in 150 regulated genes. In <30% cases, APs were regulated in a similar manner by estradiol, whereas in >70% cases, they were regulated differentially. The patterns of AP regulation correlated with the patterns of estrogen receptor alpha (ERalpha) and CCCTC-binding factor (CTCF) binding sites at regulated gene loci. Interestingly, among genes with differentially regulated (DR) APs, we identified cases where estradiol regulated APs in an opposite manner, sometimes without affecting global gene expression levels. This promoter switch was mediated by the DDX5/DDX17 family of ERalpha coregulators. Finally, genes with DR promoters were preferentially involved in specific processes (e.g., cell structure and motility, and cell cycle). We show, in particular, that isoforms encoded by the NET1 gene APs, which are inversely regulated by estradiol, play distinct roles in cell adhesion and cell cycle regulation and that their expression is differentially associated with prognosis in ER(+) breast cancer. Altogether, this study identifies the patterns of AP regulation in ERGs and shows the contribution of AP-encoded isoforms to the estradiol-regulated transcriptome as well as their physiopathologic significance in breast cancer.}, } @article {pmid20398277, year = {2010}, author = {van Schaik, W and Top, J and Riley, DR and Boekhorst, J and Vrijenhoek, JE and Schapendonk, CM and Hendrickx, AP and Nijman, IJ and Bonten, MJ and Tettelin, H and Willems, RJ}, title = {Pyrosequencing-based comparative genome analysis of the nosocomial pathogen Enterococcus faecium and identification of a large transferable pathogenicity island.}, journal = {BMC genomics}, volume = {11}, number = {}, pages = {239}, pmid = {20398277}, issn = {1471-2164}, mesh = {Bacteriophages/genetics/ultrastructure ; Base Sequence ; Drug Resistance, Bacterial ; Enterococcus faecium/*genetics/pathogenicity/virology ; *Genome, Bacterial ; *Genomic Islands ; Microscopy, Electron, Transmission ; Phylogeny ; Virulence ; }, abstract = {BACKGROUND: The Gram-positive bacterium Enterococcus faecium is an important cause of nosocomial infections in immunocompromized patients.

RESULTS: We present a pyrosequencing-based comparative genome analysis of seven E. faecium strains that were isolated from various sources. In the genomes of clinical isolates several antibiotic resistance genes were identified, including the vanA transposon that confers resistance to vancomycin in two strains. A functional comparison between E. faecium and the related opportunistic pathogen E. faecalis based on differences in the presence of protein families, revealed divergence in plant carbohydrate metabolic pathways and oxidative stress defense mechanisms. The E. faecium pan-genome was estimated to be essentially unlimited in size, indicating that E. faecium can efficiently acquire and incorporate exogenous DNA in its gene pool. One of the most prominent sources of genomic diversity consists of bacteriophages that have integrated in the genome. The CRISPR-Cas system, which contributes to immunity against bacteriophage infection in prokaryotes, is not present in the sequenced strains. Three sequenced isolates carry the esp gene, which is involved in urinary tract infections and biofilm formation. The esp gene is located on a large pathogenicity island (PAI), which is between 64 and 104 kb in size. Conjugation experiments showed that the entire esp PAI can be transferred horizontally and inserts in a site-specific manner.

CONCLUSIONS: Genes involved in environmental persistence, colonization and virulence can easily be aquired by E. faecium. This will make the development of successful treatment strategies targeted against this organism a challenge for years to come.}, } @article {pmid20236513, year = {2010}, author = {D'Auria, G and Jiménez-Hernández, N and Peris-Bondia, F and Moya, A and Latorre, A}, title = {Legionella pneumophila pangenome reveals strain-specific virulence factors.}, journal = {BMC genomics}, volume = {11}, number = {}, pages = {181}, pmid = {20236513}, issn = {1471-2164}, mesh = {Evolution, Molecular ; *Genome, Bacterial ; Genomic Islands ; Genomics ; Legionella pneumophila/*genetics ; Virulence Factors/*genetics ; }, abstract = {BACKGROUND: Legionella pneumophila subsp. pneumophila is a gram-negative gamma-Proteobacterium and the causative agent of Legionnaires' disease, a form of epidemic pneumonia. It has a water-related life cycle. In industrialized cities L. pneumophila is commonly encountered in refrigeration towers and water pipes. Infection is always via infected aerosols to humans. Although many efforts have been made to eradicate Legionella from buildings, it still contaminates the water systems. The town of Alcoy (Valencian Region, Spain) has had recurrent outbreaks since 1999. The strain "Alcoy 2300/99" is a particularly persistent and recurrent strain that was isolated during one of the most significant outbreaks between the years 1999-2000.

RESULTS: We have sequenced the genome of the particularly persistent L. pneumophila strain Alcoy 2300/99 and have compared it with four previously sequenced strains known as Philadelphia (USA), Lens (France), Paris (France) and Corby (England).Pangenome analysis facilitated the identification of strain-specific features, as well as some that are shared by two or more strains. We identified: (1) three islands related to anti-drug resistance systems; (2) a system for transport and secretion of heavy metals; (3) three systems related to DNA transfer; (4) two CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) systems, known to provide resistance against phage infections, one similar in the Lens and Alcoy strains, and another specific to the Paris strain; and (5) seven islands of phage-related proteins, five of which seem to be strain-specific and two shared.

CONCLUSIONS: The dispensable genome disclosed by the pangenomic analysis seems to be a reservoir of new traits that have mainly been acquired by horizontal gene transfer and could confer evolutionary advantages over strains lacking them.}, } @article {pmid20228124, year = {2010}, author = {Galland, F and Lacroix, L and Saulnier, P and Dessen, P and Meduri, G and Bernier, M and Gaillard, S and Guibourdenche, J and Fournier, T and Evain-Brion, D and Bidart, JM and Chanson, P}, title = {Differential gene expression profiles of invasive and non-invasive non-functioning pituitary adenomas based on microarray analysis.}, journal = {Endocrine-related cancer}, volume = {17}, number = {2}, pages = {361-371}, doi = {10.1677/ERC-10-0018}, pmid = {20228124}, issn = {1479-6821}, mesh = {Adenoma/*genetics/metabolism/pathology ; Adult ; Aged ; Aged, 80 and over ; Female ; Gene Expression Profiling ; Gene Expression Regulation, Neoplastic ; Humans ; Immunohistochemistry ; Insulin-Like Growth Factor Binding Protein 5/genetics/metabolism ; Male ; Microarray Analysis ; Middle Aged ; Myosin Heavy Chains/genetics/metabolism ; Myosin Type V/genetics/metabolism ; Neoplasm Proteins/genetics/metabolism ; Neoplasm Recurrence, Local/*genetics/metabolism/pathology ; Pituitary Neoplasms/*genetics/metabolism/pathology ; Reverse Transcriptase Polymerase Chain Reaction ; }, abstract = {Non-functioning pituitary adenomas (NFPAs) may be locally invasive. Markers of invasiveness are needed to guide patient management and particularly the use of adjuvant radiotherapy. To examine whether invasive NFPAs display a specific gene expression profile relative to non-invasive tumors, we selected 40 NFPAs (38 of the gonadotroph type) and classified them as invasive (n=22) or non-invasive (n=18) on the basis of magnetic resonance imaging and surgical findings. We then performed pangenomic analysis with the 44k Agilent human whole genome expression oligonucleotide microarray in order to identify genes with differential expression between invasive and non-invasive NFPAs. Candidate genes were then tested in qRT-PCR. Prediction class analysis showed that the expression of 346 genes differed between invasive and non-invasive NFPAs (P<0.001), of which 233 genes were up-regulated and 113 genes were down-regulated in invasive tumors. On the basis of Ingenuity networks and the degree of up- or down-regulation in invasive versus non-invasive tumors, 35 genes were selected for expression quantification by qRT-PCR. Overexpression of only four genes was confirmed, namely IGFBP5 (P=0.02), MYO5A (P=0.04), FLT3 (P=0.01), and NFE2L1 (P=0.02). At the protein level, only myosin 5A (MYO5A) immunostaining was stronger in invasive than in non-invasive NFPAs. Molecular signature allows to differentiate 'grossly' invasive from non-invasive NFPAs. The product of one of these genes, MYO5A, may be a useful marker of tumor invasiveness.}, } @article {pmid20219865, year = {2010}, author = {Bayjanov, JR and Siezen, RJ and van Hijum, SA}, title = {PanCGHweb: a web tool for genotype calling in pangenome CGH data.}, journal = {Bioinformatics (Oxford, England)}, volume = {26}, number = {9}, pages = {1256-1257}, pmid = {20219865}, issn = {1367-4811}, mesh = {Algorithms ; Computational Biology/*methods ; Genetic Variation ; Genome ; Genomics ; *Genotype ; Internet ; Lactococcus lactis/genetics ; Models, Genetic ; Nucleic Acid Hybridization ; Oligonucleotide Array Sequence Analysis ; Open Reading Frames ; Phylogeny ; Sequence Analysis, DNA ; Software ; }, abstract = {UNLABELLED: A pangenome is the total of genes present in strains of the same species. Pangenome microarrays allow determining the genomic content of bacterial strains more accurately than conventional comparative genome hybridization microarrays. PanCGHweb is the first tool that effectively calls genotype based on pangenome microarray data.

AVAILABILITY: PanCGHweb, the web tool is accessible from: http://bamics2.cmbi.ru.nl/websoftware/pancgh/.}, } @article {pmid20138093, year = {2010}, author = {Gerrish, RS and Gill, AL and Fowler, VG and Gill, SR}, title = {Development of pooled suppression subtractive hybridization to analyze the pangenome of Staphylococcus aureus.}, journal = {Journal of microbiological methods}, volume = {81}, number = {1}, pages = {56-60}, pmid = {20138093}, issn = {1872-8359}, support = {R01 AI059111/AI/NIAID NIH HHS/United States ; R01 AI059111-04A2/AI/NIAID NIH HHS/United States ; T32 DE007034/DE/NIDCR NIH HHS/United States ; R01-AI 591/AI/NIAID NIH HHS/United States ; }, mesh = {Bacteremia/microbiology ; Comparative Genomic Hybridization/*methods ; DNA, Bacterial/*genetics ; Endocarditis/microbiology ; *Genome, Bacterial ; Humans ; Osteomyelitis/microbiology ; Staphylococcal Infections/microbiology ; Staphylococcus aureus/*genetics ; }, abstract = {We describe the development and application of a Pooled Suppression Subtractive Hybridization (PSSH) method to describe differences between the genomic content of a pool of clinical Staphylococcus aureus isolates and a sequenced reference strain. In comparative bacterial genomics, Suppression Subtractive Hybridization (SSH) is normally utilized to compare genomic features or expression profiles of one strain versus another, which limits its ability to analyze communities of isolates. However, a PSSH approach theoretically enables the user to characterize the entirety of gene content unique to a related group of isolates in a single reaction. These unique fragments may then be linked to individual isolates through standard PCR. This method was applied to examine the genomic diversity found in pools of S.aureus isolates associated with complicated bacteremia infections leading to endocarditis and osteomyelitis. Across four pools of 10 isolates each, four hundred and twenty seven fragments not found in or significantly divergent from the S. aureus NCTC 8325 reference genome were detected. These fragments could be linked to individual strains within its pool by PCR. This is the first use of PSSH to examine the S. aureus pangenome. We propose that PSSH is a powerful tool for researchers interested in rapidly comparing the genomic content of multiple unstudied isolates.}, } @article {pmid20116936, year = {2010}, author = {Bonneau, D and Marlin, S and Sanlaville, D and Dupont, JM and Sobol, H and Gonzales, M and Le Merrer, M and Malzac, P and Razavi, F and Manouvrier, S and Odent, S and Stoppa-Lyonnet, D}, title = {[Genetic testing in the context of the revision of the French law on bioethics].}, journal = {Pathologie-biologie}, volume = {58}, number = {5}, pages = {396-401}, doi = {10.1016/j.patbio.2009.12.002}, pmid = {20116936}, issn = {1768-3114}, mesh = {Confidentiality/ethics/legislation & jurisprudence ; Family Health ; Female ; France ; Genetic Testing/ethics/*legislation & jurisprudence ; Humans ; Male ; Pregnancy ; Preimplantation Diagnosis/ethics ; Prenatal Diagnosis/ethics ; Self Care ; }, abstract = {This article focuses on six questions raised by genetic testing in human: (1) the use of genetic tests, (2) information given to relatives of patients affected with genetic disorders, (3) prenatal and preimplantatory diagnosis for late onset genetic diseases and the use of pangenomic tests in prenatal diagnosis, (4) direct-to-consumer genetic testing, (5) population screening in the age of genomic medicine and (6) incidental findings when genetic testing are used.}, } @article {pmid20097573, year = {2010}, author = {Assié, G and Guillaud-Bataille, M and Ragazzon, B and Bertagna, X and Bertherat, J and Clauser, E}, title = {The pathophysiology, diagnosis and prognosis of adrenocortical tumors revisited by transcriptome analyses.}, journal = {Trends in endocrinology and metabolism: TEM}, volume = {21}, number = {5}, pages = {325-334}, doi = {10.1016/j.tem.2009.12.009}, pmid = {20097573}, issn = {1879-3061}, mesh = {Adenoma/genetics/pathology/physiopathology ; Adrenal Cortex/metabolism ; Adrenal Cortex Neoplasms/diagnosis/*genetics/pathology/*physiopathology ; Animals ; Carcinoma/genetics/pathology/physiopathology ; *Gene Expression Profiling ; Humans ; Hyperaldosteronism/physiopathology ; Oligonucleotide Array Sequence Analysis ; Prognosis ; }, abstract = {Analyzing gene expression (transcriptome) in tissue is now reliable using industrial pangenomic microarrays. Accumulating data on adrenal cortex and adrenocortical tumor transcriptomes have already identified striking transcriptome differences not only between adenoma and carcinoma but also between two sets of carcinoma, which have very different prognoses. These findings result in the development of diagnostic and prognostic molecular predictors, which improve the outcome determination compared with standard clinical and pathological tools. These transcriptome data observing adrenocortical tumor phenotype in great but complex detail, combined with genomic and proteomic information, will function for future research investigating the pathophysiology of their tumorigenesis and hormonal secretion.}, } @article {pmid20078865, year = {2010}, author = {Liu, M and Bayjanov, JR and Renckens, B and Nauta, A and Siezen, RJ}, title = {The proteolytic system of lactic acid bacteria revisited: a genomic comparison.}, journal = {BMC genomics}, volume = {11}, number = {}, pages = {36}, pmid = {20078865}, issn = {1471-2164}, mesh = {Bacterial Proteins/genetics ; *Comparative Genomic Hybridization ; Genes, Bacterial ; *Genome, Bacterial ; Lactococcus/enzymology/*genetics ; Peptide Hydrolases/genetics ; Phylogeny ; Protein Structure, Tertiary ; }, abstract = {BACKGROUND: Lactic acid bacteria (LAB) are a group of gram-positive, lactic acid producing Firmicutes. They have been extensively used in food fermentations, including the production of various dairy products. The proteolytic system of LAB converts proteins to peptides and then to amino acids, which is essential for bacterial growth and also contributes significantly to flavor compounds as end-products. Recent developments in high-throughput genome sequencing and comparative genomics hybridization arrays provide us with opportunities to explore the diversity of the proteolytic system in various LAB strains.

RESULTS: We performed a genome-wide comparative genomics analysis of proteolytic system components, including cell-wall bound proteinase, peptide transporters and peptidases, in 22 sequenced LAB strains. The peptidase families PepP/PepQ/PepM, PepD and PepI/PepR/PepL are described as examples of our in silico approach to refine the distinction of subfamilies with different enzymatic activities. Comparison of protein 3D structures of proline peptidases PepI/PepR/PepL and esterase A allowed identification of a conserved core structure, which was then used to improve phylogenetic analysis and functional annotation within this protein superfamily.The diversity of proteolytic system components in 39 Lactococcus lactis strains was explored using pangenome comparative genome hybridization analysis. Variations were observed in the proteinase PrtP and its maturation protein PrtM, in one of the Opp transport systems and in several peptidases between strains from different Lactococcus subspecies or from different origin.

CONCLUSIONS: The improved functional annotation of the proteolytic system components provides an excellent framework for future experimental validations of predicted enzymatic activities. The genome sequence data can be coupled to other "omics" data e.g. transcriptomics and metabolomics for prediction of proteolytic and flavor-forming potential of LAB strains. Such an integrated approach can be used to tune the strain selection process in food fermentations.}, } @article {pmid20064835, year = {2010}, author = {Sparfel, L and Pinel-Marie, ML and Boize, M and Koscielny, S and Desmots, S and Pery, A and Fardel, O}, title = {Transcriptional signature of human macrophages exposed to the environmental contaminant benzo(a)pyrene.}, journal = {Toxicological sciences : an official journal of the Society of Toxicology}, volume = {114}, number = {2}, pages = {247-259}, doi = {10.1093/toxsci/kfq007}, pmid = {20064835}, issn = {1096-0929}, mesh = {Benzo(a)pyrene/*toxicity ; Carcinogens, Environmental/*toxicity ; Cell Death/drug effects/genetics ; Cell Survival/drug effects ; Cells, Cultured ; Gene Expression Regulation/*drug effects ; Gene Silencing ; Humans ; Immune System/drug effects ; Immunity/drug effects/genetics ; Inflammation/chemically induced/genetics ; Interleukin-8/metabolism ; Macrophages/*drug effects/metabolism ; RNA, Messenger/metabolism ; Signal Transduction/drug effects ; Tumor Necrosis Factor-alpha/metabolism ; Tumor Suppressor Protein p53/metabolism ; }, abstract = {Polycyclic aromatic hydrocarbons (PAHs) are widely distributed immunotoxic and carcinogenic environmental contaminants, known to affect macrophages. In order to identify their molecular targets in such cells, we have analyzed gene expression profile of primary human macrophages treated by the prototypical PAH benzo(a)pyrene (BaP), using pangenomic oligonucleotides microarrays. Exposure of macrophages to BaP for 8 and 24 h resulted in 96 and 1100 genes, differentially expressed by at least a twofold change factor, respectively. Some of these targets, including the chemokine receptor CXCR5, the G protein-coupled receptor 35 (GPR35), and the Ras regulator RASAL1, have not been previously shown to be affected by PAHs, in contrast to others, such as interleukin-1beta and the aryl hydrocarbon receptor (AhR) repressor. These BaP-mediated gene regulations were fully validated by reverse transcription-quantitative polymerase chain reaction assays for some selected genes. Their bioinformatic analysis indicated that biological functions linked to immunity, inflammation, and cell death were among the most affected by BaP in human macrophages and that the AhR and p53 signaling pathways were the most significant canonical pathways activated by the PAH. AhR and p53 implications were moreover fully confirmed by the prevention of BaP-related upregulation of some selected target genes by AhR silencing or the use of pifithrin-alpha, an inhibitor of PAH bioactivation-related DNA damage/p53 pathways. Overall, these data, through identifying genes and signaling pathways targeted by PAHs in human macrophages, may contribute to better understand the molecular basis of the immunotoxicity of these environmental contaminants.}, } @article {pmid20061468, year = {2010}, author = {Eppinger, M and Worsham, PL and Nikolich, MP and Riley, DR and Sebastian, Y and Mou, S and Achtman, M and Lindler, LE and Ravel, J}, title = {Genome sequence of the deep-rooted Yersinia pestis strain Angola reveals new insights into the evolution and pangenome of the plague bacterium.}, journal = {Journal of bacteriology}, volume = {192}, number = {6}, pages = {1685-1699}, pmid = {20061468}, issn = {1098-5530}, support = {N01 AI030071/AI/NIAID NIH HHS/United States ; }, mesh = {Animals ; Antigens, Bacterial ; Bacterial Proteins/genetics/*metabolism ; Base Sequence ; *Biological Evolution ; Gene Expression Regulation, Bacterial ; *Genome, Bacterial ; Genotype ; Global Health ; Guinea Pigs ; Mice ; Mice, Inbred BALB C ; Plague/epidemiology/*microbiology ; Plasmids/genetics ; Virulence ; Yersinia pestis/*classification/*genetics ; }, abstract = {To gain insights into the origin and genome evolution of the plague bacterium Yersinia pestis, we have sequenced the deep-rooted strain Angola, a virulent Pestoides isolate. Its ancient nature makes this atypical isolate of particular importance in understanding the evolution of plague pathogenicity. Its chromosome features a unique genetic make-up intermediate between modern Y. pestis isolates and its evolutionary ancestor, Y. pseudotuberculosis. Our genotypic and phenotypic analyses led us to conclude that Angola belongs to one of the most ancient Y. pestis lineages thus far sequenced. The mobilome carries the first reported chimeric plasmid combining the two species-specific virulence plasmids. Genomic findings were validated in virulence assays demonstrating that its pathogenic potential is distinct from modern Y. pestis isolates. Human infection with this particular isolate would not be diagnosed by the standard clinical tests, as Angola lacks the plasmid-borne capsule, and a possible emergence of this genotype raises major public health concerns. To assess the genomic plasticity in Y. pestis, we investigated the global gene reservoir and estimated the pangenome at 4,844 unique protein-coding genes. As shown by the genomic analysis of this evolutionary key isolate, we found that the genomic plasticity within Y. pestis clearly was not as limited as previously thought, which is strengthened by the detection of the largest number of isolate-specific single-nucleotide polymorphisms (SNPs) currently reported in the species. This study identified numerous novel genetic signatures, some of which seem to be intimately associated with plague virulence. These markers are valuable in the development of a robust typing system critical for forensic, diagnostic, and epidemiological studies.}, } @article {pmid20048063, year = {2010}, author = {González, V and Acosta, JL and Santamaría, RI and Bustos, P and Fernández, JL and Hernández González, IL and Díaz, R and Flores, M and Palacios, R and Mora, J and Dávila, G}, title = {Conserved symbiotic plasmid DNA sequences in the multireplicon pangenomic structure of Rhizobium etli.}, journal = {Applied and environmental microbiology}, volume = {76}, number = {5}, pages = {1604-1614}, pmid = {20048063}, issn = {1098-5336}, mesh = {Cluster Analysis ; Conserved Sequence ; DNA, Bacterial/chemistry/*genetics ; *Genome, Bacterial ; Genomic Islands ; Molecular Sequence Data ; Phylogeny ; *Plasmids ; Rhizobium etli/*genetics/physiology ; Sequence Analysis, DNA ; Sequence Homology, Nucleic Acid ; }, abstract = {Strains of the same bacterial species often show considerable genomic variation. To examine the extent of such variation in Rhizobium etli, the complete genome sequence of R. etli CIAT652 and the partial genomic sequences of six additional R. etli strains having different geographical origins were determined. The sequences were compared with each other and with the previously reported genome sequence of R. etli CFN42. DNA sequences common to all strains constituted the greater part of these genomes and were localized in both the chromosome and large plasmids. About 700 to 1,000 kb of DNA that did not match sequences of the complete genomes of strains CIAT652 and CFN42 was unique to each R. etli strain. These sequences were distributed throughout the chromosome as individual genes or chromosomal islands and in plasmids, and they encoded accessory functions, such as transport of sugars and amino acids, or secondary metabolism; they also included mobile elements and hypothetical genes. Sequences corresponding to symbiotic plasmids showed high levels of nucleotide identity (about 98 to 99%), whereas chromosomal sequences and the sequences with matches to other plasmids showed lower levels of identity (on average, about 90 to 95%). We concluded that R. etli has a pangenomic structure with a core genome composed of both chromosomal and plasmid sequences, including a highly conserved symbiotic plasmid, despite the overall genomic divergence.}, } @article {pmid21472203, year = {2010}, author = {Chalabi, N and Coxam, V and Satih, S and Davicco, MJ and Lebecque, P and Fontana, L and Bignon, YJ and Bernard-Gallon, DJ}, title = {Gene signature of rat mammary glands: Influence of lifelong soy isoflavones consumption.}, journal = {Molecular medicine reports}, volume = {3}, number = {1}, pages = {75-81}, doi = {10.3892/mmr_00000221}, pmid = {21472203}, issn = {1791-3004}, abstract = {Epidemiological studies have indicated that phytoestrogen has a preventive effect on breast cancer development. However, controversial results have been reported suggesting these compounds have ambivalent effects on breast tissue. Here, we report a transgenerational study conducted on female Wistar rats fed a diet enriched with phytoestrogen. Using a pangenomic microarray approach, a transcriptomic study was performed on mammary glands extracted from the animals. Gene expression was examined at 3 ages: 3, 18 and 24 months. The F1 generation did not express the same genes as the F0 control generation fed the same diet. This effect increased with animal age: in 3-, 18- and 24-month-old rats, 293, 441 and 2868 differentially expressed genes were respectively observed. These results suggest that long-term exposure to isoflavones may play a key role in gene regulation. Additionally, epigenetic patterns were found to be affected by DNA-methyltransferase and histone-deacetylase expression.}, } @article {pmid20019077, year = {2010}, author = {Hall, BG and Ehrlich, GD and Hu, FZ}, title = {Pan-genome analysis provides much higher strain typing resolution than multi-locus sequence typing.}, journal = {Microbiology (Reading, England)}, volume = {156}, number = {Pt 4}, pages = {1060-1068}, pmid = {20019077}, issn = {1465-2080}, support = {R01 DC004173/DC/NIDCD NIH HHS/United States ; R01 DC002148/DC/NIDCD NIH HHS/United States ; DC 02148/DC/NIDCD NIH HHS/United States ; R01 DC005659/DC/NIDCD NIH HHS/United States ; R01 AI080935/AI/NIAID NIH HHS/United States ; DC 04173/DC/NIDCD NIH HHS/United States ; DC 05659/DC/NIDCD NIH HHS/United States ; }, mesh = {Bacteria/*classification/*genetics ; Bacterial Typing Techniques/*methods ; *Genome, Bacterial ; Molecular Sequence Data ; Sequence Analysis, DNA/*methods ; }, abstract = {The most widely used DNA-based method for bacterial strain typing, multi-locus sequence typing (MLST), lacks sufficient resolution to distinguish among many bacterial strains within a species. Here, we show that strain typing based on the presence or absence of distributed genes is able to resolve all completely sequenced genomes of six bacterial species. This was accomplished by the development of a clustering method, neighbour grouping, which is completely consistent with the lower-resolution MLST method, but provides far greater resolving power. Because the presence/absence of distributed genes can be determined by low-cost microarray analyses, it offers a practical, high-resolution alternative to MLST that could provide valuable diagnostic and prognostic information for pathogenic bacterial species.}, } @article {pmid19997067, year = {2010}, author = {Li, R and Li, Y and Zheng, H and Luo, R and Zhu, H and Li, Q and Qian, W and Ren, Y and Tian, G and Li, J and Zhou, G and Zhu, X and Wu, H and Qin, J and Jin, X and Li, D and Cao, H and Hu, X and Blanche, H and Cann, H and Zhang, X and Li, S and Bolund, L and Kristiansen, K and Yang, H and Wang, J and Wang, J}, title = {Building the sequence map of the human pan-genome.}, journal = {Nature biotechnology}, volume = {28}, number = {1}, pages = {57-63}, pmid = {19997067}, issn = {1546-1696}, mesh = {Animals ; Base Sequence ; Genetics, Population ; Genome, Human/*genetics ; Humans ; Sequence Alignment ; Sequence Analysis, DNA/*methods ; Species Specificity ; }, abstract = {Here we integrate the de novo assembly of an Asian and an African genome with the NCBI reference human genome, as a step toward constructing the human pan-genome. We identified approximately 5 Mb of novel sequences not present in the reference genome in each of these assemblies. Most novel sequences are individual or population specific, as revealed by their comparison to all available human DNA sequence and by PCR validation using the human genome diversity cell line panel. We found novel sequences present in patterns consistent with known human migration paths. Cross-species conservation analysis of predicted genes indicated that the novel sequences contain potentially functional coding regions. We estimate that a complete human pan-genome would contain approximately 19-40 Mb of novel sequence not present in the extant reference genome. The extensive amount of novel sequence contributing to the genetic variation of the pan-genome indicates the importance of using complete genome sequencing and de novo assembly.}, } @article {pmid19912620, year = {2009}, author = {Carpaij, N and Fluit, AC and Lindsay, JA and Bonten, MJ and Willems, RJ}, title = {New methods to analyse microarray data that partially lack a reference signal.}, journal = {BMC genomics}, volume = {10}, number = {}, pages = {522}, pmid = {19912620}, issn = {1471-2164}, support = {//Wellcome Trust/United Kingdom ; }, mesh = {Comparative Genomic Hybridization ; Oligonucleotide Array Sequence Analysis/*methods/standards ; Polymerase Chain Reaction ; Reference Standards ; Staphylococcus aureus/genetics ; }, abstract = {BACKGROUND: Microarray-based Comparative Genomic Hybridisation (CGH) has been used to assess genetic variability between bacterial strains. Crucial for interpretation of microarray data is the availability of a reference to compare signal intensities to reliably determine presence or divergence each DNA fragment. However, the production of a good reference becomes unfeasible when microarrays are based on pan-genomes.When only a single strain is used as a reference for a multistrain array, the accessory gene pool will be partially represented by reference DNA, although these genes represent the genomic repertoire that can explain differences in virulence, pathogenicity or transmissibility between strains. The lack of a reference makes interpretation of the data for these genes difficult and, if the test signal is low, they are often deleted from the analysis. We aimed to develop novel methods to determine the presence or divergence of genes in a Staphylococcus aureus multistrain PCR product microarray-based CGH approach for which reference DNA was not available for some probes.

RESULTS: In this study we have developed 6 new methods to predict divergence and presence of all genes spotted on a multistrain Staphylococcus aureus DNA microarray, published previously, including those gene spots that lack reference signals. When considering specificity and PPV (i.e. the false-positive rate) as the most important criteria for evaluating these methods, the method that defined gene presence based on a signal at least twice as high as the background and higher than the reference signal (method 4) had the best test characteristics. For this method specificity was 100% and 82% for MRSA252 (compared to the GACK method) and all spots (compared to sequence data), respectively, and PPV were 100% and 76% for MRSA252 (compared to the GACK method) and all spots (compared to sequence data), respectively.

CONCLUSION: A definition of gene presence based on signal at least twice as high as the background and higher than the reference signal (method 4) had the best test characteristics, allowing the analysis of 6-17% more of the genes not present in the reference strain. This method is recommended to analyse microarray data that partially lack a reference signal.}, } @article {pmid19905894, year = {2010}, author = {Delorme, B and Nivet, E and Gaillard, J and Häupl, T and Ringe, J and Devèze, A and Magnan, J and Sohier, J and Khrestchatisky, M and Roman, FS and Charbord, P and Sensebé, L and Layrolle, P and Féron, F}, title = {The human nose harbors a niche of olfactory ectomesenchymal stem cells displaying neurogenic and osteogenic properties.}, journal = {Stem cells and development}, volume = {19}, number = {6}, pages = {853-866}, doi = {10.1089/scd.2009.0267}, pmid = {19905894}, issn = {1557-8534}, mesh = {Adipocytes/cytology/metabolism ; Adult ; Animals ; Biomarkers/metabolism ; Bone Marrow Cells/cytology/metabolism ; Cell Differentiation/genetics ; Cell Membrane/metabolism ; Cell Proliferation ; Chondrocytes/cytology/metabolism ; Female ; Gene Expression Profiling ; Gene Expression Regulation ; Humans ; Male ; Mesenchymal Stem Cells/*cytology/metabolism ; Mice ; Middle Aged ; *Neurogenesis ; Neurons/cytology/metabolism ; Nose/*cytology ; Olfactory Mucosa/cytology ; Osteoblasts/cytology/metabolism ; *Osteogenesis ; Spheroids, Cellular/cytology ; Stem Cell Niche/*cytology/metabolism ; Young Adult ; }, abstract = {We previously identified multipotent stem cells within the lamina propria of the human olfactory mucosa, located in the nasal cavity. We also demonstrated that this cell type differentiates into neural cells and improves locomotor behavior after transplantation in a rat model of Parkinson's disease. Yet, next to nothing is known about their specific stemness characteristics. We therefore devised a study aiming to compare olfactory lamina propria stem cells from 4 individuals to bone marrow mesenchymal stem cells from 4 age- and gender-matched individuals. Using pangenomic microarrays and immunostaining with 34 cell surface marker antibodies, we show here that olfactory stem cells are closely related to bone marrow stem cells. However, olfactory stem cells also exhibit singular traits. By means of techniques such as proliferation assay, cDNA microarrays, RT-PCR, in vitro and in vivo differentiation, we report that when compared to bone marrow stem cells, olfactory stem cells display (1) a high proliferation rate; (2) a propensity to differentiate into osseous cells; and (3) a disinclination to give rise to chondrocytes and adipocytes. Since peripheral olfactory stem cells originate from a neural crest-derived tissue and, as shown here, exhibit an increased expression of neural cell-related genes, we propose to name them olfactory ectomesenchymal stem cells (OE-MSC). Further studies are now required to corroborate the therapeutic potential of OE-MSCs in animal models of bone and brain diseases.}, } @article {pmid19860885, year = {2009}, author = {Thompson, CC and Vicente, AC and Souza, RC and Vasconcelos, AT and Vesth, T and Alves, N and Ussery, DW and Iida, T and Thompson, FL}, title = {Genomic taxonomy of Vibrios.}, journal = {BMC evolutionary biology}, volume = {9}, number = {}, pages = {258}, pmid = {19860885}, issn = {1471-2148}, mesh = {Base Sequence ; *Evolution, Molecular ; *Genome, Bacterial ; Phylogeny ; Vibrio/*classification/*genetics ; }, abstract = {BACKGROUND: Vibrio taxonomy has been based on a polyphasic approach. In this study, we retrieve useful taxonomic information (i.e. data that can be used to distinguish different taxonomic levels, such as species and genera) from 32 genome sequences of different vibrio species. We use a variety of tools to explore the taxonomic relationship between the sequenced genomes, including Multilocus Sequence Analysis (MLSA), supertrees, Average Amino Acid Identity (AAI), genomic signatures, and Genome BLAST atlases. Our aim is to analyse the usefulness of these tools for species identification in vibrios.

RESULTS: We have generated four new genome sequences of three Vibrio species, i.e., V. alginolyticus 40B, V. harveyi-like 1DA3, and V. mimicus strains VM573 and VM603, and present a broad analyses of these genomes along with other sequenced Vibrio species. The genome atlas and pangenome plots provide a tantalizing image of the genomic differences that occur between closely related sister species, e.g. V. cholerae and V. mimicus. The vibrio pangenome contains around 26504 genes. The V. cholerae core genome and pangenome consist of 1520 and 6923 genes, respectively. Pangenomes might allow different strains of V. cholerae to occupy different niches. MLSA and supertree analyses resulted in a similar phylogenetic picture, with a clear distinction of four groups (Vibrio core group, V. cholerae-V. mimicus, Aliivibrio spp., and Photobacterium spp.). A Vibrio species is defined as a group of strains that share > 95% DNA identity in MLSA and supertree analysis, > 96% AAI, < or = 10 genome signature dissimilarity, and > 61% proteome identity. Strains of the same species and species of the same genus will form monophyletic groups on the basis of MLSA and supertree.

CONCLUSION: The combination of different analytical and bioinformatics tools will enable the most accurate species identification through genomic computational analysis. This endeavour will culminate in the birth of the online genomic taxonomy whereby researchers and end-users of taxonomy will be able to identify their isolates through a web-based server. This novel approach to microbial systematics will result in a tremendous advance concerning biodiversity discovery, description, and understanding.}, } @article {pmid19837092, year = {2009}, author = {Pajon, R and Yero, D and Niebla, O and Climent, Y and Sardiñas, G and García, D and Perera, Y and Llanes, A and Delgado, M and Cobas, K and Caballero, E and Taylor, S and Brookes, C and Gorringe, A}, title = {Identification of new meningococcal serogroup B surface antigens through a systematic analysis of neisserial genomes.}, journal = {Vaccine}, volume = {28}, number = {2}, pages = {532-541}, doi = {10.1016/j.vaccine.2009.09.128}, pmid = {19837092}, issn = {1873-2518}, support = {//Department of Health/United Kingdom ; }, mesh = {Animals ; Animals, Newborn ; Antigens, Bacterial/*genetics/*immunology ; Computational Biology ; Enzyme-Linked Immunosorbent Assay ; Genome, Bacterial/*genetics ; Meningococcal Vaccines/genetics/*immunology ; Mice ; Mice, Inbred BALB C ; Neisseria meningitidis, Serogroup B/*genetics/*immunology ; Polymerase Chain Reaction ; Rats ; }, abstract = {The difficulty of inducing an effective immune response against the Neisseria meningitidis serogroup B capsular polysaccharide has lead to the search for vaccines for this serogroup based on outer membrane proteins. The availability of the first meningococcal genome (MC58 strain) allowed the expansion of high-throughput methods to explore the protein profile displayed by N. meningitidis. By combining a pan-genome analysis with an extensive experimental validation to identify new potential vaccine candidates, genes coding for antigens likely to be exposed on the surface of the meningococcus were selected after a multistep comparative analysis of entire Neisseria genomes. Eleven novel putative ORF annotations were reported for serogroup B strain MC58. Furthermore, a total of 20 new predicted potential pan-neisserial vaccine candidates were produced as recombinant proteins and evaluated using immunological assays. Potential vaccine candidate coding genes were PCR-amplified from a panel of representative strains and their variability analyzed using maximum likelihood approaches for detecting positive selection. Finally, five proteins all capable of inducing a functional antibody response vs N. meningitidis strain CU385 were identified as new attractive vaccine candidates: NMB0606 a potential YajC orthologue, NMB0928 the neisserial NlpB (BamC), NMB0873 a LolB orthologue, NMB1163 a protein belonging to a curli-like assembly machinery, and NMB0938 (a neisserial specific antigen) with evidence of positive selection appreciated for NMB0928. The new set of vaccine candidates and the novel proposed functions will open a new wave of research in the search for the elusive neisserial vaccine.}, } @article {pmid19815445, year = {2009}, author = {Vago, P and , }, title = {[Half a century of human and medical cytogenetics].}, journal = {Morphologie : bulletin de l'Association des anatomistes}, volume = {93}, number = {301}, pages = {42-50}, doi = {10.1016/j.morpho.2009.06.001}, pmid = {19815445}, issn = {1286-0115}, mesh = {Chromosome Disorders/genetics ; Chromosomes, Human/genetics/ultrastructure ; Cytogenetic Analysis ; *Cytogenetics/history ; DNA/genetics ; Genes ; Genetic Diseases, Inborn/genetics ; *Genetics, Medical/history ; History, 20th Century ; History, 21st Century ; Humans ; Meiosis ; Neoplasms/genetics ; Neoplastic Syndromes, Hereditary/genetics ; }, abstract = {In 1956, the number of chromosomes in humans is set at 46; in 1959, the link between a disability (mongolism) and a chromosomal anomaly (the Down syndrome) is established: human and medical cytogenetics were born. Since then, progress has been remarkable: the techniques of chromosomal and molecular cytogenetics can reach a resolution of the size of a single gene with a pangenomic scope. Practical applications are constantly expanded. The clinical impact is significant, from the genetic counselling in constitutional to the targeted therapies. Fifty years later, cytogenetics can be defined as the science which aims to detect chromosomal abnormalities, whether constitutional or acquired, using chromosomal or molecular techniques aiming to study the arrangement of genes in chromosomes, to quantify the number of gene copy and to look for the presence of gene fusion.}, } @article {pmid19808979, year = {2009}, author = {Vispé, S and DeVries, L and Créancier, L and Besse, J and Bréand, S and Hobson, DJ and Svejstrup, JQ and Annereau, JP and Cussac, D and Dumontet, C and Guilbaud, N and Barret, JM and Bailly, C}, title = {Triptolide is an inhibitor of RNA polymerase I and II-dependent transcription leading predominantly to down-regulation of short-lived mRNA.}, journal = {Molecular cancer therapeutics}, volume = {8}, number = {10}, pages = {2780-2790}, doi = {10.1158/1535-7163.MCT-09-0549}, pmid = {19808979}, issn = {1538-8514}, mesh = {Cell Line, Tumor ; Cell Proliferation/drug effects ; Cyclic AMP Response Element-Binding Protein/metabolism ; Diterpenes/*chemistry/pharmacology ; Down-Regulation/*drug effects ; Epoxy Compounds/chemistry/pharmacology ; Gene Expression Regulation, Neoplastic/drug effects ; Humans ; Oligonucleotide Array Sequence Analysis ; Phenanthrenes/*chemistry/pharmacology ; Promoter Regions, Genetic/genetics ; Proto-Oncogene Proteins c-myc/metabolism ; RNA Polymerase I/*antagonists & inhibitors ; RNA Polymerase II/*antagonists & inhibitors ; RNA, Messenger/biosynthesis ; RNA, Neoplasm/biosynthesis ; Time Factors ; Transcription, Genetic/*drug effects ; Tumor Suppressor Protein p53 ; ets-Domain Protein Elk-1/metabolism ; }, abstract = {Triptolide, a natural product extracted from the Chinese plant Tripterygium wilfordii, possesses antitumor properties. Despite numerous reports showing the proapoptotic capacity and the inhibition of NF-kappaB-mediated transcription by triptolide, the identity of its cellular target is still unknown. To clarify its mechanism of action, we further investigated the effect of triptolide on RNA synthesis in the human non-small cell lung cancer cell line A549. Triptolide inhibited both total RNA and mRNA de novo synthesis, with the primary action being on the latter pool. We used 44K human pan-genomic DNA microarrays and identified the genes primarily affected by a short treatment with triptolide. Among the modulated genes, up to 98% are down-regulated, encompassing a large array of oncogenes including transcription factors and cell cycle regulators. We next observed that triptolide induced a rapid depletion of RPB1, the RNA polymerase II main subunit that is considered a hallmark of a transcription elongation blockage. However, we also show that triptolide does not directly interact with the RNA polymerase II complex nor does it damage DNA. We thus conclude that triptolide is an original pharmacologic inhibitor of RNA polymerase activity, affecting indirectly the transcription machinery, leading to a rapid depletion of short-lived mRNA, including transcription factors, cell cycle regulators such as CDC25A, and the oncogenes MYC and Src. Overall, the data shed light on the effect of triptolide on transcription, along with its novel potential applications in cancers, including acute myeloid leukemia, which is in part driven by the aforementioned oncogenic factors.}, } @article {pmid19786035, year = {2009}, author = {Jeong, H and Barbe, V and Lee, CH and Vallenet, D and Yu, DS and Choi, SH and Couloux, A and Lee, SW and Yoon, SH and Cattolico, L and Hur, CG and Park, HS and Ségurens, B and Kim, SC and Oh, TK and Lenski, RE and Studier, FW and Daegelen, P and Kim, JF}, title = {Genome sequences of Escherichia coli B strains REL606 and BL21(DE3).}, journal = {Journal of molecular biology}, volume = {394}, number = {4}, pages = {644-652}, doi = {10.1016/j.jmb.2009.09.052}, pmid = {19786035}, issn = {1089-8638}, mesh = {DNA, Bacterial/chemistry/*genetics ; Escherichia coli/*genetics ; *Genome, Bacterial ; Interspersed Repetitive Sequences ; Molecular Sequence Data ; Polymorphism, Genetic ; Prophages/genetics ; *Sequence Analysis, DNA ; }, abstract = {Escherichia coli K-12 and B have been the subjects of classical experiments from which much of our understanding of molecular genetics has emerged. We present here complete genome sequences of two E. coli B strains, REL606, used in a long-term evolution experiment, and BL21(DE3), widely used to express recombinant proteins. The two genomes differ in length by 72,304 bp and have 426 single base pair differences, a seemingly large difference for laboratory strains having a common ancestor within the last 67 years. Transpositions by IS1 and IS150 have occurred in both lineages. Integration of the DE3 prophage in BL21(DE3) apparently displaced a defective prophage in the lambda attachment site of B. As might have been anticipated from the many genetic and biochemical experiments comparing B and K-12 over the years, the B genomes are similar in size and organization to the genome of E. coli K-12 MG1655 and have >99% sequence identity over approximately 92% of their genomes. E. coli B and K-12 differ considerably in distribution of IS elements and in location and composition of larger mobile elements. An unexpected difference is the absence of a large cluster of flagella genes in B, due to a 41 kbp IS1-mediated deletion. Gene clusters that specify the LPS core, O antigen, and restriction enzymes differ substantially, presumably because of horizontal transfer. Comparative analysis of 32 independently isolated E. coli and Shigella genomes, both commensals and pathogenic strains, identifies a minimal set of genes in common plus many strain-specific genes that constitute a large E. coli pan-genome.}, } @article {pmid19779742, year = {2009}, author = {Wurtz, N and Desplans, J and Parzy, D}, title = {Phenotypic and transcriptomic analyses of Plasmodium falciparum protein kinase A catalytic subunit inhibition.}, journal = {Parasitology research}, volume = {105}, number = {6}, pages = {1691-1699}, pmid = {19779742}, issn = {1432-1955}, mesh = {Animals ; *Catalytic Domain ; Cell Cycle/drug effects ; Cyclic AMP-Dependent Protein Kinases/*antagonists & inhibitors/genetics ; Down-Regulation ; Gene Silencing ; Oligonucleotide Array Sequence Analysis ; Plasmodium falciparum/chemistry/cytology/*enzymology/genetics ; RNA, Double-Stranded/genetics ; RNA, Messenger/biosynthesis ; RNA, Protozoan/biosynthesis ; }, abstract = {The emergence and dissemination of drug-resistant malaria parasites represent one of the most important problems in malaria case management. Plasmodium falciparum is the causative agent of the most lethal form of human malaria. The molecular mechanisms that control the life cycle of the malaria parasite are still poorly understood. The published genome sequence (P. falciparum strain 3D7) reveals that several homologs of eukaryotic signaling proteins, such as protein kinases and phosphatases, are conserved in P. falciparum. Proteins kinases are now widely recognized as valuable drug targets in protozoan parasites. In this study, gene silencing with double-stranded RNA (dsRNA) and microarray techniques were used to study the biological function of the cAMP-dependent protein kinase catalytic subunit (PfPKAc) in the parasite erythrocytic life cycle. Treatment of parasites with PfPKAc dsRNA resulted in a marked reduction of endogenous PfPKAc mRNA associated with a compensatory decrease of PfPKAr mRNA followed by morphological changes in schizont stages and cell cycle arrest. The global effects of gene silencing were also investigated using a P. falciparum pan-genomic microarray. Transcriptomic analysis showed that the expression of 329 genes was altered in response to downregulation of PfPKAc mRNA particularly genes in specific metabolic pathways linked with merozoite invasion processes, the calcium/calmodulin signaling, and kinases network and mitochondrial functions.}, } @article {pmid19758451, year = {2009}, author = {Phillippy, AM and Deng, X and Zhang, W and Salzberg, SL}, title = {Efficient oligonucleotide probe selection for pan-genomic tiling arrays.}, journal = {BMC bioinformatics}, volume = {10}, number = {}, pages = {293}, pmid = {19758451}, issn = {1471-2105}, support = {R01 GM083873/GM/NIGMS NIH HHS/United States ; R01 GM083873-06/GM/NIGMS NIH HHS/United States ; R01 LM006845/LM/NLM NIH HHS/United States ; R01 LM006845-10/LM/NLM NIH HHS/United States ; }, mesh = {*Algorithms ; Computational Biology/*methods ; Genome ; Listeria monocytogenes/genetics ; Oligonucleotide Array Sequence Analysis/*methods ; Oligonucleotide Probes/*chemistry ; }, abstract = {BACKGROUND: Array comparative genomic hybridization is a fast and cost-effective method for detecting, genotyping, and comparing the genomic sequence of unknown bacterial isolates. This method, as with all microarray applications, requires adequate coverage of probes targeting the regions of interest. An unbiased tiling of probes across the entire length of the genome is the most flexible design approach. However, such a whole-genome tiling requires that the genome sequence is known in advance. For the accurate analysis of uncharacterized bacteria, an array must query a fully representative set of sequences from the species' pan-genome. Prior microarrays have included only a single strain per array or the conserved sequences of gene families. These arrays omit potentially important genes and sequence variants from the pan-genome.

RESULTS: This paper presents a new probe selection algorithm (PanArray) that can tile multiple whole genomes using a minimal number of probes. Unlike arrays built on clustered gene families, PanArray uses an unbiased, probe-centric approach that does not rely on annotations, gene clustering, or multi-alignments. Instead, probes are evenly tiled across all sequences of the pan-genome at a consistent level of coverage. To minimize the required number of probes, probes conserved across multiple strains in the pan-genome are selected first, and additional probes are used only where necessary to span polymorphic regions of the genome. The viability of the algorithm is demonstrated by array designs for seven different bacterial pan-genomes and, in particular, the design of a 385,000 probe array that fully tiles the genomes of 20 different Listeria monocytogenes strains with overlapping probes at greater than twofold coverage.

CONCLUSION: PanArray is an oligonucleotide probe selection algorithm for tiling multiple genome sequences using a minimal number of probes. It is capable of fully tiling all genomes of a species on a single microarray chip. These unique pan-genome tiling arrays provide maximum flexibility for the analysis of both known and uncharacterized strains.}, } @article {pmid19703284, year = {2009}, author = {Quatrini, R and Appia-Ayme, C and Denis, Y and Jedlicki, E and Holmes, DS and Bonnefoy, V}, title = {Extending the models for iron and sulfur oxidation in the extreme acidophile Acidithiobacillus ferrooxidans.}, journal = {BMC genomics}, volume = {10}, number = {}, pages = {394}, pmid = {19703284}, issn = {1471-2164}, mesh = {Acidithiobacillus/*genetics/metabolism ; Computational Biology ; Gene Expression Profiling ; Genes, Bacterial ; *Genome, Bacterial ; Iron/*metabolism ; Metabolomics ; Oligonucleotide Array Sequence Analysis ; Oxidation-Reduction ; RNA, Bacterial/genetics ; Sulfur Compounds/*metabolism ; }, abstract = {BACKGROUND: Acidithiobacillus ferrooxidans gains energy from the oxidation of ferrous iron and various reduced inorganic sulfur compounds at very acidic pH. Although an initial model for the electron pathways involved in iron oxidation has been developed, much less is known about the sulfur oxidation in this microorganism. In addition, what has been reported for both iron and sulfur oxidation has been derived from different A. ferrooxidans strains, some of which have not been phylogenetically characterized and some have been shown to be mixed cultures. It is necessary to provide models of iron and sulfur oxidation pathways within one strain of A. ferrooxidans in order to comprehend the full metabolic potential of the pangenome of the genus.

RESULTS: Bioinformatic-based metabolic reconstruction supported by microarray transcript profiling and quantitative RT-PCR analysis predicts the involvement of a number of novel genes involved in iron and sulfur oxidation in A. ferrooxidans ATCC23270. These include for iron oxidation: cup (copper oxidase-like), ctaABT (heme biogenesis and insertion), nuoI and nuoK (NADH complex subunits), sdrA1 (a NADH complex accessory protein) and atpB and atpE (ATP synthetase F0 subunits). The following new genes are predicted to be involved in reduced inorganic sulfur compounds oxidation: a gene cluster (rhd, tusA, dsrE, hdrC, hdrB, hdrA, orf2, hdrC, hdrB) encoding three sulfurtransferases and a heterodisulfide reductase complex, sat potentially encoding an ATP sulfurylase and sdrA2 (an accessory NADH complex subunit). Two different regulatory components are predicted to be involved in the regulation of alternate electron transfer pathways: 1) a gene cluster (ctaRUS) that contains a predicted iron responsive regulator of the Rrf2 family that is hypothesized to regulate cytochrome aa3 oxidase biogenesis and 2) a two component sensor-regulator of the RegB-RegA family that may respond to the redox state of the quinone pool.

CONCLUSION: Bioinformatic analysis coupled with gene transcript profiling extends our understanding of the iron and reduced inorganic sulfur compounds oxidation pathways in A. ferrooxidans and suggests mechanisms for their regulation. The models provide unified and coherent descriptions of these processes within the type strain, eliminating previous ambiguity caused by models built from analyses of multiple and divergent strains of this microorganism.}, } @article {pmid19697077, year = {2009}, author = {Wilson, MK and Lane, AB and Law, BF and Miller, WG and Joens, LA and Konkel, ME and White, BA}, title = {Analysis of the pan genome of Campylobacter jejuni isolates recovered from poultry by pulsed-field gel electrophoresis, multilocus sequence typing (MLST), and repetitive sequence polymerase chain reaction (rep-PCR) reveals different discriminatory capabilities.}, journal = {Microbial ecology}, volume = {58}, number = {4}, pages = {843-855}, pmid = {19697077}, issn = {1432-184X}, mesh = {Animal Husbandry/methods ; Animals ; Bacterial Typing Techniques/methods ; Campylobacter Infections/microbiology ; Campylobacter jejuni/classification/*genetics ; Chickens/microbiology ; DNA Fingerprinting/methods ; DNA, Bacterial/genetics ; Electrophoresis, Gel, Pulsed-Field/methods ; *Genetic Variation ; *Genome, Bacterial ; Genotype ; Polymerase Chain Reaction/methods ; Sequence Analysis, DNA ; }, abstract = {Campylobacter jejuni is one of the leading bacterial causes of food-borne illness in the USA. Molecular typing methods are often used in food safety for identifying sources of infection and pathways of transmission. Moreover, the identification of genetically related isolates (i.e., clades) may facilitate the development of intervention strategies for control and prevention of food-borne diseases. We analyzed the pan genome (i.e., core and variable genes) of 63 C. jejuni isolates recovered from chickens raised in conventional, organic, and free-range poultry flocks to gain insight into the genetic diversity of C. jejuni isolates recovered from different environments. We assessed the discriminatory power of three genotyping methods [i.e., pulsed-field gel electrophoresis (PFGE), multilocus sequence typing (MLST), and repetitive extragenic palindromic polymerase chain reaction (rep-PCR)]. The rep-PCR fingerprint was generated by determining the presence of repetitive sequences that are interspersed throughout the genome via repetitive extragenic palindromic PCR, enterobacterial repetitive intergenic consensus sequence PCR (ERIC-PCR), and BOX element PCR (BOX-PCR) and combining the data to form a composite fingerprint. The genetic fingerprints were subjected to computer-assisted pattern analysis. Comparison of the three genotypic methods revealed that repREB-PCR showed greater discriminatory power than PFGE and MLST. ERIC-PCR and BOX-PCR yielded the highest number of PCR products and greatest reproducibility. Regardless of the genotyping method, C. jejuni isolates recovered from chickens reared in conventional, organic, and free-range environments all exhibit a high level of genotypic diversity.}, } @article {pmid19696499, year = {2009}, author = {Ussery, DW and Kiil, K and Lagesen, K and Sicheritz-Pontén, T and Bohlin, J and Wassenaar, TM}, title = {The genus burkholderia: analysis of 56 genomic sequences.}, journal = {Genome dynamics}, volume = {6}, number = {}, pages = {140-157}, doi = {10.1159/000235768}, pmid = {19696499}, issn = {1660-9263}, mesh = {Burkholderia ; *Genome, Bacterial ; Genomic Islands ; Genomics ; Molecular Sequence Data ; *Phylogeny ; Sequence Analysis, DNA ; }, abstract = {The genus Burkholderia consists of a number of very diverse species, both in terms of lifestyle (which varies from category B pathogens to apathogenic soil bacteria and plant colonizers) and their genetic contents. We have used 56 publicly available genomes to explore the genomic diversity within this genus, including genome sequences that are not completely finished, but are available from the NCBI database. Defining the pan- and core genomes of species results in insights in the conserved and variable fraction of genomes, and can verify (or question) historic, taxonomic groupings. We find only several hundred genes that are conserved across all Burkholderia genomes, whilst there are more than 40,000 gene families in the Burkholderia pan-genome. A BLAST matrix visualizes the fraction of conserved genes in pairwise comparisons. A BLAST atlas shows which genes are actually conserved in a number of genomes, located and visualized with reference to a chosen genome. Genomic islands are common in many Burkholderia genomes, and most of these can be readily visualized by DNA structural properties of the chromosome. Trees that are based on relatedness of gene family content yield different results depending on what genes are analyzed. Some of the differences can be explained by errors in incomplete genome sequences, but, as our data illustrate, the outcome of phylogenetic trees depends on the type of genes that are analyzed.}, } @article {pmid19696492, year = {2009}, author = {Tettelin, H}, title = {The bacterial pan-genome and reverse vaccinology.}, journal = {Genome dynamics}, volume = {6}, number = {}, pages = {35-47}, doi = {10.1159/000235761}, pmid = {19696492}, issn = {1660-9263}, mesh = {Base Sequence ; Computational Biology ; *Genome, Bacterial ; *Genomics ; Humans ; Phylogeny ; Sequence Analysis, DNA ; Vaccines ; }, abstract = {The whole genome sequence of most human bacterial pathogens is available and the advent of next-generation sequencing technologies will result in a large number of sequenced isolates per pathogenic species. The study of multiple genome sequences of a given bacterium provides insights into its evolution, pathogenic potential and diversity. The pathogen's pan-genome, defined as the sum of the core genome shared by all sequenced strains and the dispensable genome present only in a subset of the isolates, can be analyzed to assess the size and diversity of the gene repertoire that the species has access to. This information is then used to better inform the reverse vaccinology approach whereby vaccine candidates are identified and prioritized in silico based on genomic data. Bioinformatics integration of genome sequence data with functional genomics results and clinical meta-data is essential to maximize the use of this large amount of information to answer biologically relevant questions.}, } @article {pmid19691844, year = {2009}, author = {Snipen, L and Almøy, T and Ussery, DW}, title = {Microbial comparative pan-genomics using binomial mixture models.}, journal = {BMC genomics}, volume = {10}, number = {}, pages = {385}, pmid = {19691844}, issn = {1471-2164}, mesh = {Algorithms ; Bacteria/*genetics ; Buchnera/genetics ; Comparative Genomic Hybridization/*methods ; Escherichia coli/genetics ; Genome, Bacterial ; Genomics/*methods ; *Models, Statistical ; Multigene Family ; Sequence Analysis, DNA ; }, abstract = {BACKGROUND: The size of the core- and pan-genome of bacterial species is a topic of increasing interest due to the growing number of sequenced prokaryote genomes, many from the same species. Attempts to estimate these quantities have been made, using regression methods or mixture models. We extend the latter approach by using statistical ideas developed for capture-recapture problems in ecology and epidemiology.

RESULTS: We estimate core- and pan-genome sizes for 16 different bacterial species. The results reveal a complex dependency structure for most species, manifested as heterogeneous detection probabilities. Estimated pan-genome sizes range from small (around 2600 gene families) in Buchnera aphidicola to large (around 43000 gene families) in Escherichia coli. Results for Echerichia coli show that as more data become available, a larger diversity is estimated, indicating an extensive pool of rarely occurring genes in the population.

CONCLUSION: Analyzing pan-genomics data with binomial mixture models is a way to handle dependencies between genomes, which we find is always present. A bottleneck in the estimation procedure is the annotation of rarely occurring genes.}, } @article {pmid19675075, year = {2009}, author = {Ameur, N and Lacroix, L and Roucan, S and Roux, V and Broutin, S and Talbot, M and Dupuy, C and Caillou, B and Schlumberger, M and Bidart, JM}, title = {Aggressive inherited and sporadic medullary thyroid carcinomas display similar oncogenic pathways.}, journal = {Endocrine-related cancer}, volume = {16}, number = {4}, pages = {1261-1272}, doi = {10.1677/ERC-08-0289}, pmid = {19675075}, issn = {1479-6821}, mesh = {Adolescent ; Adult ; Aged ; Biomarkers, Tumor/*genetics ; Carcinoma, Medullary/*genetics/metabolism/secondary ; Child ; Child, Preschool ; Female ; Gene Expression Profiling ; Gene Expression Regulation, Neoplastic ; Humans ; Immunoenzyme Techniques ; Male ; Middle Aged ; Mutation/*genetics ; Oligonucleotide Array Sequence Analysis ; Oncogenes/*genetics ; Prognosis ; Proto-Oncogene Proteins c-ret/antagonists & inhibitors/*genetics/metabolism ; RNA, Small Interfering/pharmacology ; Thyroid Neoplasms/*genetics/metabolism/pathology ; Young Adult ; }, abstract = {RET oncogene mutations are found in familial medullary thyroid carcinomas (MTC) and in one-third of sporadic cases. Oncogenic mechanisms involved in non-RET mutated sporadic MTC remain unclear. To study alterations associated with the development of both inherited and sporadic MTC, pangenomic DNA microarrays were used to analyze the transcriptome of 13 MTCs (four familial and nine sporadic). By using an ANOVA test, a list of 173 gene sequences with at least a twofold change expression was obtained. A subset of differentially expressed genes was controlled by real-time quantitative PCR and immunohistochemistry on a larger collection of MTCs. The expression pattern of those genes allowed us to distinguish two groups of sporadic tumors. The first group displays an expression profile similar to that expressed by inherited RET634 tumors. The second presents an expression profile close to that displayed by inherited RET918 tumors and includes tumors from patients with distant metastases. It is characterized by the overexpression of genes involved in proliferation and invasion (PTN, ESM1, and CEACAM6) or matrix remodeling (COL1A1, COL1A2, and FAP). Interestingly, RET918 tumors showed overexpression of the PTN gene, encoding pleiotrophin, a protein associated with metastasis. Using a MTC cell line, silencing of RET induced the inhibition of PTN gene expression. Overall, our results suggest that familial MTC and sporadic MTC could activate similar oncogenic pathways.}, } @article {pmid19602626, year = {2009}, author = {Cheung, F and Trick, M and Drou, N and Lim, YP and Park, JY and Kwon, SJ and Kim, JA and Scott, R and Pires, JC and Paterson, AH and Town, C and Bancroft, I}, title = {Comparative analysis between homoeologous genome segments of Brassica napus and its progenitor species reveals extensive sequence-level divergence.}, journal = {The Plant cell}, volume = {21}, number = {7}, pages = {1912-1928}, pmid = {19602626}, issn = {1040-4651}, support = {BB/E017363/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/E017363/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/B/07330/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Brassica/*genetics ; Brassica napus/genetics ; *Evolution, Molecular ; Genome, Plant/*genetics ; Molecular Sequence Data ; Sequence Analysis, DNA ; }, abstract = {Homoeologous regions of Brassica genomes were analyzed at the sequence level. These represent segments of the Brassica A genome as found in Brassica rapa and Brassica napus and the corresponding segments of the Brassica C genome as found in Brassica oleracea and B. napus. Analysis of synonymous base substitution rates within modeled genes revealed a relatively broad range of times (0.12 to 1.37 million years ago) since the divergence of orthologous genome segments as represented in B. napus and the diploid species. Similar, and consistent, ranges were also identified for single nucleotide polymorphism and insertion-deletion variation. Genes conserved across the Brassica genomes and the homoeologous segments of the genome of Arabidopsis thaliana showed almost perfect collinearity. Numerous examples of apparent transduplication of gene fragments, as previously reported in B. oleracea, were observed in B. rapa and B. napus, indicating that this phenomenon is widespread in Brassica species. In the majority of the regions studied, the C genome segments were expanded in size relative to their A genome counterparts. The considerable variation that we observed, even between the different versions of the same Brassica genome, for gene fragments and annotated putative genes suggest that the concept of the pan-genome might be particularly appropriate when considering Brassica genomes.}, } @article {pmid19590497, year = {2009}, author = {Marchetti, S and Gamas, P and Belhacène, N and Grosso, S and Pradelli, LA and Colosetti, P and Johansen, C and Iversen, L and Deckert, M and Luciano, F and Hofman, P and Ortonne, N and Khemis, A and Mari, B and Ortonne, JP and Ricci, JE and Auberger, P}, title = {The caspase-cleaved form of LYN mediates a psoriasis-like inflammatory syndrome in mice.}, journal = {The EMBO journal}, volume = {28}, number = {16}, pages = {2449-2460}, pmid = {19590497}, issn = {1460-2075}, mesh = {Animals ; Biopsy ; Caspases/metabolism ; Cells, Cultured ; Gene Deletion ; Gene Expression ; Humans ; Keratinocytes/metabolism ; Mice ; Mice, Inbred C57BL ; Mice, Transgenic ; NF-kappa B/metabolism ; Phenotype ; Psoriasis/genetics/*metabolism ; Skin/anatomy & histology/*pathology ; Thymus Gland/cytology ; Tumor Necrosis Factor-alpha/genetics/immunology ; src-Family Kinases/*genetics/*metabolism ; }, abstract = {We showed previously that Lyn is a substrate for caspases, a family of cysteine proteases, involved in the regulation of apoptosis and inflammation. Here, we report that expression of the caspase-cleaved form of Lyn (LynDeltaN), in mice, mediates a chronic inflammatory syndrome resembling human psoriasis. Genetic ablation of TNF receptor 1 in a LynDeltaN background rescues a normal phenotype, indicating that LynDeltaN mice phenotype is TNF-alpha-dependent. The predominant role of T cells in the disease occurring in LynDeltaN mice was highlighted by the distinct improvement of LynDeltaN mice phenotype in a Rag1-deficient background. Using pan-genomic profiling, we also established that LynDeltaN mice show an increased expression of STAT-3 and inhibitory members of the NFkappaB pathway. Accordingly, LynDeltaN alters NFkappaB activity underlying a link between inhibition of NFkappaB and LynDeltaN mice phenotype. Finally, analysis of Lyn expression in human skin biopsies of psoriatic patients led to the detection of Lyn cleavage product whose expression correlates with the activation of caspase 1. Our data identify a new role for Lyn as a regulator of psoriasis through its cleavage by caspases.}, } @article {pmid19567819, year = {2009}, author = {Grosso, S and Puissant, A and Dufies, M and Colosetti, P and Jacquel, A and Lebrigand, K and Barbry, P and Deckert, M and Cassuto, JP and Mari, B and Auberger, P}, title = {Gene expression profiling of imatinib and PD166326-resistant CML cell lines identifies Fyn as a gene associated with resistance to BCR-ABL inhibitors.}, journal = {Molecular cancer therapeutics}, volume = {8}, number = {7}, pages = {1924-1933}, doi = {10.1158/1535-7163.MCT-09-0168}, pmid = {19567819}, issn = {1538-8514}, mesh = {Antineoplastic Agents/pharmacology ; Apoptosis/drug effects ; Benzamides ; Caspases/metabolism ; Cell Cycle/drug effects ; Cell Proliferation/drug effects ; *Drug Resistance, Neoplasm ; Flow Cytometry ; Fusion Proteins, bcr-abl/*antagonists & inhibitors ; Gene Expression Profiling ; Humans ; Imatinib Mesylate ; Leukemia, Myelogenous, Chronic, BCR-ABL Positive/*drug therapy/*genetics/pathology ; Microarray Analysis ; Piperazines/*pharmacology ; Protein-Tyrosine Kinases/antagonists & inhibitors ; Proto-Oncogene Proteins c-fyn/antagonists & inhibitors/*genetics/metabolism ; Pyridines/*pharmacology ; Pyrimidines/*pharmacology ; RNA, Small Interfering/pharmacology ; Tumor Cells, Cultured ; }, abstract = {Imatinib is used to treat chronic myelogenous leukemia (CML), but resistance develops in all phases of this disease. The purpose of the present study was to identify the mode of resistance of newly derived imatinib-resistant (IM-R) and PD166326-resistant (PD-R) CML cells. IM-R and PD-R clones exhibited an increase in viability and a decrease in caspase activation in response to various doses of imatinib and PD166326, respectively, as compared with parental K562 cells. Resistance involved neither mutations in BCR-ABL nor increased BCR-ABL, MDR1 or Lyn expression, all known modes of resistance. To gain insight into the resistance mechanisms, we used pangenomic microarrays and identified 281 genes modulated in parental versus IM-R and PD-R cells. The gene signature was similar for IM-R and PD-R cells, accordingly with the cross-sensitivity observed for both inhibitors. These genes were functionally associated with pathways linked to development, cell adhesion, cell growth, and the JAK-STAT cascade. Especially relevant were the increased expression of the tyrosine kinases AXL and Fyn as well as CD44 and HMGA2. Small interfering RNA experiments and pharmacologic approaches identified FYN as a candidate for resistance to imatinib. Our findings provide a comprehensive picture of the transcriptional events associated with imatinib and PD166326 resistance and identify Fyn as a new potential target for therapeutic intervention in CML.}, } @article {pmid19493340, year = {2009}, author = {Makarova, KS and Wolf, YI and Koonin, EV}, title = {Comprehensive comparative-genomic analysis of type 2 toxin-antitoxin systems and related mobile stress response systems in prokaryotes.}, journal = {Biology direct}, volume = {4}, number = {}, pages = {19}, pmid = {19493340}, issn = {1745-6150}, support = {//Intramural NIH HHS/United States ; }, mesh = {Amino Acid Sequence ; Antitoxins/chemistry/*genetics ; Chromosomes, Archaeal/genetics ; Chromosomes, Bacterial/genetics ; Genetic Variation ; Genome/genetics ; *Genomics ; *Interspersed Repetitive Sequences ; Molecular Sequence Data ; Operon/genetics ; Prokaryotic Cells/*metabolism ; Sequence Alignment ; Stress, Physiological/*genetics ; Toxins, Biological/chemistry/*genetics ; }, abstract = {BACKGROUND: The prokaryotic toxin-antitoxin systems (TAS, also referred to as TA loci) are widespread, mobile two-gene modules that can be viewed as selfish genetic elements because they evolved mechanisms to become addictive for replicons and cells in which they reside, but also possess "normal" cellular functions in various forms of stress response and management of prokaryotic population. Several distinct TAS of type 1, where the toxin is a protein and the antitoxin is an antisense RNA, and numerous, unrelated TAS of type 2, in which both the toxin and the antitoxin are proteins, have been experimentally characterized, and it is suspected that many more remain to be identified.

RESULTS: We report a comprehensive comparative-genomic analysis of Type 2 toxin-antitoxin systems in prokaryotes. Using sensitive methods for distant sequence similarity search, genome context analysis and a new approach for the identification of mobile two-component systems, we identified numerous, previously unnoticed protein families that are homologous to toxins and antitoxins of known type 2 TAS. In addition, we predict 12 new families of toxins and 13 families of antitoxins, and also, predict a TAS or TAS-like activity for several gene modules that were not previously suspected to function in that capacity. In particular, we present indications that the two-gene module that encodes a minimal nucleotidyl transferase and the accompanying HEPN protein, and is extremely abundant in many archaea and bacteria, especially, thermophiles might comprise a novel TAS. We present a survey of previously known and newly predicted TAS in 750 complete genomes of archaea and bacteria, quantitatively demonstrate the exceptional mobility of the TAS, and explore the network of toxin-antitoxin pairings that combines plasticity with selectivity.

CONCLUSION: The defining properties of the TAS, namely, the typically small size of the toxin and antitoxin genes, fast evolution, and extensive horizontal mobility, make the task of comprehensive identification of these systems particularly challenging. However, these same properties can be exploited to develop context-based computational approaches which, combined with exhaustive analysis of subtle sequence similarities were employed in this work to substantially expand the current collection of TAS by predicting both previously unnoticed, derived versions of known toxins and antitoxins, and putative novel TAS-like systems. In a broader context, the TAS belong to the resistome domain of the prokaryotic mobilome which includes partially selfish, addictive gene cassettes involved in various aspects of stress response and organized under the same general principles as the TAS. The "selfish altruism", or "responsible selfishness", of TAS-like systems appears to be a defining feature of the resistome and an important characteristic of the entire prokaryotic pan-genome given that in the prokaryotic world the mobilome and the "stable" chromosomes form a dynamic continuum.

REVIEWERS: This paper was reviewed by Kenn Gerdes (nominated by Arcady Mushegian), Daniel Haft, Arcady Mushegian, and Andrei Osterman. For full reviews, go to the Reviewers' Reports section.}, } @article {pmid19486308, year = {2009}, author = {MacHugh, DE and Gormley, E and Park, SD and Browne, JA and Taraktsoglou, M and O'Farrelly, C and Meade, KG}, title = {Gene expression profiling of the host response to Mycobacterium bovis infection in cattle.}, journal = {Transboundary and emerging diseases}, volume = {56}, number = {6-7}, pages = {204-214}, doi = {10.1111/j.1865-1682.2009.01082.x}, pmid = {19486308}, issn = {1865-1674}, mesh = {Animals ; Antigens, Bacterial ; Cattle ; Gene Expression/immunology ; *Gene Expression Profiling ; Genetic Markers ; Host-Pathogen Interactions/genetics/immunology ; Humans ; Leukocytes, Mononuclear/immunology ; Mycobacterium bovis/*genetics/*immunology ; Tuberculosis, Bovine/diagnosis/*genetics/*immunology ; Zoonoses/microbiology ; }, abstract = {Bovine tuberculosis (BTB), caused by Mycobacterium bovis, continues to pose a threat to livestock worldwide and, as a zoonotic infection, also has serious implications for human health. The implementation of comprehensive surveillance programmes to detect BTB has been successful in reducing the incidence of infection in many countries, yet BTB has remained recalcitrant to eradication in several EU states, particularly in Ireland and the UK. There are well-recognized limitations in the use of the current diagnostics to detect all infected animals and this has led to renewed efforts to uncover novel diagnostic biomarkers that may serve to enhance the performance of the tests. Studies of single immunological parameters have so far been unable to unlock the complexities of the immune response to mycobacterial infection. However, the development of high-throughput methods including pan-genomic gene expression technologies such as DNA microarrays has facilitated the simultaneous identification and analysis of thousands of genes and their interactions during the immune response. In addition, the application of these new genomic technologies to BTB has identified pathogen-associated immune response signatures of host infection. The objective of these investigations is to understand the changing profile of immune responses throughout the course of infection and to identify biomarkers for sensitive diagnosis, particularly during the early stages of infection. Transcriptional profiling via microarray and more recently via next-generation sequencing technologies may lead to the development of specific and sensitive diagnostics for M. bovis infection and will enhance the prospect of eradication of tuberculosis from cattle populations.}, } @article {pmid19445531, year = {2009}, author = {Narjoz, C and Marisa, L and Imbeaud, S and Paris, A and Delacroix, H and Beaune, P and De Waziers, I}, title = {Genomic consequences of cytochrome P450 2C9 overexpression in human hepatoma cells.}, journal = {Chemical research in toxicology}, volume = {22}, number = {5}, pages = {779-787}, doi = {10.1021/tx800417u}, pmid = {19445531}, issn = {1520-5010}, mesh = {Aryl Hydrocarbon Hydroxylases/genetics/*metabolism ; Carcinoma, Hepatocellular ; Cell Survival ; Cytochrome P-450 CYP2C9 ; Endoplasmic Reticulum/genetics/metabolism ; Endoplasmic Reticulum Chaperone BiP ; *Gene Expression Profiling ; HSP27 Heat-Shock Proteins/genetics/metabolism ; Heat-Shock Proteins/genetics/metabolism ; Humans ; Membrane Proteins/genetics/metabolism ; Molecular Chaperones ; Proteasome Endopeptidase Complex/genetics/metabolism ; SEC Translocation Channels ; Transcription, Genetic ; Tumor Cells, Cultured ; Ubiquitin/genetics/metabolism ; }, abstract = {Cytochrome P450 2C9 (P450 2C9) is one of the most important P450 isoforms in the human liver, as it metabolizes numerous exogenous and endogenous substrates. Moreover, it is inducible by several compounds, such as rifampicin, phenobarbital, and NSAIDs (nonsteroidal anti-inflammatories). The aim of this study was to investigate the global cellular consequences of P450 2C9 overexpression at the transcriptional level using an untargeted approach: pangenomic microarrays. Recombinant adenovirus was used to express P450 2C9 instead of an inducer to prevent a per se effect of inducer or its metabolites. P450 2C9 overexpression induced endoplasmic reticulum (ER) stress and regulated genes implicated in the unfolded protein response (UPR) as heat shock protein (HSP) (we studied particurlarly HSPA5 and HSPB1) and in the endoplasmic reticulum associated degradation (ERAD) system as Sec61 and ubiquitin and proteasome pathways. UPR and ERAD are two mechanisms of adaptative response to ER stress. Moreover, activation of Akt was observed in HepG2 cells that overexpress P450 2C9 and might participate in the cellular adaptive response to stress, thus leading to the activation of cell survival pathways. UPR and ERAD should be caused by accumulation of native and misfolded P450 2C9 protein. Our results indicated that P450 2C9 overexpression did not lead to toxicity but induced an ER stress due to protein overexpression rather than mono-oxygenase activity. The ER stress triggered activation of the adaptative response and of pathways leading to cell survival.}, } @article {pmid19435847, year = {2009}, author = {Reno, ML and Held, NL and Fields, CJ and Burke, PV and Whitaker, RJ}, title = {Biogeography of the Sulfolobus islandicus pan-genome.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {106}, number = {21}, pages = {8605-8610}, pmid = {19435847}, issn = {1091-6490}, mesh = {Archaeal Proteins/genetics ; *Evolution, Molecular ; *Genetic Speciation ; Genetic Variation/*genetics ; Genome, Archaeal/*genetics ; *Geography ; Molecular Sequence Data ; Sulfolobus/classification/*genetics ; }, abstract = {Variation in gene content has been hypothesized to be the primary mode of adaptive evolution in microorganisms; however, very little is known about the spatial and temporal distribution of variable genes. Through population-scale comparative genomics of 7 Sulfolobus islandicus genomes from 3 locations, we demonstrate the biogeographical structure of the pan-genome of this species, with no evidence of gene flow between geographically isolated populations. The evolutionary independence of each population allowed us to assess genome dynamics over very recent evolutionary time, beginning approximately 910,000 years ago. On this time scale, genome variation largely consists of recent strain-specific integration of mobile elements. Localized sectors of parallel gene loss are identified; however, the balance between the gain and loss of genetic material suggests that S. islandicus genomes acquire material slowly over time, primarily from closely related Sulfolobus species. Examination of the genome dynamics through population genomics in S. islandicus exposes the process of allopatric speciation in thermophilic Archaea and brings us closer to a generalized framework for understanding microbial genome evolution in a spatial context.}, } @article {pmid19432983, year = {2009}, author = {Silby, MW and Cerdeño-Tárraga, AM and Vernikos, GS and Giddens, SR and Jackson, RW and Preston, GM and Zhang, XX and Moon, CD and Gehrig, SM and Godfrey, SA and Knight, CG and Malone, JG and Robinson, Z and Spiers, AJ and Harris, S and Challis, GL and Yaxley, AM and Harris, D and Seeger, K and Murphy, L and Rutter, S and Squares, R and Quail, MA and Saunders, E and Mavromatis, K and Brettin, TS and Bentley, SD and Hothersall, J and Stephens, E and Thomas, CM and Parkhill, J and Levy, SB and Rainey, PB and Thomson, NR}, title = {Genomic and genetic analyses of diversity and plant interactions of Pseudomonas fluorescens.}, journal = {Genome biology}, volume = {10}, number = {5}, pages = {R51}, pmid = {19432983}, issn = {1474-760X}, support = {104/P16729//Biotechnology and Biological Sciences Research Council/United Kingdom ; P15257//Biotechnology and Biological Sciences Research Council/United Kingdom ; //Wellcome Trust/United Kingdom ; }, mesh = {*Ecosystem ; *Genome, Bacterial ; Plants/metabolism/*microbiology ; Pseudomonas fluorescens/classification/*genetics/metabolism ; }, abstract = {BACKGROUND: Pseudomonas fluorescens are common soil bacteria that can improve plant health through nutrient cycling, pathogen antagonism and induction of plant defenses. The genome sequences of strains SBW25 and Pf0-1 were determined and compared to each other and with P. fluorescens Pf-5. A functional genomic in vivo expression technology (IVET) screen provided insight into genes used by P. fluorescens in its natural environment and an improved understanding of the ecological significance of diversity within this species.

RESULTS: Comparisons of three P. fluorescens genomes (SBW25, Pf0-1, Pf-5) revealed considerable divergence: 61% of genes are shared, the majority located near the replication origin. Phylogenetic and average amino acid identity analyses showed a low overall relationship. A functional screen of SBW25 defined 125 plant-induced genes including a range of functions specific to the plant environment. Orthologues of 83 of these exist in Pf0-1 and Pf-5, with 73 shared by both strains. The P. fluorescens genomes carry numerous complex repetitive DNA sequences, some resembling Miniature Inverted-repeat Transposable Elements (MITEs). In SBW25, repeat density and distribution revealed 'repeat deserts' lacking repeats, covering approximately 40% of the genome.

CONCLUSIONS: P. fluorescens genomes are highly diverse. Strain-specific regions around the replication terminus suggest genome compartmentalization. The genomic heterogeneity among the three strains is reminiscent of a species complex rather than a single species. That 42% of plant-inducible genes were not shared by all strains reinforces this conclusion and shows that ecological success requires specialized and core functions. The diversity also indicates the significant size of genetic information within the Pseudomonas pan genome.}, } @article {pmid19407241, year = {2009}, author = {Remme, CA and Scicluna, BP and Verkerk, AO and Amin, AS and van Brunschot, S and Beekman, L and Deneer, VH and Chevalier, C and Oyama, F and Miyazaki, H and Nukina, N and Wilders, R and Escande, D and Houlgatte, R and Wilde, AA and Tan, HL and Veldkamp, MW and de Bakker, JM and Bezzina, CR}, title = {Genetically determined differences in sodium current characteristics modulate conduction disease severity in mice with cardiac sodium channelopathy.}, journal = {Circulation research}, volume = {104}, number = {11}, pages = {1283-1292}, doi = {10.1161/CIRCRESAHA.109.194423}, pmid = {19407241}, issn = {1524-4571}, mesh = {Animals ; Arrhythmias, Cardiac/physiopathology ; Channelopathies/*genetics/physiopathology ; DNA Transposable Elements ; Heart Conduction System/*physiopathology ; Heart Ventricles/cytology/metabolism ; Mice ; Mice, Inbred Strains ; Muscle Cells/cytology/physiology ; Mutation ; NAV1.5 Voltage-Gated Sodium Channel ; RNA, Messenger/genetics ; Sodium Channels/deficiency/genetics/physiology ; Voltage-Gated Sodium Channel beta-4 Subunit ; }, abstract = {Conduction slowing of the electric impulse that drives the heartbeat may evoke lethal cardiac arrhythmias. Mutations in SCN5A, which encodes the pore-forming cardiac sodium channel alpha subunit, are associated with familial arrhythmia syndromes based on conduction slowing. However, disease severity among mutation carriers is highly variable. We hypothesized that genetic modifiers underlie the variability in conduction slowing and disease severity. With the aim of identifying such modifiers, we studied the Scn5a(1798insD/+) mutation in 2 distinct mouse strains, FVB/N and 129P2. In 129P2 mice, the mutation resulted in more severe conduction slowing particularly in the right ventricle (RV) compared to FVB/N. Pan-genomic mRNA expression profiling in the 2 mouse strains uncovered a drastic reduction in mRNA encoding the sodium channel auxiliary subunit beta4 (Scn4b) in 129P2 mice compared to FVB/N. This corresponded to low to undetectable beta4 protein levels in 129P2 ventricular tissue, whereas abundant beta4 protein was detected in FVB/N. Sodium current measurements in isolated myocytes from the 2 mouse strains indicated that sodium channel activation in myocytes from 129P2 mice occurred at more positive potentials compared to FVB/N. Using computer simulations, this difference in activation kinetics was predicted to explain the observed differences in conduction disease severity between the 2 strains. In conclusion, genetically determined differences in sodium current characteristics on the myocyte level modulate disease severity in cardiac sodium channelopathies. In particular, the sodium channel subunit beta4 (SCN4B) may constitute a potential genetic modifier of conduction and cardiac sodium channel disease.}, } @article {pmid19376856, year = {2009}, author = {Barrangou, R and Briczinski, EP and Traeger, LL and Loquasto, JR and Richards, M and Horvath, P and Coûté-Monvoisin, AC and Leyer, G and Rendulic, S and Steele, JL and Broadbent, JR and Oberg, T and Dudley, EG and Schuster, S and Romero, DA and Roberts, RF}, title = {Comparison of the complete genome sequences of Bifidobacterium animalis subsp. lactis DSM 10140 and Bl-04.}, journal = {Journal of bacteriology}, volume = {191}, number = {13}, pages = {4144-4151}, pmid = {19376856}, issn = {1098-5530}, support = {T32 GM007133/GM/NIGMS NIH HHS/United States ; }, mesh = {Bifidobacterium/*genetics ; Genome, Bacterial/*genetics ; Molecular Sequence Data ; Polymorphism, Single Nucleotide/genetics ; Sequence Analysis, DNA/*methods ; }, abstract = {Bifidobacteria are important members of the human gut flora, especially in infants. Comparative genomic analysis of two Bifidobacterium animalis subsp. lactis strains revealed evolution by internal deletion of consecutive spacer-repeat units within a novel clustered regularly interspaced short palindromic repeat locus, which represented the largest differential content between the two genomes. Additionally, 47 single nucleotide polymorphisms were identified, consisting primarily of nonsynonymous mutations, indicating positive selection and/or recent divergence. A particular nonsynonymous mutation in a putative glucose transporter was linked to a negative phenotypic effect on the ability of the variant to catabolize glucose, consistent with a modification in the predicted protein transmembrane topology. Comparative genome sequence analysis of three Bifidobacterium species provided a core genome set of 1,117 orthologs complemented by a pan-genome of 2,445 genes. The genome sequences of the intestinal bacterium B. animalis subsp. lactis provide insights into rapid genome evolution and the genetic basis for adaptation to the human gut environment, notably with regard to catabolism of dietary carbohydrates, resistance to bile and acid, and interaction with the intestinal epithelium. The high degree of genome conservation observed between the two strains in terms of size, organization, and sequence is indicative of a genomically monomorphic subspecies and explains the inability to differentiate the strains by standard techniques such as pulsed-field gel electrophoresis.}, } @article {pmid19287447, year = {2009}, author = {Bentley, S}, title = {Sequencing the species pan-genome.}, journal = {Nature reviews. Microbiology}, volume = {7}, number = {4}, pages = {258-259}, pmid = {19287447}, issn = {1740-1534}, mesh = {Bacteria/*genetics ; Genome, Bacterial/*genetics ; Genomics/*methods ; }, } @article {pmid19271178, year = {2009}, author = {Lawrence, JG and Retchless, AC}, title = {The interplay of homologous recombination and horizontal gene transfer in bacterial speciation.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {532}, number = {}, pages = {29-53}, doi = {10.1007/978-1-60327-853-9_3}, pmid = {19271178}, issn = {1064-3745}, support = {GM078092/GM/NIGMS NIH HHS/United States ; }, mesh = {Bacteria/classification/*genetics ; Biological Evolution ; Ecosystem ; *Gene Transfer, Horizontal ; *Genetic Speciation ; Genome, Bacterial ; Models, Genetic ; Mutation ; *Recombination, Genetic ; Selection, Genetic ; Time Factors ; }, abstract = {Bacteria experience recombination in two ways. In the context of the Biological Species concept, allelic exchange purges genic variability within bacterial populations as gene exchange mediates selective sweeps. In contrast, horizontal gene transfer (HGT) increases the size of the population's pan-genome by providing an influx of novel genetic material. Here we discuss the interplay of these two processes, with an emphasis on how they allow for the maintenance of genotypically cohesive bacterial populations, yet allow for the separation of these populations upon bacterial speciation. In populations that maintain genotypic similarity by frequent allelic exchange, horizontally transferred genes may initiate ecological barriers to genetic exchange. The resulting recombination interference allows for the accumulation of neutral mutations and, consequently, the imposition of a pre-mating barrier to gene transfer.}, } @article {pmid19200820, year = {2009}, author = {Serruto, D and Serino, L and Masignani, V and Pizza, M}, title = {Genome-based approaches to develop vaccines against bacterial pathogens.}, journal = {Vaccine}, volume = {27}, number = {25-26}, pages = {3245-3250}, doi = {10.1016/j.vaccine.2009.01.072}, pmid = {19200820}, issn = {0264-410X}, mesh = {Bacterial Vaccines/genetics/*immunology ; Drug Design ; Fimbriae, Bacterial/immunology ; *Genome, Bacterial ; Meningococcal Vaccines/immunology ; Neisseria meningitidis, Serogroup B/immunology ; Pneumococcal Vaccines/immunology ; Streptococcus pyogenes/immunology ; }, abstract = {Bacterial infectious diseases remain the single most important threat to health worldwide. Although conventional vaccinology approaches were successful in conferring protection against several diseases, they failed to provide efficacious solutions against many others. The advent of whole-genome sequencing changed the way to think about vaccine development, enabling the targeting of possible vaccine candidates starting from the genomic information of a single bacterial isolate, with a process named reverse vaccinology. As the genomic era progressed, reverse vaccinology has evolved with a pan-genome approach and multi-strain genome analysis became fundamental for the design of universal vaccines. This review describes the applications of genome-based approaches in the development of new vaccines against bacterial pathogens.}, } @article {pmid19197388, year = {2009}, author = {Belin, S and Kaya, F and Duisit, G and Giacometti, S and Ciccolini, J and Fontés, M}, title = {Antiproliferative effect of ascorbic acid is associated with the inhibition of genes necessary to cell cycle progression.}, journal = {PloS one}, volume = {4}, number = {2}, pages = {e4409}, pmid = {19197388}, issn = {1932-6203}, mesh = {Animals ; Ascorbic Acid/*pharmacology ; Cell Cycle/*drug effects/*genetics ; Cell Division/drug effects ; Cell Line ; Cell Proliferation/drug effects ; Cell Survival/drug effects ; Disease Progression ; Female ; Gene Expression Regulation/*drug effects ; HT29 Cells ; Humans ; Mice ; Mice, Nude ; Neoplasms/pathology ; Oligonucleotide Array Sequence Analysis ; Polymerase Chain Reaction ; }, abstract = {BACKGROUND: Ascorbic acid (AA), or Vitamin C, is most well known as a nutritional supplement with antioxidant properties. Recently, we demonstrated that high concentrations of AA act on PMP22 gene expression and partially correct the Charcot-Marie-Tooth disease phenotype in a mouse model. This is due to the capacity of AA, but not other antioxidants, to down-modulate cAMP intracellular concentration by a competitive inhibition of the adenylate cyclase enzymatic activity. Because of the critical role of cAMP in intracellular signalling, we decided to explore the possibility that ascorbic acid could modulate the expression of other genes.

METHODS AND FINDINGS: Using human pangenomic microarrays, we found that AA inhibited the expression of two categories of genes necessary for cell cycle progression, tRNA synthetases and translation initiation factor subunits. In in vitro assays, we demonstrated that AA induced the S-phase arrest of proliferative normal and tumor cells. Highest concentrations of AA leaded to necrotic cell death. However, quiescent cells were not susceptible to AA toxicity, suggesting the blockage of protein synthesis was mainly detrimental in metabolically-active cells. Using animal models, we found that high concentrations of AA inhibited tumor progression in nude mice grafted with HT29 cells (derived from human colon carcinoma). Consistently, expression of tRNA synthetases and ieF2 appeared to be specifically decreased in tumors upon AA treatment.

CONCLUSIONS: AA has an antiproliferative activity, at elevated concentration that could be obtained using IV injection. This activity has been observed in vitro as well in vivo and likely results from the inhibition of expression of genes involved in protein synthesis. Implications for a clinical use in anticancer therapies will be discussed.}, } @article {pmid19168257, year = {2009}, author = {Lapierre, P and Gogarten, JP}, title = {Estimating the size of the bacterial pan-genome.}, journal = {Trends in genetics : TIG}, volume = {25}, number = {3}, pages = {107-110}, doi = {10.1016/j.tig.2008.12.004}, pmid = {19168257}, issn = {0168-9525}, mesh = {Amino Acid Sequence ; *Evolution, Molecular ; Gene Frequency ; Genes, Bacterial ; *Genome, Bacterial ; Molecular Sequence Data ; Sequence Homology, Amino Acid ; }, abstract = {The 'pan-genome' denotes the set of all genes present in the genomes of a group of organisms. Here, we extend the pan-genome concept to higher taxonomic units. Using 573 sequenced genomes, we estimate the size of the bacterial pan-genome based on the frequency of occurrences of genes among sampled genomes. Using gene- and genome-centered approaches, we characterize three distinct pools of gene families that comprise the bacterial pan-genome, each evolving under different evolutionary constraints. Our findings indicate that the pan-genome of the bacterial domain is of infinite size (the Bacteria as a whole have an open pan-genome) and that approximately 250 genes per genome belong to the extended bacterial core genome.}, } @article {pmid19159669, year = {2009}, author = {Lambert, CB and Spire, C and Renaud, MP and Claude, N and Guillouzo, A}, title = {Reproducible chemical-induced changes in gene expression profiles in human hepatoma HepaRG cells under various experimental conditions.}, journal = {Toxicology in vitro : an international journal published in association with BIBRA}, volume = {23}, number = {3}, pages = {466-475}, doi = {10.1016/j.tiv.2008.12.018}, pmid = {19159669}, issn = {0887-2333}, mesh = {Carcinoma, Hepatocellular/*drug therapy/genetics/metabolism ; Cell Line, Tumor ; Cell Survival/drug effects ; Gene Expression Profiling/*methods ; Gene Expression Regulation, Neoplastic/*drug effects ; Hepatocytes/*drug effects/metabolism ; Humans ; Hypnotics and Sedatives/classification/*toxicity ; Liver/drug effects ; Microarray Analysis ; Phenobarbital/classification/*toxicity ; Principal Component Analysis ; RNA, Messenger/metabolism ; Reproducibility of Results ; Toxicity Tests ; }, abstract = {The use of in vitro human liver cell models is an attractive approach in toxicogenomic studies designed to analyze gene expression changes induced by a toxic chemical. However, in such studies, reliability, reproducibility and interlaboratory concordance of microarrays, as well as the choice of the most suitable cell model, remain a matter of debate. This work was aimed at evaluating the robustness of microarray technologies and the suitability of the highly differentiated human HepaRG cell line in the investigation of gene expression changes induced by a toxic compound in human liver. The influence of various experimental conditions including cell cultures grown at different test sites, different generations of microarrays, RNA analysis platforms and softwares, was tested on gene expression profiles induced by a 20h treatment with an 8mM concentration of phenobarbital as the toxic compound. As many as 1099 genes (p-value<0.01 and 1.5-fold-change), representing 74% and 30% of the signature genes detected with Agilent 22 and 44K pangenomic microarrays, respectively, were shown to be modulated in common in six independently performed experiments. The most modulated genes included both those known to be regulated by phenobarbital, such as cytochromes P450 and membrane transporters, and those involved in oxidative stress, inflammation and apoptosis, typifying a toxic insult. These data provide strong support for the use of a toxicogenomic approach for the in vitro prediction of chemical toxicity, and for the choice of human HepaRG cells as a promising model system for human hepatotoxicity testing.}, } @article {pmid19152914, year = {2009}, author = {Auclair, S and Milenkovic, D and Besson, C and Chauvet, S and Gueux, E and Morand, C and Mazur, A and Scalbert, A}, title = {Catechin reduces atherosclerotic lesion development in apo E-deficient mice: a transcriptomic study.}, journal = {Atherosclerosis}, volume = {204}, number = {2}, pages = {e21-7}, doi = {10.1016/j.atherosclerosis.2008.12.007}, pmid = {19152914}, issn = {1879-1484}, mesh = {Animals ; Antioxidants/metabolism ; Aortic Diseases/genetics/metabolism/pathology/*prevention & control ; Apolipoproteins E/*deficiency/genetics ; Atherosclerosis/genetics/metabolism/pathology/*prevention & control ; Catechin/*pharmacology ; *Dietary Supplements ; Disease Models, Animal ; Disease Progression ; *Gene Expression Profiling/methods ; Gene Expression Regulation/drug effects ; Inflammation/genetics/metabolism/prevention & control ; Inflammation Mediators/blood ; Lipids/blood ; Liver/drug effects/metabolism ; Male ; Mice ; Mice, Knockout ; Oligonucleotide Array Sequence Analysis ; Polymerase Chain Reaction ; Serum Amyloid A Protein/metabolism ; }, abstract = {Much experimental evidence supports a protective role of dietary flavonoids against cardiovascular diseases. The aim of the present study was to investigate the anti-atherosclerotic effects of catechin supplemented in the diet of apoE deficient mice at a low nutritional level and to explore the mechanisms of action by a transcriptomic approach. After 6 weeks of supplementation, atherosclerotic lesions were assessed by histomorphometry and several markers of lipid, inflammation and oxidative stress status were evaluated. Analysis of the global gene expression in the aorta was carried out using pangenomic arrays. Catechin supplementation reduced the mean atherosclerotic lesion area by 32% but had no effect on total cholesterol and triacylglycerol levels in the plasma and the liver. The plasma antioxidant capacity (FRAP) and inflammatory status (serum amyloid A) were unchanged. The expression of 450 genes was significantly modified by catechin supplementation. Some of the most significantly down-regulated genes included genes coding for adhesion molecules such as CD34 and PSGL-1 known to play a key role in leukocyte adhesion to the endothelium. Other genes involved in energy metabolism, lipid metabolism and lipids trafficking such as FABP4, LPL and SCARA5 were down-regulated and may contribute to the atheroprotective effect of catechin. This work shows that transcriptomic allows characterizing the biological effects of low doses of flavonoids where common markers were not significantly affected.}, } @article {pmid19150507, year = {2009}, author = {Bambini, S and Rappuoli, R}, title = {The use of genomics in microbial vaccine development.}, journal = {Drug discovery today}, volume = {14}, number = {5-6}, pages = {252-260}, pmid = {19150507}, issn = {1878-5832}, mesh = {Animals ; Bacterial Vaccines/*immunology ; Communicable Diseases/immunology ; *Drug Design ; Genome, Bacterial ; Genome, Protozoan ; Genome, Viral ; Genomics/*methods ; Humans ; Molecular Sequence Data ; Protozoan Vaccines/immunology ; Viral Vaccines/immunology ; }, abstract = {Vaccination is one of the most effective tools for the prevention of infectious diseases. The availability of complete genome sequences, together with the progression of high-throughput technologies such as functional and structural genomics, has led to a new paradigm in vaccine development. Pan-genomic reverse vaccinology, with the comparison of sequence data from multiple isolates of the same species of a pathogen, increases the opportunity of the identification of novel vaccine candidates. Overall, the conventional empiric approach to vaccine development is being replaced by vaccine design. The recent development of synthetic genomics may provide a further opportunity to design vaccines.}, } @article {pmid19129208, year = {2009}, author = {Bayjanov, JR and Wels, M and Starrenburg, M and van Hylckama Vlieg, JE and Siezen, RJ and Molenaar, D}, title = {PanCGH: a genotype-calling algorithm for pangenome CGH data.}, journal = {Bioinformatics (Oxford, England)}, volume = {25}, number = {3}, pages = {309-314}, pmid = {19129208}, issn = {1367-4811}, mesh = {*Algorithms ; Cluster Analysis ; Comparative Genomic Hybridization/*methods ; Computational Biology ; Databases, Protein ; Evolution, Molecular ; Genetic Variation ; Genome, Bacterial/*genetics ; *Genotype ; }, abstract = {MOTIVATION: Pangenome arrays contain DNA oligomers targeting several sequenced reference genomes from the same species. In microbiology, these can be employed to investigate the often high genetic variability within a species by comparative genome hybridization (CGH). The biological interpretation of pangenome CGH data depends on the ability to compare strains at a functional level, particularly by comparing the presence or absence of orthologous genes. Due to the high genetic variability, available genotype-calling algorithms can not be applied to pangenome CGH data.

RESULTS: We have developed the algorithm PanCGH that incorporates orthology information about genes to predict the presence or absence of orthologous genes in a query organism using CGH arrays that target the genomes of sequenced representatives of a group of microorganisms. PanCGH was tested and applied in the analysis of genetic diversity among 39 Lactococcus lactis strains from three different subspecies (lactis.cremoris, hordniae) and isolated from two different niches (dairy and plant). Clustering of these strains using the presence/absence data of gene orthologs revealed a clear separation between different subspecies and reflected the niche of the strains.}, } @article {pmid19128835, year = {2009}, author = {Bourdonnay, E and Morzadec, C and Sparfel, L and Galibert, MD and Jouneau, S and Martin-Chouly, C and Fardel, O and Vernhet, L}, title = {Global effects of inorganic arsenic on gene expression profile in human macrophages.}, journal = {Molecular immunology}, volume = {46}, number = {4}, pages = {649-656}, doi = {10.1016/j.molimm.2008.08.268}, pmid = {19128835}, issn = {0161-5890}, mesh = {Arsenic Trioxide ; Arsenicals ; Cell Differentiation ; Down-Regulation ; *Gene Expression Profiling ; Humans ; Macrophages/*drug effects/immunology ; Oxides/*toxicity ; Up-Regulation ; }, abstract = {Inorganic arsenic, a major environmental contaminant, exerts immunosuppressive effects towards human cells. We previously demonstrated that relevant environmental concentrations of inorganic arsenic altered morphology and functions of human primary macrophages, suggesting interference with macrophage differentiation program. The goal of this study was to determine global effect of low concentrations of arsenic trioxide (As(2)O(3)) on gene expression profile in human primary macrophages, in order to identify molecular targets of inorganic arsenic, especially those relevant of macrophage differentiation process. Using a pan-genomic microarray, we demonstrate that exposure of human blood monocyte-derived macrophages to 1microM As(2)O(3) for 72h, a non-cytototoxic concentration, results in up-regulation of 32 genes and repression of 91 genes. Among these genes, 26 are specifically related to differentiation program of human macrophages. Particularly, we validated that As(2)O(3) strongly alters expression of MMP9, MMP12, CCL22, SPON2 and CXCL2 genes, which contribute to major macrophagic functions. Most of these metalloid effects were reversed when As(2)O(3)-treated macrophages were next cultured in arsenic-free medium. We also show that As(2)O(3) similarly regulates expression of this macrophagic gene subset in human alveolar macrophages, the phenotype of which closely resembles that of blood monocyte-derived macrophage. In conclusion, our study demonstrates that environmentally relevant concentrations of As(2)O(3) impair expression of macrophage-specific genes, which fully supports interference of metalloid with differentiation program of human macrophages.}, } @article {pmid19095084, year = {2009}, author = {Power, PM and Sweetman, WA and Gallacher, NJ and Woodhall, MR and Kumar, GA and Moxon, ER and Hood, DW}, title = {Simple sequence repeats in Haemophilus influenzae.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {9}, number = {2}, pages = {216-228}, pmid = {19095084}, issn = {1567-7257}, support = {G0400426/MRC_/Medical Research Council/United Kingdom ; /WT_/Wellcome Trust/United Kingdom ; }, mesh = {DNA, Bacterial/*genetics/metabolism ; Gene Expression Regulation, Bacterial ; Genetic Variation ; Genome, Bacterial/*genetics ; Haemophilus influenzae/*genetics ; Repetitive Sequences, Nucleic Acid/*genetics ; }, abstract = {Simple sequence repeat (SSRs) of DNA are subject to high rates of mutation and are important mediators of adaptation in Haemophilus influenzae. Previous studies of the Rd KW20 genome identified the primacy of tetranucleotide SSRs in mediating phase variation (the rapid reversible switching of gene expression) of surface exposed structures such as lipopolysaccharide. The recent sequencing of the genomes of multiple strains of H. influenzae allowed the comparison of the SSRs (repeat units of one to nine nucleotides in length) in detail across four complete H. influenzae genomes and then comparison with a further 12 genomes when they became available. The SSR loci were broadly classified into three groups: (1) those that did not vary; (2) those for which some variation between strains was observed but this could not be linked to variation of gene expression; and (3) those that both varied and were located in regions consistent with mediating phase variable gene expression. Comparative analysis of 988 SSR associated loci confirmed that tetranucleotide repeats were the major mediators of phase variation and extended the repertoire of known tetranucleotide SSR loci by identifying ten previously uncharacterised tetranucleotide SSR loci with the potential to mediate phase variation which were unequally distributed across the H. influenzae pan-genome. Further, analysis of non-tetranucleotide SSR in the 16 strains revealed a number of mononucleotide, dinucleotide, pentanucleotide, heptanucleotide, and octanucleotide SSRs which were consistent with these tracts mediating phase variation. This study substantiates previous findings as to the important role that tetranucleotide SSRs play in H. influenzae biology. Two Brazilian isolates showed the most variation in their complement of SSRs suggesting the possibility of geographic and phenotypic influences on SSR distribution.}, } @article {pmid19086349, year = {2008}, author = {Tettelin, H and Riley, D and Cattuto, C and Medini, D}, title = {Comparative genomics: the bacterial pan-genome.}, journal = {Current opinion in microbiology}, volume = {11}, number = {5}, pages = {472-477}, doi = {10.1016/j.mib.2008.09.006}, pmid = {19086349}, issn = {1369-5274}, mesh = {Bacteria/*genetics ; Genetic Variation ; *Genome, Bacterial ; Genomics/*methods ; Synteny ; }, abstract = {Bacterial genome sequencing has become so easy and accessible that the genomes of multiple strains of more and more individual species have been and will be generated. These data sets provide for in depth analysis of intra-species diversity from various aspects. The pan-genome analysis, whereby the size of the gene repertoire accessible to any given species is characterized together with an estimate of the number of whole genome sequences required for proper analysis, is being increasingly applied. Different models exist for the analysis and their accuracy and applicability depend on the case at hand. Here we discuss current models and suggest a new model of broad applicability, including examples of its implementation.}, } @article {pmid19084549, year = {2009}, author = {Lambert, CB and Spire, C and Claude, N and Guillouzo, A}, title = {Dose- and time-dependent effects of phenobarbital on gene expression profiling in human hepatoma HepaRG cells.}, journal = {Toxicology and applied pharmacology}, volume = {234}, number = {3}, pages = {345-360}, doi = {10.1016/j.taap.2008.11.008}, pmid = {19084549}, issn = {1096-0333}, mesh = {Adenosine Triphosphate/metabolism ; Carcinoma, Hepatocellular/enzymology/*genetics ; Cell Line, Tumor ; Cluster Analysis ; Cytochrome P-450 Enzyme System/drug effects/genetics ; Dose-Response Relationship, Drug ; *Gene Expression Profiling/methods ; Gene Expression Regulation, Enzymologic/*drug effects ; Gene Expression Regulation, Neoplastic/*drug effects ; Hepatocytes/*drug effects/enzymology ; Humans ; Liver Neoplasms/enzymology/*genetics ; Multidrug Resistance-Associated Protein 2 ; Oligonucleotide Array Sequence Analysis ; Phenobarbital/*pharmacology/toxicity ; Principal Component Analysis ; Reverse Transcriptase Polymerase Chain Reaction ; Time Factors ; }, abstract = {Phenobarbital (PB) induces or represses a wide spectrum of genes in rodent liver. Much less is known about its effects in human liver. We used pangenomic cDNA microarrays to analyze concentration- and time-dependent gene expression profile changes induced by PB in the well-differentiated human HepaRG cell line. Changes in gene expression profiles clustered at specific concentration ranges and treatment times. The number of correctly annotated genes significantly modulated by at least three different PB concentration ranges (spanning 0.5 to 3.2 mM) at 20 h exposure amounted to 77 and 128 genes (p< or =0.01) at 2- and 1.8-fold filter changes, respectively. At low concentrations (0.5 and 1 mM), PB-responsive genes included the well-recognized CAR- and PXR-dependent responsive cytochromes P450 (CYP2B6, CYP3A4), sulfotransferase 2A1 and plasma transporters (ABCB1, ABCC2), as well as a number of genes critically involved in various metabolic pathways, including lipid (CYP4A11, CYP4F3), vitamin D (CYP24A1) and bile (CYP7A1 and CYP8B1) metabolism. At concentrations of 3.2 mM or higher after 20 h, and especially 48 h, increased cytotoxic effects were associated with disregulation of numerous genes related to oxidative stress, DNA repair and apoptosis. Primary human hepatocyte cultures were also exposed to 1 and 3.2 mM PB for 20 h and the changes were comparable to those found in HepaRG cells treated under the same conditions. Taken altogether, our data provide further evidence that HepaRG cells closely resemble primary human hepatocytes and provide new information on the effects of PB in human liver. These data also emphasize the importance of investigating dose- and time-dependent effects of chemicals when using toxicogenomic approaches.}, } @article {pmid19047395, year = {2009}, author = {Castellanos, E and Aranaz, A and Gould, KA and Linedale, R and Stevenson, K and Alvarez, J and Dominguez, L and de Juan, L and Hinds, J and Bull, TJ}, title = {Discovery of stable and variable differences in the Mycobacterium avium subsp. paratuberculosis type I, II, and III genomes by pan-genome microarray analysis.}, journal = {Applied and environmental microbiology}, volume = {75}, number = {3}, pages = {676-686}, pmid = {19047395}, issn = {1098-5336}, support = {86547/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Animals ; Base Composition ; DNA, Bacterial/*genetics ; Gene Duplication ; Gene Order ; *Genome, Bacterial ; Genomic Islands ; Genotype ; Humans ; INDEL Mutation ; *Microarray Analysis ; Mycobacterium avium subsp. paratuberculosis/*classification/*genetics ; Oligonucleotide Array Sequence Analysis ; Polymerase Chain Reaction/methods ; *Polymorphism, Genetic ; Synteny ; }, abstract = {Mycobacterium avium subsp. paratuberculosis is an important animal pathogen widely disseminated in the environment that has also been associated with Crohn's disease in humans. Three M. avium subsp. paratuberculosis genomotypes are recognized, but genomic differences have not been fully described. To further investigate these potential differences, a 60-mer oligonucleotide microarray (designated the MAPAC array), based on the combined genomes of M. avium subsp. paratuberculosis (strain K-10) and Mycobacterium avium subsp. hominissuis (strain 104), was designed and validated. By use of a test panel of defined M. avium subsp. paratuberculosis strains, the MAPAC array was able to identify a set of large sequence polymorphisms (LSPs) diagnostic for each of the three major M. avium subsp. paratuberculosis types. M. avium subsp. paratuberculosis type II strains contained a smaller genomic complement than M. avium subsp. paratuberculosis type I and M. avium subsp. paratuberculosis type III genomotypes, which included a set of genomic regions also found in M. avium subsp. hominissuis 104. Specific PCRs for genes within LSPs that differentiated M. avium subsp. paratuberculosis types were devised and shown to accurately screen a panel (n = 78) of M. avium subsp. paratuberculosis strains. Analysis of insertion/deletion region INDEL12 showed deletion events causing a reduction in the complement of mycobacterial cell entry genes in M. avium subsp. paratuberculosis type II strains and significantly altering the coding of a major immunologic protein (MPT64) associated with persistence and granuloma formation. Analysis of MAPAC data also identified signal variations in several genomic regions, termed variable genomic islands (vGIs), suggestive of transient duplication/deletion events. vGIs contained significantly low GC% and were immediately flanked by insertion sequences, integrases, or short inverted repeat sequences. Quantitative PCR demonstrated that variation in vGI signals could be associated with colony growth rate and morphology.}, } @article {pmid19016759, year = {2008}, author = {Ghayad, SE and Bieche, I and Vendrell, JA and Keime, C and Lidereau, R and Dumontet, C and Cohen, PA}, title = {mTOR inhibition reverses acquired endocrine therapy resistance of breast cancer cells at the cell proliferation and gene-expression levels.}, journal = {Cancer science}, volume = {99}, number = {10}, pages = {1992-2003}, doi = {10.1111/j.1349-7006.2008.00955.x}, pmid = {19016759}, issn = {1349-7006}, mesh = {Breast Neoplasms/*drug therapy/genetics/pathology ; Cell Line, Tumor ; Cell Proliferation/*drug effects ; Drug Resistance, Neoplasm/*drug effects ; Female ; Gene Expression Regulation, Neoplastic/*drug effects ; Humans ; Models, Genetic ; Receptors, Estrogen/genetics ; Sirolimus/*pharmacology ; }, abstract = {Activation of the Akt/mammalian target of rapamycin (mTOR) pathway has been shown to be associated with resistance to endocrine therapy in estrogen receptor alpha (ERalpha)-positive breast cancer patients. Utmost importance is attached to strategies aimed at overcoming treatment resistance. In this context, this work aimed to investigate whether, in breast cancer cells, the use of an mTOR inhibitor would be sufficient to reverse the resistance acquired after exposure to endocrine therapy. The ERalpha-positive human breast adenocarcinoma derived-MCF-7 cells used in this study have acquired both cross-resistance to hydroxy-tamoxifen (OH-Tam) and to fulvestrant and strong activation of the Akt/mTOR pathway. Cell proliferation tests in control cells demonstrated that the mTOR inhibitor rapamycin enhanced cell sensitivity to endocrine therapy when combined to OH-Tam or to fulvestrant. In resistant cells, rapamycin used alone greatly inhibited cell proliferation and reversed resistance to endocrine therapy by blocking the agonist-like activity of OH-Tam on cell proliferation and bypassing fulvestrant resistance. Reversion of resistance by rapamycin was associated with increased ERalpha protein expression levels and modification of the balance of phospho-ser167 ERalpha/total ERalpha ratio. Pangenomic DNA array experiments demonstrated that the cotreatment of resistant cells with fulvestrant and rapamycin allowed the restoration of 40% of the fulvestrant gene-expression signature. Taken together, data presented herein strongly support the idea that mTOR inhibitor might be one of the promising therapeutic approaches for patients with ERalpha-positive endocrine therapy-resistant breast cancers.}, } @article {pmid19015323, year = {2009}, author = {Reinhardt, JA and Baltrus, DA and Nishimura, MT and Jeck, WR and Jones, CD and Dangl, JL}, title = {De novo assembly using low-coverage short read sequence data from the rice pathogen Pseudomonas syringae pv. oryzae.}, journal = {Genome research}, volume = {19}, number = {2}, pages = {294-305}, pmid = {19015323}, issn = {1088-9051}, support = {F32 GM082279/GM/NIGMS NIH HHS/United States ; R01 GM066025/GM/NIGMS NIH HHS/United States ; GM082279-01/GM/NIGMS NIH HHS/United States ; GM066025/GM/NIGMS NIH HHS/United States ; }, mesh = {Algorithms ; Base Sequence ; Genetic Variation/physiology ; Genome, Bacterial ; Genomic Library ; Oryza/microbiology ; Plasmids ; Pseudomonas syringae/*genetics/pathogenicity ; Quality Control ; Sequence Alignment/methods ; Sequence Analysis, DNA/*methods ; }, abstract = {We developed a novel approach for de novo genome assembly using only sequence data from high-throughput short read sequencing technologies. By combining data generated from 454 Life Sciences (Roche) and Illumina (formerly known as Solexa sequencing) sequencing platforms, we reliably assembled genomes into large scaffolds at a fraction of the traditional cost and without use of a reference sequence. We applied this method to two isolates of the phytopathogenic bacteria Pseudomonas syringae. Sequencing and reassembly of the well-studied tomato and Arabidopsis pathogen, Pto(DC3000), facilitated development and testing of our method. Sequencing of a distantly related rice pathogen, Por(1_)(6), demonstrated our method's efficacy for de novo assembly of novel genomes. Our assembly of Por(1_6) yielded an N50 scaffold size of 531,821 bp with >75% of the predicted genome covered by scaffolds over 100,000 bp. One of the critical phenotypic differences between strains of P. syringae is the range of plant hosts they infect. This is largely determined by their complement of type III effector proteins. The genome of Por(1_6) is the first sequenced for a P. syringae isolate that is a pathogen of monocots, and, as might be predicted, its complement of type III effectors differs substantially from the previously sequenced isolates of this species. The genome of Por(1_6) helps to define an expansion of the P. syringae pan-genome, a corresponding contraction of the core genome, and a further diversification of the type III effector complement for this important plant pathogen species.}, } @article {pmid19013239, year = {2008}, author = {Calvo, E and Luu-The, V and Morissette, J and Martel, C and Labrie, C and Bernard, B and Bernerd, F and Deloche, C and Chaussade, V and Leclaire, J and Labrie, F}, title = {Pangenomic changes induced by DHEA in the skin of postmenopausal women.}, journal = {The Journal of steroid biochemistry and molecular biology}, volume = {112}, number = {4-5}, pages = {186-193}, doi = {10.1016/j.jsbmb.2008.10.008}, pmid = {19013239}, issn = {0960-0760}, mesh = {Administration, Topical ; Aged ; Cell Differentiation/drug effects ; Cell Proliferation/drug effects ; Dehydroepiandrosterone/*pharmacology ; Female ; *Gene Expression Profiling ; Humans ; Keratinocytes/cytology ; Middle Aged ; Postmenopause/drug effects/*physiology ; Reverse Transcriptase Polymerase Chain Reaction ; Skin/drug effects/*metabolism ; }, abstract = {The objective of this study was to explore, for the first time, the changes in the pangenomic profile induced in human skin in women treated with dehydroepiandrosterone (DHEA) applied locally. Sixty postmenopausal women participated in this phase II prospective, randomized, double-blind and placebo-controlled study. Women were randomized to the twice daily local application of 0% (placebo), 0.3%, 1% or 2% DHEA cream. Changes in the pangenomic expression profile were studied using Affymetrix Genechips. Significant changes (p<0.05) in sixty-six DHEA-responsive probe sets corresponding to 52 well-characterized genes and 9 unknown gene sequences were identified. A dose-dependent increase in the expression of several members of the collagen family was observed, namely COL1, COL3 and COL5 as well as the concomitant modulation of SPARC, a gene required for the normal deposition and maturation of collagen fibrils in the dermis. Several genes involved in the proliferation and differentiation of keratinocytes were also modulated. In addition, topical DHEA reduced the expression of genes associated with the terminal differentiation and cornification of keratinocytes. Our results strongly suggest the possibility that DHEA could exert an anti-aging effect in the skin through stimulation of collagen biosynthesis, improved structural organization of the dermis while modulating keratinocyte metabolism.}, } @article {pmid18986251, year = {2008}, author = {Glasner, JD and Marquez-Villavicencio, M and Kim, HS and Jahn, CE and Ma, B and Biehl, BS and Rissman, AI and Mole, B and Yi, X and Yang, CH and Dangl, JL and Grant, SR and Perna, NT and Charkowski, AO}, title = {Niche-specificity and the variable fraction of the Pectobacterium pan-genome.}, journal = {Molecular plant-microbe interactions : MPMI}, volume = {21}, number = {12}, pages = {1549-1560}, doi = {10.1094/MPMI-21-12-1549}, pmid = {18986251}, issn = {0894-0282}, mesh = {Chromosomes, Bacterial/*genetics ; Contig Mapping ; DNA, Bacterial/genetics ; Genes, Bacterial ; *Genome, Bacterial ; *Genomics ; INDEL Mutation ; Molecular Sequence Data ; Pectobacterium/*genetics ; Sequence Alignment ; Sequence Analysis, DNA ; Species Specificity ; }, abstract = {We compare genome sequences of three closely related soft-rot pathogens that vary in host range and geographical distribution to identify genetic differences that could account for lifestyle differences. The isolates compared, Pectobacterium atrosepticum SCRI1043, P. carotovorum WPP14, and P. brasiliensis 1692, represent diverse lineages of the genus. P. carotovorum and P. brasiliensis genome contigs, generated by 454 pyrosequencing ordered by reference to the previously published complete circular chromosome of P. atrosepticum genome and each other, account for 96% of the predicted genome size. Orthologous proteins encoded by P. carotovorum and P. brasiliensis are approximately 95% identical to each other and 92% identical to P. atrosepticum. Multiple alignment using Mauve identified a core genome of 3.9 Mb conserved among these Pectobacterium spp. Each core genome is interrupted at many points by species-specific insertions or deletions (indels) that account for approximately 0.9 to 1.1 Mb. We demonstrate that the presence of a hrpK-like type III secretion system-dependent effector protein in P. carotovorum and P. brasiliensis and its absence from P. atrosepticum is insufficient to explain variability in their response to infection in a plant. Additional genes that vary among these species include those encoding peptide toxin production, enzyme production, secretion proteins, and antibiotic production, as well as differences in more general aspects of gene regulation and metabolism that may be relevant to pathogenicity.}, } @article {pmid18948290, year = {2009}, author = {Rouillard, JM and Gulari, E}, title = {OligoArrayDb: pangenomic oligonucleotide microarray probe sets database.}, journal = {Nucleic acids research}, volume = {37}, number = {Database issue}, pages = {D938-41}, pmid = {18948290}, issn = {1362-4962}, support = {1 R01 GM06854-01A1/GM/NIGMS NIH HHS/United States ; }, mesh = {*Databases, Nucleic Acid ; Gene Expression Profiling ; Genome, Archaeal ; Genome, Bacterial ; Genomics ; *Oligonucleotide Array Sequence Analysis ; Oligonucleotide Probes/*chemistry ; }, abstract = {OligoArrayDb is a comprehensive database containing pangenomic oligonucleotide microarray probe sets designed for most of the sequenced genomes that are not covered by commercial catalog arrays. The availability of probe sequences, associated with custom microarray fabrication services offered by many companies and cores presents the unequalled possibility to perform microarray experiments on most of the sequenced organisms. OligoArrayDb contains more than 2.8 probes per gene in average for more than 600 organisms, mostly archaea and bacteria strains available from public database. On average, 98% of the annotated genes have at least one probe which is predicted to be specific to its intended target in >94% of the cases. OligoArrayDb is weekly updated as new sequenced genomes become available. Probe sequences, in addition to a comprehensive set of annotations can be downloaded from this database. OligoArrayDb is publicly accessible online at http://berry.engin.umich.edu/oligoarraydb.}, } @article {pmid18854354, year = {2009}, author = {Klein, J and Münch, R and Biegler, I and Haddad, I and Retter, I and Jahn, D}, title = {Strepto-DB, a database for comparative genomics of group A (GAS) and B (GBS) streptococci, implemented with the novel database platform 'Open Genome Resource' (OGeR).}, journal = {Nucleic acids research}, volume = {37}, number = {Database issue}, pages = {D494-8}, pmid = {18854354}, issn = {1362-4962}, mesh = {Bacterial Proteins/genetics ; DNA, Intergenic/chemistry ; *Databases, Genetic ; *Genome, Bacterial ; Genomics ; Streptococcus/classification/*genetics ; }, abstract = {Streptococci are the causative agent of many human infectious diseases including bacterial pneumonia and meningitis. Here, we present Strepto-DB, a database for the comparative genome analysis of group A (GAS) and group B (GBS) streptococci. The known genomes of various GAS and GBS contain a large fraction of distributed genes that were found absent in other strains or serotypes of the same species. Strepto-DB identifies the homologous proteins deduced from the genomes of interest. It allows for the elucidation of the GAS and GBS core- and pan-genomes via genome-wide comparisons. Moreover, an intergenic region analysis tool provides alignments and predictions for transcription factor binding sites in the non-coding sequences. An interactive genome browser visualizes functional annotations. Strepto-DB (http://oger.tu-bs.de/strepto_db) was created by the use of OGeR, the Open Genome Resource for comparative analysis of prokaryotic genomes. OGeR is a newly developed open source database and tool platform for the web-based storage, distribution, visualization and comparison of prokaryotic genome data. The system automatically creates the dedicated relational database and web interface and imports an arbitrary number of genomes derived from standardized genome files. OGeR can be downloaded at http://oger.tu-bs.de.}, } @article {pmid18761693, year = {2008}, author = {Churchward, G}, title = {Back to the future: the new ICE age.}, journal = {Molecular microbiology}, volume = {70}, number = {3}, pages = {554-556}, doi = {10.1111/j.1365-2958.2008.06415.x}, pmid = {18761693}, issn = {1365-2958}, mesh = {Bacillus subtilis/*genetics/metabolism ; Bacterial Proteins/*genetics/metabolism ; Chromosomes, Bacterial/genetics ; Conjugation, Genetic ; Gene Expression Regulation, Bacterial ; Gene Transfer, Horizontal ; *Interspersed Repetitive Sequences ; Repressor Proteins/*genetics/metabolism ; }, abstract = {The analysis of bacterial genomes has revealed an extraordinary array of conjugal elements (integrative and conjugative element or ICE) that reside in bacterial chromosomes. These elements contribute to the pan-genomes of individual species and confer a wide variety of properties on their bacterial hosts. ICEBs1 is a conjugal element found in Bacillus subtilis that has a remarkable regulatory mechanism that apparently favours conjugation when there are suitable recipient bacteria at high density or when the bacterial host is facing DNA-damaging stresses. In the current issue, Bose et al. dissect the mechanism of induction of transfer of this element, and reveal a new, apparently widespread repressor anti-repressor system and a new mechanism of repressor destruction by proteolysis.}, } @article {pmid18688281, year = {2008}, author = {Siau, A and Silvie, O and Franetich, JF and Yalaoui, S and Marinach, C and Hannoun, L and van Gemert, GJ and Luty, AJ and Bischoff, E and David, PH and Snounou, G and Vaquero, C and Froissard, P and Mazier, D}, title = {Temperature shift and host cell contact up-regulate sporozoite expression of Plasmodium falciparum genes involved in hepatocyte infection.}, journal = {PLoS pathogens}, volume = {4}, number = {8}, pages = {e1000121}, pmid = {18688281}, issn = {1553-7374}, mesh = {Animals ; Cells, Cultured ; Gene Expression Profiling/methods ; Hepatocytes/*metabolism/parasitology ; Hot Temperature ; Humans ; Malaria, Falciparum/genetics/*metabolism ; Oligonucleotide Array Sequence Analysis/methods ; Plasmodium falciparum/genetics/*metabolism ; Protozoan Proteins/*biosynthesis/genetics ; Reverse Transcriptase Polymerase Chain Reaction ; *Up-Regulation/genetics ; }, abstract = {Plasmodium sporozoites are deposited in the skin by Anopheles mosquitoes. They then find their way to the liver, where they specifically invade hepatocytes in which they develop to yield merozoites infective to red blood cells. Relatively little is known of the molecular interactions during these initial obligatory phases of the infection. Recent data suggested that many of the inoculated sporozoites invade hepatocytes an hour or more after the infective bite. We hypothesised that this pre-invasive period in the mammalian host prepares sporozoites for successful hepatocyte infection. Therefore, the genes whose expression becomes modified prior to hepatocyte invasion would be those likely to code for proteins implicated in the subsequent events of invasion and development. We have used P. falciparum sporozoites and their natural host cells, primary human hepatocytes, in in vitro co-culture system as a model for the pre-invasive period. We first established that under co-culture conditions, sporozoites maintain infectivity for an hour or more, in contrast to a drastic loss in infectivity when hepatocytes were not included. Thus, a differential transcriptome of salivary gland sporozoites versus sporozoites co-cultured with hepatocytes was established using a pan-genomic P. falciparum microarray. The expression of 532 genes was found to have been up-regulated following co-culture. A fifth of these genes had no orthologues in the genomes of Plasmodium species used in rodent models of malaria. Quantitative RT-PCR analysis of a selection of 21 genes confirmed the reliability of the microarray data. Time-course analysis further indicated two patterns of up-regulation following sporozoite co-culture, one transient and the other sustained, suggesting roles in hepatocyte invasion and liver stage development, respectively. This was supported by functional studies of four hitherto uncharacterized proteins of which two were shown to be sporozoite surface proteins involved in hepatocyte invasion, while the other two were predominantly expressed during hepatic parasite development. The genome-wide up-regulation of expression observed supports the hypothesis that the shift from the mosquito to the mammalian host contributes to activate quiescent salivary gland sporozoites into a state of readiness for the hepatic stages. Functional studies on four of the up-regulated genes validated our approach as one means to determine the repertoire of proteins implicated during the early events of the Plasmodium infection, and in this case that of P. falciparum, the species responsible for the severest forms of malaria.}, } @article {pmid18676672, year = {2008}, author = {Rasko, DA and Rosovitz, MJ and Myers, GS and Mongodin, EF and Fricke, WF and Gajer, P and Crabtree, J and Sebaihia, M and Thomson, NR and Chaudhuri, R and Henderson, IR and Sperandio, V and Ravel, J}, title = {The pangenome structure of Escherichia coli: comparative genomic analysis of E. coli commensal and pathogenic isolates.}, journal = {Journal of bacteriology}, volume = {190}, number = {20}, pages = {6881-6893}, pmid = {18676672}, issn = {1098-5530}, support = {BB/C510075/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; G0700151/MRC_/Medical Research Council/United Kingdom ; N01AI30071/AI/NIAID NIH HHS/United States ; }, mesh = {Adult ; Animals ; Computational Biology ; Conserved Sequence ; DNA, Bacterial/*genetics ; Escherichia coli/*genetics/isolation & purification ; Genes, Bacterial ; *Genome, Bacterial ; *Genomics ; Humans ; Molecular Sequence Data ; Rabbits ; Virulence Factors/*genetics ; }, abstract = {Whole-genome sequencing has been skewed toward bacterial pathogens as a consequence of the prioritization of medical and veterinary diseases. However, it is becoming clear that in order to accurately measure genetic variation within and between pathogenic groups, multiple isolates, as well as commensal species, must be sequenced. This study examined the pangenomic content of Escherichia coli. Six distinct E. coli pathovars can be distinguished using molecular or phenotypic markers, but only two of the six pathovars have been subjected to any genome sequencing previously. Thus, this report provides a seminal description of the genomic contents and unique features of three unsequenced pathovars, enterotoxigenic E. coli, enteropathogenic E. coli, and enteroaggregative E. coli. We also determined the first genome sequence of a human commensal E. coli isolate, E. coli HS, which will undoubtedly provide a new baseline from which workers can examine the evolution of pathogenic E. coli. Comparison of 17 E. coli genomes, 8 of which are new, resulted in identification of approximately 2,200 genes conserved in all isolates. We were also able to identify genes that were isolate and pathovar specific. Fewer pathovar-specific genes were identified than anticipated, suggesting that each isolate may have independently developed virulence capabilities. Pangenome calculations indicate that E. coli genomic diversity represents an open pangenome model containing a reservoir of more than 13,000 genes, many of which may be uncharacterized but important virulence factors. This comparative study of the species E. coli, while descriptive, should provide the basis for future functional work on this important group of pathogens.}, } @article {pmid18667081, year = {2008}, author = {Biro, JC}, title = {Does codon bias have an evolutionary origin?.}, journal = {Theoretical biology & medical modelling}, volume = {5}, number = {}, pages = {16}, pmid = {18667081}, issn = {1742-4682}, mesh = {Amino Acids/*genetics ; Animals ; *Codon ; *Evolution, Molecular ; Genetic Code ; Genome ; *Models, Genetic ; }, abstract = {BACKGROUND: There is a 3-fold redundancy in the Genetic Code; most amino acids are encoded by more than one codon. These synonymous codons are not used equally; there is a Codon Usage Bias (CUB). This article will provide novel information about the origin and evolution of this bias.

RESULTS: Codon Usage Bias (CUB, defined here as deviation from equal usage of synonymous codons) was studied in 113 species. The average CUB was 29.3 +/- 1.1% (S.E.M, n = 113) of the theoretical maximum and declined progressively with evolution and increasing genome complexity. A Pan-Genomic Codon Usage Frequency (CUF) Table was constructed to describe genome-wide relationships among codons. Significant correlations were found between the number of synonymous codons and (i) the frequency of the respective amino acids (ii) the size of CUB. Numerous, statistically highly significant, internal correlations were found among codons and the nucleic acids they comprise. These strong correlations made it possible to predict missing synonymous codons (wobble bases) reliably from the remaining codons or codon residues.

CONCLUSION: The results put the concept of "codon bias" into a novel perspective. The internal connectivity of codons indicates that all synonymous codons might be integrated parts of the Genetic Code with equal importance in maintaining its functional integrity.}, } @article {pmid18514435, year = {2008}, author = {Andrieux, J}, title = {[Array-CGH for routine diagnosis of cryptic chromosomal imbalances].}, journal = {Pathologie-biologie}, volume = {56}, number = {6}, pages = {368-374}, doi = {10.1016/j.patbio.2008.04.011}, pmid = {18514435}, issn = {0369-8114}, mesh = {Chromosome Disorders/*diagnosis/genetics ; Chromosomes, Artificial, Bacterial/genetics ; Gene Dosage ; Genetic Testing/methods ; Humans ; Karyotyping/methods ; Molecular Diagnostic Techniques/*methods ; Molecular Weight ; Nucleic Acid Hybridization/*methods ; *Oligonucleotide Array Sequence Analysis ; Oligonucleotide Probes ; }, abstract = {Cytogenetics allows detection of genomic anomalies between 10 and 15 Mb (classical cytogenetics) and between 3 and 5 Mb (high-resolution cytogenetics). These pangenomic techniques are associated with more accurate analyses, single probe interstitial FISH and subtelomeric studies. Array-CGH (aCGH) allows high resolution pangenomic analyses. BAC/PAC and oligonucleotides array-CGH have transformed the field of genetics and are useful for constitutional, hematological and solid tumors cytogenetics. Array-based comparative pangenomic hybridization resolutions vary in size (range, several kilobases to 1 Mb). With the more recent improvements, aCGH is becoming the "missing link" between cytogenetics and molecular diagnostics. Despite copy number variations (CNV) and without replacing karyotype, aCGH detects cryptic quantitative anomalies anywhere in the genome and becomes day after day more useful.}, } @article {pmid18501443, year = {2008}, author = {Mazzucotelli, A and Ribet, C and Castan-Laurell, I and Daviaud, D and Guigné, C and Langin, D and Valet, P}, title = {The transcriptional co-activator PGC-1alpha up regulates apelin in human and mouse adipocytes.}, journal = {Regulatory peptides}, volume = {150}, number = {1-3}, pages = {33-37}, doi = {10.1016/j.regpep.2008.04.003}, pmid = {18501443}, issn = {0167-0115}, mesh = {Adipocytes/metabolism/*physiology ; Adipokines ; Adipose Tissue, White/cytology ; Animals ; Apelin ; Carrier Proteins/genetics/metabolism/*physiology ; Cell Line ; Cells, Cultured ; Female ; Gene Expression Regulation ; Heat-Shock Proteins/genetics/metabolism/*physiology ; Humans ; Intercellular Signaling Peptides and Proteins/genetics/metabolism/*physiology ; Mice ; Mice, Inbred C57BL ; Peroxisome Proliferator-Activated Receptor Gamma Coactivator 1-alpha ; Trans-Activators/genetics/metabolism/*physiology ; Transcription Factors/genetics/metabolism/*physiology ; }, abstract = {By using pangenomic microarray, we identified apelin as a unique adipokine up regulated by the transcriptional co-activator peroxisome proliferator-activated receptor gamma (PPARgamma) co-activator 1alpha (PGC-1alpha) in human white adipocytes. We investigated its regulation in vitro and in vivo. Overexpression of PGC-1alpha by adenovirus in human adipocytes induces apelin expression and secretion. Pharmacological induction of cAMP, an upstream regulator of endogenous PGC-1alpha expression, up regulates apelin gene expression and also apelin secretion in human and mice adipocytes. Moreover, during cold exposure in mice, a physiological situation known to induce both cAMP and PGC-1alpha, apelin expression in adipocytes and plasma levels were increased. This is the first demonstration that PGC-1alpha is involved in the regulation of an adipokine gene expression and release.}, } @article {pmid18492273, year = {2008}, author = {Caserta, D and Benkhalifa, M and Baldi, M and Fiorentino, F and Qumsiyeh, M and Moscarini, M}, title = {Genome profiling of ovarian adenocarcinomas using pangenomic BACs microarray comparative genomic hybridization.}, journal = {Molecular cytogenetics}, volume = {1}, number = {}, pages = {10}, pmid = {18492273}, issn = {1755-8166}, abstract = {BACKGROUND: Routine cytogenetic investigations for ovarian cancers are limited by culture failure and poor growth of cancer cells compared to normal cells. Fluorescence in situ Hybridization (FISH) application or classical comparative genome hybridization techniques are also have their own limitations in detecting genome imbalance especially for small changes that are not known ahead of time and for which FISH probes could not be thus designed.

METHODS: We applied microarray comparative genomic hybridization (A-CGH) using one mega base BAC arrays to investigate chromosomal disorders in ovarian adenocarcinoma in patients with familial history.

RESULTS: Our data on 10 cases of ovarian cancer revealed losses of 6q (4 cases mainly mosaic loss), 9p (4 cases), 10q (3 cases), 21q (3 cases), 22q (4 cases) with association to a monosomy X and gains of 8q and 9q (occurring together in 8 cases) and gain of 12p. There were other abnormalities such as loss of 17p that were noted in two profiles of the studied cases. Total or mosaic segmental gain of 2p, 3q, 4q, 7q and 13q were also observed. Seven of 10 patients were investigated by FISH to control array CGH results. The FISH data showed a concordance between the 2 methods.

CONCLUSION: The data suggest that A-CGH detects unique and common abnormalities with certain exceptions such as tetraploidy and balanced translocation, which may lead to understanding progression of genetic changes as well as aid in early diagnosis and have an impact on therapy and prognosis.}, } @article {pmid18477666, year = {2008}, author = {Rampon, C and Bouillot, S and Climescu-Haulica, A and Prandini, MH and Cand, F and Vandenbrouck, Y and Huber, P}, title = {Protocadherin 12 deficiency alters morphogenesis and transcriptional profile of the placenta.}, journal = {Physiological genomics}, volume = {34}, number = {2}, pages = {193-204}, pmid = {18477666}, issn = {1531-2267}, mesh = {Animals ; Animals, Newborn ; Cadherins/*deficiency/metabolism ; Cell Adhesion ; Cell Movement ; Decidua/cytology/metabolism ; Female ; *Gene Expression Profiling ; Glycogen/metabolism ; Mice ; *Morphogenesis ; Organ Size ; Placenta/cytology/*embryology/*metabolism ; Pregnancy ; Protocadherins ; Reverse Transcriptase Polymerase Chain Reaction ; }, abstract = {Protocadherins are transmembrane proteins exhibiting homophilic adhesive activities through their extracellular domain. Protocadherin 12 (Pcdh12) is expressed in angiogenic endothelial cells, mesangial cells of kidney glomeruli, and glycogen cells of the mouse placenta. To get insight into the role of this protein in vivo, we analyzed PCDH12-deficient mice and investigated their placental phenotype. The mice were alive and fertile; however, placental and embryonic sizes were reduced compared with wild-type mice. We observed defects in placental layer segregation and a decreased vascularization of the labyrinth associated with a reduction in cell density in this layer. To understand the molecular events responsible for the phenotypic alterations observed in Pcdh12(-/-) placentas, we analyzed the expression profile of embryonic day 12.5 mutant placentas compared with wild-type placentas, using pangenomic chips: 2,289 genes exhibited statistically significant changes in expressed levels due to loss of PCDH12. Functional grouping of modified genes was obtained by GoMiner software. Gene clusters that contained most of the differentially expressed genes were those involved in tissue morphogenesis and development, angiogenesis, cell-matrix adhesion and migration, immune response, and chromatin remodeling. Our data show that loss of PCDH12 leads to morphological alterations of the placenta and to notable changes in its gene expression profile. Specific genes emerging from the microarray screen support the biological modifications observed in PCDH12-deficient placentas.}, } @article {pmid18414733, year = {2008}, author = {Hallin, PF and Binnewies, TT and Ussery, DW}, title = {The genome BLASTatlas-a GeneWiz extension for visualization of whole-genome homology.}, journal = {Molecular bioSystems}, volume = {4}, number = {5}, pages = {363-371}, doi = {10.1039/b717118h}, pmid = {18414733}, issn = {1742-206X}, mesh = {Amino Acid Sequence ; Bacterial Toxins/genetics ; Computational Biology ; *Databases, Genetic ; Genome, Bacterial/*genetics ; Molecular Sequence Data ; Sequence Alignment ; Sequence Homology ; *Software ; }, abstract = {The development of fast and inexpensive methods for sequencing bacterial genomes has led to a wealth of data, often with many genomes being sequenced of the same species or closely related organisms. Thus, there is a need for visualization methods that will allow easy comparison of many sequenced genomes to a defined reference strain. The BLASTatlas is one such tool that is useful for mapping and visualizing whole genome homology of genes and proteins within a reference strain compared to other strains or species of one or more prokaryotic organisms. We provide examples of BLASTatlases, including the Clostridium tetani plasmid p88, where homologues for toxin genes can be easily visualized in other sequenced Clostridium genomes, and for a Clostridium botulinum genome, compared to 14 other Clostridium genomes. DNA structural information is also included in the atlas to visualize the DNA chromosomal context of regions. Additional information can be added to these plots, and as an example we have added circles showing the probability of the DNA helix opening up under superhelical tension. The tool is SOAP compliant and WSDL (web services description language) files are located on our website: (http://www.cbs.dtu.dk/ws/BLASTatlas), where programming examples are available in Perl. By providing an interoperable method to carry out whole genome visualization of homology, this service offers bioinformaticians as well as biologists an easy-to-adopt workflow that can be directly called from the programming language of the user, hence enabling automation of repeated tasks. This tool can be relevant in many pangenomic as well as in metagenomic studies, by giving a quick overview of clusters of insertion sites, genomic islands and overall homology between a reference sequence and a data set.}, } @article {pmid18370283, year = {2008}, author = {Gruden, K and Pompe-Novak, M and Baebler, S and Krecic-Stres, H and Toplak, N and Hren, M and Kogovsek, P and Gow, L and Foster, GD and Boonham, N and Ravnikar, M}, title = {Expression microarrays in plant-virus interaction.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {451}, number = {}, pages = {583-613}, doi = {10.1007/978-1-59745-102-4_40}, pmid = {18370283}, issn = {1064-3745}, mesh = {DNA, Complementary/genetics ; DNA, Plant/genetics ; DNA, Viral/genetics ; Nucleic Acid Hybridization/methods ; Oligonucleotide Array Sequence Analysis/*methods ; Plant Viruses/*genetics ; Plants/*genetics/*virology ; RNA, Plant/genetics ; RNA, Viral/genetics ; }, abstract = {Since their conception in the late 1990s, microarray techniques have become a tool of choice for monitoring pangenomic gene expression. Although there are a large number of variations on the basic methodology the general approach remains standard and involves the comparison of a "test" RNA with a "control" RNA; in this case "healthy" and "virus-infected" plants. The protocol itself can be broken down into five main parts: RNA extraction, cDNA synthesis, hybridization, array scanning, and data analysis. The method presented is optimized for use with arrays based on glass slides spotted with cDNA, in this case 15,264 cDNAs from Solanum tuberosum. The labeling technique presented involves two steps: hybridization of cDNA produced using oligo-dT linker primers to the array and hybridization with a DNA dendrimer reagent comprising sequence complementary to the linker sequence bound to a fluorescent dye. We also present the use of the R environment for data analysis, generating statistical support for differential gene expression observed.}, } @article {pmid18366303, year = {2008}, author = {Lacroix, L and Commo, F and Soria, JC}, title = {Gene expression profiling of non-small-cell lung cancer.}, journal = {Expert review of molecular diagnostics}, volume = {8}, number = {2}, pages = {167-178}, doi = {10.1586/14737159.8.2.167}, pmid = {18366303}, issn = {1744-8352}, mesh = {Biomarkers, Tumor/*biosynthesis/classification ; Carcinoma, Non-Small-Cell Lung/classification/diagnosis/*metabolism/therapy ; *Gene Expression Profiling/methods ; *Gene Expression Regulation, Neoplastic ; Humans ; Lung Neoplasms/*metabolism/therapy ; *Oligonucleotide Array Sequence Analysis/methods ; Prognosis ; }, abstract = {Lung cancer is the leading cause of cancer worldwide. Despite recent advances in the management of resected lung cancer tumors (i.e., the use of adjuvant therapy) and more effective treatments in the metastatic setting (i.e., molecular targeted agents), the cure rate of lung cancer remains low. Successful molecular testing of lung cancer requires the identification and understanding of events that take place during the multistep tumorigenic process of lung cancer. As with other solid tumors, lung cancer is the result of the accumulation of genetic and epigenetic alterations over a long course of exposure to a carcinogen, such as tobacco smoke. Discovering new prognostic or predictive biomarkers or developing new detection tools for lung cancer is one of the major areas of translational cancer research. However, given our current understanding of the multifactorial process of lung carcinogenesis and the heterogeneous nature of the disease, monitoring of one or a few genes is limited. A pangenomic analysis seems more efficient for deciphering the complexity of lung cancer. The prospect of identifying specific events in lung carcinogenesis is significantly brightened by the recent development of high-throughput gene expression analysis. Since 2000, several studies have reported on the molecular classification of human lung carcinomas on the basis of gene expression and have described numerous putative biological markers of cancer. At this time, improving the biological significance of microarray data appears to be an important challenge. The most recent studies propose refining molecular classification of non-small-cell lung cancer on the basis of mRNA expression profiles. Other studies described new prognostic biomarkers that will be useful for the therapeutic management of patient's bearing lung cancer (non-small-cell lung cancer). The present review summarizes the main recent advances associated with gene expression analysis in the field of lung cancer and, notably, non-small-cell lung tumors.}, } @article {pmid18301759, year = {2008}, author = {James, KE and Schneider, H and Ansell, SW and Evers, M and Robba, L and Uszynski, G and Pedersen, N and Newton, AE and Russell, SJ and Vogel, JC and Kilian, A}, title = {Diversity arrays technology (DArT) for pan-genomic evolutionary studies of non-model organisms.}, journal = {PloS one}, volume = {3}, number = {2}, pages = {e1682}, pmid = {18301759}, issn = {1932-6203}, mesh = {*Biological Evolution ; Genetic Markers ; *Genome, Plant ; Genomics/*methods ; *Oligonucleotide Array Sequence Analysis ; Phylogeny ; Research ; }, abstract = {BACKGROUND: High-throughput tools for pan-genomic study, especially the DNA microarray platform, have sparked a remarkable increase in data production and enabled a shift in the scale at which biological investigation is possible. The use of microarrays to examine evolutionary relationships and processes, however, is predominantly restricted to model or near-model organisms.

This study explores the utility of Diversity Arrays Technology (DArT) in evolutionary studies of non-model organisms. DArT is a hybridization-based genotyping method that uses microarray technology to identify and type DNA polymorphism. Theoretically applicable to any organism (even one for which no prior genetic data are available), DArT has not yet been explored in exclusively wild sample sets, nor extensively examined in a phylogenetic framework. DArT recovered 1349 markers of largely low copy-number loci in two lineages of seed-free land plants: the diploid fern Asplenium viride and the haploid moss Garovaglia elegans. Direct sequencing of 148 of these DArT markers identified 30 putative loci including four routinely sequenced for evolutionary studies in plants. Phylogenetic analyses of DArT genotypes reveal phylogeographic and substrate specificity patterns in A. viride, a lack of phylogeographic pattern in Australian G. elegans, and additive variation in hybrid or mixed samples.

CONCLUSIONS/SIGNIFICANCE: These results enable methodological recommendations including procedures for detecting and analysing DArT markers tailored specifically to evolutionary investigations and practical factors informing the decision to use DArT, and raise evolutionary hypotheses concerning substrate specificity and biogeographic patterns. Thus DArT is a demonstrably valuable addition to the set of existing molecular approaches used to infer biological phenomena such as adaptive radiations, population dynamics, hybridization, introgression, ecological differentiation and phylogeography.}, } @article {pmid18226237, year = {2008}, author = {Delaye, L and Deluna, A and Lazcano, A and Becerra, A}, title = {The origin of a novel gene through overprinting in Escherichia coli.}, journal = {BMC evolutionary biology}, volume = {8}, number = {}, pages = {31}, pmid = {18226237}, issn = {1471-2148}, mesh = {Base Sequence ; Escherichia coli/classification/*genetics ; Escherichia coli Proteins/genetics ; *Evolution, Molecular ; *Genes, Bacterial ; Heat-Shock Proteins/genetics ; Molecular Sequence Data ; Open Reading Frames/genetics ; Phylogeny ; }, abstract = {BACKGROUND: Overlapped genes originate by a) loss of a stop codon among contiguous genes coded in different frames; b) shift to an upstream initiation codon of one of the contiguous genes; or c) by overprinting, whereby a novel open reading frame originates through point mutation inside an existing gene. Although overlapped genes are common in viruses, it is not clear whether overprinting has led to new genes in prokaryotes.

RESULTS: Here we report the origin of a new gene through overprinting in Escherichia coli K12. The htgA gene coding for a positive regulator of the sigma 32 heat shock promoter arose by point mutation in a 123/213 phase within an open reading frame (yaaW) of unknown function, most likely in the lineage leading to E. coli and Shigella sp. Further, we show that yaaW sequences coding for htgA genes have a slower evolutionary rate than those lacking an overlapped htgA gene.

CONCLUSION: While overprinting has been shown to be rather frequent in the evolution of new genes in viruses, our results suggest that this mechanism has also contributed to the origin of a novel gene in a prokaryote. We propose the term janolog (from Jano, the two-faced Roman god) to describe the homology relationship that holds between two genes when one originated through overprinting of the other. One cannot dismiss the possibility that at least a small fraction of the large number of novel ORPhan genes detected in pan-genome and metagenomic studies arose by overprinting.}, } @article {pmid18159071, year = {2008}, author = {Bollet, MA and Servant, N and Neuvial, P and Decraene, C and Lebigot, I and Meyniel, JP and De Rycke, Y and Savignoni, A and Rigaill, G and Hupé, P and Fourquet, A and Sigal-Zafrani, B and Barillot, E and Thiery, JP}, title = {High-resolution mapping of DNA breakpoints to define true recurrences among ipsilateral breast cancers.}, journal = {Journal of the National Cancer Institute}, volume = {100}, number = {1}, pages = {48-58}, doi = {10.1093/jnci/djm266}, pmid = {18159071}, issn = {1460-2105}, mesh = {Adult ; Breast Neoplasms/diagnosis/*genetics/pathology ; Carcinoma, Ductal, Breast/genetics ; Carcinoma, Lobular/genetics ; *DNA Breaks ; *DNA, Neoplasm ; Diagnosis, Differential ; Disease-Free Survival ; Female ; Humans ; Middle Aged ; Neoplasm Recurrence, Local/diagnosis/*genetics/pathology ; Neoplasms, Second Primary/diagnosis/*genetics ; Predictive Value of Tests ; Prognosis ; Research Design ; }, abstract = {BACKGROUND: To distinguish new primary breast cancers from true recurrences, pangenomic analyses of DNA copy number alterations (CNAs) using single-nucleotide polymorphism arrays have proven useful.

METHODS: The pangenomic profiles of 22 pairs of primary breast carcinoma (ductal or lobular) and ipsilateral breast cancers from the same patients were analyzed. Hierarchical clustering was performed using CNAs and DNA breakpoint information. A partial identity score developed using DNA breakpoint information was used to quantify partial identities between two tumors. The nature of ipsilateral breast cancers (true recurrence vs new primary tumor) as defined using the clustering methods and the partial identity score was compared with that based on clinical characteristics. Metastasis-free survival was compared among patients with primary tumors and true recurrences as defined using the partial identity score and by clinical characteristics. All statistical tests were two-sided.

RESULTS: All methods agreed on the nature of ipsilateral breast cancers for 14 pairs of samples. For five pairs, the clinical definition disagreed with both clustering methods. For three pairs, the two clustering methods were discordant and the one using DNA breakpoints agreed with the clinical definition. The partial identity score confirmed the nature of ipsilateral breast cancers as defined by clustering of DNA breakpoints in 21 of 22 pairs. The difference in metastasis-free survival of patients with new primary tumors and those with true recurrences was not statistically significant when tumors were defined based on clinical and histologic characteristics (5-year metastasis-free survival: 76%, 95% confidence interval [CI] = 52% to 100% for new primary tumors and 38%, 95% CI = 17% to 83% for true recurrences; P = .18; new primary tumor vs true recurrence, hazard ratio = 2.8, 95% CI = 0.6 to 13.7), but the difference was statistically significant when tumors were defined using the partial identity score (5-year metastasis-free survival: 100% for new primary tumors and 29%, 95% CI = 11% to 78% for true recurrences; P = .01).

CONCLUSIONS: DNA breakpoint information more often agreed with the clinical determination than CNAs in this population. The partial identity score, which was calculated based on DNA breakpoints, allows statistical discrimination between new primary tumors and true recurrences that could outperform the clinical determination in terms of prognosis.}, } @article {pmid18088402, year = {2007}, author = {Willenbrock, H and Hallin, PF and Wassenaar, TM and Ussery, DW}, title = {Characterization of probiotic Escherichia coli isolates with a novel pan-genome microarray.}, journal = {Genome biology}, volume = {8}, number = {12}, pages = {R267}, pmid = {18088402}, issn = {1474-760X}, mesh = {Escherichia coli/classification/*genetics ; Genome, Bacterial ; Oligonucleotide Array Sequence Analysis/*methods ; Phylogeny ; *Probiotics ; Shigella/genetics ; }, abstract = {BACKGROUND: Microarrays have recently emerged as a novel procedure to evaluate the genetic content of bacterial species. So far, microarrays have mostly covered single or few strains from the same species. However, with cheaper high-throughput sequencing techniques emerging, multiple strains of the same species are rapidly becoming available, allowing for the definition and characterization of a whole species as a population of genomes--the 'pan-genome'.

RESULTS: Using 32 Escherichia coli and Shigella genome sequences we estimate the pan- and core genome of the species. We designed a high-density microarray in order to provide a tool for characterization of the E. coli pan-genome. Technical performance of this pan-genome microarray based on control strain samples (E. coli K-12 and O157:H7) demonstrated a high sensitivity and relatively low false positive rate. A single-channel analysis approach is robust while allowing the possibility for deriving presence/absence predictions for any gene included on our pan-genome microarray. Moreover, the array was highly sufficient to investigate the gene content of non-pathogenic isolates, despite the strong bias towards pathogenic E. coli strains that have been sequenced so far.

CONCLUSION: This high-density microarray provides an excellent tool for characterizing the genetic makeup of unknown E. coli strains and can also deliver insights into phylogenetic relationships. Its design poses a considerably larger challenge and involves different considerations than the design of single strain microarrays. Here, lessons learned and future directions will be discussed in order to optimize design of microarrays targeting entire pan-genomes.}, } @article {pmid18043634, year = {2007}, author = {Cuadros-Orellana, S and Martin-Cuadrado, AB and Legault, B and D'Auria, G and Zhaxybayeva, O and Papke, RT and Rodriguez-Valera, F}, title = {Genomic plasticity in prokaryotes: the case of the square haloarchaeon.}, journal = {The ISME journal}, volume = {1}, number = {3}, pages = {235-245}, doi = {10.1038/ismej.2007.35}, pmid = {18043634}, issn = {1751-7362}, mesh = {Bacteriophages/genetics ; Biological Transport/genetics ; DNA, Archaeal/chemistry/genetics ; Genome, Archaeal/*genetics ; Genomic Islands/genetics ; Halobacteriaceae/*genetics/isolation & purification ; Interspersed Repetitive Sequences ; Molecular Sequence Data ; *Recombination, Genetic ; Seawater/*microbiology ; Sequence Analysis, DNA ; Sequence Homology ; }, abstract = {The variability in genome content among closely related strains of prokaryotes has been one of the most remarkable discoveries of genomics. One way to approach the description of this so-called pan-genome is to compare one reference strain genome with metagenomic sequences from the environment. We have applied this approach to one extreme aquatic habitat, saturated brines in a solar saltern. The genome of Haloquadratum walsbyi strain DSM 16790 was compared to an environmental metagenome obtained from the exact site of its isolation. This approach revealed that some regions of the strain genome were scarcely represented in the metagenome. Here we have analyzed these genomic islands (GI) in the genome of DSM 16790 and compared them with the complete sequence of some fosmids from the environmental library. Two of the islands, GI 2 and GI 4, overlapped with two large guanine and cytosine (GC)-rich regions that showed evidence of high variability through mobile elements. GI 3 seemed to be a phage or phage-remnant acquired by the reference genome, but not present in most environmental lineages. Most differential gene content was related to small molecule transport and detection, probably reflecting adaptation to different pools of organic nutrients. GI 1 did not possess traces of mobile elements and had normal GC content. This island contained the main cluster of cell envelope glycoproteins and the variability found was different from the other GIs. Rather than containing different genes it consisted of homologs with low similarity. This variation might reflect a phage evasion strategy.}, } @article {pmid18008243, year = {2007}, author = {Siau, A and Toure, FS and Ouwe-Missi-Oukem-Boyer, O and Ciceron, L and Mahmoudi, N and Vaquero, C and Froissard, P and Bisvigou, U and Bisser, S and Coppee, JY and Bischoff, E and David, PH and Mazier, D}, title = {Whole-transcriptome analysis of Plasmodium falciparum field isolates: identification of new pathogenicity factors.}, journal = {The Journal of infectious diseases}, volume = {196}, number = {11}, pages = {1603-1612}, doi = {10.1086/522012}, pmid = {18008243}, issn = {0022-1899}, mesh = {Animals ; Apoptosis ; Blood-Brain Barrier/parasitology ; Brain/*parasitology ; Cell Adhesion ; Child ; DNA, Protozoan/*analysis ; Endothelial Cells/parasitology ; Erythrocytes/parasitology ; Gabon ; *Gene Expression Profiling ; *Genes, Protozoan ; Humans ; Malaria, Cerebral/*parasitology ; Malaria, Falciparum/*diagnosis ; Oligonucleotide Array Sequence Analysis/methods ; Plasmodium falciparum/*genetics/*pathogenicity ; Reverse Transcriptase Polymerase Chain Reaction ; *Virulence Factors ; }, abstract = {BACKGROUND: Severe malaria and one of its most important pathogenic processes, cerebral malaria, involves the sequestration of parasitized red blood cells (pRBCs) in brain postcapillary venules. Although the pathogenic mechanisms underlying malaria remain poorly characterized, it has been established that adhesion of pRBCs to endothelial cells (ECs) can result in cell apoptosis, which in turn may lead to disruption of the blood-brain barrier. The nature of the parasite molecules involved in the pathogenesis of severe malaria remains elusive.

METHODS: Whole-transcriptome profiling of nonapoptogenic versus apoptogenic parasite field isolates obtained from Gabonese children was performed with pan-genomic Plasmodium falciparum DNA microarrays; radiolabeled instead of fluorescent cDNAs were used to improve the sensitivity of signal detection.

RESULTS: Our methods allowed the identification of 59 genes putatively associated with the induction of EC apoptosis. Silencing of Plasmodium gene expression with specific double-stranded RNA was performed on 8 selected genes; 5 of these, named "Plasmodium apoptosis-linked pathogenicity factors" (PALPFs), were found to be linked to parasite apoptogenicity. Of these genes, 2 might act via parasite cytoadherence.

CONCLUSION: This is the first attempt to identify genes involved in parasite pathogenic mechanisms against human ECs. The finding of PALPFs illuminates perspectives for novel therapeutic strategies against cerebral complications of malaria.}, } @article {pmid17949484, year = {2007}, author = {Dalevi, D and Desantis, TZ and Fredslund, J and Andersen, GL and Markowitz, VM and Hugenholtz, P}, title = {Automated group assignment in large phylogenetic trees using GRUNT: GRouping, Ungrouping, Naming Tool.}, journal = {BMC bioinformatics}, volume = {8}, number = {}, pages = {402}, pmid = {17949484}, issn = {1471-2105}, mesh = {Algorithms ; Classification ; Database Management Systems ; *Databases, Nucleic Acid ; *Phylogeny ; RNA, Ribosomal, 16S/analysis ; *Software ; }, abstract = {BACKGROUND: Accurate taxonomy is best maintained if species are arranged as hierarchical groups in phylogenetic trees. This is especially important as trees grow larger as a consequence of a rapidly expanding sequence database. Hierarchical group names are typically manually assigned in trees, an approach that becomes unfeasible for very large topologies.

RESULTS: We have developed an automated iterative procedure for delineating stable (monophyletic) hierarchical groups to large (or small) trees and naming those groups according to a set of sequentially applied rules. In addition, we have created an associated ungrouping tool for removing existing groups that do not meet user-defined criteria (such as monophyly). The procedure is implemented in a program called GRUNT (GRouping, Ungrouping, Naming Tool) and has been applied to the current release of the Greengenes (Hugenholtz) 16S rRNA gene taxonomy comprising more than 130,000 taxa.

CONCLUSION: GRUNT will facilitate researchers requiring comprehensive hierarchical grouping of large tree topologies in, for example, database curation, microarray design and pangenome assignments. The application is available at the greengenes website 1.}, } @article {pmid17925668, year = {2007}, author = {Greillier, L and Roll, P and Barlesi, F and Robaglia-Schlupp, A and Fraticelli, A and Cau, P and Astoul, P}, title = {[Role of DNA microarrays in the diagnosis of pleural exudates: a feasibility study].}, journal = {Revue des maladies respiratoires}, volume = {24}, number = {7}, pages = {859-867}, doi = {10.1016/s0761-8425(07)91388-1}, pmid = {17925668}, issn = {0761-8425}, mesh = {Aged ; Asbestosis/complications ; DNA, Neoplasm/genetics ; Feasibility Studies ; Female ; Gene Expression Profiling ; Humans ; Male ; Mesothelioma/diagnosis ; Middle Aged ; *Oligonucleotide Array Sequence Analysis ; Pleural Effusion/*diagnosis/genetics ; Pleural Effusion, Malignant/diagnosis/genetics ; Pleural Neoplasms/diagnosis/secondary ; Pleurisy/diagnosis ; Prospective Studies ; RNA, Neoplasm/genetics ; Smoking ; }, abstract = {INTRODUCTION: Establishing the cause of exudative pleural effusions is sometimes difficult, especially in the context of possible malignant pleural mesothelioma (MPM). Therefore, the development of new biological tools is necessary. The aim of this study was to determine the feasibility and the diagnostic contribution of genomic analysis of cells contained in pleural fluid, using DNA microarray techniques.

METHODS: Patients with pleural effusion requiring diagnostic thoracocentesis were eligible to participate in the study. Five hundred mls of pleural fluid were then collected. RNA was extracted from pleural fluid cells and its integrity was assessed. Gene expression was studied using pangenomic DNA microarrays.

RESULTS: Seventeen patients were included (4 MPM, 8 secondary malignant pleurisies, 5 benign pleurisies). Three patients offered fully exploitable samples. Taking into account the results of control experiments, gene expression study from pleural fluid was reproducible. The comparison of samples showed significant differences in gene expression. Samples from 14 patients were not exploitable because of RNA degradation.

CONCLUSIONS: Gene expression study of cells from pleural fluid is feasible but remains difficult, essentially in relationship with RNA weakness.}, } @article {pmid17909269, year = {2007}, author = {Tozlu-Kara, S and Roux, V and Andrieu, C and Vendrell, J and Vacher, S and Lazar, V and Spyratos, F and Tubiana-Hulin, M and Cohen, P and Dessen, P and Lidereau, R and Bièche, I}, title = {Oligonucleotide microarray analysis of estrogen receptor alpha-positive postmenopausal breast carcinomas: identification of HRPAP20 and TIMELESS as outstanding candidate markers to predict the response to tamoxifen.}, journal = {Journal of molecular endocrinology}, volume = {39}, number = {4}, pages = {305-318}, doi = {10.1677/JME-07-0001}, pmid = {17909269}, issn = {1479-6813}, mesh = {Aged ; Antineoplastic Agents, Hormonal/therapeutic use ; Biomarkers, Tumor/genetics/physiology ; Breast Neoplasms/diagnosis/*drug therapy/*genetics ; Calmodulin-Binding Proteins/*genetics/physiology ; Carcinoma/diagnosis/*drug therapy/*genetics ; Cell Cycle Proteins/*genetics/physiology ; Disease-Free Survival ; Estrogen Receptor alpha/*genetics ; Female ; Gene Expression Profiling ; Gene Expression Regulation, Neoplastic/drug effects ; Humans ; Intracellular Signaling Peptides and Proteins/*genetics/physiology ; Middle Aged ; *Oligonucleotide Array Sequence Analysis ; *Postmenopause/genetics ; Prognosis ; Tamoxifen/*therapeutic use ; Treatment Outcome ; Tumor Cells, Cultured ; }, abstract = {The estrogen receptor alpha (ER alpha) status of breast tumors is used to identify patients who may respond to endocrine agents such as tamoxifen. However, ER alpha status alone is not perfectly predictive, and there is a pressing need for more reliable markers of endocrine responsiveness. In this aim, we used a two-step strategy. We first screened genes of interest by a pangenomic 44 K oligonucleotide microarray in a series of ten ER alpha-positive tumors from five tamoxifen-treated postmenopausal patients who relapsed (distant metastasis) and five tamoxifen-treated postmenopausal patients who did not relapse, matched with respect to age, Scarff-Bloom-Richardson grade, lymph node status, and macroscopic tumor size. Genes of interest (n=24) were then investigated in an independent well-characterized series of ER alpha-positive unilateral invasive primary breast tumors from postmenopausal women who received tamoxifen alone as adjuvant hormone therapy after primary surgery. We identified four genes (HRPAP20, TIMELESS, PTPLB, and MGC29814) for which high mRNA levels were significantly associated with shorter relapse-free survival (log-rank test). We also showed that hormone-regulated proliferation-associated 20 kDa protein (HRPAP20) and TIMELESS are 17beta-estradiol-regulated in vitro and are ectopically expressed in OH-Tam-resistant cell lines. In conclusion, these findings point to HRPAP20 and TIMELESS as promising markers of tamoxifen resistance in women with ER alpha-positive breast tumors.}, } @article {pmid17899356, year = {2007}, author = {Menezo, Y and Russo, G and Tosti, E and El Mouatassim, S and Benkhalifa, M}, title = {Expression profile of genes coding for DNA repair in human oocytes using pangenomic microarrays, with a special focus on ROS linked decays.}, journal = {Journal of assisted reproduction and genetics}, volume = {24}, number = {11}, pages = {513-520}, pmid = {17899356}, issn = {1058-0468}, mesh = {DNA Damage/genetics ; DNA Repair/*genetics ; Gene Expression Profiling ; Humans ; Oligonucleotide Array Sequence Analysis ; Oocytes/*metabolism/physiology ; RNA, Messenger/metabolism ; *Reactive Oxygen Species ; }, abstract = {PURPOSE: To determine the level of expression for mRNAs that regulate DNA repair activity in oocytes at the germinal vesicle (GV) stage. Reactive oxygen species (ROS) have been shown to play a major role in the appearance of deleterious DNA decays, and this study focuses on the repair of damage linked to decay caused by the action of ROS. The oocyte needs a mechanism for repairing DNA decays in the early preimplantation embryo before the onset of genomic activation, since in the absence of repair, residual DNA damage would lead to either apoptosis or tolerance. Tolerance of DNA damage is a source of potential mutations.

METHOD: GV oocytes were selected for this study, both for the ethical reason that they are unsuitable for patient treatment, and because no transcription takes place during the period from GV to MII and then prior to genomic activation. The GV oocyte is therefore a good model for looking at DNA during the first cleavages of early preimplantation development. Six cohorts of GV oocytes were pooled for extraction of mRNA; the DNA was analysed using Affimetrix HG-UG133 Plus 2, containing 54,675 probe sets; spike and housekeeping genes were also added as internal controls.

RESULTS: In GV oocytes, DNA repair pathways for oxidized bases are redundant. One step repair procedure (OSR), BER (base excision repair), MMR (mismatch repair) and NER (Nucleotide excision repair) are present. All the recognition proteins are also present. The chromatin assembly factors necessary for the maintenance of genomic stability are highly expressed.

CONCLUSION: Gene expression analysis shows that the oocyte does not allow a high level of tolerance for DNA decays. This regulatory mechanism should avoid transmitting mutations into the next generation.}, } @article {pmid17878247, year = {2007}, author = {Thouënnon, E and Elkahloun, AG and Guillemot, J and Gimenez-Roqueplo, AP and Bertherat, J and Pierre, A and Ghzili, H and Grumolato, L and Muresan, M and Klein, M and Lefebvre, H and Ouafik, L and Vaudry, H and Plouin, PF and Yon, L and Anouar, Y}, title = {Identification of potential gene markers and insights into the pathophysiology of pheochromocytoma malignancy.}, journal = {The Journal of clinical endocrinology and metabolism}, volume = {92}, number = {12}, pages = {4865-4872}, doi = {10.1210/jc.2007-1253}, pmid = {17878247}, issn = {0021-972X}, mesh = {Adolescent ; Adrenal Gland Neoplasms/*genetics/*pathology ; Adult ; Aged ; Aged, 80 and over ; Child ; Child, Preschool ; Female ; Genetic Markers ; Humans ; Infant ; Male ; Middle Aged ; Oligonucleotide Array Sequence Analysis ; Pheochromocytoma/*genetics/*pathology ; RNA, Neoplasm/biosynthesis/genetics ; Reproducibility of Results ; Reverse Transcriptase Polymerase Chain Reaction ; }, abstract = {CONTEXT: Pheochromocytomas are catecholamine-producing tumors that are generally benign but that can also present as or develop into malignancy. Occurrence of malignant pheochromocytomas can only be asserted by imaging of metastatic lesions.

OBJECTIVES: We conducted a gene expression profiling of benign and malignant tumors to identify a gene signature that would allow us to discriminate benign from malignant pheochromocytomas and to gain a better understanding of tumorigenic pathways associated with malignancy.

DESIGN: A total of 36 patients with pheochromocytoma was studied retrospectively. There were 18 (nine benign and nine malignant) tumors used for gene expression profiling on pangenomic oligonucleotide microarrays.

RESULTS: We identified and validated a set of predictor genes that could accurately distinguish the two tumor subtypes through unsupervised clustering. Most of the differentially expressed genes were down-regulated in malignant tumors, and several of these genes encoded neuroendocrine factors involved in prominent characteristics of chromaffin cell biology. In particular, the expression of two key processing enzymes of trophic peptides, peptidylglycine alpha-amidating monooxygenase and glutaminyl-peptide cyclotransferase, was reduced in malignant pheochromocytomas.

CONCLUSION: The gene expression profiling of benign and malignant pheochromocytomas clearly identified a set of genes that could be used as a prognostic multi-marker and revealed that the expression of several genes encoding neuroendocrine proteins was reduced in malignant compared with benign tumors.}, } @article {pmid17825913, year = {2008}, author = {Deschamps, M and Robinet, E and Certoux, JM and Mercier, P and Sauce, D and De Vos, J and Montcuquet, N and Bonyhadi, M and Rème, T and Tiberghien, P and Ferrand, C}, title = {Transcriptome of retrovirally transduced CD8+ lymphocytes: influence of cell activation, transgene integration, and selection process.}, journal = {Molecular immunology}, volume = {45}, number = {4}, pages = {1112-1125}, doi = {10.1016/j.molimm.2007.07.025}, pmid = {17825913}, issn = {0161-5890}, mesh = {Adult ; CD8-Positive T-Lymphocytes/*metabolism ; Female ; *Gene Expression Profiling ; *Gene Transfer Techniques ; Humans ; Lymphocyte Activation ; Male ; Middle Aged ; Oligonucleotide Array Sequence Analysis/methods ; Polymorphism, Genetic ; Retroviridae/*genetics ; Transgenes ; }, abstract = {A suicide gene introduced by retroviral means can allow in vivo control of alloreactivity mediated by donor gene-modified T cells (GMTC) after allogeneic hematopoietic stem cell transplantation. The present study establishes the transcriptomic profile of GMTC prepared according to the GMTC production process used in our clinical trial (activation/selection methods, CD3/NeoR), which was previously demonstrated to induce phenotypical and functional alterations. This transcriptomic profile was compared with that of GMTC prepared by a novel process (CD3-CD28/DeltaNGFR-MACS) that limits alterations. Using a human pan-genomic microarray and GeneSpring software, we determined the gene expression profiles of CD8+ T cells from four healthy donors before and after the different steps required for gene modification. This analysis revealed that the gene expression pattern of GMTC is affected mainly by the activation step. Specific analysis of GMTC production processes showed that DeltaNGFR-MACS selection combined with CD3-CD28 activation limits the aberrant expression of genes involved in immunological functions and apoptotic pathways. Furthermore, our results indicate a limited risk of oncogenesis associated with retroviral-mediated gene transfer in CD8+ cells, a lower perturbation of the cell cycle regulation pathway after CD3-CD28 activation than after CD3 activation, and no significant involvement of the DeltaNGFR transduction signaling pathway when DeltaNGFR is used for selection. Moreover, genes that might be targeted to limit T cell functional alterations after ex vivo manipulation and culture were identified. These findings should be relevant to further adoptive T cell immunotherapy trials using ex vivo-expanded, gene-modified or unmodified T cells.}, } @article {pmid17697348, year = {2007}, author = {Ekstrøm, PO and Bjørheim, J and Thilly, WG}, title = {Technology to accelerate pangenomic scanning for unknown point mutations in exonic sequences: cycling temperature capillary electrophoresis (CTCE).}, journal = {BMC genetics}, volume = {8}, number = {}, pages = {54}, pmid = {17697348}, issn = {1471-2156}, mesh = {DNA Mutational Analysis ; Electrophoresis, Capillary/*methods ; *Exons ; *Genome, Human ; Humans ; Nucleic Acid Denaturation ; *Point Mutation ; Polymerase Chain Reaction ; Temperature ; }, abstract = {BACKGROUND: Rapid means to discover and enumerate unknown mutations in the exons of human genes on a pangenomic scale are needed to discover the genes carrying inherited risk for common diseases or the genes in which somatic mutations are required for clonal diseases such as atherosclerosis and cancers. The method of constant denaturing capillary electrophoresis (CDCE) permitted sensitive detection and enumeration of unknown point mutations but labor-intensive optimization procedures for each exonic sequence made it impractical for application at a pangenomic scale.

RESULTS: A variant denaturing capillary electrophoresis protocol, cycling temperature capillary electrophoresis (CTCE), has eliminated the need for the laboratory optimization of separation conditions for each target sequence. Here are reported the separation of wild type mutant homoduplexes from wild type/mutant heteroduplexes for 27 randomly chosen target sequences without any laboratory optimization steps. Calculation of the equilibrium melting map of each target sequence attached to a high melting domain (clamp) was sufficient to design the analyte sequence and predict the expected degree of resolution.

CONCLUSION: CTCE provides practical means for economical pangenomic detection and enumeration of point mutations in large-scale human case/control cohort studies. We estimate that the combined reagent, instrumentation and labor costs for scanning the approximately 250,000 exons and splice sites of the approximately 25,000 human protein-coding genes using automated CTCE instruments in 100 case cohorts of 10,000 individuals each are now less than U.S. $500 million, less than U.S. $500 per person.}, } @article {pmid17680689, year = {2008}, author = {Stevens, TJ and Paoli, M}, title = {RCC1-like repeat proteins: a pangenomic, structurally diverse new superfamily of beta-propeller domains.}, journal = {Proteins}, volume = {70}, number = {2}, pages = {378-387}, doi = {10.1002/prot.21521}, pmid = {17680689}, issn = {1097-0134}, mesh = {Amino Acid Sequence ; Cell Cycle Proteins/*chemistry/genetics ; *Genome ; Guanine Nucleotide Exchange Factors/*chemistry/genetics ; Molecular Sequence Data ; Nuclear Proteins/*chemistry/genetics ; Protein Conformation ; Sequence Homology, Amino Acid ; }, abstract = {The beta-propeller fold is a phylogenetically widespread, common protein architecture able to support a range of different functions such as catalysis, ligand binding and transport, regulation and protein binding. Interestingly, it appears that the beta-propeller topology is also compatible with strikingly diverse sequences. Amongst this diversity, there are three large groups of proteins with related sequences and very important cellular and intercellular regulatory functions: WD, kelch, and YWTD proteins. A common characteristic between these protein families is that their sequences, while distinct, all contain internal repeats 40-45 residues long. Through a pangenomic analysis using internal repeat profiles derived from the structurally known propeller modules of the eukaryotic protein RCC1 and the related prokaryotic protein BLIP-II, we have defined a new superfamily of propeller repeats, the RCC1-like repeats (RLRs). These sequences turn out to be more phylogenetically widespread than other large groups of propeller proteins, occurring in both prokaryotic and eukaryotic genomes. Interestingly, our research showed that RLR domains with different numbers of repeats exist, ranging from 3 to 7, and possibly more. A novel, intriguing finding is the discovery of sequences with 3 repeats, as well as proteins with 10 modular units, though in the latter case it is not clear whether these are made of two 5-bladed domains or a single, novel 10-bladed propeller. In addition, the results indicate that circular permutation events may have taken place in the evolution of these proteins. It is now established that the group of RLR proteins is extremely numerous and is characterized by unique, remarkable features which place it in a position of special interest as an important superfamily of proteins in nature.}, } @article {pmid17646210, year = {2007}, author = {Mazzucotelli, A and Viguerie, N and Tiraby, C and Annicotte, JS and Mairal, A and Klimcakova, E and Lepin, E and Delmar, P and Dejean, S and Tavernier, G and Lefort, C and Hidalgo, J and Pineau, T and Fajas, L and Clément, K and Langin, D}, title = {The transcriptional coactivator peroxisome proliferator activated receptor (PPAR)gamma coactivator-1 alpha and the nuclear receptor PPAR alpha control the expression of glycerol kinase and metabolism genes independently of PPAR gamma activation in human white adipocytes.}, journal = {Diabetes}, volume = {56}, number = {10}, pages = {2467-2475}, doi = {10.2337/db06-1465}, pmid = {17646210}, issn = {1939-327X}, mesh = {Adipocytes/*physiology ; Gene Expression Regulation ; *Gene Expression Regulation, Enzymologic ; Glycerol Kinase/*genetics/metabolism ; Humans ; Intracellular Signaling Peptides and Proteins/*metabolism ; Nuclear Receptor Coactivators ; PPAR alpha/*genetics/physiology ; PPAR gamma/*genetics/*physiology ; }, abstract = {OBJECTIVE: The purpose of this work was to determine the pattern of genes regulated by peroxisome proliferator-activated receptor (PPAR) gamma coactivator 1 alpha (PGC-1 alpha) in human adipocytes and the involvement of PPARalpha and PPARgamma in PGC-1 alpha transcriptional action.

RESEARCH DESIGN AND METHODS: Primary cultures of human adipocytes were transduced with a PGC-1 alpha adenovirus and treated with PPARgamma and PPARalpha agonists. Variation in gene expression was assessed using pangenomic microarrays and quantitative RT-PCR. To investigate glycerol kinase (GyK), a target of PGC-1 alpha, we measured enzymatic activity and glycerol incorporation into triglycerides. In vivo studies were performed on wild-type and PPARalpha(-/-) mice. The GyK promoter was studied using chromatin immunoprecipitation and promoter reporter gene assays.

RESULTS: Among the large number of genes regulated by PGC-1 alpha independently of PPARgamma, new targets involved in metabolism included the gene encoding GyK. The induction of GyK by PGC-1 alpha was observed at the levels of mRNA, enzymatic activity, and glycerol incorporation into triglycerides. PPARalpha was also upregulated by PGC-1 alpha. Its activation led to an increase in GyK expression and activity. PPARalpha was shown to bind and activate the GyK promoter. Experiments in mice confirmed the role of PGC-1 alpha and PPARalpha in the regulation of GyK in vivo.

CONCLUSIONS: This work uncovers novel pathways regulated by PGC-1 alpha and reveals that PPARalpha controls gene expression in human white adipocytes. The induction of GyK by PGC-1 alpha and PPARalpha may promote a futile cycle of triglyceride hydrolysis and fatty acid reesterification.}, } @article {pmid17532526, year = {2007}, author = {Muzzi, A and Masignani, V and Rappuoli, R}, title = {The pan-genome: towards a knowledge-based discovery of novel targets for vaccines and antibacterials.}, journal = {Drug discovery today}, volume = {12}, number = {11-12}, pages = {429-439}, doi = {10.1016/j.drudis.2007.04.008}, pmid = {17532526}, issn = {1359-6446}, mesh = {Anti-Bacterial Agents/*pharmacology ; Bacteria/genetics/immunology ; Bacterial Vaccines/*genetics ; Genome, Bacterial/*genetics ; Genome, Viral/*genetics ; Humans ; Virulence Factors/immunology/physiology ; }, abstract = {During the past decade, sequencing of the entire genome of pathogenic bacteria has become a widely used practice in microbiology research. More recently, sequence data from multiple isolates of a single pathogen have provided new insights into the microevolution of a species as well as helping researchers to decipher its virulence mechanisms. The comparison of multiple strains of a single species has resulted in the definition of the species pan-genome, as a measure of the total gene repertoire that can pertain to a given microorganism. This concept can be exploited not only to study the diversity of a species, but also, as we discuss here, to provide the opportunity to use a knowledge-based approach for the development of novel vaccine candidates and new-generation targets for antimicrobials.}, } @article {pmid17475002, year = {2007}, author = {Lefébure, T and Stanhope, MJ}, title = {Evolution of the core and pan-genome of Streptococcus: positive selection, recombination, and genome composition.}, journal = {Genome biology}, volume = {8}, number = {5}, pages = {R71}, pmid = {17475002}, issn = {1474-760X}, mesh = {*Biological Evolution ; *Genome, Bacterial ; Recombination, Genetic ; Selection, Genetic ; Streptococcus/*genetics ; }, abstract = {BACKGROUND: The genus Streptococcus is one of the most diverse and important human and agricultural pathogens. This study employs comparative evolutionary analyses of 26 Streptococcus genomes to yield an improved understanding of the relative roles of recombination and positive selection in pathogen adaptation to their hosts.

RESULTS: Streptococcus genomes exhibit extreme levels of evolutionary plasticity, with high levels of gene gain and loss during species and strain evolution. S. agalactiae has a large pan-genome, with little recombination in its core-genome, while S. pyogenes has a smaller pan-genome and much more recombination of its core-genome, perhaps reflecting the greater habitat, and gene pool, diversity for S. agalactiae compared to S. pyogenes. Core-genome recombination was evident in all lineages (18% to 37% of the core-genome judged to be recombinant), while positive selection was mainly observed during species differentiation (from 11% to 34% of the core-genome). Positive selection pressure was unevenly distributed across lineages and biochemical main role categories. S. suis was the lineage with the greatest level of positive selection pressure, the largest number of unique loci selected, and the largest amount of gene gain and loss.

CONCLUSION: Recombination is an important evolutionary force in shaping Streptococcus genomes, not only in the acquisition of significant portions of the genome as lineage specific loci, but also in facilitating rapid evolution of the core-genome. Positive selection, although undoubtedly a slower process, has nonetheless played an important role in adaptation of the core-genome of different Streptococcus species to different hosts.}, } @article {pmid17450528, year = {2007}, author = {Dupont, VN and Gentien, D and Oberkampf, M and De Rycke, Y and Blin, N}, title = {A gene expression signature associated with metastatic cells in effusions of breast carcinoma patients.}, journal = {International journal of cancer}, volume = {121}, number = {5}, pages = {1036-1046}, doi = {10.1002/ijc.22775}, pmid = {17450528}, issn = {0020-7136}, mesh = {Breast Neoplasms/*genetics/pathology ; DNA, Complementary ; Female ; Fluorescent Antibody Technique ; Gene Expression Profiling ; Humans ; Immunomagnetic Separation ; Middle Aged ; *Neoplasm Metastasis ; Oligonucleotide Array Sequence Analysis ; }, abstract = {Malignant effusion in invasive breast carcinoma is associated with poor prognosis. To decipher molecular events leading to metastasis and to identify reliable markers for targeted therapies are of crucial need. Therefore, we have used cDNA microarrays to delineate molecular signatures associated with metastasis and relapse in breast carcinoma effusions. Taking advantage of an immunomagnetic method, we have purified to homogeneity EpCAM-positive cells from 34 malignant effusions. Immunopurified cells represented as much as 10% of the whole cell fraction and their epithelial and carcinoma features were confirmed by immunofluorescence labeling. Gene expression profiles of 19 immunopurified effusion samples, were analyzed using human pan-genomic microarrays, and compared with those of 4 corresponding primary tumors, 8 breast carcinoma effusion-derived cell lines, and 4 healthy mammary tissues. Principal component and multiple clustering analyses of microarray data, clearly identified distinctive molecular portraits corresponding to the 4 categories of specimens. Of uppermost interest, effusion samples were arranged in 2 subsets on the basis of their gene expression patterns. The first subset partly shares a gene expression signature with the different cell lines, and overexpresses CD24, CD44 and epithelial cytokeratins 8,18,19. The second subset overexpresses markers related to aggressive invasive carcinoma (uPA receptor, S100A4, vimentin, CXCR4). These findings demonstrate the importance of using pure cell fractions to accurately decipher in silico gene expression of clinical specimens. Further studies will lead to the identification of genes of oustanding importance to diagnose malignant effusion, predict survival and tailor appropriate therapies to the metastatic effusion disease in breast carcinoma patients.}, } @article {pmid17449699, year = {2007}, author = {Miller, MC and Keymer, DP and Avelar, A and Boehm, AB and Schoolnik, GK}, title = {Detection and transformation of genome segments that differ within a coastal population of Vibrio cholerae strains.}, journal = {Applied and environmental microbiology}, volume = {73}, number = {11}, pages = {3695-3704}, pmid = {17449699}, issn = {0099-2240}, mesh = {California ; DNA, Bacterial/*genetics/isolation & purification ; Gene Transfer, Horizontal ; *Genetic Variation ; Genome, Bacterial/*genetics ; Interspersed Repetitive Sequences ; Multigene Family/genetics ; Seawater/*microbiology ; *Transformation, Bacterial ; Vibrio cholerae/*genetics/*isolation & purification ; Vibrio cholerae O1/genetics ; }, abstract = {Vibrio cholerae is an autochthonous member of diverse aquatic ecosystems around the globe. Collectively, the genomes of environmental V. cholerae strains comprise a large repository of encoded functions which can be acquired by individual V. cholerae lineages through uptake and recombination. To characterize the genomic diversity of environmental V. cholerae, we used comparative genome hybridization to study 41 environmental strains isolated from diverse habitats along the central California coast, a region free of endemic cholera. These data were used to classify genes of the epidemic V. cholerae O1 sequenced strain N16961 as conserved, variably present, or absent from the isolates. For the most part, absent genes were restricted to large mobile elements and have known functions in pathogenesis. Conversely, genes present in some, but not all, California isolates were in smaller contiguous clusters and were less likely to be near genes with functions in DNA mobility. Two such clusters of variable genes encoding different selectable metabolic phenotypes (mannose and diglucosamine utilization) were transformed into the genomes of environmental isolates by chitin-dependent competence, indicating that this mechanism of general genetic exchange is conserved among V. cholerae. The transformed DNA had an average size of 22.7 kbp, demonstrating that natural competence can mediate the movement of large chromosome fragments. Thus, whether variable genes arise through the acquisition of new sequences by horizontal gene transfer or by the loss of preexisting DNA though deletion, natural transformation provides a mechanism by which V. cholerae clones can gain access to the V. cholerae pan-genome.}, } @article {pmid17434157, year = {2008}, author = {Lapidus, A and Goltsman, E and Auger, S and Galleron, N and Ségurens, B and Dossat, C and Land, ML and Broussolle, V and Brillard, J and Guinebretiere, MH and Sanchis, V and Nguen-The, C and Lereclus, D and Richardson, P and Wincker, P and Weissenbach, J and Ehrlich, SD and Sorokin, A}, title = {Extending the Bacillus cereus group genomics to putative food-borne pathogens of different toxicity.}, journal = {Chemico-biological interactions}, volume = {171}, number = {2}, pages = {236-249}, doi = {10.1016/j.cbi.2007.03.003}, pmid = {17434157}, issn = {0009-2797}, mesh = {Bacillus cereus/*genetics ; *Food Microbiology ; Foodborne Diseases/*microbiology ; *Genome, Bacterial ; Humans ; }, abstract = {The Bacillus cereus group represents sporulating soil bacteria containing pathogenic strains which may cause diarrheic or emetic food poisoning outbreaks. Multiple locus sequence typing revealed a presence in natural samples of these bacteria of about 30 clonal complexes. Application of genomic methods to this group was however biased due to the major interest for representatives closely related to Bacillus anthracis. Albeit the most important food-borne pathogens were not yet defined, existing data indicate that they are scattered all over the phylogenetic tree. The preliminary analysis of the sequences of three genomes discussed in this paper narrows down the gaps in our knowledge of the B. cereus group. The strain NVH391-98 is a rare but particularly severe food-borne pathogen. Sequencing revealed that the strain should be a representative of a novel bacterial species, for which the name Bacillus cytotoxis or Bacillus cytotoxicus is proposed. This strain has a reduced genome size compared to other B. cereus group strains. Genome analysis revealed absence of sigma B factor and the presence of genes encoding diarrheic Nhe toxin, not detected earlier. The strain B. cereus F837/76 represents a clonal complex close to that of B. anthracis. Including F837/76, three such B. cereus strains had been sequenced. Alignment of genomes suggests that B. anthracis is their common ancestor. Since such strains often emerge from clinical cases, they merit a special attention. The third strain, KBAB4, is a typical facultative psychrophile generally found in soil. Phylogenic studies show that in nature it is the most active group in terms of gene exchange. Genomic sequence revealed high presence of extra-chromosomal genetic material (about 530kb) that may account for this phenomenon. Genes coding Nhe-like toxin were found on a big plasmid in this strain. This may indicate a potential mechanism of toxicity spread from the psychrophile strain community. The results of this genomic work and ecological compartments of different strains incite to consider a necessity of creating prophylactic vaccines against bacteria closely related to NVH391-98 and F837/76. Presumably developing of such vaccines can be based on the properties of non-pathogenic strains such as KBAB4 or ATCC14579 reported here or earlier. By comparing the protein coding genes of strains being sequenced in this project to others we estimate the shared proteome, or core genome, in the B. cereus group to be 3000+/-200 genes and the total proteome, or pan-genome, to be 20-25,000 genes.}, } @article {pmid17379713, year = {2007}, author = {Yukawa, H and Omumasaba, CA and Nonaka, H and Kós, P and Okai, N and Suzuki, N and Suda, M and Tsuge, Y and Watanabe, J and Ikeda, Y and Vertès, AA and Inui, M}, title = {Comparative analysis of the Corynebacterium glutamicum group and complete genome sequence of strain R.}, journal = {Microbiology (Reading, England)}, volume = {153}, number = {Pt 4}, pages = {1042-1058}, doi = {10.1099/mic.0.2006/003657-0}, pmid = {17379713}, issn = {1350-0872}, mesh = {Bacterial Proteins/genetics/metabolism ; Carbohydrate Metabolism ; Corynebacterium glutamicum/*genetics/metabolism ; *Genome, Bacterial ; Genomics ; Molecular Sequence Data ; Sigma Factor/genetics ; Species Specificity ; }, abstract = {The complete genome sequence of Corynebacterium glutamicum strain R was determined to allow its comparative analysis with other corynebacteria. The biology of corynebacteria was explored by refining the definition of the subset of genes that constitutes the corynebacterial core as well as those characteristic of saprophytic and pathogenic ecological niches. In addition, the relative scarcity of corynebacterial sigma factors and the plasticity of their two-component system machinery reflect their relatively exacting nutritional requirements and reduced membrane-associated and secreted proteins. The conservation of key genes and pathways between corynebacteria, mycobacteria and Nocardia validates the use of C. glutamicum to study fundamental processes that are conserved in slow-growing mycobacteria, including pathogenesis-associated mechanisms. The discovery of 39 novel genes in C. glutamicum R that have not been previously reported in other corynebacteria supports the rationale for sequencing additional corynebacterial genomes to better define the corynebacterial pan-genome and identify previously undetected metabolic pathways in these organisms.}, } @article {pmid17324490, year = {2007}, author = {Barocchi, MA and Censini, S and Rappuoli, R}, title = {Vaccines in the era of genomics: the pneumococcal challenge.}, journal = {Vaccine}, volume = {25}, number = {16}, pages = {2963-2973}, doi = {10.1016/j.vaccine.2007.01.065}, pmid = {17324490}, issn = {0264-410X}, mesh = {Bacterial Proteins/*immunology ; Computational Biology/methods ; *Genome, Bacterial ; *Genomics ; Humans ; Peptidoglycan/chemistry/metabolism ; Pneumococcal Infections/*prevention & control ; Pneumococcal Vaccines/*administration & dosage/immunology/therapeutic use ; }, abstract = {In this review we aim to provide the reader with an understanding of the capsular-based complexity of Streptococcus pneumoniae, one of the main limitations to current vaccine development. We then discuss the need for a new vaccine strategy based on proteic antigen candidates discovered in silico. Describing specifically how reverse vaccinology coupled to conventional vaccinology has led to a new paradigm of vaccine development. Finally, we conclude with the importance of defining the pan-genome of the pneumococcus, that is, the sequencing and analysis of multiple genomes from the same species. A critical factor in determining conserved proteins in a group of epidemiologically relevant circulating S. pneumoniae strains, in order to achieve the greatest coverage. Ultimately, the identification of immunogenic surface antigens and assessment of their efficacy will be imperative in the development of a vaccine with the ability to protect against invasive disease independent of serotype.}, } @article {pmid17321611, year = {2007}, author = {Chalabi, N and Satih, S and Delort, L and Bignon, YJ and Bernard-Gallon, DJ}, title = {Expression profiling by whole-genome microarray hybridization reveals differential gene expression in breast cancer cell lines after lycopene exposure.}, journal = {Biochimica et biophysica acta}, volume = {1769}, number = {2}, pages = {124-130}, doi = {10.1016/j.bbaexp.2007.01.007}, pmid = {17321611}, issn = {0006-3002}, mesh = {Anticarcinogenic Agents/*pharmacology ; Breast Neoplasms/*genetics ; Carotenoids/*pharmacology ; Cell Line, Tumor ; Female ; Gene Expression Profiling ; *Gene Expression Regulation, Neoplastic ; Humans ; Lycopene ; Oligonucleotide Array Sequence Analysis ; RNA, Complementary/metabolism ; }, abstract = {The correlation between diet and variation in gene-expression is an important field which could be considered to approach cancer pathways comprehension. We examined the effects of lycopene on breast cancer cell lines using pangenomic arrays. Lycopene is derived predominantly from tomatoes and tomato products and there is some epidemiologic evidence for a preventive role in breast cancer. Previously, we investigated lycopene in breast cancer using a dedicated breast cancer microarray. To confirm these results and explore pathways other than those implicated in breast cancer, for this study we used pangenomic arrays containing 25,000 oligonucleotides. This in vitro study assayed two human mammary cancer cell lines (MCF-7 and MDA-MB-231), and a fibrocystic breast cell line (MCF-10a) treated or not with 10 microM lycopene for 48 h. A competitive hybridization was performed between Cy3-labeled lycopene treated RNA and Cy5-labeled untreated RNA to define differentially expressed genes. Using t-test analysis, a subset of 391 genes was found to be differentially modulated by lycopene between estrogen-positive cells (MCF-7) and estrogen-negative cells (MDA-MB-231, MCF-10a). Hierarchical clustering revealed 726 discriminatory genes between breast cancer cell lines (MCF-7, MDA-MB-231) and the fibrocystic breast cell line (MCF-10a). Modified gene expression was observed in various molecular pathways, such as apoptosis, cell communication, MAPK and cell cycle as well as xenobiotic metabolism, fatty acid biosynthesis and gap junctional intercellular communication.}, } @article {pmid17312182, year = {2007}, author = {Condomines, M and Hose, D and Raynaud, P and Hundemer, M and De Vos, J and Baudard, M and Moehler, T and Pantesco, V and Moos, M and Schved, JF and Rossi, JF and Rème, T and Goldschmidt, H and Klein, B}, title = {Cancer/testis genes in multiple myeloma: expression patterns and prognosis value determined by microarray analysis.}, journal = {Journal of immunology (Baltimore, Md. : 1950)}, volume = {178}, number = {5}, pages = {3307-3315}, doi = {10.4049/jimmunol.178.5.3307}, pmid = {17312182}, issn = {0022-1767}, mesh = {Antigens, Neoplasm/*biosynthesis/genetics ; Cancer Vaccines/genetics/therapeutic use ; Female ; Gene Expression Profiling ; *Gene Expression Regulation, Neoplastic ; Humans ; Male ; Middle Aged ; Multiple Myeloma/diagnosis/genetics/*metabolism/therapy ; Oligonucleotide Array Sequence Analysis ; Predictive Value of Tests ; Prognosis ; }, abstract = {Cancer-testis (CT) Ags are expressed in testis and malignant tumors but rarely in nongametogenic tissues. Due to this pattern, they represent attractive targets for cancer vaccination approaches. The aims of the present study are: 1) to assess the expression of CT genes on a pangenomic base in multiple myeloma (MM); 2) to assess the prognosis value of CT gene expression; and 3) to provide selection strategies for CT Ags in clinical vaccination trials. We report the expression pattern of CT genes in purified MM cells (MMC) of 64 patients with newly diagnosed MM and12 patients with monoclonal gammopathy of unknown significance, in normal plasma cell and B cell samples, and in 20 MMC lines. Of the 46 CT genes interrogated by the Affymetrix HG-U133 set arrays, 35 are expressed in the MMC of at least one patient. Of these, 25 are located on chromosome X. The expression of six CT genes is associated with a shorter event-free survival. The MMC of 98% of the patients express at least one CT gene, 86% at least two, and 70% at least three CT genes. By using a set of 10 CT genes including KM-HN-1, MAGE-C1, MAGE-A3/6/12, MAGE-A5, MORC, DDX43, SPACA3, SSX-4, GAGE-1-8, and MAGE-C2, a combination of at least three CT genes-desirable for circumventing tumor escape mechanisms-is obtained in the MMC of 67% of the patients. Provided that the immunogenicity of the products of these 10 CT genes is confirmed, gene expression profiling could be useful in identifying which CT Ags could be used to vaccinate a given patient.}, } @article {pmid17300983, year = {2007}, author = {Morgante, M and De Paoli, E and Radovic, S}, title = {Transposable elements and the plant pan-genomes.}, journal = {Current opinion in plant biology}, volume = {10}, number = {2}, pages = {149-155}, doi = {10.1016/j.pbi.2007.02.001}, pmid = {17300983}, issn = {1369-5266}, mesh = {Base Composition/genetics ; DNA Transposable Elements/*genetics ; Genetic Variation ; Genome, Plant/*genetics ; Models, Genetic ; }, abstract = {The comparative sequencing of several grass genomes has revealed that transposable elements are largely responsible for extensive variation in both intergenic and local genic content, not only between closely related species but also among individuals within a species. These observations indicate that a single genome sequence might not reflect the entire genomic complement of a species, and prompted us to introduce the concept of the plant pan-genome, which includes core genomic features that are common to all individuals and a dispensable genome composed of partially shared and/or non-shared DNA sequence elements. Uncovering the intriguing nature of the dispensable genome, namely its composition, origin and function, represents a step forward towards an understanding of the processes that generate genetic diversity and phenotypic variation. The developing view of transcriptional regulation as a complex and modular system, in which long-range interactions and the involvement of transposable elements are frequently observed, lends support to the possibility of an important functional role for the dispensable genome and could make it less dispensable than previously thought.}, } @article {pmid17254642, year = {2007}, author = {Li, Y and Ropp, SL and Zhao, H and Damon, IK and Esposito, JJ}, title = {Orthopoxvirus pan-genomic DNA assay.}, journal = {Journal of virological methods}, volume = {141}, number = {2}, pages = {154-165}, doi = {10.1016/j.jviromet.2006.12.005}, pmid = {17254642}, issn = {0166-0934}, mesh = {DNA Primers ; *DNA, Viral/genetics ; Electrophoresis, Polyacrylamide Gel ; Humans ; Orthopoxvirus/classification/*genetics/isolation & purification ; Polymerase Chain Reaction/*standards ; *Polymorphism, Restriction Fragment Length ; Poxviridae Infections/diagnosis/virology ; Sensitivity and Specificity ; Species Specificity ; }, abstract = {A genome-spanning assay is described that enables laboratory confirmation of infections with orthopoxviruses (OPVs), particularly Vaccinia, Monkeypox, and Variola viruses, which can cause vesiculo-pustular rash illnesses in humans. The assay uses a series of polymerase chain reaction (PCR) amplicons that overlap to span the approximately 200kilobase pair linear DNA genome of OPVs. Corresponding amplicons of different viral isolates can then be compared by matching their restriction fragment length polymorphism (RFLP) gel electrophoresis patterns. The PCR step does not necessarily require viral growth to produce sufficient DNA for the RFLP comparisons. The assay would be useful as a prelude to sequencing entire or partial DNA genome regions of various OPVs, including natural or recombinant OPVs and potentially dangerous bioengineered OPVs designed to express foreign DNA or other viruses.}, } @article {pmid17222424, year = {2007}, author = {Roncalli, J and Smih, F and Desmoulin, F and Dumonteil, N and Harmancey, R and Hennig, S and Perez, L and Pathak, A and Galinier, M and Massabuau, P and Malet-Martino, M and Senard, JM and Rouet, P}, title = {NMR and cDNA array analysis prior to heart failure reveals an increase of unsaturated lipids, a glutamine/glutamate ratio decrease and a specific transcriptome adaptation in obese rat heart.}, journal = {Journal of molecular and cellular cardiology}, volume = {42}, number = {3}, pages = {526-539}, doi = {10.1016/j.yjmcc.2006.11.007}, pmid = {17222424}, issn = {0022-2828}, mesh = {Adaptation, Biological ; Aging/physiology ; Animals ; Gene Expression Profiling ; Glutamates/*metabolism ; Glutamine/*metabolism ; Heart Failure/genetics/*metabolism ; Intracellular Membranes/*metabolism ; *Lipid Metabolism ; Magnetic Resonance Spectroscopy ; Male ; Multigene Family ; Obesity/genetics/*metabolism ; Oligonucleotide Array Sequence Analysis ; Phenotype ; Rats ; Transcription, Genetic/*genetics ; }, abstract = {Obesity is a risk factor for heart failure through a set of hemodynamic and hormonal adaptations, but its contribution at the molecular level is not clearly known. Therefore, we investigated the kinetic cardiac transcriptome and metabolome in the Spontaneous Hypertensive Heart Failure (SHHF) rat. The SHHF rat is devoid of leptin signaling when homozygous for a mutation of the leptin receptor (ObR) gene. The ObR-/- SHHF rat is obese at 4 months of age and prone to heart failure after 14 months whereas its lean counterpart ObR-/+ is prone to heart failure after 16 months. We used a set of rat pangenomic high-density macroarrays to monitor left ventricle cardiac transcriptome regulation in 4- and 10-month-old, lean and obese animals. Comparative analysis of left ventricle of 4- and 10-month-old lean rat revealed 222 differentially expressed genes while 4- and 10-month-old obese rats showed 293 differentially expressed genes. (1)H NMR analysis of the metabolome of left ventricular extracts displayed a global decrease of metabolites, except for taurine, and lipid concentration. This may be attributed to gene expression regulation and likely increased extracellular mass. The glutamine to glutamate ratio was significantly lower in the obese group. The relative unsaturation of lipids increased in the obese heart; in particular, omega-3 lipid concentration was higher in the 10-month-old obese heart. Overall, several specific kinetic molecular patterns act as a prelude to heart failure in the leptin signaling deficient SHHF obese rat.}, } @article {pmid17171607, year = {2006}, author = {Surowiak, P}, title = {Prediction of the response to chemotherapy in ovarian cancers.}, journal = {Folia morphologica}, volume = {65}, number = {4}, pages = {285-294}, pmid = {17171607}, issn = {0015-5659}, mesh = {ATP-Binding Cassette Transporters/genetics/physiology ; Antineoplastic Combined Chemotherapy Protocols/pharmacology/*therapeutic use ; Apoptosis/physiology ; DNA Topoisomerases, Type I/genetics/physiology ; DNA, Neoplasm/genetics ; Drug Resistance, Multiple/*genetics ; Female ; Gene Expression Regulation, Neoplastic ; Humans ; Oligonucleotide Array Sequence Analysis ; Ovarian Neoplasms/*drug therapy/*physiopathology ; Predictive Value of Tests ; Receptor, ErbB-2/genetics/physiology ; Treatment Failure ; Treatment Outcome ; }, abstract = {Ovarian cancer represents the fifth most frequent cause of death as a result of malignant processes after cancers of the breast, large intestine, lung and stomach. Owing to the localisation of ovarian cancer, approximately 75% of cases are diagnosed at the III and IV stages of advancement according to FIGO. Because of the advanced stage of the disease surgery has to be followed by chemotherapy in most cases of ovarian cancer and therefore resistance to cytostatic drugs represents a major clinical problem. The potential to predict the response to therapy with the use of cytostatic drugs would enable the most effective drugs to be applied in individual cases, thus improving the efficiency of the treatment and restricting the development of resistance to cytostatic drugs. In the present paper the progress made so far in the prediction of the clinical course of ovarian cancer is reviewed. The significance of the expression of the ATP-binding cassette (ABC) transporters is described, including P-glycoprotein and MRP2, the principal representatives of the protein group. The importance of disturbed control of apoptosis and the overexpression of HER-2 and topoisomerase 1A are also discussed. Two sections are devoted to the most recent studies in the biology of ovarian cancer, pangenomic studies on gene expression using DNA microarrays and aberrations of DNA methylation.}, } @article {pmid17170127, year = {2007}, author = {Raslova, H and Kauffmann, A and Sekkaï, D and Ripoche, H and Larbret, F and Robert, T and Tronik Le Roux, D and Kroemer, G and Debili, N and Dessen, P and Lazar, V and Vainchenker, W}, title = {Interrelation between polyploidization and megakaryocyte differentiation: a gene profiling approach.}, journal = {Blood}, volume = {109}, number = {8}, pages = {3225-3234}, doi = {10.1182/blood-2006-07-037838}, pmid = {17170127}, issn = {0006-4971}, mesh = {Antigens, CD34 ; Blood Platelets ; Cell Differentiation/drug effects/*physiology ; Cells, Cultured ; Female ; Gene Expression Profiling ; Gene Expression Regulation/drug effects/*physiology ; Humans ; Male ; Megakaryocytes/cytology/*physiology ; Oligonucleotide Array Sequence Analysis ; Ploidies ; Thrombopoietin/pharmacology ; }, abstract = {Polyploidization is a part of the normal developmental process leading to platelet production during megakaryocyte (MK) differentiation. Ploidization is mainly involved in cell enlargement, but it is not clear whether gene expression is modified during MK ploidization. In this study, human MKs were grown from CD34(+) cells in the presence of thrombopoietin and sorted according to their ploidy level. A pangenomic microarray technique was applied to compare gene expression in 2N-, 4N-, 8N-, and 16N-sorted MKs. Using hierarchical clustering, we demonstrated that 2N and 4N MKs or 8N and 16N MKs are 2 different close populations with 105 discriminating genes. In the second approach, we determined the profile of genes that were continuously down- and up-regulated during polyploidization. Among the 100 down-regulated genes, 24 corresponded to genes involved in DNA replication and repair. The great majority of up-regulated genes corresponded to genes directly involved in platelet functions, such as genes encoding specific platelet glycoproteins and alpha-granule proteins, actin and microtubule cytoskeleton, factors involved in signaling, and transport proteins. Together, these results suggest that MK polyploidization per se does not regulate gene expression but is intrinsically included in the differentiation process.}, } @article {pmid17113583, year = {2006}, author = {Kim, YC and Jung, YC and Xuan, Z and Dong, H and Zhang, MQ and Wang, SM}, title = {Pan-genome isolation of low abundance transcripts using SAGE tag.}, journal = {FEBS letters}, volume = {580}, number = {28-29}, pages = {6721-6729}, pmid = {17113583}, issn = {0014-5793}, support = {R01 HG002600/HG/NHGRI NIH HHS/United States ; HG002600/HG/NHGRI NIH HHS/United States ; }, mesh = {Base Pair Mismatch/genetics ; Base Sequence ; Chromosome Mapping ; Chromosomes, Human, X/genetics ; Expressed Sequence Tags ; Gene Expression Regulation/*genetics ; Genome, Human/*genetics ; Humans ; Nucleic Acid Amplification Techniques ; RNA, Messenger/analysis/*genetics/*isolation & purification ; Transcription, Genetic/*genetics ; }, abstract = {The SAGE (serial analysis of gene expression) method is sensitive at detecting the lower abundance transcripts. More than a third of human SAGE tags identified are novel representing the low abundance unknown transcripts. Using the GLGI method (generation of longer 3' EST from SAGE tag for gene identification), we converted 1009 low-copy, human X chromosome-specific SAGE tags into 10210 3' ESTs. We identified 3418 unique 3' ESTs, 46% of which are novel and originated from the lower abundance transcripts. However, nearly all 3' ESTs were mapped to various regions across the genome but not X chromosome. Detailed analysis indicates that those 3' ESTs were isolated by SAGE tag mis-priming to the non-parent transcripts. Replacing SAGE tags with non-transcribed genomic DNA tags resulted in poor amplification, indicating that the sequence similarity between different transcripts contributed to the amplification. Our study shows the prevalence of novel low abundance transcripts that can be isolated efficiently through SAGE tags mis-priming.}, } @article {pmid17085551, year = {2007}, author = {Guidot, A and Prior, P and Schoenfeld, J and Carrère, S and Genin, S and Boucher, C}, title = {Genomic structure and phylogeny of the plant pathogen Ralstonia solanacearum inferred from gene distribution analysis.}, journal = {Journal of bacteriology}, volume = {189}, number = {2}, pages = {377-387}, pmid = {17085551}, issn = {0021-9193}, mesh = {Chromosome Mapping ; Chromosomes, Bacterial/genetics ; Cluster Analysis ; Evolution, Molecular ; Genes, Bacterial/genetics ; Genetic Variation ; Genome, Bacterial/*genetics ; Genomic Islands/genetics ; Multigene Family/genetics ; Nucleic Acid Hybridization/methods ; Oligonucleotide Array Sequence Analysis ; *Phylogeny ; Plants/*microbiology ; Ralstonia solanacearum/classification/*genetics/pathogenicity ; }, abstract = {In the present study, we investigated the gene distribution among strains of the highly polymorphic plant pathogenic beta-proteobacterium Ralstonia solanacearum, paying particular attention to the status of known or candidate pathogenicity genes. Based on the use of comparative genomic hybridization on a pangenomic microarray for the GMI1000 reference strain, we have defined the conditions that allowed comparison of the repertoires of genes among a collection of 18 strains that are representative of the biodiversity of the R. solanacearum species. This identified a list of 2,690 core genes present in all tested strains. As a corollary, a list of 2,338 variable genes within the R. solanacearum species has been defined. The hierarchical clustering based on the distribution of variable genes is fully consistent with the phylotype classification that was previously defined from the nucleotide sequence analysis of four genes. The presence of numerous pathogenicity-related genes in the core genome indicates that R. solanacearum is an ancestral pathogen. The results establish the long coevolution of the two replicons that constitute the bacterial genome. We also demonstrate the clustering of variable genes in genomic islands. Most genomic islands are included in regions with an alternative codon usage, suggesting that they originate from acquisition of foreign genes through lateral gene transfers. Other genomic islands correspond to genes that have the same base composition as core genes, suggesting that they either might be ancestral genes lost by deletion in certain strains or might originate from horizontal gene transfers.}, } @article {pmid16989800, year = {2007}, author = {Yoshida, N and Brahmajosyula, M and Shoji, S and Amanai, M and Perry, AC}, title = {Epigenetic discrimination by mouse metaphase II oocytes mediates asymmetric chromatin remodeling independently of meiotic exit.}, journal = {Developmental biology}, volume = {301}, number = {2}, pages = {464-477}, doi = {10.1016/j.ydbio.2006.08.006}, pmid = {16989800}, issn = {0012-1606}, mesh = {Acetylation ; Animals ; Cell Nucleus/genetics ; Chromatin Assembly and Disassembly/*genetics ; Cytoskeleton/metabolism ; DNA Methylation ; Epigenesis, Genetic/*genetics ; Fertilization ; Genome/genetics ; Histones/metabolism ; Kinetics ; Male ; *Meiosis ; *Metaphase ; Mice ; Mothers ; Oocytes/*cytology/*metabolism ; Spermatozoa/cytology/metabolism ; }, abstract = {In mammalian fertilization, paternal chromatin is exhaustively remodeled, yet the maternal contribution to this process is unknown. To address this, we prevented the induction of meiotic exit by spermatozoa and examined sperm chromatin remodeling in metaphase II (mII) oocytes. Methylation of paternal H3-K4 and H3-K9 remained low, unlike maternal H3, although paternal H3-K4 methylation increased in zygotes. Thus, mII cytoplasm can sustain epigenetic asymmetry in a cell-cycle dependent manner. Paternal genomic DNA underwent oocyte-mediated cytosine demethylation and acquired maternally-derived K12-acetylated H4 (AcH4-K12) independently of microtubule assembly and maternal chromatin. AcH4-K12 persisted without typical maturation-associated deacetylation, irrespective of paternal pan-genomic cytosine methylation. Contrastingly, somatic cell nuclei underwent rapid H4 deacetylation; sperm and somatic chromatin exhibited asymmetric AcH4-K12 dynamics simultaneously within the same mII oocyte. Inhibition of somatic histone deacetylation revealed endogenous histone acetyl transferase activity. Oocytes thus specify the histone acetylation status of given nuclei by differentially targeting histone deacetylase and acetyl transferase activities. Asymmetric H4 acetylation during and immediately after fertilization was dispensable for development when both parental chromatin sets were hyperacetylated. These studies delineate non-zygotic chromatin remodeling and suggest a powerful model with which to study de novo genomic reprogramming.}, } @article {pmid16942900, year = {2006}, author = {Field, D and Wilson, G and van der Gast, C}, title = {How do we compare hundreds of bacterial genomes?.}, journal = {Current opinion in microbiology}, volume = {9}, number = {5}, pages = {499-504}, doi = {10.1016/j.mib.2006.08.008}, pmid = {16942900}, issn = {1369-5274}, mesh = {Bacteria/classification/*genetics/isolation & purification ; Bacterial Physiological Phenomena ; Biological Evolution ; Databases, Genetic ; Ecology ; Genes, Bacterial ; *Genome, Bacterial ; Genomics/trends ; Species Specificity ; }, abstract = {The genomic revolution is fully upon us in 2006 and the pace of discovery is set to accelerate with the emergence of ultra-high-throughput sequencing technologies. Our complete genome collection of bacteria and archaea continues to grow in number and diversity, as genome sequencing is applied to an array of new problems, from the characterization of the pan-genome to the detection of mutation after experimentation and the exploration of microbial communities in unprecedented detail. The benefits of large-scale comparative genomic analyses are driving the community to think about how to manage our public collections of genomes in novel ways.}, } @article {pmid16855282, year = {2006}, author = {Le Brigand, K and Russell, R and Moreilhon, C and Rouillard, JM and Jost, B and Amiot, F and Magnone, V and Bole-Feysot, C and Rostagno, P and Virolle, V and Defamie, V and Dessen, P and Williams, G and Lyons, P and Rios, G and Mari, B and Gulari, E and Kastner, P and Gidrol, X and Freeman, TC and Barbry, P}, title = {An open-access long oligonucleotide microarray resource for analysis of the human and mouse transcriptomes.}, journal = {Nucleic acids research}, volume = {34}, number = {12}, pages = {e87}, pmid = {16855282}, issn = {1362-4962}, support = {R01 GM068564/GM/NIGMS NIH HHS/United States ; 1R01GM068564/GM/NIGMS NIH HHS/United States ; }, mesh = {Animals ; *Databases, Nucleic Acid ; Expressed Sequence Tags ; *Gene Expression Profiling ; Humans ; Internet ; Mice/*genetics/metabolism ; *Oligonucleotide Array Sequence Analysis ; Oligonucleotide Probes/*chemistry ; Transcription, Genetic ; }, abstract = {Two collections of oligonucleotides have been designed for preparing pangenomic human and mouse microarrays. A total of 148,993 and 121,703 oligonucleotides were designed against human and mouse transcripts. Quality scores were created in order to select 25,342 human and 24,109 mouse oligonucleotides. They correspond to: (i) a BLAST-specificity score; (ii) the number of expressed sequence tags matching each probe; (iii) the distance to the 3' end of the target mRNA. Scores were also used to compare in silico the two microarrays with commercial microarrays. The sets described here, called RNG/MRC collections, appear at least as specific and sensitive as those from the commercial platforms. The RNG/MRC collections have now been used by an Anglo-French consortium to distribute more than 3500 microarrays to the academic community. Ad hoc identification of tissue-specific transcripts and a approximately 80% correlation with hybridizations performed on Affymetrix GeneChiptrade mark suggest that the RNG/MRC microarrays perform well. This work provides a comprehensive open resource for investigators working on human and mouse transcriptomes, as well as a generic method to generate new microarray collections in other organisms. All information related to these probes, as well as additional information about commercial microarrays have been stored in a freely-accessible database called MEDIANTE.}, } @article {pmid16820057, year = {2006}, author = {Legault, BA and Lopez-Lopez, A and Alba-Casado, JC and Doolittle, WF and Bolhuis, H and Rodriguez-Valera, F and Papke, RT}, title = {Environmental genomics of "Haloquadratum walsbyi" in a saltern crystallizer indicates a large pool of accessory genes in an otherwise coherent species.}, journal = {BMC genomics}, volume = {7}, number = {}, pages = {171}, pmid = {16820057}, issn = {1471-2164}, mesh = {Base Composition/genetics ; Chromosomes, Archaeal/genetics ; DNA, Archaeal/chemistry/genetics ; Genome, Archaeal/*genetics ; Genome, Bacterial/genetics ; *Genomic Library ; Genomics/*methods ; Halobacteriaceae/classification/*genetics ; Molecular Sequence Data ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA/methods ; Sodium Chloride ; Spain ; Water Microbiology ; }, abstract = {BACKGROUND: Mature saturated brine (crystallizers) communities are largely dominated (> 80% of cells) by the square halophilic archaeon "Haloquadratum walsbyi". The recent cultivation of the strain HBSQ001 and the sequencing of its genome allows comparison with the metagenome of this taxonomically simplified environment. Similar studies carried out in other extreme environments have revealed very little diversity in gene content among the cell lineages present.

RESULTS: The metagenome of the microbial community of a crystallizer pond has been analyzed by end sequencing a 2000 clone fosmid library and comparing the sequences obtained with the genome sequence of "Haloquadratum walsbyi". The genome of the sequenced strain was retrieved nearly complete within this environmental DNA library. However, many ORF's that could be ascribed to the "Haloquadratum" metapopulation by common genome characteristics or scaffolding to the strain genome were not present in the specific sequenced isolate. Particularly, three regions of the sequenced genome were associated with multiple rearrangements and the presence of different genes from the metapopulation. Many transposition and phage related genes were found within this pool which, together with the associated atypical GC content in these areas, supports lateral gene transfer mediated by these elements as the most probable genetic cause of this variability. Additionally, these sequences were highly enriched in putative regulatory and signal transduction functions.

CONCLUSION: These results point to a large pan-genome (total gene repertoire of the genus/species) even in this highly specialized extremophile and at a single geographic location. The extensive gene repertoire is what might be expected of a population that exploits a diverse nutrient pool, resulting from the degradation of biomass produced at lower salinities.}, } @article {pmid16638840, year = {2006}, author = {Chicault, C and Toutain, B and Monnier, A and Aubry, M and Fergelot, P and Le Treut, A and Galibert, MD and Mosser, J}, title = {Iron-related transcriptomic variations in CaCo-2 cells, an in vitro model of intestinal absorptive cells.}, journal = {Physiological genomics}, volume = {26}, number = {1}, pages = {55-67}, doi = {10.1152/physiolgenomics.00297.2005}, pmid = {16638840}, issn = {1531-2267}, mesh = {Caco-2 Cells ; Cluster Analysis ; Databases, Genetic ; Ferritins/metabolism ; Gene Expression Profiling/methods ; *Gene Expression Regulation ; Glutathione Transferase/genetics/metabolism ; Hemin/pharmacology ; Humans ; Intestinal Mucosa/drug effects/*metabolism ; Iron/*metabolism ; Metallothionein/genetics/metabolism ; Oligonucleotide Array Sequence Analysis ; RNA, Messenger/metabolism ; Reproducibility of Results ; }, abstract = {Regulation of iron absorption by duodenal enterocytes is essential for the maintenance of homeostasis by preventing iron deficiency or overload. Despite the identification of a number of genes implicated in iron absorption and its regulation, it is likely that further factors remain to be identified. For that purpose, we used a global transcriptomic approach, using the CaCo-2 cell line as an in vitro model of intestinal absorptive cells. Pangenomic screening for variations in gene expression correlating with intracellular iron content allowed us to identify 171 genes. One hundred nine of these genes are clustered into five types of expression profile. This is the first time that most of these genes have been associated with iron metabolism. Functional annotation of these five clusters indicates potential links between the immune response, proteolysis processes, and iron depletion. In contrast, iron overload is associated with cellular metabolism, especially that of lipids and glutathione involving redox function and electron transfer.}, } @article {pmid16595783, year = {2006}, author = {Winnepenninckx, V and Lazar, V and Michiels, S and Dessen, P and Stas, M and Alonso, SR and Avril, MF and Ortiz Romero, PL and Robert, T and Balacescu, O and Eggermont, AM and Lenoir, G and Sarasin, A and Tursz, T and van den Oord, JJ and Spatz, A and , }, title = {Gene expression profiling of primary cutaneous melanoma and clinical outcome.}, journal = {Journal of the National Cancer Institute}, volume = {98}, number = {7}, pages = {472-482}, doi = {10.1093/jnci/djj103}, pmid = {16595783}, issn = {1460-2105}, mesh = {Adolescent ; Adult ; Aged ; Aged, 80 and over ; Biomarkers, Tumor/*analysis/genetics ; Cell Cycle Proteins/*analysis/genetics ; Cell Division ; Child ; Child, Preschool ; Cohort Studies ; DNA-Binding Proteins/analysis ; Disease-Free Survival ; Female ; Follow-Up Studies ; Geminin ; *Gene Expression Profiling ; Gene Expression Regulation, Neoplastic ; Humans ; Immunohistochemistry ; Infant ; Male ; Melanoma/*chemistry/genetics/mortality/*pathology ; Middle Aged ; Minichromosome Maintenance Complex Component 3 ; Minichromosome Maintenance Complex Component 4 ; Minichromosome Maintenance Complex Component 6 ; Multivariate Analysis ; Nuclear Proteins/analysis ; Oligonucleotide Array Sequence Analysis ; Predictive Value of Tests ; Prognosis ; Proportional Hazards Models ; RNA, Complementary/analysis ; Reverse Transcriptase Polymerase Chain Reaction ; Skin Neoplasms/*chemistry/genetics/mortality/*pathology ; Survival Analysis ; Up-Regulation ; alpha Karyopherins/analysis ; }, abstract = {BACKGROUND: Gene expression profiling data for human primary cutaneous melanomas are scarce because of the lack of retrospective collections of frozen tumors. To identify differentially expressed genes that may be involved in melanoma progression and prognosis, we investigated the relationship between gene expression profiles and clinical outcome in a cohort of patients with primary melanoma.

METHODS: Labeled complementary RNA (cRNA) from each tissue sample was hybridized to a pangenomic 44K 60-mer oligonucleotide microarray. Class comparison and class prediction analyses were performed to identify genes whose expression in primary melanomas was associated with 4-year distant metastasis-free survival among 58 patients with at least 4 years of follow-up, distant metastasis, or death. Results were validated immunohistochemically at the protein level in 176 independent primary melanomas from patients with a median clinical follow-up of 8.5 years. Survival was analyzed with a Cox multivariable model and stratified log-rank test. All statistical tests were two-sided.

RESULTS: We identified 254 genes that were associated with distant metastasis-free survival of patients with primary melanoma. These 254 genes include genes involved in activating DNA replication origins, such as minichromosome maintenance genes and geminin. Twenty-three of these genes were studied at the protein level; expression of five (MCM4, P = .002; MCM3, P = .030; MCM6, P = .004; KPNA2, P = .021; and geminin, P = .004) was statistically significantly associated with overall survival in the validation set. In a multivariable Cox model adjusted for tumor thickness, ulceration, age, and sex, expression of MCM4 (hazard ratio [HR] of death = 4.04, 95% confidence interval [CI] = 1.39 to 11.76; P = .010) and MCM6 (HR of death = 7.42, 95% CI = 1.99 to 27.64; P = .003) proteins was still statistically significantly associated with overall survival.

CONCLUSION: We identified 254 genes whose expression was associated with metastatic dissemination of cutaneous melanomas. These genes may shed light on the molecular mechanisms underlying poor prognosis in melanoma patients.}, } @article {pmid16575188, year = {2006}, author = {Vigé, A and Gallou-Kabani, C and Gross, MS and Fabre, A and Junien, C and Jais, JP}, title = {An oligonucleotide microarray for mouse imprinted genes profiling.}, journal = {Cytogenetic and genome research}, volume = {113}, number = {1-4}, pages = {253-261}, doi = {10.1159/000090840}, pmid = {16575188}, issn = {1424-859X}, mesh = {Animals ; DNA, Complementary/genetics ; Energy Intake ; *Gene Expression Profiling ; *Genomic Imprinting ; Mice ; Mice, Inbred C57BL ; Models, Animal ; Models, Genetic ; Nucleic Acid Hybridization ; *Oligonucleotide Array Sequence Analysis ; Polymerase Chain Reaction/methods ; }, abstract = {Genomic imprinting is an epigenetic phenomenon unique to mammals that causes some genes to be expressed according to their parental origin. It results in developmental asymmetry in the function of the parental genomes. We describe here a method for the profiling of imprinted genes based on the development of a mouse imprinting microchip containing oligonucleotides corresponding to 493 genes, including most of the known imprinted genes (IG = 63), genes involved in epigenetic processes (EPI = 15), in metabolism (= 147), in obesity (= 10) and in neurotransmission (= 256) and housekeeping reference genes (= 2). This custom oligonucleotide microarray has been constructed to make data analysis and handling more manageable than pangenomic microarrays. As a proof of concept we present the differential expression of these 493 genes in different tissues (liver, placenta, embryo) of C57BL6/J mice fed different diets. Appropriate experimental strategies and statistical tools were defined at each step of the data analysis process with regard to the different sources of constraints. Data were confirmed by expression analyses based on quantitative real-time PCR. These oligochips should make it possible to increase our understanding of the involvement of imprinted genes in the timing of expression programs, tissue by tissue, stage by stage, in response to nutrients, lifestyles and other as yet unknown critical environmental factors in a variety of physiopathological situations, and in animals of different strains, ages and sexes. The use of oligonucleotides makes it possible to expand this microchip to include the increasing number of imprinted genes discovered.}, } @article {pmid16506959, year = {2006}, author = {Taleb, S and Van Haaften, R and Henegar, C and Hukshorn, C and Cancello, R and Pelloux, V and Hanczar, B and Viguerie, N and Langin, D and Evelo, C and Zucker, J and Clément, K and Saris, WH}, title = {Microarray profiling of human white adipose tissue after exogenous leptin injection.}, journal = {European journal of clinical investigation}, volume = {36}, number = {3}, pages = {153-163}, doi = {10.1111/j.1365-2362.2006.01614.x}, pmid = {16506959}, issn = {0014-2972}, mesh = {Adipocytes/immunology ; Adipose Tissue/*drug effects/immunology ; Adult ; Cytokines/genetics ; DNA, Circular/analysis ; Gene Expression Regulation/genetics ; Humans ; Inflammation/genetics/immunology ; Injections ; Leptin/administration & dosage/*analogs & derivatives/blood/genetics ; Male ; Oligonucleotide Array Sequence Analysis/methods ; Polyethylene Glycols/*administration & dosage ; RNA, Messenger/analysis ; Recombinant Proteins/*administration & dosage ; Reverse Transcriptase Polymerase Chain Reaction/methods ; }, abstract = {BACKGROUND: Leptin is a secreted adipocyte hormone that plays a key role in the regulation of body weight homeostasis. The leptin effect on human white adipose tissue (WAT) is still debated.

OBJECTIVE: The aim of this study was to assess whether the administration of polyethylene glycol-leptin (PEG-OB) in a single supraphysiological dose has transcriptional effects on genes of WAT and to identify its target genes and functional pathways in WAT.

MATERIALS AND METHODS: Blood samples and WAT biopsies were obtained from 10 healthy nonobese men before treatment and 72 h after the PEG-OB injection, leading to an approximate 809-fold increase in circulating leptin. The WAT gene expression profile before and after the PEG-OB injection was compared using pangenomic microarrays. Functional gene annotations based on the gene ontology of the PEG-OB regulated genes were performed using both an 'in house' automated procedure and GenMAPP (Gene Microarray Pathway Profiler), designed for viewing and analyzing gene expression data in the context of biological pathways.

RESULTS: Statistical analysis of microarray data revealed that PEG-OB had a major down-regulated effect on WAT gene expression, as we obtained 1,822 and 100 down- and up-regulated genes, respectively. Microarray data were validated using reverse transcription quantitative PCR. Functional gene annotations of PEG-OB regulated genes revealed that the functional class related to immunity and inflammation was among the most mobilized PEG-OB pathway in WAT. These genes are mainly expressed in the cell of the stroma vascular fraction in comparison with adipocytes.

CONCLUSION: Our observations support the hypothesis that leptin could act on WAT, particularly on genes related to inflammation and immunity, which may suggest a novel leptin target pathway in human WAT.}, } @article {pmid16255634, year = {2005}, author = {Sheils, O}, title = {Molecular classification and biomarker discovery in papillary thyroid carcinoma.}, journal = {Expert review of molecular diagnostics}, volume = {5}, number = {6}, pages = {927-946}, doi = {10.1586/14737159.5.6.927}, pmid = {16255634}, issn = {1744-8352}, mesh = {Adenocarcinoma, Papillary/*classification/etiology/*metabolism/pathology ; Animals ; Biomarkers, Tumor/*classification/*metabolism ; Cell Cycle ; Diabetes Mellitus, Type 1/immunology ; Humans ; *Molecular Diagnostic Techniques ; Thyroid Neoplasms/*classification/etiology/*metabolism/pathology ; }, abstract = {Papillary thyroid carcinoma (PTC) is the most common thyroid malignancy, with an incidence of approximately 22,000 cases in 2004 in the USA. Incidence is increasing, with a global estimate of half a million new cases this year. PTC is found in a variety of morphologic variants, usually grows slowly and is clinically indolent, although rare, aggressive forms with local invasion or distant metastases can occur. In recent years, thyroid cancer has been at the forefront of molecular pathology as a result of the consequences of the Chernobyl disaster and the recognition of the role of Ret/PTC rearrangements in PTC. Nonetheless, the molecular pathogenesis of this disease remains poorly characterized. In the clinical setting, benign thyroid nodules are far more frequent, and distinguishing between them and malignant nodules is a common diagnostic problem. It is estimated that 5-10% of people will develop a clinically significant thyroid nodule during their lifetime. Although the introduction of fine-needle aspiration has made PTC identification more reliable, clinicians often have to make decisions regarding patient care on the basis of equivocal information. Thus, the existing diagnostic tools available to distinguish benign from malignant neoplasms are not always reliable. This article will critically evaluate recently described putative biomarkers and their potential future role for diagnostic purposes in fine-needle aspiration cytology samples. It will highlight the evolution of our understanding of the molecular biology of PTC, from a narrow focus on specific molecular lesions such as Ret/PTC rearrangements to a pan-genomic approach.}, } @article {pmid16185861, year = {2005}, author = {Medini, D and Donati, C and Tettelin, H and Masignani, V and Rappuoli, R}, title = {The microbial pan-genome.}, journal = {Current opinion in genetics & development}, volume = {15}, number = {6}, pages = {589-594}, doi = {10.1016/j.gde.2005.09.006}, pmid = {16185861}, issn = {0959-437X}, mesh = {Animals ; Bacteria/genetics ; Bacterial Physiological Phenomena ; *Evolution, Molecular ; *Genome, Bacterial/physiology ; Genomics ; Humans ; }, abstract = {A decade after the beginning of the genomic era, the question of how genomics can describe a bacterial species has not been fully addressed. Experimental data have shown that in some species new genes are discovered even after sequencing the genomes of several strains. Mathematical modeling predicts that new genes will be discovered even after sequencing hundreds of genomes per species. Therefore, a bacterial species can be described by its pan-genome, which is composed of a "core genome" containing genes present in all strains, and a "dispensable genome" containing genes present in two or more strains and genes unique to single strains. Given that the number of unique genes is vast, the pan-genome of a bacterial species might be orders of magnitude larger than any single genome.}, } @article {pmid16172379, year = {2005}, author = {Tettelin, H and Masignani, V and Cieslewicz, MJ and Donati, C and Medini, D and Ward, NL and Angiuoli, SV and Crabtree, J and Jones, AL and Durkin, AS and Deboy, RT and Davidsen, TM and Mora, M and Scarselli, M and Margarit y Ros, I and Peterson, JD and Hauser, CR and Sundaram, JP and Nelson, WC and Madupu, R and Brinkac, LM and Dodson, RJ and Rosovitz, MJ and Sullivan, SA and Daugherty, SC and Haft, DH and Selengut, J and Gwinn, ML and Zhou, L and Zafar, N and Khouri, H and Radune, D and Dimitrov, G and Watkins, K and O'Connor, KJ and Smith, S and Utterback, TR and White, O and Rubens, CE and Grandi, G and Madoff, LC and Kasper, DL and Telford, JL and Wessels, MR and Rappuoli, R and Fraser, CM}, title = {Genome analysis of multiple pathogenic isolates of Streptococcus agalactiae: implications for the microbial "pan-genome".}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {102}, number = {39}, pages = {13950-13955}, pmid = {16172379}, issn = {0027-8424}, support = {R56 AI038424/AI/NIAID NIH HHS/United States ; R21 AI042940/AI/NIAID NIH HHS/United States ; U01-AI50909/AI/NIAID NIH HHS/United States ; R01 AI042940/AI/NIAID NIH HHS/United States ; AI38424/AI/NIAID NIH HHS/United States ; AI42940/AI/NIAID NIH HHS/United States ; R01 AI038424/AI/NIAID NIH HHS/United States ; R01 AI022498-18/AI/NIAID NIH HHS/United States ; }, mesh = {Amino Acid Sequence ; Bacterial Capsules/genetics ; Base Sequence ; Gene Expression ; Genes, Bacterial ; Genetic Variation ; *Genome, Bacterial ; Molecular Sequence Data ; Phylogeny ; Sequence Alignment ; Sequence Analysis, DNA ; Streptococcus agalactiae/*classification/*genetics/pathogenicity ; Virulence/genetics ; }, abstract = {The development of efficient and inexpensive genome sequencing methods has revolutionized the study of human bacterial pathogens and improved vaccine design. Unfortunately, the sequence of a single genome does not reflect how genetic variability drives pathogenesis within a bacterial species and also limits genome-wide screens for vaccine candidates or for antimicrobial targets. We have generated the genomic sequence of six strains representing the five major disease-causing serotypes of Streptococcus agalactiae, the main cause of neonatal infection in humans. Analysis of these genomes and those available in databases showed that the S. agalactiae species can be described by a pan-genome consisting of a core genome shared by all isolates, accounting for approximately 80% of any single genome, plus a dispensable genome consisting of partially shared and strain-specific genes. Mathematical extrapolation of the data suggests that the gene reservoir available for inclusion in the S. agalactiae pan-genome is vast and that unique genes will continue to be identified even after sequencing hundreds of genomes.}, } @article {pmid15990697, year = {2005}, author = {Tetz, VV}, title = {The pangenome concept: a unifying view of genetic information.}, journal = {Medical science monitor : international medical journal of experimental and clinical research}, volume = {11}, number = {7}, pages = {HY24-9}, pmid = {15990697}, issn = {1234-1010}, mesh = {Animals ; *Biological Evolution ; *Gene Transfer, Horizontal ; *Genome ; Humans ; *Recombination, Genetic ; }, abstract = {A way of viewing the genetic information in all organisms on Earth as constituents of the Pangenome is proposed. According to this concept, the Pangenome is the common (collective) genetic system of all living organisms, the organic molecules and their complexes (DNA- and RNA-containing viruses, plasmids, transposons, insertion sequences) involved in the storage and transmission processes of genetic information. Pangenomic stability and variability are discussed. This concept alerts to the inherent fluidity and transmissibility of DNA among organisms of all types, including horizontal gene transfer between closely related and formally unrelated macro- and microorganisms. The roles of death and of all known food chains as universal ways of gene distribution among different organisms are discussed. The contribution of bacteria and viruses in maintaining the circulation of genes within the Pangenome is presented. This concept implies that newly emerging genes are not bound to disappear together with the death of an organism or the extinction of a species and microorganisms are the main pool of genes. Some negative aspects of the intervention of molecular genetics, biotechnology, and ecology, including the spread of transgenic plants and animals, are summarized. It is shown that this concept may be used in medicine for the prognosis of an epidemic situation, particularly newly spreading pathogens, and for the development of new methods for the prophylaxis and early diagnosis of oncologic diseases. This concept can also help to find promising approaches to the discovery of drugs with novel principles of action.}, } @article {pmid15948210, year = {2005}, author = {Li-Sucholeiki, XC and Hu, G and Perls, T and Tomita-Mitchell, A and Thilly, WG}, title = {Scanning the beta-globin gene for mutations in large populations by denaturing capillary and gel electrophoresis.}, journal = {Electrophoresis}, volume = {26}, number = {13}, pages = {2531-2538}, doi = {10.1002/elps.200410431}, pmid = {15948210}, issn = {0173-0835}, mesh = {Adult ; Black or African American/genetics ; Asian People/genetics ; Child ; China ; DNA Mutational Analysis/*methods ; Electrophoresis, Capillary/*methods ; Electrophoresis, Polyacrylamide Gel/*methods ; Gene Frequency ; Genetics, Population/methods ; Globins/*genetics ; Humans ; *Nucleic Acid Denaturation ; Open Reading Frames/genetics ; Polymerase Chain Reaction ; RNA Splice Sites/genetics ; }, abstract = {Separation of mutant from nonmutant DNA sequences of 100 bp may be accomplished by using defined denaturing conditions of chemical denaturant and/or elevated temperature during electrophoresis on either polyacrylamide slab gels (denaturing gradient gel electrophoresis, DGGE) or capillary gels (constant denaturant capillary electrophoresis, CDCE). In analysis of mutant directly from a polymerase chain reaction (PCR) product mixture, both have detection sensitivities of approximately 1%. CDCE that facilitates an intermediate mutant enrichment step permits detection of mutants at fractions as low as 2 x 10(-6). Here we report the successful application of both approaches to scan for mutations of the human beta-globin gene (HBB) in two human population samples of approximately 5000 persons in the HBB. Using DGGE, the coding region and flanking intronic splice sites of HBB were scanned in a population of 4949 Han Chinese individuals in pool sizes of 48 individual DNA samples. Four point mutations ranging in mutant frequency from 0.5 to 0.0002 were identified. Using CDCE with a mutant enrichment step, these same sequences were scanned in a population of 5028, predominantly African-American juveniles (<9 years) as a single pooled DNA sample. Three point mutations were identified ranging in mutant frequency from 0.13 to 0.0005. This study shows that both the DGGE/small pool and the CDCE/large pool approaches offer the means to define the fine structure map of genetic variation in large population samples, and with appropriately engineered facilities to provide high throughput, should be useful in pangenomic scans to discover genes carrying casual mutations for common diseases.}, } @article {pmid15883039, year = {2005}, author = {Park, KC and Park, EJ and Kim, ER and Kim, Y and Chung, SH and Cho, BW and Kim, S and Jin, M}, title = {Therapeutic effects of PG201, an ethanol extract from herbs, through cartilage protection on collagenase-induced arthritis in rabbits.}, journal = {Biochemical and biophysical research communications}, volume = {331}, number = {4}, pages = {1469-1477}, doi = {10.1016/j.bbrc.2005.04.030}, pmid = {15883039}, issn = {0006-291X}, mesh = {Animals ; Arthritis, Experimental/*drug therapy ; Base Sequence ; Cartilage, Articular/*drug effects ; Collagenases/*administration & dosage ; DNA Primers ; Herbal Medicine ; Inflammation Mediators/metabolism ; Plant Extracts/pharmacology/*therapeutic use ; Rabbits ; Reverse Transcriptase Polymerase Chain Reaction ; }, abstract = {In order to assess the therapeutic effects of PG201 (an ethanol extract from herbs) on osteoarthritis, we investigated whether PG201 could suppress the disease progression of collagenase-induced arthritis (CNIA) in rabbits. The right knees of rabbits were injected intra-articularly with collagenase, and the rabbits were orally treated with distilled water (DW), PG201 (200 mg/kg) or diclofenac (DCF, 10 mg/kg) once a day for 8 weeks. Oral administration of PG201 significantly suppressed the stiffness and bone space narrowing. Cartilage erosion and GAG release (p<0.01) were considerably reduced in the knee joints. As well, the mRNA expression of matrix degradation enzymes including MMP-1, -3, and -13 was decreased. On the contrary, the concentrations of TIMP-2 in the synovial fluids were considerably amplified in the PG201 treated group (p<0.01), but not in the DCF treated group. The pathologic inflammatory molecules involved in cartilage destruction such as IL-1beta, PGE2, and NO were also diminished by PG201. Taken together, these results indicate that PG201 has therapeutic effects on CNIA through the prominent protection of cartilage. PG201 indeed has great potential as a form of treatment for osteoarthritis.}, } @article {pmid15833833, year = {2005}, author = {Irving, JA and Bloodworth, L and Bown, NP and Case, MC and Hogarth, LA and Hall, AG}, title = {Loss of heterozygosity in childhood acute lymphoblastic leukemia detected by genome-wide microarray single nucleotide polymorphism analysis.}, journal = {Cancer research}, volume = {65}, number = {8}, pages = {3053-3058}, doi = {10.1158/0008-5472.CAN-04-2604}, pmid = {15833833}, issn = {0008-5472}, mesh = {Child ; Child, Preschool ; Genome, Human ; Humans ; Infant ; *Loss of Heterozygosity ; Male ; Microsatellite Repeats/genetics ; Oligonucleotide Array Sequence Analysis ; Polymorphism, Single Nucleotide ; Precursor Cell Lymphoblastic Leukemia-Lymphoma/drug therapy/*genetics/pathology ; Recurrence ; }, abstract = {Loss of heterozygosity (LOH) is detectable in many forms of malignancy, including leukemia, using techniques such as microsatellite analysis and comparative genomic hybridization. However, these techniques are laborious and require the use of relatively large amounts of DNA if the whole genome is to be examined. Here we describe the use of oligonucleotide microarrays to characterize single nucleotide polymorphisms (SNPs) in lymphoblasts isolated from children with acute lymphoblastic leukemia for the pan-genomic mapping of LOH with a resolution of 100 to 200 kb. Results were compared with DNA obtained during remission and on relapse. Abnormalities were seen in 8 of 10 cases. The two cases with no abnormalities and one case that showed identical changes at relapse and presentation remain in remission 1 to 9 years following retreatment. The remaining seven patients died following relapse. In four cases, LOH was only detectable at relapse suggesting that progressive LOH may be a cause of disease progression and/or drug resistance. This was supported by detailed analysis of one case in which LOH involving the glucocorticoid receptor was associated with mutation of the remaining allele. The most frequent abnormality detected involved chromosome 9p. In each of the four cases where this was observed LOH included the INK4 locus. In three of the four cases, INK4 loss was only observed at relapse, suggesting that this abnormality may be commonly associated with treatment failure. These observations show that SNP array analysis is a powerful new tool for the analysis of allelic imbalance in leukemic blasts.}, } @article {pmid15809371, year = {2005}, author = {Royer, A and van Veen, TA and Le Bouter, S and Marionneau, C and Griol-Charhbili, V and Léoni, AL and Steenman, M and van Rijen, HV and Demolombe, S and Goddard, CA and Richer, C and Escoubet, B and Jarry-Guichard, T and Colledge, WH and Gros, D and de Bakker, JM and Grace, AA and Escande, D and Charpentier, F}, title = {Mouse model of SCN5A-linked hereditary Lenègre's disease: age-related conduction slowing and myocardial fibrosis.}, journal = {Circulation}, volume = {111}, number = {14}, pages = {1738-1746}, doi = {10.1161/01.CIR.0000160853.19867.61}, pmid = {15809371}, issn = {1524-4539}, mesh = {Age Factors ; Animals ; Cardiomegaly ; Connexins/analysis ; Disease Models, Animal ; Electrocardiography ; Fibrosis/genetics ; Gene Expression Regulation ; Genetic Diseases, Inborn ; Heart Block/etiology/*genetics ; Heart Conduction System/*physiopathology ; Heart Ventricles/pathology ; Heterozygote ; Mice ; Mice, Knockout ; NAV1.5 Voltage-Gated Sodium Channel ; Sodium Channels/*genetics ; Transcription Factors/analysis ; }, abstract = {BACKGROUND: We have previously linked hereditary progressive cardiac conduction defect (hereditary Lenègre's disease) to a loss-of-function mutation in the gene encoding the main cardiac Na+ channel, SCN5A. In the present study, we investigated heterozygous Scn5a-knockout mice (Scn5a+/- mice) as a model for hereditary Lenègre's disease.

METHODS AND RESULTS: In Scn5a+/- mice, surface ECG recordings showed age-related lengthening of the P-wave and PR- and QRS-interval duration, coinciding with previous observations in patients with Lenègre's disease. Old but not young Scn5a+/- mice showed extensive fibrosis of their ventricular myocardium, a feature not seen in wild-type animals. In old Scn5a+/- mice, fibrosis was accompanied by heterogeneous expression of connexin 43 and upregulation of hypertrophic markers, including beta-MHC and skeletal alpha-actin. Global connexin 43 expression as assessed with Western blots was similar to wild-type mice. Decreased connexin 40 expression was seen in the atria. Using pangenomic microarrays and real-time PCR, we identified in Scn5a+/- mice an age-related upregulation of genes encoding Atf3 and Egr1 transcription factors. Echocardiography and hemodynamic investigations demonstrated conserved cardiac function with aging and lack of ventricular hypertrophy.

CONCLUSIONS: We conclude that Scn5a+/- mice convincingly recapitulate the Lenègre's disease phenotype, including progressive impairment with aging of atrial and ventricular conduction associated with myocardial rearrangements and fibrosis. Our work provides the first demonstration that a monogenic ion channel defect can progressively lead to myocardial structural anomalies.}, } @article {pmid15743511, year = {2005}, author = {Lerebours, F and Bieche, I and Lidereau, R}, title = {Update on inflammatory breast cancer.}, journal = {Breast cancer research : BCR}, volume = {7}, number = {2}, pages = {52-58}, pmid = {15743511}, issn = {1465-542X}, mesh = {Animals ; Biomarkers, Tumor ; Breast Neoplasms/drug therapy/*genetics/*pathology/radiotherapy/surgery ; Cell Proliferation ; Disease Models, Animal ; Female ; Humans ; *Inflammation ; Neovascularization, Pathologic ; Prognosis ; Receptors, Estrogen/analysis ; }, abstract = {Inflammatory breast cancer (IBC) is both the least frequent and the most severe form of epithelial breast cancer. The diagnosis is based on clinical inflammatory signs and is reinforced by pathological findings. Significant progress has been made in the management of IBC in the past 20 years. Yet survival among IBC patients is still only one-half that among patients with non-IBC. Identification of the molecular determinants of IBC would probably lead to more specific treatments and to improved survival. In the present article we review recent advances in the molecular pathogenesis of IBC. A more comprehensive view will probably be obtained by pan-genomic analysis of human IBC samples, and by functional in vitro and in vivo assays. These approaches may offer better patient outcome in the near future.}, } @article {pmid15733746, year = {2005}, author = {Viguerie, N and Poitou, C and Cancello, R and Stich, V and Clément, K and Langin, D}, title = {Transcriptomics applied to obesity and caloric restriction.}, journal = {Biochimie}, volume = {87}, number = {1}, pages = {117-123}, doi = {10.1016/j.biochi.2004.12.011}, pmid = {15733746}, issn = {0300-9084}, mesh = {Adipose Tissue/metabolism/*physiology ; Animals ; *Caloric Restriction ; Energy Metabolism ; Gene Expression Profiling ; Humans ; Inflammation/physiopathology ; Mice ; Obesity/*diet therapy ; Proteomics ; *Weight Loss ; }, abstract = {Caloric restriction still remains the most efficient way to promote weight loss. Deciphering the molecular basis of adaptation to energy restriction is critical for the tailoring of new therapeutic strategies. This review focuses on the recent input of gene profiling on adipose tissue in obesity pathogenesis and on the new insights on adaptations occurring during very low caloric diet (VLCD) in humans. Hypocaloric diets improve a wide range of metabolic parameters including lipolytic efficiency, insulin sensitivity, and inflammatory profile. In the subcutaneous white adipose tissue (scWAT) the VLCD induced a decrease in the mRNA levels for the antilipolytic alpha2-adrenergic receptor associated with changes in catecholamine-induced adipocyte lipolytic capacity. The improvement in insulin sensitivity was not associated with a change in subcutaneous adipose tissue adiponectin gene expression or in its plasma level, suggesting that adiponectin is not involved in the regulation of VLCD-induced improvement of insulin sensitivity and that there is a small contribution of subcutaneous adipose tissue to plasma adiponectin levels. Pangenomic microarray studies in human scWAT revealed that a panel of inflammatory markers and acute phase reactants were over expressed in obese compared to lean subjects. Caloric restriction improved the inflammatory profile of obese subjects through a decrease of pro-inflammatory factors and an increase of anti-inflammatory molecules. These genes were mostly expressed in the stroma vascular fraction of the adipose tissue. Specific cell-type isolation and immunohistochemistry demonstrated that monocyte/macrophage lineage cells were responsible for the expression of both mRNA and protein inflammatory markers. The acute phase proteins serum amyloid A was highly expressed in mature adipocytes from obese subjects. Caloric restriction decreased both serum amyloid mRNA and circulating levels. Obesity now clearly appears as chronic low-grade inflammation state. Modulation of the inflammatory pathways may represent new therapeutic targets for the treatment of obesity-related complications.}, } @article {pmid15607966, year = {2004}, author = {Valsesia-Wittmann, S and Magdeleine, M and Dupasquier, S and Garin, E and Jallas, AC and Combaret, V and Krause, A and Leissner, P and Puisieux, A}, title = {Oncogenic cooperation between H-Twist and N-Myc overrides failsafe programs in cancer cells.}, journal = {Cancer cell}, volume = {6}, number = {6}, pages = {625-630}, doi = {10.1016/j.ccr.2004.09.033}, pmid = {15607966}, issn = {1535-6108}, mesh = {Animals ; Apoptosis/physiology ; Blotting, Northern ; Blotting, Western ; Caspase 3 ; Caspase 8 ; Caspases/metabolism ; Cell Cycle Proteins/metabolism ; Cell Line, Tumor ; Cell Transformation, Neoplastic/*genetics ; Cyclin-Dependent Kinase Inhibitor p16/genetics ; Cyclin-Dependent Kinase Inhibitor p21 ; Fibroblasts/pathology ; Flow Cytometry ; Gene Amplification ; Gene Expression Regulation, Neoplastic/genetics ; Humans ; In Situ Nick-End Labeling ; Mice ; Neuroblastoma/genetics/metabolism/*pathology ; Nuclear Proteins/genetics/*metabolism ; Oligonucleotide Array Sequence Analysis ; Proto-Oncogene Proteins c-myc/genetics/*metabolism ; Proto-Oncogenes/genetics ; RNA, Small Interfering/genetics ; Reverse Transcriptase Polymerase Chain Reaction ; Transcription Factors/genetics/*metabolism ; Transfection ; Tumor Stem Cell Assay ; Tumor Suppressor Protein p14ARF/genetics ; Tumor Suppressor Protein p53/metabolism ; Twist-Related Protein 1 ; }, abstract = {N-Myc oncogene amplification is a frequent event in neuroblastoma and is strongly correlated with advanced disease stage and treatment failure. Similarly to c-Myc oncogenic activation, N-Myc deregulation promotes both cell proliferation and p53-dependent apoptosis by sensitizing cells to a variety of insults. Intriguingly, p53 mutations are uncommon in neuroblastomas, strongly suggesting that an alternative cooperating event circumvents this safeguard against oncogene-driven neoplasia. By performing a pangenomic cDNA microarray analysis, we demonstrate that human Twist is constantly overexpressed in N-Myc-amplified neuroblastomas. H-Twist overexpression is responsible for the inhibition of the ARF/p53 pathway involved in the Myc-dependent apoptotic response. This oncogenic cooperation of two key regulators of embryogenesis causes cell transformation and malignant outgrowth.}, } @article {pmid15598879, year = {2005}, author = {Moreilhon, C and Gras, D and Hologne, C and Bajolet, O and Cottrez, F and Magnone, V and Merten, M and Groux, H and Puchelle, E and Barbry, P}, title = {Live Staphylococcus aureus and bacterial soluble factors induce different transcriptional responses in human airway cells.}, journal = {Physiological genomics}, volume = {20}, number = {3}, pages = {244-255}, doi = {10.1152/physiolgenomics.00135.2004}, pmid = {15598879}, issn = {1531-2267}, mesh = {Cell Culture Techniques ; Cell Extracts/pharmacology ; Computational Biology ; DNA, Complementary/genetics ; Humans ; *Oligonucleotide Array Sequence Analysis ; RNA/genetics ; Respiratory Mucosa/drug effects/*microbiology/*physiology ; Staphylococcus aureus/*physiology ; *Transcription, Genetic/drug effects ; }, abstract = {To characterize the response of respiratory epithelium to infection by Staphylococcus aureus (S. aureus), human airway cells were incubated for 1 to 24 h with a supernatant of a S. aureus culture (bacterial supernatant), then profiled with a pangenomic DNA microarray. Because an upregulation of many genes was noticed around 3 h, three independent approaches were then used to characterize the host response to a 3-h contact either with bacterial supernatant or with live bacteria: 1) a DNA microarray containing 4,200 sequence-verified probes, 2) a semiquantitative RT-PCR with a set of 537 pairs of validated primers, or 3) ELISA assay of IL-8, IL-6, TNFalpha, and PGE(2). Among others, Fos, Jun, and EGR-1 were upregulated by the bacterial supernatant and by live bacteria. Increased expression of bhlhb2 and Mig-6, promoter regions which harbor HIF responding elements, was explained by an increased expression of the HIF-1alpha protein. Activation of the inducible form of cyclooxygenase, COX-2, and of the interleukins IL-1, IL-6, and IL-8, as well as of the NF-kappaB pathway, was observed preferentially in cells in contact with bacterial supernatant. Early infection was characterized by an upregulation of anti-apoptotic genes and a downregulation of pro-apoptotic genes. This correlated with a necrotic, rather than apoptotic cell death. Overall, this first global description of an airway epithelial infection by S. aureus demonstrates a larger global response to bacterial supernatant (in term of altered genes and variation factors) than to exponentially growing live bacteria.}, } @article {pmid14578175, year = {2003}, author = {Perner, S and Brüderlein, S and Hasel, C and Waibel, I and Holdenried, A and Ciloglu, N and Chopurian, H and Nielsen, KV and Plesch, A and Högel, J and Möller, P}, title = {Quantifying telomere lengths of human individual chromosome arms by centromere-calibrated fluorescence in situ hybridization and digital imaging.}, journal = {The American journal of pathology}, volume = {163}, number = {5}, pages = {1751-1756}, pmid = {14578175}, issn = {0002-9440}, mesh = {Adolescent ; Adult ; Age Factors ; Aged ; Blotting, Southern ; Calibration ; Centromere ; Child ; Child, Preschool ; Female ; Humans ; Image Processing, Computer-Assisted/*methods ; In Situ Hybridization, Fluorescence/*methods ; Infant ; Infant, Newborn ; Male ; Middle Aged ; Sex Factors ; Telomere/*ultrastructure ; }, abstract = {Telomere length analysis has aroused considerable interest in biology and oncology. However, most published data are pan-genomic Southern-blot-based estimates. We developed T/C-FISH (telomere/centromere-FISH), allowing precise measurement of individual telomeres at every single chromosome arm. Metaphase preparations are co-hybridized with peptide nucleic acid probes for telomeric sequences and the chromosome 2 centromere serving as internal reference. Metaphase images are captured and karyotyped using dedicated software. A software module determines the absolute integrated fluorescence intensities of the p- and q-telomeres of each chromosome and the reference signal. Normalized data are derived by calculating the ratio of absolute telomere and reference signal intensities, and descriptive statistics are calculated. T/C-FISH detects even small differences in telomere length. Using T/C-FISH we have discovered an epigenetic process occurring in the human male postzygote or early embryo: in umbilical cord blood lymphocytes, telomeres on male Xqs are around 1100 bp shorter than female Xqs.}, } @article {pmid14521062, year = {2003}, author = {Tets, VV}, title = {[Pangenome].}, journal = {Tsitologiia}, volume = {45}, number = {5}, pages = {526-531}, pmid = {14521062}, issn = {0041-3771}, mesh = {Animals ; *Biological Evolution ; *Genetic Variation ; *Genome ; }, abstract = {A conception of general interaction between genetic information of different organisms is formulated. A new concept "Pangenom" is proposed for description of the total sum of genes of living organisms and viruses. Features of the Pangenom structure and processes controlling its saving and variability are discussed. It is demonstrated that this conception may be used in medicine for the aims of prognosis of epidemic situation, in particular, of newly spreading pathogen development of new methods of prophylaxis and diagnostics of oncological diseases, and also of the evaluation of possible consequences of molecular genetics and gene engineering interventions.}, } @article {pmid12773709, year = {2003}, author = {Jin, M and Jeon, H and Jung, HJ and Kim, B and Shin, SS and Choi, JJ and Lee, JK and Kang, CY and Kim, S}, title = {Enhancement of repopulation and hematopoiesis of bone marrow cells in irradiated mice by oral administration of PG101, a water-soluble extract from Lentinus lepideus.}, journal = {Experimental biology and medicine (Maywood, N.J.)}, volume = {228}, number = {6}, pages = {759-766}, doi = {10.1177/153537020322800616}, pmid = {12773709}, issn = {1535-3702}, mesh = {Administration, Oral ; Animals ; Bone Marrow Cells/*cytology/*drug effects/radiation effects ; Cell Differentiation/drug effects ; Cell Division/drug effects ; Cells, Cultured ; Colony-Forming Units Assay ; Cytokines/drug effects/metabolism ; Female ; Flow Cytometry/methods ; Hematopoiesis/*drug effects/radiation effects ; Lentinula/*chemistry ; Mice ; Mice, Inbred BALB C ; Plant Extracts/*pharmacology ; Solubility ; }, abstract = {PG101 is a water-soluble extract from Lentinus lepideus. It is a potential biological response modifier that activates selective cytokines in vitro, mainly by controlling cellular transcription factor NF-kappaB. Effects of PG101 were tested on bone marrow cells in irradiated mice. Mice were irradiated with a dose of 6 Gy and were given PG101 by gavages daily for 24 days. In PG101-treated mice, the number of colony-forming cells, including colony-forming units (CFU)-granulocytes/macrophages (GM) and erythroid burst-forming units (BFU-E), were increased to almost the levels seen in nonirradiated control as early as 8 days after irradiation. Two-color flow cytometric analysis using antibodies to ER-MP12 and ER-MP20 suggested that in the bone marrow cell population, PG101 increased the number of granulocytes (ER-MP12(-)20(med)) and myeloid progenitors (ER-MP12(+)20(+)). Analysis of surface c-Kit and Gr-1 proteins in bone marrow cells indicated that PG101 might induce differentiation of progenitor cells to granulocytes and/or proliferation of the committed cells. Lastly, oral administration of PG101 highly increased serum levels of GM-CSF, IL-6, and IL-1beta. Interestingly, the level of TNF-alpha was elevated by irradiation in control mice, but was maintained at the background level in PG101-treated mice, suggesting that PG101 might effectively suppress TNF-alpha-related pathologic conditions. Our results strongly suggest the great potential of PG101 as an immune enhancer during radiotherapy and/or chemotherapy.}, } @article {pmid12773708, year = {2003}, author = {Jin, M and Jung, HJ and Choi, JJ and Jeon, H and Oh, JH and Kim, B and Shin, SS and Lee, JK and Yoon, K and Kim, S}, title = {Activation of selective transcription factors and cytokines by water-soluble extract from Lentinus lepideus.}, journal = {Experimental biology and medicine (Maywood, N.J.)}, volume = {228}, number = {6}, pages = {749-758}, doi = {10.1177/153537020322800615}, pmid = {12773708}, issn = {1535-3702}, mesh = {Animals ; Cytokines/analysis/genetics/*metabolism ; Dose-Response Relationship, Drug ; Electrophoretic Mobility Shift Assay ; Genes, Reporter/genetics ; Humans ; Lentinula/*chemistry ; Leukocytes, Mononuclear/cytology/*drug effects/metabolism ; NF-kappa B/antagonists & inhibitors/*metabolism ; Plant Extracts/*pharmacology/toxicity ; Plasmids/genetics ; Proline/*analogs & derivatives/pharmacology ; Rats ; Solubility ; Thiocarbamates/pharmacology ; Time Factors ; Transfection ; Tumor Cells, Cultured ; }, abstract = {We isolated a water-soluble extract, PG101, from cultured mycelia of Lentinus lepideus. Treatment of human peripheral blood mononuclear cells (PBMCs) with PG101 increased levels of TNF-alpha, IL-1beta, IL-10, and IL-12 by 100- to 1000-fold, whereas GM-CSF and IL-18 were activated by an order of magnitude. On the contrary, IFN-gamma and IL-4 were not affected. The response to PG101 occurred in a dose- and time-dependent manner. From the human PBMCs treated with PG101, TNF-alpha was a first cytokine to be activated, detectable at 2 hr post-treatment followed by IL-1beta at 6 hr post-treatment. IL-12 and IL-10 were the next to follow. GM-CSF and IL-18 both showed significant increases 24 hr after treatment. When PBMCs were sorted into various cell types, monocyte/macrophages, but not T and B cells, were the major target cell type responsive to PG101. Consistent with this result, the profile of cytokine expression upon PG101 treatment was comparable between PBMCs and a human promonocytic cell line (U937), whereas cell lines of T cell and myeloid origins did not respond to PG101. Data from a transient transfection assay involving specific reporter plasmids indicated that cellular transcription factor such as NF-kappaB, but not AP-1, was highly activated by PG101. Results from a gel retardation assay and the experiment involving a specific NF-kappaB inhibitor confirmed the involvement of NF-kappaB. Despite its significant biological effect on various cytokines, PG101 remained nontoxic in both rats and PBMCs even at a biological concentration approximately 20 times greater. PG101 demonstrates great potential as a therapeutic immune modulator.}, } @article {pmid12709543, year = {2003}, author = {Shin, SS and Jin, M and Jung, HJ and Kim, B and Jeon, H and Choi, JJ and Kim, JM and Cho, BW and Chung, SH and Lee, YW and Song, YW and Kim, S}, title = {Suppressive effects of PG201, an ethanol extract from herbs, on collagen-induced arthritis in mice.}, journal = {Rheumatology (Oxford, England)}, volume = {42}, number = {5}, pages = {665-672}, doi = {10.1093/rheumatology/keg209}, pmid = {12709543}, issn = {1462-0324}, mesh = {Animals ; Antirheumatic Agents/*therapeutic use/toxicity ; Arthritis, Experimental/metabolism/pathology/*prevention & control ; Collagen ; Disease Progression ; Dose-Response Relationship, Drug ; Drugs, Chinese Herbal/*therapeutic use/toxicity ; Ethanol ; Female ; Interleukin-1/analysis ; Interleukins/blood ; Male ; Matrix Metalloproteinase 2/blood ; Mice ; Mice, Inbred DBA ; *Phytotherapy/adverse effects ; Plant Extracts/*therapeutic use ; Rats ; Rats, Sprague-Dawley ; Tissue Inhibitor of Metalloproteinases/blood ; Tumor Necrosis Factor-alpha/analysis ; }, abstract = {OBJECTIVE: PG201 has been formulated using 12 herbs known to have anti-inflammatory and protective effects on damaged tissue and bone among other functions. The present study was done in order to assess the therapeutic effects of PG201 in collagen-induced arthritis (CIA) in mice.

METHODS: DBA/1 mice were immunized with bovine type II collagen. After a second collagen immunization, mice were treated with PG201 orally at 10 mg/kg once a day for 18 days. Paws were evaluated macroscopically for redness, swelling and deformities. The levels of TNF-alpha and IL-1beta in the ankle were examined. The severity of arthritis within the knee joints was evaluated by histological assessment of cartilage destruction and pannus formation. Molecular indicators related to CIA pathology were analysed by measuring the serum levels of matrix metalloproteinase 2 (MMP-2), tissue inhibitor of matrix metalloproteinase 2 (TIMP-2) and the anti-inflammatory cytokines interleukin (IL)-4 and IL-10.

RESULTS: Administration of PG201 significantly suppressed the progression of CIA and inhibited the production of TNF-alpha and IL-1beta in the paws. The erosion of cartilage was dramatically reduced in mouse knees after treatment with PG201. In the serum of PG201-treated mice, the level of TIMP-2 and the ratio of TIMP-2 to MMP-2 were significantly elevated, and the level of IL-4, but not of IL-10, was increased.

CONCLUSION: Administration of PG201 has therapeutic effects on CIA. Protection of cartilage was particularly prominent. PG201 is a potential therapy for rheumatoid arthritis.}, } @article {pmid12645300, year = {2002}, author = {Marti, J and Piquemal, D and Manchon, L and Commes, T}, title = {[Transcriptomes for serial analysis of gene expression].}, journal = {Journal de la Societe de biologie}, volume = {196}, number = {4}, pages = {303-307}, pmid = {12645300}, issn = {1295-0661}, mesh = {Animals ; DNA, Complementary/genetics ; Databases, Nucleic Acid ; Gene Expression Profiling/*methods ; *Gene Expression Regulation ; Humans ; Oligonucleotide Array Sequence Analysis ; RNA, Messenger/*genetics ; *Transcription, Genetic ; }, abstract = {The availability of the sequences for whole genomes is changing our understanding of cell biology. Functional genomics refers to the comprehensive analysis, at the protein level (proteome) and at the mRNA level (transcriptome) of all events associated with the expression of whole sets of genes. New methods have been developed for transcriptome analysis. Serial Analysis of Gene Expression (SAGE) is based on the massive sequential analysis of short cDNA sequence tags. Each tag is derived from a defined position within a transcript. Its size (14 bp) is sufficient to identify the corresponding gene and the number of times each tag is observed provides an accurate measurement of its expression level. Since tag populations can be widely amplified without altering their relative proportions, SAGE may be performed with minute amounts of biological extract. Dealing with the mass of data generated by SAGE necessitates computer analysis. A software is required to automatically detect and count tags from sequence files. Criterias allowing to assess the quality of experimental data can be included at this stage. To identify the corresponding genes, a database is created registering all virtual tags susceptible to be observed, based on the present status of the genome knowledge. By using currently available database functions, it is easy to match experimental and virtual tags, thus generating a new database registering identified tags, together with their expression levels. As an open system, SAGE is able to reveal new, yet unknown, transcripts. Their identification will become increasingly easier with the progress of genome annotation. However, their direct characterization can be attempted, since tag information may be sufficient to design primers allowing to extend unknown sequences. A major advantage of SAGE is that, by measuring expression levels without reference to an arbitrary standard, data are definitively acquired and cumulative. All publicly available data can thus be stored in a unique database, facilitating whole-genome analysis of differential expression between cell types, normal and diseased samples, or samples with and without drug treatment. SAGE data are readily amenable to statistical comparisons, allowing to determine the level of confidence of the observed variations. A major limitation of SAGE is that, because each analysis is obligatory performed on the whole set of expressed genes, it can hardly be performed on multiple samples, for example in kinetics studies or to compare the effects of large numbers of drugs. To overcome this limitation, high-throughput detection of a subset of mRNAs is more rapidly performed by parallel hybridization of mRNAs on arrays of nucleic acids immobilized on solid supports. From this point of view, a SAGE platform is a powerful instrument for selecting the most informative subset of genes, assembling them to design microarrays dedicated to a specific problem and calibrating measurement by comparison with a standard cell model for which SAGE data are available. This approach is an attractive alternative to strategies based exclusively on pangenomic arrays. A very large amount of SAGE data are already available and the problem is now to extract their biological meaning. Knowledge on metabolic pathways is already organized so that its successful integration in a SAGE platform can be undertaken. For other cell components and pathways, the problem lies on the lack of controlled vocabulary to describe gene activities, starting form a clear definition of the concept of biological function itself. Progress in gene and cell ontology is expected to facilitate computer-based extraction of biological knowledge from existing and forthcoming SAGE data.}, } @article {pmid12528359, year = {2003}, author = {}, title = {DNAPrint launches pan-genome screening platform ADMIXMAP.}, journal = {Expert review of molecular diagnostics}, volume = {3}, number = {1}, pages = {9-10}, pmid = {12528359}, issn = {1473-7159}, mesh = {Algorithms ; Gene Frequency ; Genetic Markers ; Genetic Testing/*methods ; *Genome, Human ; Humans ; }, } @article {pmid12525643, year = {2003}, author = {Addo, MM and Yu, XG and Rathod, A and Cohen, D and Eldridge, RL and Strick, D and Johnston, MN and Corcoran, C and Wurcel, AG and Fitzpatrick, CA and Feeney, ME and Rodriguez, WR and Basgoz, N and Draenert, R and Stone, DR and Brander, C and Goulder, PJ and Rosenberg, ES and Altfeld, M and Walker, BD}, title = {Comprehensive epitope analysis of human immunodeficiency virus type 1 (HIV-1)-specific T-cell responses directed against the entire expressed HIV-1 genome demonstrate broadly directed responses, but no correlation to viral load.}, journal = {Journal of virology}, volume = {77}, number = {3}, pages = {2081-2092}, pmid = {12525643}, issn = {0022-538X}, support = {R37 AI128568/AI/NIAID NIH HHS/United States ; P30 AI042851/AI/NIAID NIH HHS/United States ; R01 AI44656/AI/NIAID NIH HHS/United States ; R01 AI30914/AI/NIAID NIH HHS/United States ; R01 AI50429/AI/NIAID NIH HHS/United States ; R01 AI050429/AI/NIAID NIH HHS/United States ; P30AI42815/AI/NIAID NIH HHS/United States ; R01 AI044656/AI/NIAID NIH HHS/United States ; R01 AI030914/AI/NIAID NIH HHS/United States ; }, mesh = {Acquired Immunodeficiency Syndrome/*immunology/virology ; Amino Acid Sequence ; Epitopes, T-Lymphocyte ; Female ; Gene Products, nef/immunology ; *Genome, Viral ; HIV Core Protein p24/immunology ; HIV-1/*immunology ; Humans ; Interferon-gamma/biosynthesis ; Male ; Molecular Sequence Data ; Peptide Fragments/immunology ; T-Lymphocytes/*immunology ; Viral Load ; nef Gene Products, Human Immunodeficiency Virus ; }, abstract = {Cellular immune responses play a critical role in the control of human immunodeficiency virus type 1 (HIV-1); however, the breadth of these responses at the single-epitope level has not been comprehensively assessed. We therefore screened peripheral blood mononuclear cells (PBMC) from 57 individuals at different stages of HIV-1 infection for virus-specific T-cell responses using a matrix of 504 overlapping peptides spanning all expressed HIV-1 proteins in a gamma interferon-enzyme-linked immunospot (Elispot) assay. HIV-1-specific T-cell responses were detectable in all study subjects, with a median of 14 individual epitopic regions targeted per person (range, 2 to 42), and all 14 HIV-1 protein subunits were recognized. HIV-1 p24-Gag and Nef contained the highest epitope density and were also the most frequently recognized HIV-1 proteins. The total magnitude of the HIV-1-specific response ranged from 280 to 25,860 spot-forming cells (SFC)/10(6) PBMC (median, 4,245) among all study participants. However, the number of epitopic regions targeted, the protein subunits recognized, and the total magnitude of HIV-1-specific responses varied significantly among the tested individuals, with the strongest and broadest responses detectable in individuals with untreated chronic HIV-1 infection. Neither the breadth nor the magnitude of the total HIV-1-specific CD8+-T-cell responses correlated with plasma viral load. We conclude that a peptide matrix-based Elispot assay allows for rapid, sensitive, specific, and efficient assessment of cellular immune responses directed against the entire expressed HIV-1 genome. These data also suggest that the impact of T-cell responses on control of viral replication cannot be explained by the mere quantification of the magnitude and breadth of the CD8+-T-cell response, even if a comprehensive pan-genome screening approach is applied.}, } @article {pmid12354853, year = {2002}, author = {Birtles, RJ and Fry, NK and Ventosilla, P and Cáceres, AG and Sánchez, E and Vizcarra, H and Raoult, D}, title = {Identification of Bartonella bacilliformis genotypes and their relevance to epidemiological investigations of human bartonellosis.}, journal = {Journal of clinical microbiology}, volume = {40}, number = {10}, pages = {3606-3612}, pmid = {12354853}, issn = {0095-1137}, support = {/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Bacterial Proteins/analysis/genetics ; Bartonella/genetics/*isolation & purification ; Bartonella Infections/*epidemiology/microbiology ; DNA, Bacterial/analysis ; DNA, Intergenic/analysis ; Genotype ; Humans ; Molecular Sequence Data ; Polymerase Chain Reaction ; Sampling Studies ; }, abstract = {Genotypic diversity among 26 isolates of Bartonella bacilliformis obtained from different areas of Peru, and at different times, was assessed by comparison of DNA sequences derived from 16S-23S ribosomal DNA intergenic spacer regions (ISR) and a citrate synthase gene (gltA) fragment and by amplified fragment length polymorphism (AFLP) analysis. gltA comparison divided the isolates into two groups, whereas ISR comparison revealed six sequences. AFLP analysis using a selective primer delineated five profiles that correlated well with those obtained by sequence comparison. Combination of all three data sets divided the isolates into six genotypes. One of these genotypes was common to isolates collected from a large area in western Peru that corresponded to the region of endemicity for bartonellosis; however, isolates belonging to two other genotypes were also found within this region. Two of these genotypes were found in isolates isolated more than 35 years apart. The remaining three genotypes were each specifically associated with three outbreaks of bartonellosis that have recently occurred in areas where the disease had not previously been recognized. Demonstration of the unique nature of these isolates indicates that the outbreaks with which they were associated did not result from the introduction of disease by individuals who acquired their infection in the recognized region of endemicity. The sources of these outbreaks remain unknown. A consensus approach to bacterial typing using comparative sequence analysis of multiple genetic loci and the pan-genomic sampling of AFLP appears to offer a well-supported assessment of B. bacilliformis diversity, and the genotypic differences identified appear to have epidemiological significance.}, } @article {pmid12213587, year = {2002}, author = {Urnov, FD and Rebar, EJ}, title = {Designed transcription factors as tools for therapeutics and functional genomics.}, journal = {Biochemical pharmacology}, volume = {64}, number = {5-6}, pages = {919-923}, doi = {10.1016/s0006-2952(02)01150-4}, pmid = {12213587}, issn = {0006-2952}, mesh = {Drug Design ; Genome, Human ; *Genomics ; Humans ; Protein Engineering ; Transcription Factors/genetics/*therapeutic use ; Zinc Fingers/genetics/*physiology ; }, abstract = {The paucity of tools that control expression of specific genes in vivo represents a major limitation of functional genomics in mammals; most available small-molecule regulators of transcription-e.g. histone deacetylase inhibitors-exert pan-genomic effects. Recent developments in understanding the role of chromatin in regulating the genome, and of protein-DNA interactions have allowed the development of designed transcription factors that regulate specific genes in vivo (Reik et al., Curr Opin Genet Dev 2002;12:233). These proteins contain two modules: (i) a zinc finger protein (ZFP)-based DNA-binding domain (DBD) designed to recognize a specific sequence (for example, a motif in the promoter of a certain gene); (ii) a functional module (for example, a transcriptional activation or repression domain). Recent data describe the use of such designed transcription factors to regulate a variety of clinically relevant gene targets in human cells: these include MDR1, erythropoietin, erbB-2 and erbB-3, VEGF, and PPARgamma. In the case of VEGF (Liu et al., J Biol Chem 2001;276:11323), proportional upregulation by the designed transcription factor of all three distinct splice isoforms generated by this locus was observed, illuminating the utility of endogenous gene control in therapeutic settings (proper isoform ratio is essential for the proangiogenic function of VEGF). In the case of PPARgamma, use of a transcriptional repressor designed to downregulate the expression of two PPARgamma isoforms allowed "mutation-free reverse genetics" analysis that illuminated a unique role for the PPARgamma2 isoform in adipogenesis (Ren et al., Genes Dev 2002;16:27). The ability to selectively activate or repress specific mammalian genes in vivo using designed transcription factors thus has considerable promise in clinical and in basic science settings.}, } @article {pmid11821865, year = {2002}, author = {Storek, MJ and Ernst, A and Verdine, GL}, title = {High-resolution footprinting of sequence-specific protein-DNA contacts.}, journal = {Nature biotechnology}, volume = {20}, number = {2}, pages = {183-186}, doi = {10.1038/nbt0202-183}, pmid = {11821865}, issn = {1087-0156}, mesh = {Base Sequence ; Binding Sites ; Crystallography, X-Ray ; DNA/*chemistry/genetics/metabolism ; DNA Footprinting/*methods ; Humans ; Hydrogen Bonding ; Magnetic Resonance Spectroscopy ; Models, Genetic ; Molecular Sequence Data ; Promoter Regions, Genetic ; Protein Binding ; Sequence Analysis, DNA ; Time Factors ; }, abstract = {Gene transcription is regulated by proteins that bind specific DNA sequences and control the initiation of RNA synthesis. A major challenge is to map all of the regulatory sites in the genome and to identify the proteins that bind them. Because members of transcription factor families often exhibit similar sequence preferences, methods for determining intermolecular contacts in protein-DNA interfaces must be sensitive to even subtle structural differences. The most detailed structural views of protein-DNA interfaces have been obtained through X-ray crystallography and NMR spectroscopy, and these methods have revolutionized the understanding of the structural determinants of sequence-specific recognition. Neither crystallography nor NMR, however, is particularly well-suited to high-throughput applications such as pan-genomic elucidation of regulatory sequences; in addition, these methods yield no information on the energetic contribution of particular contacts. Here we report a straightforward, high-resolution biochemical method for mapping, at single-nucleotide resolution, DNA bases that are subject to sequence-specific contacts by regulatory proteins.}, } @article {pmid11261250, year = {2000}, author = {Sigaux, F}, title = {[Cancer genome or the development of molecular portraits of tumors].}, journal = {Bulletin de l'Academie nationale de medecine}, volume = {184}, number = {7}, pages = {1441-7; discussion 1448-9}, pmid = {11261250}, issn = {0001-4079}, mesh = {France ; *Genome, Human ; Humans ; Neoplasms/*genetics ; }, abstract = {The rapid development of cancer genomics is due to important progresses in oncogenesis, human genome sequencing and emergence of new technologies in genome and transcriptome analysis. In this context, the aim of the French program 'Cartes d'Identites des Tumeurs--Molecular Portraits of Tumors' is to build a public data base containing a pan genome assessment of genome and transcriptome alterations in the major types of tumors as well as in relevant normal cells and experimental models. Data mining is done in the context of genome annotations and clinical and biological informations attached to the enrolled samples. The goal of the program is to define new tests useful for diagnostic procedures in clinical laboratories and new targets for biological treatments of tumors.}, } @article {pmid10962277, year = {2000}, author = {Fitzgerald, KA and O'Neill, LA}, title = {The role of the interleukin-1/Toll-like receptor superfamily in inflammation and host defence.}, journal = {Microbes and infection}, volume = {2}, number = {8}, pages = {933-943}, doi = {10.1016/s1286-4579(00)00396-8}, pmid = {10962277}, issn = {1286-4579}, mesh = {Adaptor Proteins, Signal Transducing ; Animals ; Antigens, Differentiation/physiology ; *Drosophila Proteins ; Forecasting ; Gram-Negative Bacteria/immunology ; Gram-Positive Bacteria/immunology ; Humans ; Immunity, Innate/immunology ; Inflammation ; Membrane Glycoproteins/*physiology ; Myeloid Differentiation Factor 88 ; Plant Diseases ; Plant Proteins ; Receptors, Cell Surface/*physiology ; *Receptors, Immunologic ; Receptors, Interleukin-1/*physiology ; Toll-Like Receptors ; }, abstract = {The IL-1 receptor/Toll-like receptor superfamily comprises a diverse family of cell surface receptors defined by a characteristic conserved sequence in their cytosolic regions, termed the Toll/IL-1 receptor domain, which function in inflammation and host defence against microbial pathogens. Members include receptors for the proinflammatory cytokines IL-1 and IL-18 and Toll-like receptors 2 and 4, which are involved in host responses to Gram-positive and Gram-negative bacteria, respectively. Signalling pathways activated by these receptors are conserved and the superfamily represents a pan-genomic system involved in the host response to infection and injury.}, } @article {pmid6284982, year = {1982}, author = {Heller, M and Gerber, P and Kieff, E}, title = {DNA of herpesvirus pan, a third member of the Epstein-Barr virus-Herpesvirus papio group.}, journal = {Journal of virology}, volume = {41}, number = {3}, pages = {931-939}, pmid = {6284982}, issn = {0022-538X}, support = {AI-07099/AI/NIAID NIH HHS/United States ; CA 17281/CA/NCI NIH HHS/United States ; CA 19264/CA/NCI NIH HHS/United States ; }, mesh = {Base Sequence ; DNA Restriction Enzymes ; DNA, Viral/*analysis ; Herpesviridae/classification/*genetics ; Molecular Weight ; Nucleic Acid Hybridization ; Phylogeny ; Repetitive Sequences, Nucleic Acid ; Species Specificity ; }, abstract = {The DNA of herpesvirus pan, a primate B-lymphotropic herpesvirus, shares about 40% well-conserved sequence relatedness with Epstein-Barr virus (EBV) and herpesvirus papio DNAs. Labeled cloned fragments from the EBV recombinant DNA library were cross hybridized to blots of EcoRI, XbaI, and BamHI restriction endonuclease fragments of herpesvirus pan DNA to identify and map homologous sequences in the herpesvirus pan genome. Regions of colinear homology were demonstrated between 6 x 10(6) daltons and 108 x 10(6) daltons in the DNAs. The structural organization of herpesvirus pan DNA was similar to the format of Epstein-Barr virus and herpesvirus papio DNAs. The DNA consists of two domains of largely unique sequence complexity, a segment US of 9 x 10(6) daltons and a segment UL of 88 x 10(6) daltons. US and UL are separated by a variable number of tandem repetitions of a sequence IR (2 x 10(6) daltons). There was homology between DNA which mapped at 26 to 28 x 10(6) daltons and 93 to 95 x 10(6) daltons in UL. The terminal reiteration component, TR, of herpesvirus pan DNA and sequences which mapped to the left of 6 x 10(6) daltons and to the right of 108 x 10(6) daltons had no detectable homology with the corresponding regions of Epstein-Barr virus DNA.}, }